[add]fix vision

This commit is contained in:
TS-Sun-P 2024-05-07 16:12:25 +08:00
parent b4145e7fbf
commit 1af61d2eef
8 changed files with 216 additions and 19 deletions

8
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,8 @@
{
"files.associations": {
"xstring": "cpp",
"iterator": "cpp",
"ostream": "cpp",
"vector": "cpp"
}
}

View File

@ -10,7 +10,7 @@ using namespace std;
int main(int argc, char** argv) {
NFA nfa = RexToNFA();
printNFA(nfa);
//printNFA(nfa);
DFA dfa = nfaToDFA(nfa);
//printDFA(dfa);
@ -58,19 +58,20 @@ int main(int argc, char** argv) {
ll.build_LL1_predict();
// ll.print_LL1_predict();
//ll.print_LL1_predict();
ll.get_token_strings(token_strings);
// ll.print_token_strings();
ll.print_token_strings();
ll.build_LL1_grammar();
ll.fileout_LL1_grammar_log(outputs_grammar[i]);
// ll.print_LL1_grammar_log();
//ll.print_LL1_grammar_log();
cout << endl;
cout<<"end"<<endl;
i++;
}

View File

@ -22,6 +22,7 @@ typedef enum WordType {
KW_VOID, // void
KW_RETURN, // return
KW_CONST, // const
KW_MAIN, //main
OP_ADD, // +
OP_SUB, // -
@ -95,6 +96,7 @@ typedef struct Token {
//定义函数判断输入的字符类别
InputCharType getInputCharType(char c);
string getWordTypeName(WordType type,string buffer);
string getWordAttribute(WordType type,string buffer);
//定义状态类
class State {

View File

@ -142,6 +142,7 @@ DFA minimizeDFA(const DFA& dfa) {
}
return DFA(minimizedStartState, minimizedEndStates, minimizedStates);
}
void removeUnreachableStates(DFA& dfa) {
set<State*> reachableStates; //可达状态集合
queue<State*> statesQueue; //状态队列
@ -175,6 +176,7 @@ void removeUnreachableStates(DFA& dfa) {
}
}
}
vector<string> recognize(const DFA& dfa, const string& input, const string& output) {
State* currentState = dfa.startState;
@ -206,8 +208,8 @@ vector<string> recognize(const DFA& dfa, const string& input, const string& outp
if (nextIt == nextState->transitions.end()) {// 如果没有更多匹配的转换
// 输出识别到的单词符号和对应的类型
cout << buffer << "\t<" << getWordTypeName(nextState->wordType,buffer) << ">" << endl;
file << buffer << "\t<" << getWordTypeName(nextState->wordType, buffer) << ">" << endl;
cout << buffer << "\t<" << getWordTypeName(nextState->wordType,buffer)<<","<<getWordAttribute(nextState->wordType,buffer) << ">" << endl;
file << buffer << "\t<" << getWordTypeName(nextState->wordType, buffer)<<","<<getWordAttribute(nextState->wordType,buffer) << ">" << endl;
tokens.push_back(getGrammarName(nextState->wordType, buffer));
buffer.clear();
currentState = dfa.startState;
@ -223,8 +225,8 @@ vector<string> recognize(const DFA& dfa, const string& input, const string& outp
else {// 如果没有找到匹配的转换
if (currentState->isFinalState) {// 如果当前状态是终止状态
// 输出识别到的单词符号和对应的类型
cout << buffer << "\t<" << getWordTypeName(currentState->wordType,buffer) << ">" << endl;
file << buffer << "\t<" << getWordTypeName(currentState->wordType, buffer) << ">" << endl;
cout << buffer << "\t<" << getWordTypeName(currentState->wordType,buffer) <<","<<getWordAttribute(nextState->wordType,buffer) << ">" << endl;
file << buffer << "\t<" << getWordTypeName(currentState->wordType, buffer) << ","<<getWordAttribute(nextState->wordType,buffer) << ">" << endl;
tokens.push_back(getGrammarName(currentState->wordType, buffer) );
buffer.clear();
}
@ -242,8 +244,8 @@ vector<string> recognize(const DFA& dfa, const string& input, const string& outp
}
// 处理最后一个字符,如果缓冲区不为空且当前状态是终止状态,对应第一个if里面的else
if (!buffer.empty() && currentState->isFinalState) {
cout << buffer << "\t<" << getWordTypeName(currentState->wordType,buffer) << ">" << endl;
file << buffer << "\t<" << getWordTypeName(currentState->wordType, buffer) << ">" << endl;
cout << buffer << "\t<" << getWordTypeName(currentState->wordType,buffer) << ","<<getWordAttribute(nextState->wordType,buffer) << ">" << endl;
file << buffer << "\t<" << getWordTypeName(currentState->wordType, buffer) << ","<<getWordAttribute(nextState->wordType,buffer) << ">" << endl;
tokens.push_back(getGrammarName(currentState->wordType, buffer));
}
file.close();//关闭文件

View File

@ -12,11 +12,13 @@ NFA RexToNFA() {
//由于里面存在||,所以不同正则间使用空格分隔代表| l代表letter_代表下划线0代表数字(也可以是d但是为了使用已经有的函数)
//[lu]代表l|u
string rex = "+ - * / % = > < == <= >= != && || ( ) { } , ; [l_][l_0]* -?00*";
//下面给出正则对应的输出(终态)
vector<WordType> finalState = {
OP_ADD, OP_SUB,OP_MUL,OP_DIV,OP_MOD,OP_ASSIGN,OP_GT,OP_LT, OP_EQ,OP_LE,OP_GE,OP_NE, OP_AND, OP_OR,SE_LBRAC, SE_RBRAC,
SE_LCBRAC,SE_RCBRAC,SE_COMMA,SE_SEMI,IDN,INT_VAL
};
stringstream ss(rex);
string target;
@ -26,6 +28,8 @@ NFA RexToNFA() {
State* startState = new State(stateIndex++);
set<State*, StatePtrCompare> endStates;
set<State*, StatePtrCompare> allStates = { startState };
// 按空格分割
while (getline(ss, target,' ')) {
//如获得[l_][l_0]*
State* currentState = startState;

View File

@ -15,13 +15,17 @@
#include <queue>
#include <algorithm>
using namespace std;
//单词符号的类型,返回<待测代码中的单词符号,WordType>
// 保留关键字
typedef enum WordType {
//当识别成标识符后先判断是不是保留字让后再判断IDN
KW_INT = 0, // int
KW_VOID, // void
KW_RETURN, // return
KW_CONST, // const
KW_MAIN, //main
OP_ADD, // +
OP_SUB, // -
@ -50,7 +54,10 @@ typedef enum WordType {
UNKOWN
}WordType;
string getWordTypeName(WordType type);
//定义输入的字符类别
// 定义输入的字符类别
// 输入与实际不完全匹配
// 注意:此处定义的;和,顺序与实验指导书中不同
typedef enum InputCharType {
LETTER = 0, // 字母 0
UNDERLINE, // _ 1
@ -78,6 +85,9 @@ typedef enum InputCharType {
EPSILON, // 空字符 20
}InputCharType;
string getInputChartypeName(InputCharType type);
// 定义 token类型
enum class TokenType {
KW = 0,
OP,
@ -87,34 +97,46 @@ enum class TokenType {
UNKNOWN
};
TokenType getTokenType(WordType wordType,string buffer);
// 定义最终返回的token的组成类型包含值和类型两部分
typedef struct Token {
string value;
TokenType type;
} Token;
//定义函数判断输入的字符类别
// 定义函数判断输入的字符类别
InputCharType getInputCharType(char c);
string getWordTypeName(WordType type,string buffer);
string getWordAttribute(WordType type,string buffer);
//定义状态类
// 定义状态类
class State {
public:
int id; // 状态编号
map<InputCharType, set<State*>> transitions; // 转移函数映射表,记录每个输入字符类型对应的目标状态集合
bool isFinalState; // 是否为最终状态
WordType wordType; // 到达该状态时应该返回的词法单元类型
map<InputCharType, set<State*>> transitions; // 转移函数映射表,记录每个输入字符类型对应的目标状态集合
// 构造函数
State(int id) : id(id), isFinalState(false), wordType(UNKOWN) {}
// 添加状态转移映射
void addTransition(InputCharType input, State* targetState) {
transitions[input].insert(targetState);
}
// 转换终态
void setFinalState(bool isFinal, WordType type) {
isFinalState = isFinal;
wordType = type;
}
// 对象序号比较
bool operator<(const State& other) const {
return id < other.id;
}
};
//为了是set内部有序定义排序结构体StatePtrCompare
struct StatePtrCompare {
bool operator()(const State* lhs, const State* rhs) const {
@ -128,10 +150,14 @@ public:
State* startState; // 起始状态
set<State*, StatePtrCompare> endStates; // 终止状态集合
set<State*, StatePtrCompare> states; // 状态集合
// 构造函数
NFA(State* startState, set<State*, StatePtrCompare> endStates, set<State*, StatePtrCompare> states) :
startState(startState), endStates(endStates), states(states) {}
// void printNFA();
};
// 正则表达式构建状态机
NFA RexToNFA();
void printNFA(const NFA& nfa);
NFA buildNFA(string filename);

View File

@ -1,6 +1,16 @@
#include "nfa.h"
/*
-->
1.
*/
// 获取输入串的类型
// 单独一位的读入-->下一步标识终态
InputCharType getInputCharType(char c) {
switch (c) {
case '_': return UNDERLINE;
@ -33,6 +43,8 @@ InputCharType getInputCharType(char c) {
}
}
}
// 根据状态获取名称
string getInputChartypeName(InputCharType type) {
switch (type)
{
@ -82,8 +94,11 @@ string getInputChartypeName(InputCharType type) {
return "UNKOWN";
}
}
//根据关键字类型获取其所属的种别
string getWordTypeName(WordType type, string buffer) {
switch (type) {
// 运算符
case OP_ADD:
case OP_SUB:
case OP_MUL:
@ -100,6 +115,7 @@ string getWordTypeName(WordType type, string buffer) {
case OP_OR:
return "OP";
// 界符
case SE_LBRAC:
case SE_RBRAC:
case SE_LCBRAC:
@ -108,29 +124,95 @@ string getWordTypeName(WordType type, string buffer) {
case SE_SEMI:
return "SE";
// 标识符和关键字
case IDN:
if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")){
if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")||!buffer.compare("main")){
return "KW";
}
else {
return "IDN";
}
// 整数
case INT_VAL:
return "INT";
//default
default:
return "UNKNOWN";
}
}
//根据关键字属性
string getWordAttribute(WordType type, string buffer) {
switch (type) {
// 运算符
case OP_ADD: return "6";
case OP_SUB: return "7";
case OP_MUL: return "8";
case OP_DIV: return "9";
case OP_MOD: return "10";
case OP_ASSIGN: return "11";
case OP_GT: return "12";
case OP_LT: return "13";
case OP_EQ: return "14";
case OP_LE: return "15";
case OP_GE: return "16";
case OP_NE: return "17";
case OP_AND:return "18";
case OP_OR: return "19";
// 界符
case SE_LBRAC: return "20";
case SE_RBRAC: return "21";
case SE_LCBRAC: return "22";
case SE_RCBRAC: return "23";
case SE_COMMA: return "25";
case SE_SEMI: return "24";
// 标识符和关键字
case IDN:
if (!buffer.compare("int")){
return "1";
}
else if (!buffer.compare("void")){
return "2";
}
else if (!buffer.compare("return")){
return "3";
}
else if (!buffer.compare("const")){
return "4";
}
else if (!buffer.compare("main")){
return "5";
}
else {
return buffer;
}
// 整数
case INT_VAL:
return buffer;
//default
default:
return "UNKNOWN";
}
}
// 读取文件
string readfile(const string& filename)
{
// 打开文件流并读取文件内容
ifstream file(filename);
string content((istreambuf_iterator<char>(file)),
istreambuf_iterator<char>());
// 使用istreambuf_iterator类逐字符从file中读取到content中
string content((istreambuf_iterator<char>(file)),istreambuf_iterator<char>());
// 去掉换行符
//remove函数的作用是将字符串中的某个字符移动到字符串的末尾并返回一个指向该字符后面位置的指针。
@ -139,6 +221,8 @@ string readfile(const string& filename)
return content;
}
// 获取关键字的Token种类
TokenType getTokenType(WordType type,string buffer) {
switch (type) {
case OP_ADD:
@ -166,7 +250,7 @@ TokenType getTokenType(WordType type,string buffer) {
return TokenType::SE;
case IDN:
if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")) {
if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")|| !buffer.compare("main")) {
return TokenType::KW;
}
else {
@ -181,6 +265,7 @@ TokenType getTokenType(WordType type,string buffer) {
}
}
// 获取token名称
string getWordTypeName(WordType type) {
switch (type) {
case KW_INT:
@ -191,6 +276,8 @@ string getWordTypeName(WordType type) {
return "KW_RETURN";
case KW_CONST:
return "KW_CONST";
case KW_MAIN:
return "KW_MAIN";
case OP_ADD:
return "OP_ADD";
case OP_SUB:
@ -240,6 +327,7 @@ string getWordTypeName(WordType type) {
}
}
// 获取语法名称
string getGrammarName(WordType type, string buffer) {
switch (type) {
@ -278,6 +366,9 @@ string getGrammarName(WordType type, string buffer) {
else if (!buffer.compare("const")) {
return "const";
}
else if (!buffer.compare("main")) {
return "main";
}
else {
return "IDN";
}

63
read.md Normal file
View File

@ -0,0 +1,63 @@
# 任务划分
- 两部分任务
1. 词法分析器
2. 语法分析器
## 词法分析器
### 流程
- 关键字的识别 --> 超前搜索
- 构建词法识别的状态转换图
```
1. 构造出关键字与种类编码的对应表
2. 根据对应表 构造状态转换图 --->先根据所做内容画出语法分析器
```
### 输出格式
[待测代码中的单词符号] [TAB] <[单词符号种别],[单词符号内容]>
- 其中单词符号种别为 KW关键字、OP运算符、SE界符、IDN标识符INT整形数
- 单词符号内容第⼀个维度为其种别,第⼆个维度为其属性。
# 代码结构
```
📁2024-compiler-mods
├─ 📄.gitignore
├─ 📄CMakeLists.txt
├─ 📄read.md
├─ 📄README.md
├─ 📁LL1
│ ├─ 📄CMakeLists.txt
│ ├─ 📄grammar.cpp
│ ├─ 📄grammar.h
│ ├─ 📄LL1.cpp
│ └─ 📄LL1.h
├─ 📁main
│ ├─ 📄CMakeLists.txt
│ ├─ 📄grammar.h
│ ├─ 📄LL1.h
│ ├─ 📄main.cpp
│ └─ 📄nfa.h
├─ 📁nfa
│ ├─ 📄CMakeLists.txt
│ ├─ 📄dfa.cpp
│ ├─ 📄nfa.cpp
│ ├─ 📄nfa.h
│ └─ 📄tool.cpp
├─ 📁.git
└─ 📁build
```
# 需做改动
1. 可视化部分做一做改动,原有代码中的比较可以用但最终考虑不提交考虑添加DFA、NFA的画图可视化
2. 拆分词法分析器和语法分析器为两部分,对两部分测试独立进行,保证至少有一部分无误
# 需考虑
1. 错误处理部分
2. 数字 01 情况
3. 标识符一定包含字母、数、下划线三部分
4. 测试部分复合的标识符定义读取