diff --git a/.vscode/settings.json b/.vscode/settings.json index 6cab0b3..36833e4 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -6,6 +6,44 @@ "vector": "cpp", "*.tcc": "cpp", "iostream": "cpp", - "map": "cpp" + "map": "cpp", + "array": "cpp", + "atomic": "cpp", + "cctype": "cpp", + "clocale": "cpp", + "cmath": "cpp", + "cstdarg": "cpp", + "cstddef": "cpp", + "cstdint": "cpp", + "cstdio": "cpp", + "cstdlib": "cpp", + "cstring": "cpp", + "cwchar": "cpp", + "cwctype": "cpp", + "deque": "cpp", + "unordered_map": "cpp", + "unordered_set": "cpp", + "exception": "cpp", + "algorithm": "cpp", + "memory": "cpp", + "memory_resource": "cpp", + "optional": "cpp", + "set": "cpp", + "string": "cpp", + "string_view": "cpp", + "system_error": "cpp", + "tuple": "cpp", + "type_traits": "cpp", + "utility": "cpp", + "fstream": "cpp", + "initializer_list": "cpp", + "iosfwd": "cpp", + "istream": "cpp", + "limits": "cpp", + "new": "cpp", + "sstream": "cpp", + "stdexcept": "cpp", + "streambuf": "cpp", + "typeinfo": "cpp" } } \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index d48c4c0..1bedd0b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ project(compiler-bin) # 收集所有的cpp源文件 file(GLOB SOURCES_LL "LL1/*.cpp") -file(GLOB SOURCES_NFA "nfa/*.cpp") +file(GLOB SOURCES_NFA "nfa/src/*.cpp") file(GLOB SOURCES_MAIN "main/*.cpp") # 设置输出目录为 bin @@ -15,7 +15,7 @@ add_library(nfa STATIC ${SOURCES_NFA}) # 添加头文件目录 target_include_directories(LL PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/LL1) -target_include_directories(nfa PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/nfa) +target_include_directories(nfa PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/nfa/include) # 添加可执行文件 add_executable(main ${SOURCES_MAIN}) diff --git a/nfa/include/nfa.h b/nfa/include/nfa.h index 8ad82ac..5b05fa2 100644 --- a/nfa/include/nfa.h +++ b/nfa/include/nfa.h @@ -1,7 +1,4 @@ #pragma once -#ifndef __NFA__H__ -#define __NFA__H__ - #include #include #include @@ -14,14 +11,17 @@ #include #include #include +#include + using namespace std; + //单词符号的类型,返回<待测代码中的单词符号,WordType> //当识别成标识符后,先判断是不是保留字,让后再判断IDN // Token 类型定义 -typedef enum WordType { +enum class WordType { //关键字 KW_INT = 0, // int @@ -63,11 +63,11 @@ typedef enum WordType { FLOAT_VAL, // -?[0-9]+\\.[0-9]+ UNKOWN -}WordType; -string getWordTypeName(WordType type); +}; +extern std::unordered_map WordTypeNames; // 定义输入的字符类别 -typedef enum InputCharType { +enum class InputCharType { LETTER = 0, // 字母 0 UNDERLINE, // _ 1 DIGIT, // 数字 2 当识别成功一个数字时,为了避免出现数字01的情况,返回前先进行一个判断,对GCC,01可以识别并等于1的 @@ -93,8 +93,9 @@ typedef enum InputCharType { POINT, // . 20 如果浮点按整数缓存判断,则将小数点作为数字类型加载,最后在缓冲区内判断 EPSILON, // 空字符 21 -}InputCharType; -string getInputChartypeName(InputCharType type); +}; + +extern std::unordered_map CharTypeNames; // 定义 token类型 enum class TokenType { @@ -108,10 +109,16 @@ enum class TokenType { }; // 定义最终返回/输出的token的组成类型,包含值和类型两部分 -typedef struct Token { +class Token { string value; TokenType type; -} Token; +}; + +string getWordTypeName(WordType type); +string getInputChartypeName(InputCharType type); + + + // 定义函数判断输入的字符类别 InputCharType getInputCharType(char c); @@ -128,7 +135,7 @@ public: map> transitions; // 转移函数映射表,记录每个输入字符类型对应的目标状态集合 // 构造函数 - State(int id) : id(id), isFinalState(false), wordType(UNKOWN) {} + State(int id) : id(id), isFinalState(false), wordType(WordType::UNKOWN) {} // 添加状态转移映射 void addTransition(InputCharType input, State* targetState) { @@ -204,4 +211,4 @@ string getGrammarName(WordType type, string buffer); DFA minimizeDFA(const DFA& dfa); vector recognize(const DFA& dfa, const string& input, const string& output); string readfile(const string& filename); -#endif \ No newline at end of file + diff --git a/nfa/src/dfa.cpp b/nfa/src/dfa.cpp index 87328e2..317ae05 100644 --- a/nfa/src/dfa.cpp +++ b/nfa/src/dfa.cpp @@ -84,7 +84,7 @@ DFA minimizeDFA(const DFA& dfa) { size_t oldSize;//分割集初始大小 do { oldSize = partitions.size(); - for (InputCharType input = static_cast(0); input < EPSILON; input = static_cast(input + 1)) {//类似于求Ia,Ib等 + for (InputCharType input = static_cast(0); input < InputCharType::EPSILON; input = static_cast(static_cast(input) + 1)) {//类似于求Ia,Ib等 for (Partition* partition : set(partitions)) {//遍历现存分割的每一个割集,看是否可再分割 if (partition->states.size() > 1) {//为1的集合不可再分割 split(partition->states, input, partitions);//核心分割函数 diff --git a/nfa/src/nfa.cpp b/nfa/src/nfa.cpp index 0e617e8..96b0bc1 100644 --- a/nfa/src/nfa.cpp +++ b/nfa/src/nfa.cpp @@ -7,12 +7,32 @@ NFA RexToNFA() { //由于里面存在||,所以不同正则间使用空格分隔代表| l代表letter,_代表下划线,0代表数字(也可以是d,但是为了使用已经有的函数), //[lu]代表l|u - string rex = "+ - * / % = > < == <= >= != && || ( ) { } , ; [l_][l_0]* -?00*"; - - //下面给出正则对应的输出(终态) + std::string rex = "+ - * / % = > < == <= >= != && || ( ) { } , ; [l_][l_0]* -?00*"; + + //正则对应的输出(终态) vector finalState = { - OP_ADD, OP_SUB,OP_MUL,OP_DIV,OP_MOD,OP_ASSIGN,OP_GT,OP_LT, OP_EQ,OP_LE,OP_GE,OP_NE, OP_AND, OP_OR,SE_LBRAC, SE_RBRAC, - SE_LCBRAC,SE_RCBRAC,SE_COMMA,SE_SEMI,IDN,INT_VAL + WordType::OP_ADD, + WordType::OP_SUB, + WordType::OP_MUL, + WordType::OP_DIV, + WordType::OP_MOD, + WordType::OP_ASSIGN, + WordType::OP_GT, + WordType::OP_LT, + WordType::OP_EQ, + WordType::OP_LE, + WordType::OP_GE, + WordType::OP_NE, + WordType::OP_AND, + WordType::OP_OR, + WordType::SE_LBRAC, + WordType::SE_RBRAC, + WordType::SE_LCBRAC, + WordType::SE_RCBRAC, + WordType::SE_COMMA, + WordType::SE_SEMI, + WordType::IDN, + WordType::INT_VAL }; stringstream ss(rex); @@ -39,7 +59,7 @@ NFA RexToNFA() { //[...]构成一种输入,查看]后面是否有?或者*,来判断当前状态的构成 for (i=i+1; i < target.length() && target[i] != ']'; i++) { InputCharType input = getInputCharType(target[i]); - if (input != EPSILON) { + if (input != InputCharType::EPSILON) { // 添加转移函数,从当前状态向新状态转移 currentState->addTransition(input, newState); } @@ -54,8 +74,8 @@ NFA RexToNFA() { //创建EPSILON转移状态 State* epsState = new State(stateIndex++); allStates.insert(epsState); - currentState->addTransition(EPSILON, epsState); - newState->addTransition(EPSILON, epsState); + currentState->addTransition(InputCharType::EPSILON, epsState); + newState->addTransition(InputCharType::EPSILON, epsState); currentState = epsState; // 跳过'?'字符 i++; @@ -63,9 +83,9 @@ NFA RexToNFA() { else if (i + 1 < target.length() && target[i + 1] == '*') { State* epsState = new State(stateIndex++); allStates.insert(epsState); - currentState->addTransition(EPSILON, epsState); - newState->addTransition(EPSILON, epsState); - epsState->addTransition(EPSILON, currentState); + currentState->addTransition(InputCharType::EPSILON, epsState); + newState->addTransition(InputCharType::EPSILON, epsState); + epsState->addTransition(InputCharType::EPSILON, currentState); currentState = epsState; // 跳过'*'字符 i++; @@ -131,7 +151,7 @@ set epsilonClosure(const set& while (!stateStack.empty()) { State* currentState = stateStack.top(); stateStack.pop(); - auto it = currentState->transitions.find(EPSILON); + auto it = currentState->transitions.find(InputCharType::EPSILON); if (it != currentState->transitions.end()) { for (State* nextState : it->second) { if (closure.find(nextState) == closure.end()) {//防止同一状态多次进栈,set自带去重 @@ -174,7 +194,7 @@ DFA nfaToDFA(const NFA& nfa) { } // 遍历所有输入字符类型 - for (int i = 0; i < static_cast(EPSILON); i++) { + for (int i = 0; i < static_cast(InputCharType::EPSILON); i++) { InputCharType inputCharType = static_cast(i); set nextNFAStates = epsilonClosure(move(currentNFAStates, inputCharType)); if (nextNFAStates.empty()) { diff --git a/nfa/src/tool.cpp b/nfa/src/tool.cpp index 9e4fd2d..c43f2c4 100644 --- a/nfa/src/tool.cpp +++ b/nfa/src/tool.cpp @@ -1,103 +1,87 @@ #include "nfa.h" -/* -扫描读入-->以字符的格式读入 -对于界符和部分运算符,显然是单个组成,即可以单独代表一个状态 +std::unordered_map CharTypeNames = { + {InputCharType::UNDERLINE, "_"}, + {InputCharType::ADD, "+"}, + {InputCharType::SUB, "-"}, + {InputCharType::MUL, "*"}, + {InputCharType::DIV, "/"}, + {InputCharType::MOD, "%"}, + {InputCharType::EQ, "="}, + {InputCharType::GT, ">"}, + {InputCharType::LT, "<"}, + {InputCharType::NOT, "!"}, + {InputCharType::AND, "&"}, + {InputCharType::OR, "|"}, + {InputCharType::LBRACKET, "("}, + {InputCharType::RBRACKET, ")"}, + {InputCharType::LCBRAC, "{"}, + {InputCharType::RCBRAC, "}"}, + {InputCharType::COMMA, ","}, + {InputCharType::SEMI, ";"}, + {InputCharType::POINT, "."} // 小数点 +}; -注意: -1.字母需要区分大小写 -*/ - -// 获取输入串的类型 -// 单独一位的读入-->下一步标识终态 +std::unordered_map WordTypeNames = { + {WordType::KW_INT, "INT"}, {WordType::KW_VOID, "VOID"}, {WordType::KW_RETURN, "RETURN"}, + {WordType::KW_CONST, "CONST"}, {WordType::KW_MAIN, "MAIN"}, {WordType::KW_IF, "IF"}, + {WordType::KW_ELSE, "ELSE"}, {WordType::KW_FLOAT, "FLOAT"}, {WordType::OP_ADD, "+"}, + {WordType::OP_SUB, "-"}, {WordType::OP_MUL, "*"}, {WordType::OP_DIV, "/"}, + {WordType::OP_MOD, "%"}, {WordType::OP_ASSIGN, "="}, {WordType::OP_GT, ">"}, + {WordType::OP_LT, "<"}, {WordType::OP_EQ, "=="}, {WordType::OP_LE, "<="}, + {WordType::OP_GE, ">="}, {WordType::OP_NE, "!="}, {WordType::OP_AND, "&&"}, + {WordType::OP_OR, "||"}, {WordType::SE_LBRAC, "("}, {WordType::SE_RBRAC, ")"}, + {WordType::SE_LCBRAC, "{"}, {WordType::SE_RCBRAC, "}"}, {WordType::SE_COMMA, ","}, + {WordType::SE_SEMI, ";"}, {WordType::IDN, "IDENTIFIER"}, {WordType::INT_VAL, "INTEGER"}, + {WordType::FLOAT_VAL, "FLOAT"}, {WordType::UNKOWN, "UNKNOWN"} +}; +//扫描,以字符的格式读入 +//对于界符和部分运算符,是单个组成,即可以单独代表一个状态 InputCharType getInputCharType(char c) { switch (c) { - case '_': return UNDERLINE; - case '+': return ADD; - case '-': return SUB; - case '*': return MUL; - case '/': return DIV; - case '%': return MOD; - case '=': return EQ; - case '>': return GT; - case '<': return LT; - case '!': return NOT; - case '&': return AND; - case '|': return OR; - case '(': return LBRACKET; - case ')': return RBRACKET; - case '{': return LCBRAC; - case '}': return RCBRAC; - case ',': return COMMA; - case ';': return SEMI; + case '_': return InputCharType::UNDERLINE; + case '+': return InputCharType::ADD; + case '-': return InputCharType::SUB; + case '*': return InputCharType::MUL; + case '/': return InputCharType::DIV; + case '%': return InputCharType::MOD; + case '=': return InputCharType::EQ; + case '>': return InputCharType::GT; + case '<': return InputCharType::LT; + case '!': return InputCharType::NOT; + case '&': return InputCharType::AND; + case '|': return InputCharType::OR; + case '(': return InputCharType::LBRACKET; + case ')': return InputCharType::RBRACKET; + case '{': return InputCharType::LCBRAC; + case '}': return InputCharType::RCBRAC; + case ',': return InputCharType::COMMA; + case ';': return InputCharType::SEMI; //小数点作为数字读入 - case '.': return DIGIT; + case '.': return InputCharType::DIGIT; default: if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { - return LETTER; + return InputCharType::LETTER; } else if (c >= '0' && c <= '9') { - return DIGIT; + return InputCharType::DIGIT; } else { - return EPSILON; + return InputCharType::EPSILON; } } } // 根据状态获取名称 -string getInputChartypeName(InputCharType type) { - switch (type) - { - case LETTER: - return "LETTER"; - case UNDERLINE: - return "UNDERLINE"; - case DIGIT: - return "DIGIT"; - case ADD: - return "+"; - case SUB: - return "-"; - case MUL: - return "*"; - case DIV: - return "/"; - case MOD: - return "%"; - case EQ: - return "="; - case GT: - return ">"; - case LT: - return "<"; - case NOT: - return "!"; - case AND: - return "&"; - case OR: - return "|"; - case LBRACKET: - return "("; - case RBRACKET: - return ")"; - case LCBRAC: - return "{"; - case RCBRAC: - return "}"; - case COMMA: - return ","; - case SEMI: - return ";"; - case POINT: //暂不使用 - return "."; - case EPSILON: - return "EPSILON"; - default: - return "UNKOWN"; +std::string getInputChartypeName(InputCharType type) { + auto it = CharTypeNames.find(type); + if (it != CharTypeNames.end()) { + return it->second; + } else { + return "UNKNOWN"; } } @@ -118,33 +102,33 @@ string judeFloat(string buffer){ string getWordTypeName(WordType type, string buffer) { switch (type) { // 运算符 - case OP_ADD: - case OP_SUB: - case OP_MUL: - case OP_DIV: - case OP_MOD: - case OP_ASSIGN: - case OP_GT: - case OP_LT: - case OP_EQ: - case OP_LE: - case OP_GE: - case OP_NE: - case OP_AND: - case OP_OR: + case WordType::OP_ADD: + case WordType::OP_SUB: + case WordType::OP_MUL: + case WordType::OP_DIV: + case WordType::OP_MOD: + case WordType::OP_ASSIGN: + case WordType::OP_GT: + case WordType::OP_LT: + case WordType::OP_EQ: + case WordType::OP_LE: + case WordType::OP_GE: + case WordType::OP_NE: + case WordType::OP_AND: + case WordType::OP_OR: return "OP"; // 界符 - case SE_LBRAC: - case SE_RBRAC: - case SE_LCBRAC: - case SE_RCBRAC: - case SE_COMMA: - case SE_SEMI: + case WordType::SE_LBRAC: + case WordType::SE_RBRAC: + case WordType::SE_LCBRAC: + case WordType::SE_RCBRAC: + case WordType::SE_COMMA: + case WordType::SE_SEMI: return "SE"; // 标识符和关键字 - case IDN: + case WordType::IDN: if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || @@ -160,7 +144,7 @@ string getWordTypeName(WordType type, string buffer) { } // 整数(添加了浮点判断) - case INT_VAL: + case WordType::INT_VAL: return judeFloat(buffer); //浮点 @@ -177,32 +161,32 @@ string getWordTypeName(WordType type, string buffer) { string getWordAttribute(WordType type, string buffer) { switch (type) { // 运算符 - case OP_ADD: return "6"; - case OP_SUB: return "7"; - case OP_MUL: return "8"; - case OP_DIV: return "9"; - case OP_MOD: return "10"; - case OP_ASSIGN: return "11"; - case OP_GT: return "12"; - case OP_LT: return "13"; - case OP_EQ: return "14"; - case OP_LE: return "15"; - case OP_GE: return "16"; - case OP_NE: return "17"; - case OP_AND:return "18"; - case OP_OR: return "19"; + case WordType::OP_ADD: return "6"; + case WordType::OP_SUB: return "7"; + case WordType::OP_MUL: return "8"; + case WordType::OP_DIV: return "9"; + case WordType::OP_MOD: return "10"; + case WordType::OP_ASSIGN: return "11"; + case WordType::OP_GT: return "12"; + case WordType::OP_LT: return "13"; + case WordType::OP_EQ: return "14"; + case WordType::OP_LE: return "15"; + case WordType::OP_GE: return "16"; + case WordType::OP_NE: return "17"; + case WordType::OP_AND:return "18"; + case WordType::OP_OR: return "19"; // 界符 - case SE_LBRAC: return "20"; - case SE_RBRAC: return "21"; - case SE_LCBRAC: return "22"; - case SE_RCBRAC: return "23"; - case SE_COMMA: return "25"; - case SE_SEMI: return "24"; + case WordType::SE_LBRAC: return "20"; + case WordType::SE_RBRAC: return "21"; + case WordType::SE_LCBRAC: return "22"; + case WordType::SE_RCBRAC: return "23"; + case WordType::SE_COMMA: return "25"; + case WordType::SE_SEMI: return "24"; // 标识符和关键字 - case IDN: + case WordType::IDN: if (!buffer.compare("int")){ return "1"; } @@ -232,11 +216,11 @@ string getWordAttribute(WordType type, string buffer) { } // 整数 - case INT_VAL: + case WordType::INT_VAL: return buffer; //浮点类型,理论不使用 - case FLOAT_VAL: + case WordType::FLOAT_VAL: return buffer; //default @@ -265,70 +249,10 @@ string readfile(const string& filename) // 获取token名称 string getWordTypeName(WordType type) { - switch (type) { - case KW_INT: - return "KW_INT"; - case KW_VOID: - return "KW_VOID"; - case KW_RETURN: - return "KW_RETURN"; - case KW_CONST: - return "KW_CONST"; - case KW_MAIN: - return "KW_MAIN"; - case KW_IF: - return "KW_IF"; - case KW_ELSE: - return "KW_ELSE"; - case KW_FLOAT: - return "KW_FLOAT"; - case OP_ADD: - return "OP_ADD"; - case OP_SUB: - return "OP_SUB"; - case OP_MUL: - return "OP_MUL"; - case OP_DIV: - return "OP_DIV"; - case OP_MOD: - return "OP_MOD"; - case OP_ASSIGN: - return "OP_ASSIGN"; - case OP_GT: - return "OP_GT"; - case OP_LT: - return "OP_LT"; - case OP_EQ: - return "OP_EQ"; - case OP_LE: - return "OP_LE"; - case OP_GE: - return "OP_GE"; - case OP_NE: - return "OP_NE"; - case OP_AND: - return "OP_AND"; - case OP_OR: - return "OP_OR"; - case SE_LBRAC: - return "SE_LBRAC"; - case SE_RBRAC: - return "SE_RBRAC"; - case SE_LCBRAC: - return "SE_LCBRAC"; - case SE_RCBRAC: - return "SE_RCBRAC"; - case SE_COMMA: - return "SE_COMMA"; - case SE_SEMI: - return "SE_SEMI"; - case IDN: - return "IDN"; - case INT_VAL: - return "INT_VAL"; - case FLOAT_VAL: - return "FLOAT_VAL"; - default: + auto it = WordTypeNames.find(type); + if (it != WordTypeNames.end()) { + return it->second; + } else { return "UNKNOWN"; } } @@ -337,29 +261,29 @@ string getWordTypeName(WordType type) { string getGrammarName(WordType type, string buffer) { switch (type) { - case OP_ADD: return "+"; - case OP_SUB: return "-"; - case OP_MUL: return "*"; - case OP_DIV: return "/"; - case OP_MOD: return "%"; - case OP_ASSIGN: return "="; - case OP_GT: return ">"; - case OP_LT: return "<"; - case OP_EQ: return "=="; - case OP_LE: return "<="; - case OP_GE: return ">="; - case OP_NE: return "!="; - case OP_AND: return "&&"; - case OP_OR: return "||"; + case WordType::OP_ADD: return "+"; + case WordType::OP_SUB: return "-"; + case WordType::OP_MUL: return "*"; + case WordType::OP_DIV: return "/"; + case WordType::OP_MOD: return "%"; + case WordType::OP_ASSIGN: return "="; + case WordType::OP_GT: return ">"; + case WordType::OP_LT: return "<"; + case WordType::OP_EQ: return "=="; + case WordType::OP_LE: return "<="; + case WordType::OP_GE: return ">="; + case WordType::OP_NE: return "!="; + case WordType::OP_AND: return "&&"; + case WordType::OP_OR: return "||"; - case SE_LBRAC: return "("; - case SE_RBRAC: return ")"; - case SE_LCBRAC: return "{"; - case SE_RCBRAC: return "}"; - case SE_COMMA: return ","; - case SE_SEMI: return ";"; + case WordType::SE_LBRAC: return "("; + case WordType::SE_RBRAC: return ")"; + case WordType::SE_LCBRAC: return "{"; + case WordType::SE_RCBRAC: return "}"; + case WordType::SE_COMMA: return ","; + case WordType::SE_SEMI: return ";"; - case IDN: + case WordType::IDN: if (!buffer.compare("int")) { return "int"; } @@ -378,8 +302,8 @@ string getGrammarName(WordType type, string buffer) { else { return "IDN"; } - case INT_VAL: return "INT"; - case FLOAT_VAL: return "FLOAT"; - default: cerr << "Token Error: "<< type << endl; exit(-1); + case WordType::INT_VAL: return "INT"; + case WordType::FLOAT_VAL: return "FLOAT"; + default: cerr << "Token Error: "<< WordTypeNames.find(type)->second << endl; exit(-1); } } \ No newline at end of file