#pragma once #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; //单词符号的类型，返回<待测代码中的单词符号,WordType> //当识别成标识符后，先判断是不是保留字，让后再判断IDN // Token 类型定义 enum class WordType { //关键字 KW_INT = 0, // int KW_VOID, // void KW_RETURN, // return KW_CONST, // const KW_MAIN, // main KW_IF, // if KW_ELSE, // else KW_FLOAT, // float //操作符 OP_ADD, // + OP_SUB, // - OP_MUL, // * OP_DIV, // / OP_MOD, // % OP_ASSIGN, // = OP_GT, // > OP_LT, // < OP_EQ, // == OP_LE, // <= OP_GE, // >= OP_NE, // != OP_AND, // && OP_OR, // || //界符 SE_LBRAC, // ( left backet SE_RBRAC, // ) right bracket SE_LCBRAC, // { left curly bracket SE_RCBRAC, // } right curly bracket SE_COMMA, // , SE_SEMI, // ; IDN, // [a-zA-Z][a-zA-Z_0-9]* INT_VAL, // -*[0-9]+ FLOAT_VAL, // -?[0-9]+\\.[0-9]+ UNKOWN }; // 定义输入的字符类别 enum class InputCharType { LETTER = 0, // 字母 0 UNDERLINE, // _ 1 DIGIT, // 数字 2 当识别成功一个数字时，为了避免出现数字01的情况，返回前先进行一个判断，对GCC，01可以识别并等于1的 //OP ADD, // + 3 SUB, // - 4 MUL, // * 5 DIV, // / 6 MOD, // % 7 EQ, // = 8 GT, // > 9 LT, // < 10 NOT, // ! 11 AND, // & 12 OR, // | 13 //SE LBRACKET, // ( 14 RBRACKET, // ) 15 LCBRAC, // { 16 RCBRAC, // } 17 COMMA, // , 18 SEMI, // ; 19 POINT, // . 20 如果浮点按整数缓存判断，则将小数点作为数字类型加载，最后在缓冲区内判断 EPSILON, // 空字符 21 }; // 定义 token类型 enum class TokenType { KW = 0, OP, SE, IDN, INT, FLOAT, UNKNOWN }; // 定义最终返回/输出的token的组成类型，包含值和类型两部分 class Token { string value; TokenType type; }; // 定义与名称映射 extern std::unordered_map CharTypeNames; extern std::unordered_map WordTypeNames; string getWordTypeName(WordType type); string getInputChartypeName(InputCharType type); // 定义函数判断输入的字符类别 InputCharType getInputCharType(char c); string getWordTypeName(WordType type,string buffer); string getWordAttribute(WordType type,string buffer); // 定义状态类 class State { public: int id; // 状态编号 bool isFinalState; // 是否为最终状态 WordType wordType; // 到达该状态时应该返回的词法单元类型 map> transitions; // 转移函数映射表，记录每个输入字符类型对应的目标状态集合 // 构造函数 State(int id) : id(id), isFinalState(false), wordType(WordType::UNKOWN) {} // 添加状态转移映射 void addTransition(InputCharType input, State* targetState) { transitions[input].insert(targetState); } // 转换终态 void setFinalState(bool isFinal, WordType type) { isFinalState = isFinal; wordType = type; } // 对象序号比较 bool operator<(const State& other) const { return id < other.id; } }; //为了是set内部有序，定义排序结构体StatePtrCompare struct StatePtrCompare { bool operator()(const State* lhs, const State* rhs) const { return lhs->id < rhs->id; } }; //定义NFA类 class NFA { public: State* startState; // 起始状态 set endStates; // 终止状态集合 set states; // 状态集合 // 构造函数 NFA(State* startState, set endStates, set states) : startState(startState), endStates(endStates), states(states) {} // void printNFA(); }; // 正则表达式构建状态机 NFA RexToNFA(); void printNFA(const NFA& nfa); set move(const set& states, InputCharType input); set epsilonClosure(const set& states); class DFA { public: State* startState; // 起始状态 set endStates; // 终止状态集合 set states; // 状态集合 DFA(State* startState, set endStates, set states) : startState(startState), endStates(endStates), states(states) {} }; void removeUnreachableStates(DFA& dfa); void printDFA(const DFA& dfa); DFA nfaToDFA(const NFA& nfa); void printDFA(const DFA& dfa); struct SetComparator { bool operator()(const set& a, const set& b) const { if (a.size() != b.size()) { return a.size() < b.size(); } vector vecA(a.begin(), a.end()); vector vecB(b.begin(), b.end()); sort(vecA.begin(), vecA.end(), [](const State* a, const State* b) { return a->id < b->id; }); sort(vecB.begin(), vecB.end(), [](const State* a, const State* b) { return a->id < b->id; }); return vecA < vecB; } }; string getGrammarName(WordType type, string buffer); DFA minimizeDFA(const DFA& dfa); vector recognize(const DFA& dfa, const string& input, const string& output); string readfile(const string& filename);