#pragma once #ifndef __NFA__H__ #define __NFA__H__ #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; //单词符号的类型，返回<待测代码中的单词符号,WordType> typedef enum WordType { //当识别成标识符后，先判断是不是保留字，让后再判断IDN KW_INT = 0, // int KW_VOID, // void KW_RETURN, // return KW_CONST, // const OP_ADD, // + OP_SUB, // - OP_MUL, // * OP_DIV, // / OP_MOD, // % OP_ASSIGN, // = OP_GT, // > OP_LT, // < OP_EQ, // == OP_LE, // <= OP_GE, // >= OP_NE, // != OP_AND, // && OP_OR, // || SE_LBRAC, // ( left backet SE_RBRAC, // ) right bracket SE_LCBRAC, // { left curly bracket SE_RCBRAC, // } right curly bracket SE_COMMA, // , SE_SEMI, // ; IDN, // [a-zA-Z][a-zA-Z_0-9]* INT_VAL, // -*[0-9]+ UNKOWN }WordType; string getWordTypeName(WordType type); //定义输入的字符类别 typedef enum InputCharType { LETTER = 0, // 字母 0 UNDERLINE, // _ 1 DIGIT, // 数字 2 当识别成功一个数字时，为了避免出现数字01的情况，返回前先进行一个判断，对GCC，01可以识别并等于1的 //OP ADD, // + 3 SUB, // - 4 MUL, // * 5 DIV, // / 6 MOD, // % 7 EQ, // = 8 GT, // > 9 LT, // < 10 NOT, // ! 11 AND, // & 12 OR, // | 13 //SE LBRACKET, // ( 14 RBRACKET, // ) 15 LCBRAC, // { 16 RCBRAC, // } 17 COMMA, // , 18 SEMI, // ; 19 EPSILON, // 空字符 20 }InputCharType; string getInputChartypeName(InputCharType type); enum class TokenType { KW = 0, OP, SE, IDN, INT, UNKNOWN }; TokenType getTokenType(WordType wordType,string buffer); typedef struct Token { string value; TokenType type; } Token; //定义函数判断输入的字符类别 InputCharType getInputCharType(char c); string getWordTypeName(WordType type,string buffer); //定义状态类 class State { public: int id; // 状态编号 map> transitions; // 转移函数映射表，记录每个输入字符类型对应的目标状态集合 bool isFinalState; // 是否为最终状态 WordType wordType; // 到达该状态时应该返回的词法单元类型 State(int id) : id(id), isFinalState(false), wordType(UNKOWN) {} void addTransition(InputCharType input, State* targetState) { transitions[input].insert(targetState); } void setFinalState(bool isFinal, WordType type) { isFinalState = isFinal; wordType = type; } bool operator<(const State& other) const { return id < other.id; } }; //为了是set内部有序，定义排序结构体StatePtrCompare struct StatePtrCompare { bool operator()(const State* lhs, const State* rhs) const { return lhs->id < rhs->id; } }; //定义NFA类 class NFA { public: State* startState; // 起始状态 set endStates; // 终止状态集合 set states; // 状态集合 NFA(State* startState, set endStates, set states) : startState(startState), endStates(endStates), states(states) {} // void printNFA(); }; NFA RexToNFA(); void printNFA(const NFA& nfa); NFA buildNFA(string filename); NFA RexToNFA(); set move(const set& states, InputCharType input); set epsilonClosure(const set& states); class DFA { public: State* startState; // 起始状态 set endStates; // 终止状态集合 set states; // 状态集合 DFA(State* startState, set endStates, set states) : startState(startState), endStates(endStates), states(states) {} }; void removeUnreachableStates(DFA& dfa); void printDFA(const DFA& dfa); DFA nfaToDFA(const NFA& nfa); void printDFA(const DFA& dfa); struct SetComparator { bool operator()(const set& a, const set& b) const { if (a.size() != b.size()) { return a.size() < b.size(); } vector vecA(a.begin(), a.end()); vector vecB(b.begin(), b.end()); sort(vecA.begin(), vecA.end(), [](const State* a, const State* b) { return a->id < b->id; }); sort(vecB.begin(), vecB.end(), [](const State* a, const State* b) { return a->id < b->id; }); return vecA < vecB; } }; string getGrammarName(WordType type, string buffer); DFA minimizeDFA(const DFA& dfa); vector recognize(const DFA& dfa, const string& input, const string& output); string readfile(const string& filename); #endif