// 将正则表达式转换为非确定性有限自动机 #include "nfa.h" // 处理正则表达式,描述终态 NFA RexToNFA() { //由于里面存在||,所以不同正则间使用空格分隔代表| l代表letter,_代表下划线,0代表数字(也可以是d,但是为了使用已经有的函数), //[lu]代表l|u std::string rex = "+ - * / % = > < == <= >= != && || ( ) { } , ; [l_][l_0]* -?00*"; //正则对应的输出(终态) vector finalState = { WordType::OP_ADD, WordType::OP_SUB, WordType::OP_MUL, WordType::OP_DIV, WordType::OP_MOD, WordType::OP_ASSIGN, WordType::OP_GT, WordType::OP_LT, WordType::OP_EQ, WordType::OP_LE, WordType::OP_GE, WordType::OP_NE, WordType::OP_AND, WordType::OP_OR, WordType::SE_LBRAC, WordType::SE_RBRAC, WordType::SE_LCBRAC, WordType::SE_RCBRAC, WordType::SE_COMMA, WordType::SE_SEMI, WordType::IDN, WordType::INT_VAL }; stringstream ss(rex); string target; // 创建初始状态 int stateIndex = 0; int finalIndex = 0; State* startState = new State(stateIndex++); set endStates; set allStates = { startState }; // 按空格分割 while (getline(ss, target,' ')) { //如获得[l_][l_0]* State* currentState = startState; for (size_t i = 0; i < target.length();i++) { //创建一个新状态,startState通过输入InputCharType到达该状态 State* newState = new State(stateIndex++); allStates.insert(newState); //需要往后看一个符号 if (target[i] == '[') { //[...]构成一种输入,查看]后面是否有?或者*,来判断当前状态的构成 for (i=i+1; i < target.length() && target[i] != ']'; i++) { InputCharType input = getInputCharType(target[i]); if (input != InputCharType::EPSILON) { // 添加转移函数,从当前状态向新状态转移 currentState->addTransition(input, newState); } } } else { InputCharType input = getInputCharType(target[i]); currentState->addTransition(input, newState); } //往后查看一个输入 if (i + 1 < target.length() && target[i + 1] == '?') { //创建EPSILON转移状态 State* epsState = new State(stateIndex++); allStates.insert(epsState); currentState->addTransition(InputCharType::EPSILON, epsState); newState->addTransition(InputCharType::EPSILON, epsState); currentState = epsState; // 跳过'?'字符 i++; } else if (i + 1 < target.length() && target[i + 1] == '*') { State* epsState = new State(stateIndex++); allStates.insert(epsState); currentState->addTransition(InputCharType::EPSILON, epsState); newState->addTransition(InputCharType::EPSILON, epsState); epsState->addTransition(InputCharType::EPSILON, currentState); currentState = epsState; // 跳过'*'字符 i++; } else { currentState = newState; } //判断是否是终止状态 if (i == (target.length() - 1)) { // 到达最后一个字符,将当前状态设置为终止状态 currentState->setFinalState(true, finalState[endStates.size()]); endStates.insert(currentState); } }//for } // 返回字符集合对应的NFA return NFA(startState, endStates, allStates); } void printNFA(const NFA& nfa) { cout << "Start state: " << nfa.startState->id << endl; cout << "End states: "<id << " " << getWordTypeName(state->wordType) << " " << (state->isFinalState == true) << endl; } cout << endl; cout << "States and transitions:" << endl; for (auto state : nfa.states) { cout << "State " << state->id << ":" << endl; for (auto transition : state->transitions) { cout << "\tInput " << getInputChartypeName(transition.first) << ": "; for (auto targetState : transition.second) { cout << targetState->id << " "; } cout << endl; } } } set move(const set& states, InputCharType input) { set targetStates; for (State* state : states) { auto it = state->transitions.find(input); if (it != state->transitions.end()) { for (State* targetState : it->second) { if (targetStates.find(targetState) == targetStates.end()) { targetStates.insert(targetState); } } } } return targetStates; } set epsilonClosure(const set& states) { set closure = states; stack stateStack; for (State* state : states) { stateStack.push(state); } while (!stateStack.empty()) { State* currentState = stateStack.top(); stateStack.pop(); auto it = currentState->transitions.find(InputCharType::EPSILON); if (it != currentState->transitions.end()) { for (State* nextState : it->second) { if (closure.find(nextState) == closure.end()) {//防止同一状态多次进栈,set自带去重 closure.insert(nextState); stateStack.push(nextState); } } } } return closure; } DFA nfaToDFA(const NFA& nfa) { map, State*, SetComparator> dfaStatesMap; // 用于映射NFA状态集合到DFA状态的映射表 queue> nfaStatesQueue; // 用于BFS遍历的集合队列 set dfaStates; set dfaEndStates; set nfaStartClosure = epsilonClosure({ nfa.startState }); State* dfaStartState = new State(0); dfaStatesMap[nfaStartClosure] = dfaStartState; dfaStates.insert(dfaStartState); nfaStatesQueue.push(nfaStartClosure); int nextStateId = 1; //set nfaStartClosure while (!nfaStatesQueue.empty()) { set currentNFAStates = nfaStatesQueue.front(); nfaStatesQueue.pop(); State* currentDFAState = dfaStatesMap[currentNFAStates]; // 检查是否有终止状态,如果有,设置DFA状态为终止状态 for (State* nfaState : currentNFAStates) { if (nfaState->isFinalState) { // cout << nfaState->id << "is FinalState" << endl; currentDFAState->setFinalState(true, nfaState->wordType); dfaEndStates.insert(currentDFAState); break; } } // 遍历所有输入字符类型 for (int i = 0; i < static_cast(InputCharType::EPSILON); i++) { InputCharType inputCharType = static_cast(i); set nextNFAStates = epsilonClosure(move(currentNFAStates, inputCharType)); if (nextNFAStates.empty()) { continue; } // 如果NFA状态集合不存在于映射表中,则创建新的DFA状态 if (dfaStatesMap.find(nextNFAStates) == dfaStatesMap.end()) { State* newDFAState = new State(nextStateId++); dfaStatesMap[nextNFAStates] = newDFAState; dfaStates.insert(newDFAState); nfaStatesQueue.push(nextNFAStates); } currentDFAState->addTransition(inputCharType, dfaStatesMap[nextNFAStates]); } } return DFA(dfaStartState, dfaEndStates, dfaStates); } void printDFA(const DFA& dfa) { cout << "Start state: " << dfa.startState->id << endl; cout << "End states: "<id << " " << getWordTypeName(state->wordType) << endl; } cout << endl; cout << "States and transitions:" << endl; for (auto state : dfa.states) { cout << "State " << state->id << ":" << endl; for (auto transition : state->transitions) { cout << "\tInput " << getInputChartypeName(transition.first) << ": "; for (auto targetState : transition.second) { cout << targetState->id << " "; } cout << endl; } } }