#include "nfa.h" NFA RexToNFA() { //由于里面存在||,所以不同正则间使用空格分隔代表| l代表letter,_代表下划线,0代表数字(也可以是d,但是为了使用已经有的函数), //[lu]代表l|u string rex = "+ - * / % = > < == <= >= != && || ( ) { } , ; [l_][l_0]* -?00*"; //下面给出正则对应的输出(终态) vector finalState = { OP_ADD, OP_SUB,OP_MUL,OP_DIV,OP_MOD,OP_ASSIGN,OP_GT,OP_LT, OP_EQ,OP_LE,OP_GE,OP_NE, OP_AND, OP_OR,SE_LBRAC, SE_RBRAC, SE_LCBRAC,SE_RCBRAC,SE_COMMA,SE_SEMI,IDN,INT_VAL }; stringstream ss(rex); string target; // 创建初始状态 int stateIndex = 0; int finalIndex = 0; State* startState = new State(stateIndex++); set endStates; set allStates = { startState }; while (getline(ss, target,' ')) { //如获得[l_][l_0]* State* currentState = startState; for (size_t i = 0; i < target.length();i++) { //创建一个新状态,startState通过输入InputCharType到达该状态 State* newState = new State(stateIndex++); allStates.insert(newState); //需要往后看一个符号 if (target[i] == '[') { //[...]构成一种输入,查看]后面是否有?或者*,来判断当前状态的构成 for (i=i+1; i < target.length() && target[i] != ']'; i++) { InputCharType input = getInputCharType(target[i]); if (input != EPSILON) { // 添加转移函数,从当前状态向新状态转移 currentState->addTransition(input, newState); } } } else { InputCharType input = getInputCharType(target[i]); currentState->addTransition(input, newState); } //往后查看一个输入 if (i + 1 < target.length() && target[i + 1] == '?') { //创建EPSILON转移状态 State* epsState = new State(stateIndex++); allStates.insert(epsState); currentState->addTransition(EPSILON, epsState); newState->addTransition(EPSILON, epsState); currentState = epsState; // 跳过'?'字符 i++; } else if (i + 1 < target.length() && target[i + 1] == '*') { State* epsState = new State(stateIndex++); allStates.insert(epsState); currentState->addTransition(EPSILON, epsState); newState->addTransition(EPSILON, epsState); epsState->addTransition(EPSILON, currentState); currentState = epsState; // 跳过'*'字符 i++; } else { currentState = newState; } //判断是否是终止状态 if (i == (target.length() - 1)) { // 到达最后一个字符,将当前状态设置为终止状态 currentState->setFinalState(true, finalState[endStates.size()]); endStates.insert(currentState); } }//for } // 返回字符集合对应的NFA return NFA(startState, endStates, allStates); } NFA buildNFA(string filename) { ifstream ifs(filename); if (!ifs) { cerr << "Cannot open file: " << filename << endl; exit(EXIT_FAILURE); } int stateNum, inputNum; ifs >> stateNum >> inputNum; vector states(stateNum); for (int i = 0; i < stateNum; i++) { states[i] = new State(i); } State* startState = states[0]; set endStates; for (int i = 0; i < stateNum; i++) { for (int j = 0; j < inputNum; j++) { string targetStateIDs; ifs >> targetStateIDs; if (targetStateIDs.compare("#") != 0) { stringstream ss(targetStateIDs); string targetStateIDStr; while (getline(ss, targetStateIDStr, ',')) { int targetStateID = stoi(targetStateIDStr); states[i]->addTransition(static_cast(j), states[targetStateID]); } } } } int endStateNum; ifs >> endStateNum; for (int i = 0; i < endStateNum; i++) { int endStateID, wordTypeID; ifs >> endStateID >> wordTypeID; states[endStateID]->setFinalState(true, static_cast(wordTypeID)); endStates.insert(states[endStateID]); } return NFA(startState, endStates, set(states.begin(), states.end())); } void printNFA(const NFA& nfa) { cout << "Start state: " << nfa.startState->id << endl; cout << "End states: "<id << " " << getWordTypeName(state->wordType) << " " << (state->isFinalState == true) << endl; } cout << endl; cout << "States and transitions:" << endl; for (auto state : nfa.states) { cout << "State " << state->id << ":" << endl; for (auto transition : state->transitions) { cout << "\tInput " << getInputChartypeName(transition.first) << ": "; for (auto targetState : transition.second) { cout << targetState->id << " "; } cout << endl; } } } set move(const set& states, InputCharType input) { set targetStates; for (State* state : states) { auto it = state->transitions.find(input); if (it != state->transitions.end()) { for (State* targetState : it->second) { if (targetStates.find(targetState) == targetStates.end()) { targetStates.insert(targetState); } } } } return targetStates; } set epsilonClosure(const set& states) { set closure = states; stack stateStack; for (State* state : states) { stateStack.push(state); } while (!stateStack.empty()) { State* currentState = stateStack.top(); stateStack.pop(); auto it = currentState->transitions.find(EPSILON); if (it != currentState->transitions.end()) { for (State* nextState : it->second) { if (closure.find(nextState) == closure.end()) {//防止同一状态多次进栈,set自带去重 closure.insert(nextState); stateStack.push(nextState); } } } } return closure; } DFA nfaToDFA(const NFA& nfa) { map, State*, SetComparator> dfaStatesMap; // 用于映射NFA状态集合到DFA状态的映射表 queue> nfaStatesQueue; // 用于BFS遍历的集合队列 set dfaStates; set dfaEndStates; set nfaStartClosure = epsilonClosure({ nfa.startState }); State* dfaStartState = new State(0); dfaStatesMap[nfaStartClosure] = dfaStartState; dfaStates.insert(dfaStartState); nfaStatesQueue.push(nfaStartClosure); int nextStateId = 1; //set nfaStartClosure while (!nfaStatesQueue.empty()) { set currentNFAStates = nfaStatesQueue.front(); nfaStatesQueue.pop(); State* currentDFAState = dfaStatesMap[currentNFAStates]; // 检查是否有终止状态,如果有,设置DFA状态为终止状态 for (State* nfaState : currentNFAStates) { if (nfaState->isFinalState) { // cout << nfaState->id << "is FinalState" << endl; currentDFAState->setFinalState(true, nfaState->wordType); dfaEndStates.insert(currentDFAState); break; } } // 遍历所有输入字符类型 for (int i = 0; i < static_cast(EPSILON); i++) { InputCharType inputCharType = static_cast(i); set nextNFAStates = epsilonClosure(move(currentNFAStates, inputCharType)); if (nextNFAStates.empty()) { continue; } // 如果NFA状态集合不存在于映射表中,则创建新的DFA状态 if (dfaStatesMap.find(nextNFAStates) == dfaStatesMap.end()) { State* newDFAState = new State(nextStateId++); dfaStatesMap[nextNFAStates] = newDFAState; dfaStates.insert(newDFAState); nfaStatesQueue.push(nextNFAStates); } currentDFAState->addTransition(inputCharType, dfaStatesMap[nextNFAStates]); } } return DFA(dfaStartState, dfaEndStates, dfaStates); } void printDFA(const DFA& dfa) { cout << "Start state: " << dfa.startState->id << endl; cout << "End states: "<id << " " << getWordTypeName(state->wordType) << endl; } cout << endl; cout << "States and transitions:" << endl; for (auto state : dfa.states) { cout << "State " << state->id << ":" << endl; for (auto transition : state->transitions) { cout << "\tInput " << getInputChartypeName(transition.first) << ": "; for (auto targetState : transition.second) { cout << targetState->id << " "; } cout << endl; } } }