254 lines
9.3 KiB
C++
254 lines
9.3 KiB
C++
#include "nfa.h"
|
||
NFA RexToNFA() {
|
||
//由于里面存在||,所以不同正则间使用空格分隔代表| l代表letter,_代表下划线,0代表数字(也可以是d,但是为了使用已经有的函数),
|
||
//[lu]代表l|u
|
||
string rex = "+ - * / % = > < == <= >= != && || ( ) { } , ; [l_][l_0]* -?00*";
|
||
//下面给出正则对应的输出(终态)
|
||
vector<WordType> finalState = {
|
||
OP_ADD, OP_SUB,OP_MUL,OP_DIV,OP_MOD,OP_ASSIGN,OP_GT,OP_LT, OP_EQ,OP_LE,OP_GE,OP_NE, OP_AND, OP_OR,SE_LBRAC, SE_RBRAC,
|
||
SE_LCBRAC,SE_RCBRAC,SE_COMMA,SE_SEMI,IDN,INT_VAL
|
||
};
|
||
stringstream ss(rex);
|
||
string target;
|
||
|
||
// 创建初始状态
|
||
int stateIndex = 0;
|
||
int finalIndex = 0;
|
||
State* startState = new State(stateIndex++);
|
||
set<State*, StatePtrCompare> endStates;
|
||
set<State*, StatePtrCompare> allStates = { startState };
|
||
while (getline(ss, target,' ')) {
|
||
//如获得[l_][l_0]*
|
||
State* currentState = startState;
|
||
|
||
for (size_t i = 0; i < target.length();i++) {
|
||
//创建一个新状态,startState通过输入InputCharType到达该状态
|
||
State* newState = new State(stateIndex++);
|
||
allStates.insert(newState);
|
||
//需要往后看一个符号
|
||
if (target[i] == '[') {
|
||
//[...]构成一种输入,查看]后面是否有?或者*,来判断当前状态的构成
|
||
for (i=i+1; i < target.length() && target[i] != ']'; i++) {
|
||
InputCharType input = getInputCharType(target[i]);
|
||
if (input != EPSILON) {
|
||
// 添加转移函数,从当前状态向新状态转移
|
||
currentState->addTransition(input, newState);
|
||
}
|
||
}
|
||
}
|
||
else {
|
||
InputCharType input = getInputCharType(target[i]);
|
||
currentState->addTransition(input, newState);
|
||
}
|
||
//往后查看一个输入
|
||
if (i + 1 < target.length() && target[i + 1] == '?') {
|
||
//创建EPSILON转移状态
|
||
State* epsState = new State(stateIndex++);
|
||
allStates.insert(epsState);
|
||
currentState->addTransition(EPSILON, epsState);
|
||
newState->addTransition(EPSILON, epsState);
|
||
currentState = epsState;
|
||
// 跳过'?'字符
|
||
i++;
|
||
}
|
||
else if (i + 1 < target.length() && target[i + 1] == '*') {
|
||
State* epsState = new State(stateIndex++);
|
||
allStates.insert(epsState);
|
||
currentState->addTransition(EPSILON, epsState);
|
||
newState->addTransition(EPSILON, epsState);
|
||
epsState->addTransition(EPSILON, currentState);
|
||
currentState = epsState;
|
||
// 跳过'*'字符
|
||
i++;
|
||
}
|
||
else {
|
||
currentState = newState;
|
||
}
|
||
//判断是否是终止状态
|
||
if (i == (target.length() - 1)) {
|
||
// 到达最后一个字符,将当前状态设置为终止状态
|
||
currentState->setFinalState(true, finalState[endStates.size()]);
|
||
endStates.insert(currentState);
|
||
}
|
||
}//for
|
||
}
|
||
// 返回字符集合对应的NFA
|
||
return NFA(startState, endStates, allStates);
|
||
}
|
||
|
||
|
||
NFA buildNFA(string filename) {
|
||
ifstream ifs(filename);
|
||
if (!ifs) {
|
||
cerr << "Cannot open file: " << filename << endl;
|
||
exit(EXIT_FAILURE);
|
||
}
|
||
|
||
int stateNum, inputNum;
|
||
ifs >> stateNum >> inputNum;
|
||
|
||
vector<State*> states(stateNum);
|
||
for (int i = 0; i < stateNum; i++) {
|
||
states[i] = new State(i);
|
||
}
|
||
|
||
State* startState = states[0];
|
||
set<State*, StatePtrCompare> endStates;
|
||
for (int i = 0; i < stateNum; i++) {
|
||
for (int j = 0; j < inputNum; j++) {
|
||
string targetStateIDs;
|
||
ifs >> targetStateIDs;
|
||
if (targetStateIDs.compare("#") != 0) {
|
||
stringstream ss(targetStateIDs);
|
||
string targetStateIDStr;
|
||
while (getline(ss, targetStateIDStr, ',')) {
|
||
int targetStateID = stoi(targetStateIDStr);
|
||
states[i]->addTransition(static_cast<InputCharType>(j), states[targetStateID]);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
int endStateNum;
|
||
ifs >> endStateNum;
|
||
for (int i = 0; i < endStateNum; i++) {
|
||
int endStateID, wordTypeID;
|
||
ifs >> endStateID >> wordTypeID;
|
||
states[endStateID]->setFinalState(true, static_cast<WordType>(wordTypeID));
|
||
endStates.insert(states[endStateID]);
|
||
}
|
||
|
||
return NFA(startState, endStates, set<State*, StatePtrCompare>(states.begin(), states.end()));
|
||
}
|
||
|
||
void printNFA(const NFA& nfa) {
|
||
cout << "Start state: " << nfa.startState->id << endl;
|
||
cout << "End states: "<<endl;
|
||
for (auto state : nfa.endStates) {
|
||
cout << state->id << " " << getWordTypeName(state->wordType) << " " << (state->isFinalState == true) << endl;
|
||
}
|
||
cout << endl;
|
||
|
||
cout << "States and transitions:" << endl;
|
||
for (auto state : nfa.states) {
|
||
cout << "State " << state->id << ":" << endl;
|
||
for (auto transition : state->transitions) {
|
||
cout << "\tInput " << getInputChartypeName(transition.first) << ": ";
|
||
for (auto targetState : transition.second) {
|
||
cout << targetState->id << " ";
|
||
}
|
||
cout << endl;
|
||
}
|
||
}
|
||
}
|
||
|
||
set<State*, StatePtrCompare> move(const set<State*, StatePtrCompare>& states, InputCharType input) {
|
||
set<State*, StatePtrCompare> targetStates;
|
||
for (State* state : states) {
|
||
auto it = state->transitions.find(input);
|
||
if (it != state->transitions.end()) {
|
||
for (State* targetState : it->second) {
|
||
if (targetStates.find(targetState) == targetStates.end()) {
|
||
targetStates.insert(targetState);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return targetStates;
|
||
}
|
||
|
||
|
||
set<State*, StatePtrCompare> epsilonClosure(const set<State*, StatePtrCompare>& states) {
|
||
set<State*, StatePtrCompare> closure = states;
|
||
stack<State*> stateStack;
|
||
for (State* state : states) {
|
||
stateStack.push(state);
|
||
}
|
||
while (!stateStack.empty()) {
|
||
State* currentState = stateStack.top();
|
||
stateStack.pop();
|
||
auto it = currentState->transitions.find(EPSILON);
|
||
if (it != currentState->transitions.end()) {
|
||
for (State* nextState : it->second) {
|
||
if (closure.find(nextState) == closure.end()) {//防止同一状态多次进栈,set自带去重
|
||
closure.insert(nextState);
|
||
stateStack.push(nextState);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
return closure;
|
||
}
|
||
|
||
DFA nfaToDFA(const NFA& nfa) {
|
||
map<set<State*, StatePtrCompare>, State*, SetComparator> dfaStatesMap; // 用于映射NFA状态集合到DFA状态的映射表
|
||
queue<set<State*, StatePtrCompare>> nfaStatesQueue; // 用于BFS遍历的集合队列
|
||
set<State*, StatePtrCompare> dfaStates;
|
||
set<State*, StatePtrCompare> dfaEndStates;
|
||
|
||
set<State*, StatePtrCompare> nfaStartClosure = epsilonClosure({ nfa.startState });
|
||
State* dfaStartState = new State(0);
|
||
dfaStatesMap[nfaStartClosure] = dfaStartState;
|
||
dfaStates.insert(dfaStartState);
|
||
nfaStatesQueue.push(nfaStartClosure);
|
||
|
||
int nextStateId = 1;
|
||
//set<State*, StatePtrCompare> nfaStartClosure
|
||
while (!nfaStatesQueue.empty()) {
|
||
set<State*, StatePtrCompare> currentNFAStates = nfaStatesQueue.front();
|
||
nfaStatesQueue.pop();
|
||
State* currentDFAState = dfaStatesMap[currentNFAStates];
|
||
|
||
// 检查是否有终止状态,如果有,设置DFA状态为终止状态
|
||
for (State* nfaState : currentNFAStates) {
|
||
if (nfaState->isFinalState) {
|
||
// cout << nfaState->id << "is FinalState" << endl;
|
||
currentDFAState->setFinalState(true, nfaState->wordType);
|
||
dfaEndStates.insert(currentDFAState);
|
||
break;
|
||
}
|
||
}
|
||
|
||
// 遍历所有输入字符类型
|
||
for (int i = 0; i < static_cast<int>(EPSILON); i++) {
|
||
InputCharType inputCharType = static_cast<InputCharType>(i);
|
||
set<State*, StatePtrCompare> nextNFAStates = epsilonClosure(move(currentNFAStates, inputCharType));
|
||
if (nextNFAStates.empty()) {
|
||
continue;
|
||
}
|
||
|
||
// 如果NFA状态集合不存在于映射表中,则创建新的DFA状态
|
||
if (dfaStatesMap.find(nextNFAStates) == dfaStatesMap.end()) {
|
||
State* newDFAState = new State(nextStateId++);
|
||
dfaStatesMap[nextNFAStates] = newDFAState;
|
||
dfaStates.insert(newDFAState);
|
||
nfaStatesQueue.push(nextNFAStates);
|
||
}
|
||
currentDFAState->addTransition(inputCharType, dfaStatesMap[nextNFAStates]);
|
||
}
|
||
}
|
||
|
||
return DFA(dfaStartState, dfaEndStates, dfaStates);
|
||
}
|
||
|
||
void printDFA(const DFA& dfa) {
|
||
cout << "Start state: " << dfa.startState->id << endl;
|
||
cout << "End states: "<<endl;
|
||
for (auto state : dfa.endStates) {
|
||
cout << state->id << " " << getWordTypeName(state->wordType) << endl;
|
||
}
|
||
cout << endl;
|
||
cout << "States and transitions:" << endl;
|
||
for (auto state : dfa.states) {
|
||
cout << "State " << state->id << ":" << endl;
|
||
for (auto transition : state->transitions) {
|
||
cout << "\tInput " << getInputChartypeName(transition.first) << ": ";
|
||
for (auto targetState : transition.second) {
|
||
cout << targetState->id << " ";
|
||
}
|
||
cout << endl;
|
||
}
|
||
}
|
||
}
|
||
|