compiler-temp/nfa/nfa.cpp

239 lines
9.0 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// 将正则表达式转换为非确定性有限自动机
#include "nfa.h"
// 处理正则表达式,描述终态
NFA RexToNFA() {
//由于里面存在||,所以不同正则间使用空格分隔代表| l代表letter_代表下划线0代表数字(也可以是d但是为了使用已经有的函数)
//[lu]代表l|u
string rex = "+ - * / % = > < == <= >= != && || ( ) { } , ; [l_][l_0]* -?00* -?[0-9]+\\.[0-9]+";
//下面给出正则对应的输出(终态)
vector<WordType> finalState = {
OP_ADD, OP_SUB,OP_MUL,OP_DIV,OP_MOD,OP_ASSIGN,OP_GT,OP_LT, OP_EQ,OP_LE,OP_GE,OP_NE, OP_AND, OP_OR,SE_LBRAC, SE_RBRAC,
SE_LCBRAC,SE_RCBRAC,SE_COMMA,SE_SEMI,IDN,INT_VAL,FLOAT_CONST
};
stringstream ss(rex);
string target;
// 创建初始状态
int stateIndex = 0;
int finalIndex = 0;
State* startState = new State(stateIndex++);
set<State*, StatePtrCompare> endStates;
set<State*, StatePtrCompare> allStates = { startState };
// 按空格分割
while (getline(ss, target,' ')) {
//如获得[l_][l_0]*
State* currentState = startState;
cout<<target<<endl;
if (target == "-?[0-9]+\\.[0-9]+") {
// 处理浮点数
for (size_t i = 0; i < target.length(); i++) {
State* newState = new State(stateIndex++);
allStates.insert(newState);
InputCharType input = getInputCharType(target[i]);
currentState->addTransition(input, newState);
currentState = newState;
}
currentState->setFinalState(true, finalState[endStates.size()]);
endStates.insert(currentState);
continue;
}
for (size_t i = 0; i < target.length();i++) {
//创建一个新状态startState通过输入InputCharType到达该状态
State* newState = new State(stateIndex++);
allStates.insert(newState);
//需要往后看一个符号
if (target[i] == '[') {
//[...]构成一种输入,查看]后面是否有?或者*,来判断当前状态的构成
for (i=i+1; i < target.length() && target[i] != ']'; i++) {
InputCharType input = getInputCharType(target[i]);
if (input != EPSILON) {
// 添加转移函数,从当前状态向新状态转移
currentState->addTransition(input, newState);
}
}
}
else {
InputCharType input = getInputCharType(target[i]);
currentState->addTransition(input, newState);
}
//往后查看一个输入
if (i + 1 < target.length() && target[i + 1] == '?') {
//创建EPSILON转移状态
State* epsState = new State(stateIndex++);
allStates.insert(epsState);
currentState->addTransition(EPSILON, epsState);
newState->addTransition(EPSILON, epsState);
currentState = epsState;
// 跳过'?'字符
i++;
}
else if (i + 1 < target.length() && target[i + 1] == '*') {
State* epsState = new State(stateIndex++);
allStates.insert(epsState);
currentState->addTransition(EPSILON, epsState);
newState->addTransition(EPSILON, epsState);
epsState->addTransition(EPSILON, currentState);
currentState = epsState;
// 跳过'*'字符
i++;
}
else {
currentState = newState;
}
//判断是否是终止状态
if (i == (target.length() - 1)) {
// 到达最后一个字符,将当前状态设置为终止状态
currentState->setFinalState(true, finalState[endStates.size()]);
endStates.insert(currentState);
}
}//for
}
// 返回字符集合对应的NFA
cout<<"ok"<<endl;
return NFA(startState, endStates, allStates);
}
void printNFA(const NFA& nfa) {
cout << "Start state: " << nfa.startState->id << endl;
cout << "End states: "<<endl;
for (auto state : nfa.endStates) {
cout << state->id << " " << getWordTypeName(state->wordType) << " " << (state->isFinalState == true) << endl;
}
cout << endl;
cout << "States and transitions:" << endl;
for (auto state : nfa.states) {
cout << "State " << state->id << ":" << endl;
for (auto transition : state->transitions) {
cout << "\tInput " << getInputChartypeName(transition.first) << ": ";
for (auto targetState : transition.second) {
cout << targetState->id << " ";
}
cout << endl;
}
}
}
set<State*, StatePtrCompare> move(const set<State*, StatePtrCompare>& states, InputCharType input) {
set<State*, StatePtrCompare> targetStates;
for (State* state : states) {
auto it = state->transitions.find(input);
if (it != state->transitions.end()) {
for (State* targetState : it->second) {
if (targetStates.find(targetState) == targetStates.end()) {
targetStates.insert(targetState);
}
}
}
}
return targetStates;
}
set<State*, StatePtrCompare> epsilonClosure(const set<State*, StatePtrCompare>& states) {
set<State*, StatePtrCompare> closure = states;
stack<State*> stateStack;
for (State* state : states) {
stateStack.push(state);
}
while (!stateStack.empty()) {
State* currentState = stateStack.top();
stateStack.pop();
auto it = currentState->transitions.find(EPSILON);
if (it != currentState->transitions.end()) {
for (State* nextState : it->second) {
if (closure.find(nextState) == closure.end()) {//防止同一状态多次进栈set自带去重
closure.insert(nextState);
stateStack.push(nextState);
}
}
}
}
return closure;
}
DFA nfaToDFA(const NFA& nfa) {
map<set<State*, StatePtrCompare>, State*, SetComparator> dfaStatesMap; // 用于映射NFA状态集合到DFA状态的映射表
queue<set<State*, StatePtrCompare>> nfaStatesQueue; // 用于BFS遍历的集合队列
set<State*, StatePtrCompare> dfaStates;
set<State*, StatePtrCompare> dfaEndStates;
set<State*, StatePtrCompare> nfaStartClosure = epsilonClosure({ nfa.startState });
State* dfaStartState = new State(0);
dfaStatesMap[nfaStartClosure] = dfaStartState;
dfaStates.insert(dfaStartState);
nfaStatesQueue.push(nfaStartClosure);
int nextStateId = 1;
//set<State*, StatePtrCompare> nfaStartClosure
while (!nfaStatesQueue.empty()) {
set<State*, StatePtrCompare> currentNFAStates = nfaStatesQueue.front();
nfaStatesQueue.pop();
State* currentDFAState = dfaStatesMap[currentNFAStates];
// 检查是否有终止状态如果有设置DFA状态为终止状态
for (State* nfaState : currentNFAStates) {
if (nfaState->isFinalState) {
// cout << nfaState->id << "is FinalState" << endl;
currentDFAState->setFinalState(true, nfaState->wordType);
dfaEndStates.insert(currentDFAState);
break;
}
}
// 遍历所有输入字符类型
for (int i = 0; i < static_cast<int>(EPSILON); i++) {
InputCharType inputCharType = static_cast<InputCharType>(i);
set<State*, StatePtrCompare> nextNFAStates = epsilonClosure(move(currentNFAStates, inputCharType));
if (nextNFAStates.empty()) {
continue;
}
// 如果NFA状态集合不存在于映射表中则创建新的DFA状态
if (dfaStatesMap.find(nextNFAStates) == dfaStatesMap.end()) {
State* newDFAState = new State(nextStateId++);
dfaStatesMap[nextNFAStates] = newDFAState;
dfaStates.insert(newDFAState);
nfaStatesQueue.push(nextNFAStates);
}
currentDFAState->addTransition(inputCharType, dfaStatesMap[nextNFAStates]);
}
}
return DFA(dfaStartState, dfaEndStates, dfaStates);
}
void printDFA(const DFA& dfa) {
cout << "Start state: " << dfa.startState->id << endl;
cout << "End states: "<<endl;
for (auto state : dfa.endStates) {
cout << state->id << " " << getWordTypeName(state->wordType) << endl;
}
cout << endl;
cout << "States and transitions:" << endl;
for (auto state : dfa.states) {
cout << "State " << state->id << ":" << endl;
for (auto transition : state->transitions) {
cout << "\tInput " << getInputChartypeName(transition.first) << ": ";
for (auto targetState : transition.second) {
cout << targetState->id << " ";
}
cout << endl;
}
}
}