This commit is contained in:
LYC 2024-05-09 16:29:51 +08:00
parent 1af61d2eef
commit 424def7590
8 changed files with 66 additions and 50 deletions

1
.gitignore vendored
View File

@ -1,2 +1,3 @@
*/build/ */build/
build/ build/
.vscode/settings.json

View File

@ -3,6 +3,9 @@
"xstring": "cpp", "xstring": "cpp",
"iterator": "cpp", "iterator": "cpp",
"ostream": "cpp", "ostream": "cpp",
"vector": "cpp" "vector": "cpp",
"sstream": "cpp",
"queue": "cpp",
"set": "cpp"
} }
} }

View File

@ -22,3 +22,6 @@ add_executable(main ${SOURCES_MAIN})
# #
target_link_libraries(main PRIVATE LL nfa) target_link_libraries(main PRIVATE LL nfa)

View File

@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.10)
project(nfa) project(nfa)
# cpp # cpp
file(GLOB SOURCES "*.cpp") file(GLOB SOURCES dfa.cpp nfa.cpp tool.cpp test_main.cpp)
# bin # bin
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
@ -12,3 +12,9 @@ add_library(nfa STATIC ${SOURCES})
# #
target_include_directories(nfa PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_include_directories(nfa PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
#
add_executable(test_nfa test_main.cpp nfa)
#
target_link_libraries(test_nfa nfa)

View File

@ -11,12 +11,12 @@
NFA RexToNFA() { NFA RexToNFA() {
//由于里面存在||,所以不同正则间使用空格分隔代表| l代表letter_代表下划线0代表数字(也可以是d但是为了使用已经有的函数) //由于里面存在||,所以不同正则间使用空格分隔代表| l代表letter_代表下划线0代表数字(也可以是d但是为了使用已经有的函数)
//[lu]代表l|u //[lu]代表l|u
string rex = "+ - * / % = > < == <= >= != && || ( ) { } , ; [l_][l_0]* -?00*"; string rex = "+ - * / % = > < == <= >= != && || ( ) { } , ; [l_][l_0]* -?00* -?[0-9]+\\.[0-9]+";
//下面给出正则对应的输出(终态) //下面给出正则对应的输出(终态)
vector<WordType> finalState = { vector<WordType> finalState = {
OP_ADD, OP_SUB,OP_MUL,OP_DIV,OP_MOD,OP_ASSIGN,OP_GT,OP_LT, OP_EQ,OP_LE,OP_GE,OP_NE, OP_AND, OP_OR,SE_LBRAC, SE_RBRAC, OP_ADD, OP_SUB,OP_MUL,OP_DIV,OP_MOD,OP_ASSIGN,OP_GT,OP_LT, OP_EQ,OP_LE,OP_GE,OP_NE, OP_AND, OP_OR,SE_LBRAC, SE_RBRAC,
SE_LCBRAC,SE_RCBRAC,SE_COMMA,SE_SEMI,IDN,INT_VAL SE_LCBRAC,SE_RCBRAC,SE_COMMA,SE_SEMI,IDN,INT_VAL,FLOAT_CONST
}; };
stringstream ss(rex); stringstream ss(rex);
@ -34,6 +34,21 @@ NFA RexToNFA() {
//如获得[l_][l_0]* //如获得[l_][l_0]*
State* currentState = startState; State* currentState = startState;
cout<<target<<endl;
if (target == "-?[0-9]+\\.[0-9]+") {
// 处理浮点数
for (size_t i = 0; i < target.length(); i++) {
State* newState = new State(stateIndex++);
allStates.insert(newState);
InputCharType input = getInputCharType(target[i]);
currentState->addTransition(input, newState);
currentState = newState;
}
currentState->setFinalState(true, finalState[endStates.size()]);
endStates.insert(currentState);
continue;
}
for (size_t i = 0; i < target.length();i++) { for (size_t i = 0; i < target.length();i++) {
//创建一个新状态startState通过输入InputCharType到达该状态 //创建一个新状态startState通过输入InputCharType到达该状态
State* newState = new State(stateIndex++); State* newState = new State(stateIndex++);
@ -86,53 +101,10 @@ NFA RexToNFA() {
}//for }//for
} }
// 返回字符集合对应的NFA // 返回字符集合对应的NFA
cout<<"ok"<<endl;
return NFA(startState, endStates, allStates); return NFA(startState, endStates, allStates);
} }
// 构造状态机
NFA buildNFA(string filename) {
ifstream ifs(filename);
if (!ifs) {
cerr << "Cannot open file: " << filename << endl;
exit(EXIT_FAILURE);
}
int stateNum, inputNum;
ifs >> stateNum >> inputNum;
vector<State*> states(stateNum);
for (int i = 0; i < stateNum; i++) {
states[i] = new State(i);
}
State* startState = states[0];
set<State*, StatePtrCompare> endStates;
for (int i = 0; i < stateNum; i++) {
for (int j = 0; j < inputNum; j++) {
string targetStateIDs;
ifs >> targetStateIDs;
if (targetStateIDs.compare("#") != 0) {
stringstream ss(targetStateIDs);
string targetStateIDStr;
while (getline(ss, targetStateIDStr, ',')) {
int targetStateID = stoi(targetStateIDStr);
states[i]->addTransition(static_cast<InputCharType>(j), states[targetStateID]);
}
}
}
}
int endStateNum;
ifs >> endStateNum;
for (int i = 0; i < endStateNum; i++) {
int endStateID, wordTypeID;
ifs >> endStateID >> wordTypeID;
states[endStateID]->setFinalState(true, static_cast<WordType>(wordTypeID));
endStates.insert(states[endStateID]);
}
return NFA(startState, endStates, set<State*, StatePtrCompare>(states.begin(), states.end()));
}
void printNFA(const NFA& nfa) { void printNFA(const NFA& nfa) {
cout << "Start state: " << nfa.startState->id << endl; cout << "Start state: " << nfa.startState->id << endl;

View File

@ -26,6 +26,9 @@ typedef enum WordType {
KW_RETURN, // return KW_RETURN, // return
KW_CONST, // const KW_CONST, // const
KW_MAIN, //main KW_MAIN, //main
KW_IF,
KW_ELSE,
KW_FLOAT,
OP_ADD, // + OP_ADD, // +
OP_SUB, // - OP_SUB, // -
@ -51,6 +54,7 @@ typedef enum WordType {
IDN, // [a-zA-Z][a-zA-Z_0-9]* IDN, // [a-zA-Z][a-zA-Z_0-9]*
INT_VAL, // -*[0-9]+ INT_VAL, // -*[0-9]+
FLOAT_CONST, //[0-9]+'.'[0-9]+
UNKOWN UNKOWN
}WordType; }WordType;
string getWordTypeName(WordType type); string getWordTypeName(WordType type);
@ -81,6 +85,7 @@ typedef enum InputCharType {
RCBRAC, // } 17 RCBRAC, // } 17
COMMA, // , 18 COMMA, // , 18
SEMI, // ; 19 SEMI, // ; 19
POINT, // .
EPSILON, // 空字符 20 EPSILON, // 空字符 20
}InputCharType; }InputCharType;
@ -160,8 +165,6 @@ public:
// 正则表达式构建状态机 // 正则表达式构建状态机
NFA RexToNFA(); NFA RexToNFA();
void printNFA(const NFA& nfa); void printNFA(const NFA& nfa);
NFA buildNFA(string filename);
NFA RexToNFA();
set<State*, StatePtrCompare> move(const set<State*, StatePtrCompare>& states, InputCharType input); set<State*, StatePtrCompare> move(const set<State*, StatePtrCompare>& states, InputCharType input);
set<State*, StatePtrCompare> epsilonClosure(const set<State*, StatePtrCompare>& states); set<State*, StatePtrCompare> epsilonClosure(const set<State*, StatePtrCompare>& states);

23
nfa/test_main.cpp Normal file
View File

@ -0,0 +1,23 @@
#include <iostream>
#include <fstream>
#include <cassert>
#include "nfa.h"
using namespace std;
int main(int argc, char** argv) {
NFA nfa = RexToNFA();
//printNFA(nfa);
cout<<"OK1";
DFA dfa = nfaToDFA(nfa);
cout<<"OK2";
//printDFA(dfa);
DFA minimizedDFA = minimizeDFA(minimizeDFA(dfa));
removeUnreachableStates(minimizedDFA);
//printDFA(minimizedDFA);
return 0;
}

View File

@ -31,6 +31,7 @@ InputCharType getInputCharType(char c) {
case '}': return RCBRAC; case '}': return RCBRAC;
case ',': return COMMA; case ',': return COMMA;
case ';': return SEMI; case ';': return SEMI;
case '.': return POINT;
default: default:
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
return LETTER; return LETTER;
@ -88,6 +89,8 @@ string getInputChartypeName(InputCharType type) {
return ","; return ",";
case SEMI: case SEMI:
return ";"; return ";";
case POINT:
return ".";
case EPSILON: case EPSILON:
return "EPSILON"; return "EPSILON";
default: default:
@ -136,6 +139,8 @@ string getWordTypeName(WordType type, string buffer) {
// 整数 // 整数
case INT_VAL: case INT_VAL:
return "INT"; return "INT";
case FLOAT_CONST:
return "FLOAT";
//default //default
default: default: