202 lines
5.2 KiB
C++
202 lines
5.2 KiB
C++
#pragma once
|
||
#ifndef __NFA__H__
|
||
#define __NFA__H__
|
||
|
||
#include <map>
|
||
#include <set>
|
||
#include <deque>
|
||
#include <vector>
|
||
#include <iostream>
|
||
#include <fstream>
|
||
#include <string>
|
||
#include <stdio.h>
|
||
#include <sstream>
|
||
#include <stack>
|
||
#include <queue>
|
||
#include <algorithm>
|
||
using namespace std;
|
||
|
||
|
||
//单词符号的类型,返回<待测代码中的单词符号,WordType>
|
||
// 保留关键字
|
||
typedef enum WordType {
|
||
//当识别成标识符后,先判断是不是保留字,让后再判断IDN
|
||
KW_INT = 0, // int
|
||
KW_VOID, // void
|
||
KW_RETURN, // return
|
||
KW_CONST, // const
|
||
KW_MAIN, //main
|
||
KW_IF,
|
||
KW_ELSE,
|
||
KW_FLOAT,
|
||
|
||
OP_ADD, // +
|
||
OP_SUB, // -
|
||
OP_MUL, // *
|
||
OP_DIV, // /
|
||
OP_MOD, // %
|
||
OP_ASSIGN, // =
|
||
OP_GT, // >
|
||
OP_LT, // <
|
||
OP_EQ, // ==
|
||
OP_LE, // <=
|
||
OP_GE, // >=
|
||
OP_NE, // !=
|
||
OP_AND, // &&
|
||
OP_OR, // ||
|
||
|
||
SE_LBRAC, // ( left backet
|
||
SE_RBRAC, // ) right bracket
|
||
SE_LCBRAC, // { left curly bracket
|
||
SE_RCBRAC, // } right curly bracket
|
||
SE_COMMA, // ,
|
||
SE_SEMI, // ;
|
||
|
||
IDN, // [a-zA-Z][a-zA-Z_0-9]*
|
||
INT_VAL, // -*[0-9]+
|
||
FLOAT_CONST, //[0-9]+'.'[0-9]+
|
||
UNKOWN
|
||
}WordType;
|
||
string getWordTypeName(WordType type);
|
||
|
||
// 定义输入的字符类别
|
||
// 输入与实际不完全匹配
|
||
// 注意:此处定义的;和,顺序与实验指导书中不同
|
||
typedef enum InputCharType {
|
||
LETTER = 0, // 字母 0
|
||
UNDERLINE, // _ 1
|
||
DIGIT, // 数字 2 当识别成功一个数字时,为了避免出现数字01的情况,返回前先进行一个判断,对GCC,01可以识别并等于1的
|
||
//OP
|
||
ADD, // + 3
|
||
SUB, // - 4
|
||
MUL, // * 5
|
||
DIV, // / 6
|
||
MOD, // % 7
|
||
EQ, // = 8
|
||
GT, // > 9
|
||
LT, // < 10
|
||
NOT, // ! 11
|
||
AND, // & 12
|
||
OR, // | 13
|
||
//SE
|
||
LBRACKET, // ( 14
|
||
RBRACKET, // ) 15
|
||
LCBRAC, // { 16
|
||
RCBRAC, // } 17
|
||
COMMA, // , 18
|
||
SEMI, // ; 19
|
||
POINT, // .
|
||
|
||
EPSILON, // 空字符 20
|
||
}InputCharType;
|
||
string getInputChartypeName(InputCharType type);
|
||
|
||
|
||
// 定义 token类型
|
||
enum class TokenType {
|
||
KW = 0,
|
||
OP,
|
||
SE,
|
||
IDN,
|
||
INT,
|
||
UNKNOWN
|
||
};
|
||
TokenType getTokenType(WordType wordType,string buffer);
|
||
|
||
// 定义最终返回的token的组成类型,包含值和类型两部分
|
||
typedef struct Token {
|
||
string value;
|
||
TokenType type;
|
||
} Token;
|
||
|
||
// 定义函数判断输入的字符类别
|
||
InputCharType getInputCharType(char c);
|
||
string getWordTypeName(WordType type,string buffer);
|
||
string getWordAttribute(WordType type,string buffer);
|
||
|
||
// 定义状态类
|
||
class State {
|
||
public:
|
||
int id; // 状态编号
|
||
bool isFinalState; // 是否为最终状态
|
||
WordType wordType; // 到达该状态时应该返回的词法单元类型
|
||
map<InputCharType, set<State*>> transitions; // 转移函数映射表,记录每个输入字符类型对应的目标状态集合
|
||
|
||
// 构造函数
|
||
State(int id) : id(id), isFinalState(false), wordType(UNKOWN) {}
|
||
|
||
// 添加状态转移映射
|
||
void addTransition(InputCharType input, State* targetState) {
|
||
transitions[input].insert(targetState);
|
||
}
|
||
|
||
// 转换终态
|
||
void setFinalState(bool isFinal, WordType type) {
|
||
isFinalState = isFinal;
|
||
wordType = type;
|
||
}
|
||
|
||
// 对象序号比较
|
||
bool operator<(const State& other) const {
|
||
return id < other.id;
|
||
}
|
||
};
|
||
|
||
//为了是set内部有序,定义排序结构体StatePtrCompare
|
||
struct StatePtrCompare {
|
||
bool operator()(const State* lhs, const State* rhs) const {
|
||
return lhs->id < rhs->id;
|
||
}
|
||
};
|
||
|
||
//定义NFA类
|
||
class NFA {
|
||
public:
|
||
State* startState; // 起始状态
|
||
set<State*, StatePtrCompare> endStates; // 终止状态集合
|
||
set<State*, StatePtrCompare> states; // 状态集合
|
||
|
||
// 构造函数
|
||
NFA(State* startState, set<State*, StatePtrCompare> endStates, set<State*, StatePtrCompare> states) :
|
||
startState(startState), endStates(endStates), states(states) {}
|
||
// void printNFA();
|
||
};
|
||
|
||
// 正则表达式构建状态机
|
||
NFA RexToNFA();
|
||
void printNFA(const NFA& nfa);
|
||
set<State*, StatePtrCompare> move(const set<State*, StatePtrCompare>& states, InputCharType input);
|
||
set<State*, StatePtrCompare> epsilonClosure(const set<State*, StatePtrCompare>& states);
|
||
|
||
class DFA {
|
||
public:
|
||
State* startState; // 起始状态
|
||
set<State*, StatePtrCompare> endStates; // 终止状态集合
|
||
set<State*, StatePtrCompare> states; // 状态集合
|
||
DFA(State* startState, set<State*, StatePtrCompare> endStates, set<State*, StatePtrCompare> states) :
|
||
startState(startState), endStates(endStates), states(states) {}
|
||
};
|
||
void removeUnreachableStates(DFA& dfa);
|
||
void printDFA(const DFA& dfa);
|
||
DFA nfaToDFA(const NFA& nfa);
|
||
void printDFA(const DFA& dfa);
|
||
struct SetComparator {
|
||
bool operator()(const set<State*, StatePtrCompare>& a, const set<State*, StatePtrCompare>& b) const {
|
||
if (a.size() != b.size()) {
|
||
return a.size() < b.size();
|
||
}
|
||
|
||
vector<State*> vecA(a.begin(), a.end());
|
||
vector<State*> vecB(b.begin(), b.end());
|
||
|
||
sort(vecA.begin(), vecA.end(), [](const State* a, const State* b) { return a->id < b->id; });
|
||
sort(vecB.begin(), vecB.end(), [](const State* a, const State* b) { return a->id < b->id; });
|
||
|
||
return vecA < vecB;
|
||
}
|
||
};
|
||
string getGrammarName(WordType type, string buffer);
|
||
DFA minimizeDFA(const DFA& dfa);
|
||
vector<string> recognize(const DFA& dfa, const string& input, const string& output);
|
||
string readfile(const string& filename);
|
||
#endif |