compiler-temp/nfa/nfa.h

197 lines
5.1 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#pragma once
#ifndef __NFA__H__
#define __NFA__H__
#include <map>
#include <set>
#include <deque>
#include <vector>
#include <iostream>
#include <fstream>
#include <string>
#include <stdio.h>
#include <sstream>
#include <stack>
#include <queue>
#include <algorithm>
using namespace std;
//单词符号的类型,返回<待测代码中的单词符号,WordType>
// 保留关键字
typedef enum WordType {
//当识别成标识符后先判断是不是保留字让后再判断IDN
KW_INT = 0, // int
KW_VOID, // void
KW_RETURN, // return
KW_CONST, // const
KW_MAIN, //main
OP_ADD, // +
OP_SUB, // -
OP_MUL, // *
OP_DIV, // /
OP_MOD, // %
OP_ASSIGN, // =
OP_GT, // >
OP_LT, // <
OP_EQ, // ==
OP_LE, // <=
OP_GE, // >=
OP_NE, // !=
OP_AND, // &&
OP_OR, // ||
SE_LBRAC, // ( left backet
SE_RBRAC, // ) right bracket
SE_LCBRAC, // { left curly bracket
SE_RCBRAC, // } right curly bracket
SE_COMMA, // ,
SE_SEMI, // ;
IDN, // [a-zA-Z][a-zA-Z_0-9]*
INT_VAL, // -*[0-9]+
UNKOWN
}WordType;
string getWordTypeName(WordType type);
// 定义输入的字符类别
// 输入与实际不完全匹配
// 注意:此处定义的;和,顺序与实验指导书中不同
typedef enum InputCharType {
LETTER = 0, // 字母 0
UNDERLINE, // _ 1
DIGIT, // 数字 2 当识别成功一个数字时为了避免出现数字01的情况返回前先进行一个判断对GCC01可以识别并等于1的
//OP
ADD, // + 3
SUB, // - 4
MUL, // * 5
DIV, // / 6
MOD, // % 7
EQ, // = 8
GT, // > 9
LT, // < 10
NOT, // ! 11
AND, // & 12
OR, // | 13
//SE
LBRACKET, // ( 14
RBRACKET, // ) 15
LCBRAC, // { 16
RCBRAC, // } 17
COMMA, // , 18
SEMI, // ; 19
EPSILON, // 空字符 20
}InputCharType;
string getInputChartypeName(InputCharType type);
// 定义 token类型
enum class TokenType {
KW = 0,
OP,
SE,
IDN,
INT,
UNKNOWN
};
TokenType getTokenType(WordType wordType,string buffer);
// 定义最终返回的token的组成类型包含值和类型两部分
typedef struct Token {
string value;
TokenType type;
} Token;
// 定义函数判断输入的字符类别
InputCharType getInputCharType(char c);
string getWordTypeName(WordType type,string buffer);
string getWordAttribute(WordType type,string buffer);
// 定义状态类
class State {
public:
int id; // 状态编号
bool isFinalState; // 是否为最终状态
WordType wordType; // 到达该状态时应该返回的词法单元类型
map<InputCharType, set<State*>> transitions; // 转移函数映射表,记录每个输入字符类型对应的目标状态集合
// 构造函数
State(int id) : id(id), isFinalState(false), wordType(UNKOWN) {}
// 添加状态转移映射
void addTransition(InputCharType input, State* targetState) {
transitions[input].insert(targetState);
}
// 转换终态
void setFinalState(bool isFinal, WordType type) {
isFinalState = isFinal;
wordType = type;
}
// 对象序号比较
bool operator<(const State& other) const {
return id < other.id;
}
};
//为了是set内部有序定义排序结构体StatePtrCompare
struct StatePtrCompare {
bool operator()(const State* lhs, const State* rhs) const {
return lhs->id < rhs->id;
}
};
//定义NFA类
class NFA {
public:
State* startState; // 起始状态
set<State*, StatePtrCompare> endStates; // 终止状态集合
set<State*, StatePtrCompare> states; // 状态集合
// 构造函数
NFA(State* startState, set<State*, StatePtrCompare> endStates, set<State*, StatePtrCompare> states) :
startState(startState), endStates(endStates), states(states) {}
// void printNFA();
};
// 正则表达式构建状态机
NFA RexToNFA();
void printNFA(const NFA& nfa);
set<State*, StatePtrCompare> move(const set<State*, StatePtrCompare>& states, InputCharType input);
set<State*, StatePtrCompare> epsilonClosure(const set<State*, StatePtrCompare>& states);
class DFA {
public:
State* startState; // 起始状态
set<State*, StatePtrCompare> endStates; // 终止状态集合
set<State*, StatePtrCompare> states; // 状态集合
DFA(State* startState, set<State*, StatePtrCompare> endStates, set<State*, StatePtrCompare> states) :
startState(startState), endStates(endStates), states(states) {}
};
void removeUnreachableStates(DFA& dfa);
void printDFA(const DFA& dfa);
DFA nfaToDFA(const NFA& nfa);
void printDFA(const DFA& dfa);
struct SetComparator {
bool operator()(const set<State*, StatePtrCompare>& a, const set<State*, StatePtrCompare>& b) const {
if (a.size() != b.size()) {
return a.size() < b.size();
}
vector<State*> vecA(a.begin(), a.end());
vector<State*> vecB(b.begin(), b.end());
sort(vecA.begin(), vecA.end(), [](const State* a, const State* b) { return a->id < b->id; });
sort(vecB.begin(), vecB.end(), [](const State* a, const State* b) { return a->id < b->id; });
return vecA < vecB;
}
};
string getGrammarName(WordType type, string buffer);
DFA minimizeDFA(const DFA& dfa);
vector<string> recognize(const DFA& dfa, const string& input, const string& output);
string readfile(const string& filename);
#endif