compiler-temp/main/nfa.h

173 lines
4.7 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#pragma once
#ifndef __NFA__H__
#define __NFA__H__
#include <map>
#include <set>
#include <deque>
#include <vector>
#include <iostream>
#include <fstream>
#include <string>
#include <stdio.h>
#include <sstream>
#include <stack>
#include <queue>
#include <algorithm>
using namespace std;
//单词符号的类型,返回<待测代码中的单词符号,WordType>
typedef enum WordType {
//当识别成标识符后先判断是不是保留字让后再判断IDN
KW_INT = 0, // int
KW_VOID, // void
KW_RETURN, // return
KW_CONST, // const
OP_ADD, // +
OP_SUB, // -
OP_MUL, // *
OP_DIV, // /
OP_MOD, // %
OP_ASSIGN, // =
OP_GT, // >
OP_LT, // <
OP_EQ, // ==
OP_LE, // <=
OP_GE, // >=
OP_NE, // !=
OP_AND, // &&
OP_OR, // ||
SE_LBRAC, // ( left backet
SE_RBRAC, // ) right bracket
SE_LCBRAC, // { left curly bracket
SE_RCBRAC, // } right curly bracket
SE_COMMA, // ,
SE_SEMI, // ;
IDN, // [a-zA-Z][a-zA-Z_0-9]*
INT_VAL, // -*[0-9]+
UNKOWN
}WordType;
string getWordTypeName(WordType type);
//定义输入的字符类别
typedef enum InputCharType {
LETTER = 0, // 字母 0
UNDERLINE, // _ 1
DIGIT, // 数字 2 当识别成功一个数字时为了避免出现数字01的情况返回前先进行一个判断对GCC01可以识别并等于1的
//OP
ADD, // + 3
SUB, // - 4
MUL, // * 5
DIV, // / 6
MOD, // % 7
EQ, // = 8
GT, // > 9
LT, // < 10
NOT, // ! 11
AND, // & 12
OR, // | 13
//SE
LBRACKET, // ( 14
RBRACKET, // ) 15
LCBRAC, // { 16
RCBRAC, // } 17
COMMA, // , 18
SEMI, // ; 19
EPSILON, // 空字符 20
}InputCharType;
string getInputChartypeName(InputCharType type);
enum class TokenType {
KW = 0,
OP,
SE,
IDN,
INT,
UNKNOWN
};
TokenType getTokenType(WordType wordType,string buffer);
typedef struct Token {
string value;
TokenType type;
} Token;
//定义函数判断输入的字符类别
InputCharType getInputCharType(char c);
string getWordTypeName(WordType type,string buffer);
//定义状态类
class State {
public:
int id; // 状态编号
map<InputCharType, set<State*>> transitions; // 转移函数映射表,记录每个输入字符类型对应的目标状态集合
bool isFinalState; // 是否为最终状态
WordType wordType; // 到达该状态时应该返回的词法单元类型
State(int id) : id(id), isFinalState(false), wordType(UNKOWN) {}
void addTransition(InputCharType input, State* targetState) {
transitions[input].insert(targetState);
}
void setFinalState(bool isFinal, WordType type) {
isFinalState = isFinal;
wordType = type;
}
bool operator<(const State& other) const {
return id < other.id;
}
};
//为了是set内部有序定义排序结构体StatePtrCompare
struct StatePtrCompare {
bool operator()(const State* lhs, const State* rhs) const {
return lhs->id < rhs->id;
}
};
//定义NFA类
class NFA {
public:
State* startState; // 起始状态
set<State*, StatePtrCompare> endStates; // 终止状态集合
set<State*, StatePtrCompare> states; // 状态集合
NFA(State* startState, set<State*, StatePtrCompare> endStates, set<State*, StatePtrCompare> states) :
startState(startState), endStates(endStates), states(states) {}
// void printNFA();
};
NFA RexToNFA();
void printNFA(const NFA& nfa);
NFA buildNFA(string filename);
NFA RexToNFA();
set<State*, StatePtrCompare> move(const set<State*, StatePtrCompare>& states, InputCharType input);
set<State*, StatePtrCompare> epsilonClosure(const set<State*, StatePtrCompare>& states);
class DFA {
public:
State* startState; // 起始状态
set<State*, StatePtrCompare> endStates; // 终止状态集合
set<State*, StatePtrCompare> states; // 状态集合
DFA(State* startState, set<State*, StatePtrCompare> endStates, set<State*, StatePtrCompare> states) :
startState(startState), endStates(endStates), states(states) {}
};
void removeUnreachableStates(DFA& dfa);
void printDFA(const DFA& dfa);
DFA nfaToDFA(const NFA& nfa);
void printDFA(const DFA& dfa);
struct SetComparator {
bool operator()(const set<State*, StatePtrCompare>& a, const set<State*, StatePtrCompare>& b) const {
if (a.size() != b.size()) {
return a.size() < b.size();
}
vector<State*> vecA(a.begin(), a.end());
vector<State*> vecB(b.begin(), b.end());
sort(vecA.begin(), vecA.end(), [](const State* a, const State* b) { return a->id < b->id; });
sort(vecB.begin(), vecB.end(), [](const State* a, const State* b) { return a->id < b->id; });
return vecA < vecB;
}
};
string getGrammarName(WordType type, string buffer);
DFA minimizeDFA(const DFA& dfa);
vector<string> recognize(const DFA& dfa, const string& input, const string& output);
string readfile(const string& filename);
#endif