compiler-temp/nfa/nfa.h

#pragma once
#ifndef __NFA__H__
#define __NFA__H__

#include <map>
#include <set>
#include <deque>
#include <vector>
#include <iostream>
#include <fstream>
#include <string>
#include <stdio.h>
#include <sstream>
#include <stack>
#include <queue>
#include <algorithm>
using namespace std;


//单词符号的类型，返回<待测代码中的单词符号,WordType>
// 保留关键字
typedef enum WordType {
	//当识别成标识符后，先判断是不是保留字，让后再判断IDN
	KW_INT = 0,		// int
	KW_VOID,		// void
	KW_RETURN,		// return
	KW_CONST,		// const
	KW_MAIN,        //main

	OP_ADD,			// +
	OP_SUB,			// -
	OP_MUL,			// *
	OP_DIV,			// /
	OP_MOD,			// %
	OP_ASSIGN,		// =
	OP_GT,			// >
	OP_LT,			// <
	OP_EQ,			// ==
	OP_LE,			// <=
	OP_GE,			// >=
	OP_NE,			// !=
	OP_AND,			// &&
	OP_OR,			// ||

	SE_LBRAC,		// ( left backet
	SE_RBRAC,		// ) right bracket
	SE_LCBRAC,		// { left curly bracket
	SE_RCBRAC,		// } right curly bracket
	SE_COMMA,		// ,
	SE_SEMI,		// ;

	IDN,			// [a-zA-Z][a-zA-Z_0-9]*
	INT_VAL,		// -*[0-9]+
	UNKOWN
}WordType;
string getWordTypeName(WordType type);

// 定义输入的字符类别
// 输入与实际不完全匹配
// 注意：此处定义的；和，顺序与实验指导书中不同
typedef enum InputCharType {
	LETTER = 0,		// 字母					0
	UNDERLINE,		// _ 					1
	DIGIT,			// 数字					2  当识别成功一个数字时，为了避免出现数字01的情况，返回前先进行一个判断，对GCC，01可以识别并等于1的
	//OP
	ADD,			// +					3
	SUB,			// -					4
	MUL,			// *					5
	DIV,			// /					6
	MOD,			// %					7
	EQ,				// =					8
	GT,				// >					9
	LT,				// <					10
	NOT,			// !					11
	AND,			// &					12
	OR,				// |					13
	//SE
	LBRACKET,		// (					14
	RBRACKET,		// )					15
	LCBRAC,			// {					16
	RCBRAC,			// }					17
	COMMA,			// ,					18
	SEMI,			// ;					19

	EPSILON,		// 空字符				20
}InputCharType;
string getInputChartypeName(InputCharType type);


// 定义 token类型
enum class TokenType {
	KW = 0,
	OP,
	SE,
	IDN,
	INT,
	UNKNOWN
};
TokenType getTokenType(WordType wordType,string buffer);

// 定义最终返回的token的组成类型，包含值和类型两部分
typedef struct Token {
	string value;
	TokenType type;
} Token;

// 定义函数判断输入的字符类别
InputCharType getInputCharType(char c);
string getWordTypeName(WordType type,string buffer);
string getWordAttribute(WordType type,string buffer);

// 定义状态类
class State {
public:
	int id; // 状态编号
	bool isFinalState; // 是否为最终状态
	WordType wordType; // 到达该状态时应该返回的词法单元类型
	map<InputCharType, set<State*>> transitions; // 转移函数映射表，记录每个输入字符类型对应的目标状态集合

	// 构造函数
	State(int id) : id(id), isFinalState(false), wordType(UNKOWN) {}

	// 添加状态转移映射
	void addTransition(InputCharType input, State* targetState) {
		transitions[input].insert(targetState);
	}

	// 转换终态
	void setFinalState(bool isFinal, WordType type) {
		isFinalState = isFinal;
		wordType = type;
	}

	// 对象序号比较
	bool operator<(const State& other) const {
		return id < other.id;
	}
};

//为了是set内部有序，定义排序结构体StatePtrCompare
struct StatePtrCompare {
	bool operator()(const State* lhs, const State* rhs) const {
		return lhs->id < rhs->id;
	}
};

//定义NFA类
class NFA {
public:
	State* startState; // 起始状态
	set<State*, StatePtrCompare> endStates; // 终止状态集合
	set<State*, StatePtrCompare> states; // 状态集合

	// 构造函数
	NFA(State* startState, set<State*, StatePtrCompare> endStates, set<State*, StatePtrCompare> states) :
		startState(startState), endStates(endStates), states(states) {}
	// void printNFA();
};

// 正则表达式构建状态机
NFA RexToNFA();
void printNFA(const NFA& nfa);
set<State*, StatePtrCompare> move(const set<State*, StatePtrCompare>& states, InputCharType input);
set<State*, StatePtrCompare> epsilonClosure(const set<State*, StatePtrCompare>& states);

class DFA {
public:
	State* startState; // 起始状态
	set<State*, StatePtrCompare> endStates; // 终止状态集合
	set<State*, StatePtrCompare> states; // 状态集合
	DFA(State* startState, set<State*, StatePtrCompare> endStates, set<State*, StatePtrCompare> states) :
		startState(startState), endStates(endStates), states(states) {}
};
void removeUnreachableStates(DFA& dfa);
void printDFA(const DFA& dfa);
DFA nfaToDFA(const NFA& nfa);
void printDFA(const DFA& dfa);
struct SetComparator {
	bool operator()(const set<State*, StatePtrCompare>& a, const set<State*, StatePtrCompare>& b) const {
		if (a.size() != b.size()) {
			return a.size() < b.size();
		}

		vector<State*> vecA(a.begin(), a.end());
		vector<State*> vecB(b.begin(), b.end());

		sort(vecA.begin(), vecA.end(), [](const State* a, const State* b) { return a->id < b->id; });
		sort(vecB.begin(), vecB.end(), [](const State* a, const State* b) { return a->id < b->id; });

		return vecA < vecB;
	}
};
string getGrammarName(WordType type, string buffer);
DFA minimizeDFA(const DFA& dfa);
vector<string> recognize(const DFA& dfa, const string& input, const string& output);
string readfile(const string& filename);
#endif