commit d6861884f28c1ff326e233a1bbc14b777c4fd493
Author: LiuYuanchi <liu_yuanchi@outlook.com>
Date:   Sun May 5 21:51:08 2024 +0800

    init

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..13a0149
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*/build/
+build/
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..8f4d0b4
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,24 @@
+cmake_minimum_required(VERSION 3.10)
+project(compiler-bin)
+
+# 收集所有的cpp源文件
+file(GLOB SOURCES_LL "LL1/*.cpp")
+file(GLOB SOURCES_NFA "nfa/*.cpp")
+file(GLOB SOURCES_MAIN "main/*.cpp")
+
+# 设置输出目录为 bin
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
+
+# 创建静态链接库
+add_library(LL STATIC ${SOURCES_LL})
+add_library(nfa STATIC ${SOURCES_NFA})
+
+# 添加头文件目录
+target_include_directories(LL PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/LL)
+target_include_directories(nfa PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/nfa)
+
+# 添加可执行文件
+add_executable(main ${SOURCES_MAIN})
+
+# 链接静态库
+target_link_libraries(main PRIVATE LL nfa)
diff --git a/LL1/CMakeLists.txt b/LL1/CMakeLists.txt
new file mode 100644
index 0000000..22471ff
--- /dev/null
+++ b/LL1/CMakeLists.txt
@@ -0,0 +1,14 @@
+cmake_minimum_required(VERSION 3.10)
+project(LL)
+
+# 收集所有的cpp源文件
+file(GLOB SOURCES "*.cpp")
+
+# 设置输出目录为 bin
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
+
+# 创建静态链接库
+add_library(LL STATIC ${SOURCES})
+
+# 添加头文件目录
+target_include_directories(LL PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
\ No newline at end of file
diff --git a/LL1/LL1.cpp b/LL1/LL1.cpp
new file mode 100644
index 0000000..f057f29
--- /dev/null
+++ b/LL1/LL1.cpp
@@ -0,0 +1,351 @@
+#include <set>
+#include <algorithm>
+#include <stack>
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+
+#include "LL1.h"
+
+
+LL1::LL1()
+{
+	read_grammar();
+	init_grammar_set();
+}
+
+LL1::~LL1()
+{
+}
+
+bool LL1::IsLL1()
+{
+	string symbol;
+	vector<string> right_first = vector<string>();
+	vector<string> left_follow;
+	for (int i = 0; i < grammar_rules.size(); i++) {
+		symbol.clear();
+		right_first.clear();
+		left_follow.clear();
+
+		symbol = grammar_rules[i].first;
+		
+		
+		
+		// 计算 产生式左侧 FOLLOW 集
+		left_follow = follow[symbol];
+
+		// 计算 产生式右侧 FIRST 集
+
+		// 对 X1 的 非 $ 符号 加入
+		for (int j = 0; j < first[grammar_rules[i].second[0]].size(); j++) {
+			if (first[grammar_rules[i].second[0]][j] == "$") {
+				continue;
+			}
+			right_first.push_back(first[grammar_rules[i].second[0]][j]);
+		}
+
+		int cnt;
+		for (cnt = 1; cnt < grammar_rules[i].second.size(); cnt++) {
+			
+			// 当且仅当 有 $ 符号时 继续加入
+			if (!infer_empty[grammar_rules[i].second[cnt - 1]]) {
+				break;
+			}
+			for (int j = 0; j < first[grammar_rules[i].second[cnt]].size(); j++) {
+				if (first[grammar_rules[i].second[cnt]][j] == "$") {
+					continue;
+				}
+				right_first.push_back(first[grammar_rules[i].second[cnt]][j]);
+			}
+		}
+
+		// 若都能推导至 $ 符号时 加入
+		if (cnt == grammar_rules[i].second.size() && infer_empty[grammar_rules[i].second[0]]) {
+			right_first.push_back("$");
+		}
+		
+		// 对产生式右侧 FIRST 集 进行 去重
+		set<string> sright_first(right_first.begin(), right_first.end());
+		right_first.clear();
+		right_first.resize(sright_first.size());
+		right_first.assign(sright_first.begin(), sright_first.end());
+
+		
+
+		vector<string> symbol_select;
+
+		// 若产生式右侧 FIRST 集为 {$} 时
+		if (right_first.size() == 1 && right_first[0] == "$") {
+			
+			// SELECT 集为 产生式右侧 FOLLOW 集 与 {$} 的交集
+			symbol_select = left_follow;
+			if (find(left_follow.begin(), left_follow.end(), "$") == left_follow.end()) {
+				symbol_select.push_back("$");
+			}
+		}
+		else
+		{
+			// SELECT 集为 产生式左侧 FIRST 集
+			symbol_select = right_first;
+		}
+
+		// 对 SELECT 集 进行排序 方便接下来进行集合运算
+		sort(symbol_select.begin(), symbol_select.end());
+
+		vector<string> new_select = vector<string>();
+		
+		// 判断 SELECT 表中有无现有数据
+		if (select.find(symbol) == select.end()) {
+			
+			select[symbol] = symbol_select;
+		}
+		else {
+
+			// 判断两个相同产生式左侧 SELECT 集 是否相交
+			set_intersection(symbol_select.begin(), symbol_select.end(), select[symbol].begin(), select[symbol].end(), back_inserter(new_select));
+
+			if (new_select.size() == 0) {
+				// 不相交，继续运算，存入两者并集
+				set_union(symbol_select.begin(), symbol_select.end(), select[symbol].begin(), select[symbol].end(), back_inserter(new_select));
+			}
+			else
+			{
+				// 非 LL(1) 文法，退出
+				cout << "This grammar is not LL (1) grammar" << endl;
+				return false;
+			}
+
+		}
+
+	}
+
+	// cout << "该文法为 LL(1) 文法！" << endl;
+	return true;
+}
+
+void LL1::build_LL1_predict()
+{
+	// 对每一个 非终结符 进行初始化行
+	for (int i = 0; i < VNs.size(); i++) {
+		if (LL1_predict.find(VNs[i]) == LL1_predict.end()) {
+			LL1_predict[VNs[i]] = unordered_map<string, int>();
+		}
+	}
+
+	string symbol;
+	vector<string> right_first = vector<string>();
+	vector<string> left_follow;
+
+	// 遍历 产生式 构建 预测分析表
+	for (int i = 0; i < grammar_rules.size(); i++) {
+		symbol.clear();
+		right_first.clear();
+		left_follow.clear();
+
+		symbol = grammar_rules[i].first;
+
+
+		// 计算 产生式左侧 FOLLOW 集
+		left_follow = follow[symbol];
+
+		unordered_map<string, int> &symbol_predict = LL1_predict[symbol];
+		
+		
+		// 计算 产生式右侧 FIRST 集
+
+		// 对 X1 的 非 $ 符号 加入
+		for (int j = 0; j < first[grammar_rules[i].second[0]].size(); j++) {
+			if (first[grammar_rules[i].second[0]][j] == "$") {
+				continue;
+			}
+			right_first.push_back(first[grammar_rules[i].second[0]][j]);
+		}
+
+		int cnt;
+		for (cnt = 1; cnt < grammar_rules[i].second.size(); cnt++) {
+
+			// 当且仅当 有 $ 符号时 继续加入
+			if (!infer_empty[grammar_rules[i].second[cnt - 1]]) {
+				break;
+			}
+			for (int j = 0; j < first[grammar_rules[i].second[cnt]].size(); j++) {
+				if (first[grammar_rules[i].second[cnt]][j] == "$") {
+					continue;
+				}
+				right_first.push_back(first[grammar_rules[i].second[cnt]][j]);
+			}
+		}
+
+		// 若都能推导至 $ 符号时 加入
+		if (cnt == grammar_rules[i].second.size() && infer_empty[grammar_rules[i].second[0]]) {
+			right_first.push_back("$");
+		}
+
+		// 对产生式右侧 FIRST 集 进行 去重
+		set<string> sright_first(right_first.begin(), right_first.end());
+		right_first.clear();
+		right_first.resize(sright_first.size());
+		right_first.assign(sright_first.begin(), sright_first.end());
+
+		// 循环遍历 FIRST 集进行初始化
+		for (int j = 0; j < right_first.size(); j++) {
+			if (right_first[j] == "$") {
+				pair<string, vector<string>> new_rule (grammar_rules[i].first, vector<string>());
+				new_rule.second.push_back("$");
+				int rule_id = insert_rule(new_rule);
+
+				for (int k = 0; k < left_follow.size(); k++) {
+					symbol_predict[left_follow[k]] = rule_id;
+				}
+			}
+			symbol_predict[right_first[j]] = i;
+
+		}
+
+	}
+
+
+}
+
+void LL1::print_LL1_predict()
+{
+	cout << "[LL1_predict]:" << endl;
+	for (auto iter = LL1_predict.begin(); iter != LL1_predict.end(); ++iter) {
+		cout << (*iter).first << " ";
+		for (auto j = (*iter).second.begin(); j != (*iter).second.end(); ++j) {
+			cout << (*j).first << "," << (*j).second << " ";
+		}
+		cout << endl;
+
+	}
+	cout << endl << endl;
+
+}
+
+void LL1::build_LL1_grammar()
+{
+	// 符号栈
+	stack<string> stack;
+	int token_cnt = 0;
+	
+	// 起始符 入栈
+	stack.push(start);
+
+	while (!stack.empty())
+	{
+		LL1_grammar_log.push_back(string());
+		
+		// 栈顶符号
+		// 判断栈顶是否为 空符号 
+		if (stack.top() == "$") {
+			// 栈空 以 EOF 表示
+			LL1_grammar_log.back() += "EOF";
+		}
+		else
+		{
+			LL1_grammar_log.back() += stack.top();
+		}
+		
+		// 添加 # 分割
+		LL1_grammar_log.back() += "#";
+
+		// 面临输入的符号
+		string this_token;
+		if (token_cnt == token_strings.size()) {
+			// 栈空 以 EOF 表示
+			this_token = "$";
+			LL1_grammar_log.back() += "EOF";
+		}
+		else
+		{
+			this_token = token_strings[token_cnt];
+			LL1_grammar_log.back() += token_strings[token_cnt];
+		}
+		
+		// 对栈顶元素与即将输入的符号进行比较
+		if (stack.top() == this_token) {
+			// 栈顶出栈 token 指向下一位
+			token_cnt++;
+			stack.pop();
+
+			if (this_token == "$") {
+				// 分析成功 结束分析
+				LL1_grammar_log.back() += "\taccept";
+			}
+			else
+			{
+				// 跳过
+				LL1_grammar_log.back() += "\tmove";
+			}
+		}
+		// 若为终结符
+		else if (find(VTs.begin(), VTs.end(), stack.top()) != VTs.end()) {
+			if (stack.top() == "$") {
+				stack.pop();
+				LL1_grammar_log.pop_back();
+			}
+			else {
+				LL1_grammar_log.back() += "\terror";
+				return;
+			}
+		}
+		else
+		{
+			auto tab = LL1_predict[stack.top()];
+			
+			if (tab.find(this_token) == tab.end()) {
+				LL1_grammar_log.back() += "\terror";
+				return;
+			}
+			else
+			{
+				auto this_rule = grammar_rules[tab[this_token]];
+				stack.pop();
+				for (int i = this_rule.second.size() - 1; i >= 0; i--) {
+					stack.push(this_rule.second[i]);
+				}
+				LL1_grammar_log.back() += "\treduction";
+			}
+		}
+	}
+
+}
+
+void LL1::print_LL1_grammar_log()
+{
+	for (int i = 0; i < LL1_grammar_log.size(); ++i) {
+		cout << LL1_grammar_log[i] << endl;
+	}
+}
+
+void LL1::fileout_LL1_grammar_log(string file_name)
+{
+	//打开结果输出文件
+	fstream outfile(file_name);
+
+	if (!outfile.is_open()) {
+		cout << "[FILEOUT] fail to open file" << endl;
+	}
+
+	for (int i = 0; i < LL1_grammar_log.size(); ++i) {
+		outfile << LL1_grammar_log[i] << endl;
+	}
+	outfile.close();
+}
+
+int LL1::insert_rule(pair<string, vector<string>>& new_rule)
+{
+	int cnt;
+	for (cnt = 0; cnt < grammar_rules.size(); cnt++) {
+		// 当 产生式规则 中存在这条产生式时 返回序号
+		if (grammar_rules[cnt].first == new_rule.first && grammar_rules[cnt].second == new_rule.second) {
+			return cnt;
+		}
+	}
+	// 若不存在 返回序号的同时加入
+	grammar_rules.push_back(new_rule);
+	return cnt;
+}
+
diff --git a/LL1/LL1.h b/LL1/LL1.h
new file mode 100644
index 0000000..5eaba92
--- /dev/null
+++ b/LL1/LL1.h
@@ -0,0 +1,32 @@
+// LL1 语法分析器
+#ifndef LL1_H
+#define LL1_H
+
+#include "grammar.h"
+
+using namespace std;
+
+class LL1:public Grammar{
+public:
+	LL1();
+	~LL1();
+
+	bool IsLL1();														// 判断该文法是否为 LL1 文法
+	void build_LL1_predict();											// 构建 LL1 的预测分析表
+	void print_LL1_predict();											// 打印 LL1 的预测分析表
+	void build_LL1_grammar();											// 构建规约序列
+	void print_LL1_grammar_log();
+	void fileout_LL1_grammar_log(string file_name);
+
+
+private:
+	unordered_map<string, vector<string>> select;						// 计算符号的 SELECT 集合
+	unordered_map<string, unordered_map<string, int>> LL1_predict;		// LL1 的预测分析表
+	vector<string> LL1_grammar_log;									    // 规约序列
+
+	int insert_rule(pair<string, vector<string>>& new_rule);			// 增加新的规则
+};
+
+
+
+#endif // !LL1_H
diff --git a/LL1/grammar.cpp b/LL1/grammar.cpp
new file mode 100644
index 0000000..2aefe3e
--- /dev/null
+++ b/LL1/grammar.cpp
@@ -0,0 +1,520 @@
+#include <deque>
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+#include <set>
+
+#include "grammar.h"
+
+
+Grammar::Grammar()
+{
+
+}
+
+Grammar::~Grammar()
+{
+
+}
+
+
+
+void Grammar::read_grammar() {
+	fstream infile;
+	infile.open(grammar_file,ios::in);
+	if (!infile.is_open())
+	{
+		cout << "[READ_GRAMMAR] fail to open file: "<< grammar_file << endl;
+		return;
+	}
+
+	string  buf;
+	string arrow = "->";
+	string farrow;
+	bool start_flag = true;
+	string left;
+	string forms;
+
+	while (!infile.eof()) {	
+		// 清理 string
+		buf.clear();
+		left.clear();
+		forms.clear();
+		farrow.clear();
+
+
+		grammar_rules.push_back(pair<string, vector<string>>());
+		getline(infile, buf);
+
+		stringstream ss(buf);
+
+		// 读取产生式左侧
+		ss >> left;
+		grammar_rules.back().first = left;
+		symbols.push_back(left);
+		VNs.push_back(left);
+		
+		// 存储 start
+		if (start_flag) {
+			start = left;
+			start_flag = false;
+		}
+
+		// 读取 -> 符号 并保证合法
+		ss >> farrow;
+		if (farrow != arrow) {
+			cout << "Grammar reading error" << endl;
+		}
+
+		// 读取产生式右侧
+		while (ss >> forms)
+		{
+			grammar_rules.back().second.push_back(forms);
+			symbols.push_back(forms);
+			forms.clear();
+		}
+	}
+
+	// 符号集 和 非终结符 去重
+	set<string> ssymbols(symbols.begin(), symbols.end());
+	symbols.clear();
+	symbols.resize(ssymbols.size());
+	symbols.assign(ssymbols.begin(), ssymbols.end());
+
+	set<string> sVNs(VNs.begin(), VNs.end());
+	VNs.clear();
+	VNs.resize(sVNs.size());
+	VNs.assign(sVNs.begin(), sVNs.end());
+
+	// 符号集 和 非终结符 排序 以保证差集的成功
+	sort(symbols.begin(), symbols.end());
+	sort(VNs.begin(), VNs.end());
+
+	// 取差集 得到终极符
+	set_difference(symbols.begin(), symbols.end(), VNs.begin(), VNs.end(), back_inserter(VTs));
+
+	infile.close();
+
+}
+
+void Grammar::print_grammar()
+{
+	cout << "[start]: " << endl << start << endl << endl;
+
+	cout << "[VTs]:" << endl;
+	for (int i = 0; i < VTs.size(); i++) {
+		cout << VTs[i] << " ";
+		if (((i + 1) % 5) == 0)
+			cout << endl;
+	}
+	cout << endl << endl;
+
+	cout << "[VNs]:" << endl;
+	for (int i = 0; i < VNs.size(); i++) {
+		cout << VNs[i] << " ";
+		if (((i + 1) % 5) == 0)
+			cout << endl;
+	}
+	cout << endl << endl;
+
+	cout << "[symbols]:" << endl;
+	for (int i = 0; i < symbols.size(); i++) {
+		cout << symbols[i] << " ";
+		if (((i + 1) % 5) == 0)
+			cout << endl;
+	}
+	cout << endl << endl;
+
+	cout << "[grammar_rules]: " << grammar_rules.size() << endl;
+	for (int i = 0; i < grammar_rules.size(); ++i) {
+		cout << grammar_rules[i].first << " -> ";
+		for (int j = 0; j < grammar_rules[i].second.size(); ++j) {
+			cout << "\"" << grammar_rules[i].second[j] << "\" ";
+		}
+		cout << endl;
+	}
+	cout << endl << endl;
+}
+
+void Grammar::expand_grammar()
+{
+	string new_start = start + "\'";
+	pair<string, vector<string>> new_rule = pair<string, vector<string>>(new_start, vector<string>());
+	new_rule.second.push_back(start);
+
+	VNs.push_back(new_start);
+	symbols.push_back(new_start);
+	grammar_rules.insert(grammar_rules.begin(), new_rule);
+	start = new_start;
+
+	// 符号集排序
+	sort(symbols.begin(), symbols.end());
+
+}
+
+void Grammar::init_grammar_set()
+{
+	string symbol;
+
+	
+
+	// 对符号集中各符号进行推导 是否可以到达 $ 空符号
+	for (int i = 0; i < symbols.size(); i++) {
+		symbol = symbols[i];
+		this->symbol_infer_empty(symbol);
+		symbol.clear();
+	}
+
+	// 初始化符号在产生式的 出现 依赖 情况
+	init_appears_depend();
+	
+	// 对符号集中各符号进行推导 FIRST 集
+	for (int i = 0; i < symbols.size(); i++) {
+		symbol = symbols[i];
+		this->symbol_infer_first(symbol);
+		symbol.clear();
+	}
+
+	// 对符号集中各符号进行推导 FOLLOW 集
+	
+	// 符号队列
+	deque<string> queue;
+
+	// 初次遍历所有符号 生成初始的 FOLLOW 集 
+
+	// 构建 start 的 FOLLOW 集
+	follow[start] = this->symbol_infer_follow(start);
+	follow[start].push_back("$");
+	queue.push_back(start);
+
+	// 构建除 start 的 FOLLOW 集
+	for (int i = 0; i < symbols.size(); i++) {
+		symbol = symbols[i];
+		if (symbol == start) {
+			symbol.clear();
+			continue;
+		}
+		follow[symbol] = this->symbol_infer_follow(symbol);
+		queue.push_back(symbol);
+		symbol.clear();
+	}
+
+	// 对 符号队列 进行进一步生成
+	while (!queue.empty()) {
+		// 读取 符号队列 开头
+		symbol = queue.front();
+		queue.pop_front();
+
+		// 若 FOLLOW 集发生改变
+		vector<string> new_symbol_follow = this->symbol_infer_follow(symbol);
+		if (follow[symbol].size() < new_symbol_follow.size()) {
+			// 对依赖 该符号 的所有符号添加至 符号队列
+			vector<string> dep = depend[symbol];
+			for (int i = 0; i < dep.size(); i++) {
+				queue.push_back(dep[i]);
+			}
+			follow[symbol] = new_symbol_follow;
+		}
+		symbol.clear();
+	}
+
+
+}
+
+void Grammar::print_grammar_set()
+{
+	// 打印符号在产生式的出现情况
+	cout << "[left_appears]:" << endl;
+	for (int i = 0; i < symbols.size(); i++) {
+		cout << "LEFT( " << symbols[i] << " ) = {";
+		for (int j = 0; j < left_appears[symbols[i]].size(); j++) {
+			cout << " " << left_appears[symbols[i]][j] << " ";
+		}
+		cout << "}" << endl;
+	}
+	cout << endl << endl;
+
+	cout << "[right_appears]:" << endl;
+	for (int i = 0; i < symbols.size(); i++) {
+		cout << "RIGHT( " << symbols[i] << " ) = {";
+		for (int j = 0; j < right_appears[symbols[i]].size(); j++) {
+			cout << " " << right_appears[symbols[i]][j] << " ";
+		}
+		cout << "}" << endl;
+	}
+	cout << endl << endl;
+
+	// 打印 FOLLOW 集的依赖关系
+	cout << "[depend]:" << endl;
+	for (int i = 0; i < symbols.size(); i++) {
+		cout << "DEPEND( " << symbols[i] << " ) = {";
+		for (int j = 0; j < depend[symbols[i]].size(); j++) {
+			cout << " " << depend[symbols[i]][j] << " ";
+		}
+		cout << "}" << endl;
+	}
+	cout << endl << endl;
+
+
+	// 打印是否可以推导出 $ 空符号
+	cout << "[infer_empty]:" << endl;
+	for (int i = 0; i < symbols.size(); i++) {
+		cout << symbols[i]<<" -> " << infer_empty[symbols[i]] << endl;
+	}
+	cout << endl << endl;
+
+	// 打印 FIRST 集
+	cout << "[FIRST]:" << endl;
+	for (int i = 0; i < symbols.size(); i++) {
+		cout << "FIRST( " << symbols[i] << " ) = {";
+		for (int j = 0; j < first[symbols[i]].size(); j++) {
+			cout << " " << first[symbols[i]][j] << " ";
+		}
+		cout << "}" << endl;
+	}
+	cout << endl << endl;
+
+	// 打印 FOLLOW 集
+	cout << "[FOLLOW]:" << endl;
+	for (int i = 0; i < symbols.size(); i++) {
+		cout << "FOLLOW( " << symbols[i] << " ) = {";
+		for (int j = 0; j < follow[symbols[i]].size(); j++) {
+			cout << " " << follow[symbols[i]][j] << " ";
+		}
+		cout << "}" << endl;
+	}
+	cout << endl << endl;
+
+}
+
+void Grammar::get_token_strings(vector<string>& my_token_strings)
+{
+	token_strings.resize(my_token_strings.size());
+	token_strings.assign(my_token_strings.begin(), my_token_strings.end());
+
+}
+
+void Grammar::print_token_strings()
+{
+	for (int i = 0; i < token_strings.size(); i++) {
+		cout << token_strings[i] << endl;
+	}
+}
+
+void Grammar::init_appears_depend()
+{
+	for (int k = 0; k < symbols.size(); k++) {
+		left_appears[symbols[k]] = vector<int>();
+		right_appears[symbols[k]] = vector<int>();
+		depend[symbols[k]] = vector<string>();
+		for (int i = 0; i < grammar_rules.size(); i++) {
+			if (grammar_rules[i].first == symbols[k]) {
+				// 产生式左侧相等 存入 left
+				left_appears[symbols[k]].push_back(i);
+
+				// 对该产生式构建依赖关系
+				for (int m = 0; m < grammar_rules[i].second.size(); m++) {
+					int n;
+					
+					// 判断该产生式右侧符号是否可以推导至 $ 空符号
+					for (n = m + 1; n < grammar_rules[i].second.size(); n++) {
+						if (!infer_empty[grammar_rules[i].second[n]]) {
+							break;
+						}
+					}
+					// 若可以推导 按照入栈的方式依次加入
+					if (n == grammar_rules[i].second.size()) {
+						if (symbols[k] != grammar_rules[i].second[m]) {
+							depend[symbols[k]].push_back(grammar_rules[i].second[m]);
+						}
+					}
+
+				}
+			}
+			for (int j = 0; j < grammar_rules[i].second.size(); j++) {
+				// 产生式右侧相等 存入 left
+				if (grammar_rules[i].second[j] == symbols[k]) {
+					right_appears[symbols[k]].push_back(i);
+					break;
+				}
+			}
+		}
+	}
+
+}
+
+bool Grammar::symbol_infer_empty(const string& symbol) {
+	
+	// 已经进行推导过
+	if (infer_empty.find(symbol) != infer_empty.end()) {
+		return infer_empty[symbol];
+	}
+	
+	// 当符号为终结符时，当且仅当为 $ 可以推导出 $
+	if (find(VTs.begin(), VTs.end(), symbol) != VTs.end()) {
+		infer_empty[symbol] = (symbol == "$") ;
+		return infer_empty[symbol];
+	}
+
+	// 当符号为非终结符时，通过产生式进行推导
+	for (int i = 0; i < grammar_rules.size(); i++) {
+		// 当该符号为产生式左侧时
+		if (grammar_rules[i].first == symbol) {
+			int j;
+			vector<string> rule_right = grammar_rules[i].second;
+			for (j = 0; j < rule_right.size(); j++) {
+				// 递归推导 产生式右侧无法推导至 $ 时
+				if (!(this->symbol_infer_empty(rule_right[j]))) {
+					break;
+				}
+			}
+
+			// 当且仅当产生式右侧可以推导至 $ 时
+			if (j == rule_right.size()) {
+				infer_empty[symbol] = true;
+				return infer_empty[symbol];
+			}
+		}
+	}
+
+	// 当各产生式都无法推导至 $ 时，则无法推导
+	infer_empty[symbol] = false;
+	return infer_empty[symbol];
+
+}
+
+vector<string> Grammar::symbol_infer_first(const string& symbol)
+{
+	// 已经推导过 FIRST 集
+	if (first.find(symbol) != first.end()) {
+		return first[symbol];
+	}
+
+	vector<string> symbol_first;
+	
+	// 当符号为终结符时 FIRST 集为它本身
+	if (find(VTs.begin(), VTs.end(), symbol) != VTs.end()) {	
+		symbol_first.push_back(symbol);
+		first[symbol] = symbol_first;
+		return first[symbol];
+	}
+
+	// 当符号为非终结符时，通过产生式进行推导
+	for (int i = 0; i < grammar_rules.size(); i++) {
+		// 当该符号为产生式左侧时
+		if (grammar_rules[i].first == symbol) {
+			int j;
+			for (j = 0; j < grammar_rules[i].second.size(); j++) {
+
+				// 依次添加所有产生式右侧的
+				vector<string> firsts = symbol_infer_first(grammar_rules[i].second[j]);
+				for (int k = 0; k < firsts.size(); k++) {
+					symbol_first.push_back(firsts[k]);
+				}
+				
+				// 若产生式右侧无法推导至 $ 空字符时 中断
+				if (!infer_empty[grammar_rules[i].second[j]]) {
+					break;
+				}
+
+			}
+
+			// 当且仅当产生式右侧可以推导至 $ 时 将 $ 加入到 FIRST 集中
+			if (j == grammar_rules[i].second.size()) {
+				symbol_first.push_back("$");
+			}
+		}
+	}
+
+	// 对当前 FIRST 集进行 去重 与 排序
+	set<string> ssymbol_first(symbol_first.begin(), symbol_first.end());
+	symbol_first.clear();
+	symbol_first.resize(ssymbol_first.size());
+	symbol_first.assign(ssymbol_first.begin(), ssymbol_first.end());
+
+	sort(symbol_first.begin(), symbol_first.end());
+
+	// 返回非终结符的 FIRST 集
+	first[symbol] = symbol_first;
+	return first[symbol];
+}
+
+vector<string> Grammar::symbol_infer_follow(const string& symbol)
+{
+	vector<string> symbol_follow;
+	
+	// 获取该符号出现在哪些产生式右侧
+	vector<int> right_appear = right_appears[symbol];
+	for (int i = 0; i < right_appear.size(); i++) {
+		int cnt;
+		
+		// 获取该产生式右侧的符号
+		vector<string> rule_right = grammar_rules[right_appear[i]].second;
+		
+		// 依次遍历 该产生式右侧 至 该符号 后一位
+		for (cnt = 0; cnt < rule_right.size(); cnt++) {
+			if (rule_right[cnt] == symbol) {
+				break;
+			}
+		}
+		cnt++;
+
+		// 遍历 剩余产生式右侧
+		for (; cnt < rule_right.size(); cnt++) {
+			
+			// 依次获取 后置元素 的 FIRST 集
+			vector<string> symbol_first = first[rule_right[cnt]];
+
+			// 将 该 FIRST 集 循环添加至 symbol_follow 中
+			for (int j = 0; j < symbol_first.size(); j++) {
+				symbol_follow.push_back(symbol_first[j]);
+			}
+
+			// 若不可达 $ 中断遍历
+			if (!infer_empty[rule_right[cnt]]) {
+					break;
+			}
+		}
+
+		// 当剩余产生式右侧均可到达 $ 时 
+		if (cnt == rule_right.size()) {
+			if (follow.find(grammar_rules[right_appear[i]].first) != follow.end()) {
+				
+				// 将产生式左侧的 FOLLOW 集 加入到 当前符号的 FOLLOW 集中
+				vector<string> first_follow = follow[grammar_rules[right_appear[i]].first];
+				for (int j = 0; j < first_follow.size(); j++) {
+					symbol_follow.push_back(first_follow[j]);
+				}
+
+			}
+
+		}
+		
+	}
+
+	// 删除不需要的 $ 空字符
+	auto it = remove(symbol_follow.begin(), symbol_follow.end(), "$");
+	auto it1 = symbol_follow.erase(it, symbol_follow.end());
+
+
+	// 对当前 FOLLOW 集 进行去重排序
+	set<string> ssymbol_follow(symbol_follow.begin(), symbol_follow.end());
+	symbol_follow.clear();
+	symbol_follow.resize(ssymbol_follow.size());
+	symbol_follow.assign(ssymbol_follow.begin(), ssymbol_follow.end());
+
+	sort(symbol_follow.begin(), symbol_follow.end());
+
+
+
+	return symbol_follow;
+}
+
+
+
+
+
+
+
diff --git a/LL1/grammar.h b/LL1/grammar.h
new file mode 100644
index 0000000..95e91b1
--- /dev/null
+++ b/LL1/grammar.h
@@ -0,0 +1,55 @@
+// 语法生成器
+#ifndef GRAMMAR_H
+#define GRAMMAR_H
+
+
+#include <string>
+#include <vector>
+#include <map>
+#include <unordered_set>
+#include <unordered_map>
+
+using namespace std;
+
+class Grammar
+{
+public:
+	const string grammar_file = "./tests/grammar.txt";
+	
+	Grammar();
+	~Grammar();
+	void read_grammar();									// 读取语法规则
+	void print_grammar();									// 打印语法规则
+	void expand_grammar();									// 拓展语法规则
+	void init_grammar_set();								// 初始化语法相关集合
+	void print_grammar_set();								// 打印语法相关集合
+	void get_token_strings(vector<string> &);				// 获取 token_stirngs
+	void print_token_strings();
+
+protected:
+	vector<pair<string, vector<string>>> grammar_rules;		// 产生式规则
+	string start;											// 起始字符
+	vector<string> symbols;									// 符号
+	vector<string> VTs;										// 终结符								
+	vector<string> VNs;										// 非终结符
+	unordered_map<string, vector<string>> first;			// FIRST 集
+	unordered_map<string, vector<string>> follow;			// FOLLOW 集
+	unordered_map<string, bool> infer_empty;				// 是否可以推导出 $ 空字符
+	vector<string> token_strings;
+
+
+private:
+	unordered_map<string, vector<int>> left_appears;		// 该符号出现在哪些产生式左侧
+	unordered_map<string, vector<int>> right_appears;		// 该符号出现在哪些产生式右侧
+	unordered_map<string, vector<string>> depend;			// FOLLOW 集的依赖关系
+	
+
+	void init_appears_depend();								// 获取 appear depend 集合
+	bool symbol_infer_empty(const string& symbol);			// 判断符号是否可以推导出 $ 空字符
+	vector<string> symbol_infer_first(const string& symbol);// 推导符号的 FIRST 集
+	vector<string> symbol_infer_follow(const string& symbol);// 推导符号的 FOLLOW 集
+
+};
+
+
+#endif // !GRAMMAR_H
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..cbea495
--- /dev/null
+++ b/README.md
@@ -0,0 +1,9 @@
+三个模块 LL1 NFA MAIN
+MAIN负责整合LL1和NFA
+
+每个模块基于 cmake 实现静态连接,cmakelist已经写好
+```
+mkdir build
+cd build
+
+```
\ No newline at end of file
diff --git a/bin/LL.lib b/bin/LL.lib
new file mode 100644
index 0000000..b5d67a3
Binary files /dev/null and b/bin/LL.lib differ
diff --git a/bin/nfa.lib b/bin/nfa.lib
new file mode 100644
index 0000000..4e55d26
Binary files /dev/null and b/bin/nfa.lib differ
diff --git a/main/CMakeLists.txt b/main/CMakeLists.txt
new file mode 100644
index 0000000..3b9a75c
--- /dev/null
+++ b/main/CMakeLists.txt
@@ -0,0 +1,14 @@
+cmake_minimum_required(VERSION 3.10)
+project(main)
+
+file(GLOB SOURCES "*.cpp")
+
+add_executable(main ${SOURCES})
+
+# 链接静态库
+target_link_libraries(main PRIVATE ${CMAKE_BINARY_DIR}/../../bin/LL.lib)
+target_link_libraries(main PRIVATE ${CMAKE_BINARY_DIR}/../../bin/nfa.lib)
+
+
+# 添加头文件目录
+# target_include_directories(main PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
diff --git a/main/LL1.h b/main/LL1.h
new file mode 100644
index 0000000..5eaba92
--- /dev/null
+++ b/main/LL1.h
@@ -0,0 +1,32 @@
+// LL1 语法分析器
+#ifndef LL1_H
+#define LL1_H
+
+#include "grammar.h"
+
+using namespace std;
+
+class LL1:public Grammar{
+public:
+	LL1();
+	~LL1();
+
+	bool IsLL1();														// 判断该文法是否为 LL1 文法
+	void build_LL1_predict();											// 构建 LL1 的预测分析表
+	void print_LL1_predict();											// 打印 LL1 的预测分析表
+	void build_LL1_grammar();											// 构建规约序列
+	void print_LL1_grammar_log();
+	void fileout_LL1_grammar_log(string file_name);
+
+
+private:
+	unordered_map<string, vector<string>> select;						// 计算符号的 SELECT 集合
+	unordered_map<string, unordered_map<string, int>> LL1_predict;		// LL1 的预测分析表
+	vector<string> LL1_grammar_log;									    // 规约序列
+
+	int insert_rule(pair<string, vector<string>>& new_rule);			// 增加新的规则
+};
+
+
+
+#endif // !LL1_H
diff --git a/main/grammar.h b/main/grammar.h
new file mode 100644
index 0000000..95e91b1
--- /dev/null
+++ b/main/grammar.h
@@ -0,0 +1,55 @@
+// 语法生成器
+#ifndef GRAMMAR_H
+#define GRAMMAR_H
+
+
+#include <string>
+#include <vector>
+#include <map>
+#include <unordered_set>
+#include <unordered_map>
+
+using namespace std;
+
+class Grammar
+{
+public:
+	const string grammar_file = "./tests/grammar.txt";
+	
+	Grammar();
+	~Grammar();
+	void read_grammar();									// 读取语法规则
+	void print_grammar();									// 打印语法规则
+	void expand_grammar();									// 拓展语法规则
+	void init_grammar_set();								// 初始化语法相关集合
+	void print_grammar_set();								// 打印语法相关集合
+	void get_token_strings(vector<string> &);				// 获取 token_stirngs
+	void print_token_strings();
+
+protected:
+	vector<pair<string, vector<string>>> grammar_rules;		// 产生式规则
+	string start;											// 起始字符
+	vector<string> symbols;									// 符号
+	vector<string> VTs;										// 终结符								
+	vector<string> VNs;										// 非终结符
+	unordered_map<string, vector<string>> first;			// FIRST 集
+	unordered_map<string, vector<string>> follow;			// FOLLOW 集
+	unordered_map<string, bool> infer_empty;				// 是否可以推导出 $ 空字符
+	vector<string> token_strings;
+
+
+private:
+	unordered_map<string, vector<int>> left_appears;		// 该符号出现在哪些产生式左侧
+	unordered_map<string, vector<int>> right_appears;		// 该符号出现在哪些产生式右侧
+	unordered_map<string, vector<string>> depend;			// FOLLOW 集的依赖关系
+	
+
+	void init_appears_depend();								// 获取 appear depend 集合
+	bool symbol_infer_empty(const string& symbol);			// 判断符号是否可以推导出 $ 空字符
+	vector<string> symbol_infer_first(const string& symbol);// 推导符号的 FIRST 集
+	vector<string> symbol_infer_follow(const string& symbol);// 推导符号的 FOLLOW 集
+
+};
+
+
+#endif // !GRAMMAR_H
\ No newline at end of file
diff --git a/main/main.cpp b/main/main.cpp
new file mode 100644
index 0000000..c3ed26d
--- /dev/null
+++ b/main/main.cpp
@@ -0,0 +1,80 @@
+#include <iostream>
+#include <fstream>
+#include <cassert>
+
+#include "nfa.h"
+#include "grammar.h"
+#include "LL1.h"
+using namespace std;
+
+int main(int argc, char** argv) {
+
+    NFA nfa = RexToNFA();
+    printNFA(nfa);
+
+    DFA dfa = nfaToDFA(nfa);
+    //printDFA(dfa);
+    DFA minimizedDFA = minimizeDFA(minimizeDFA(dfa));
+    removeUnreachableStates(minimizedDFA);
+    //printDFA(minimizedDFA);
+
+    string inputs[6] = {
+        "tests/00/00.txt",
+        "tests/01/01.txt",
+        "tests/02/02.txt",
+        "tests/07/07.txt",
+        "tests/08_err/08.txt",
+        "tests/10_err/10.txt"
+    };
+
+    string outputs_lexical[6] = {
+    "tests/00/00_my_lexical.txt",
+    "tests/01/01_my_lexical.txt",
+    "tests/02/02_my_lexical.txt",
+    "tests/07/07_my_lexical.txt",
+    "tests/08_err/08_my_lexical.txt",
+    "tests/10_err/10_my_lexical.txt"
+    };
+
+    string outputs_grammar[6] = {
+    "tests/00/00_my_grammar.txt",
+    "tests/01/01_my_grammar.txt",
+    "tests/02/02_my_grammar.txt",
+    "tests/07/07_my_grammar.txt",
+    "tests/08_err/08_my_grammar.txt",
+    "tests/10_err/10_my_grammar.txt"
+    };
+
+
+    int i = 0;
+    for (auto input : inputs) {
+        LL1 ll;
+        //ll.print_grammar_set();
+
+        string content = readfile(input);
+        vector<string> token_strings = recognize(minimizedDFA, content,outputs_lexical[i]);
+
+	    bool flag = ll.IsLL1();
+	    ll.build_LL1_predict();
+
+
+	    // ll.print_LL1_predict();
+        ll.get_token_strings(token_strings);
+
+
+        // ll.print_token_strings();
+        ll.build_LL1_grammar();
+
+
+        ll.fileout_LL1_grammar_log(outputs_grammar[i]);
+
+
+        // ll.print_LL1_grammar_log();
+        cout << endl;
+        i++;
+    }
+
+	
+
+	return 0;
+}
\ No newline at end of file
diff --git a/main/nfa.h b/main/nfa.h
new file mode 100644
index 0000000..184b0cf
--- /dev/null
+++ b/main/nfa.h
@@ -0,0 +1,173 @@
+#pragma once
+#ifndef __NFA__H__
+#define __NFA__H__
+
+#include <map>
+#include <set>
+#include <deque>
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <stdio.h>
+#include <sstream>
+#include <stack>
+#include <queue>
+#include <algorithm>
+using namespace std;
+//单词符号的类型，返回<待测代码中的单词符号,WordType>
+typedef enum WordType {
+	//当识别成标识符后，先判断是不是保留字，让后再判断IDN
+	KW_INT = 0,		// int
+	KW_VOID,		// void
+	KW_RETURN,		// return
+	KW_CONST,		// const
+
+	OP_ADD,			// +
+	OP_SUB,			// -
+	OP_MUL,			// *
+	OP_DIV,			// /
+	OP_MOD,			// %
+	OP_ASSIGN,		// =
+	OP_GT,			// >
+	OP_LT,			// <
+	OP_EQ,			// ==
+	OP_LE,			// <=
+	OP_GE,			// >=
+	OP_NE,			// !=
+	OP_AND,			// &&
+	OP_OR,			// ||
+
+	SE_LBRAC,		// ( left backet
+	SE_RBRAC,		// ) right bracket
+	SE_LCBRAC,		// { left curly bracket
+	SE_RCBRAC,		// } right curly bracket
+	SE_COMMA,		// ,
+	SE_SEMI,		// ;
+
+	IDN,			// [a-zA-Z][a-zA-Z_0-9]*
+	INT_VAL,		// -*[0-9]+
+	UNKOWN
+}WordType;
+string getWordTypeName(WordType type);
+//定义输入的字符类别
+typedef enum InputCharType {
+	LETTER = 0,		// 字母					0
+	UNDERLINE,		// _ 					1
+	DIGIT,			// 数字					2  当识别成功一个数字时，为了避免出现数字01的情况，返回前先进行一个判断，对GCC，01可以识别并等于1的
+	//OP
+	ADD,			// +					3
+	SUB,			// -					4
+	MUL,			// *					5
+	DIV,			// /					6
+	MOD,			// %					7
+	EQ,				// =					8
+	GT,				// >					9
+	LT,				// <					10
+	NOT,			// !					11
+	AND,			// &					12
+	OR,				// |					13
+	//SE
+	LBRACKET,		// (					14
+	RBRACKET,		// )					15
+	LCBRAC,			// {					16
+	RCBRAC,			// }					17
+	COMMA,			// ,					18
+	SEMI,			// ;					19
+
+	EPSILON,		// 空字符				20
+}InputCharType;
+string getInputChartypeName(InputCharType type);
+enum class TokenType {
+	KW = 0,
+	OP,
+	SE,
+	IDN,
+	INT,
+	UNKNOWN
+};
+TokenType getTokenType(WordType wordType,string buffer);
+typedef struct Token {
+	string value;
+	TokenType type;
+} Token;
+
+//定义函数判断输入的字符类别
+InputCharType getInputCharType(char c);
+string getWordTypeName(WordType type,string buffer);
+
+//定义状态类
+class State {
+public:
+	int id; // 状态编号
+	map<InputCharType, set<State*>> transitions; // 转移函数映射表，记录每个输入字符类型对应的目标状态集合
+	bool isFinalState; // 是否为最终状态
+	WordType wordType; // 到达该状态时应该返回的词法单元类型
+	State(int id) : id(id), isFinalState(false), wordType(UNKOWN) {}
+	void addTransition(InputCharType input, State* targetState) {
+		transitions[input].insert(targetState);
+	}
+	void setFinalState(bool isFinal, WordType type) {
+		isFinalState = isFinal;
+		wordType = type;
+	}
+	bool operator<(const State& other) const {
+		return id < other.id;
+	}
+};
+//为了是set内部有序，定义排序结构体StatePtrCompare
+struct StatePtrCompare {
+	bool operator()(const State* lhs, const State* rhs) const {
+		return lhs->id < rhs->id;
+	}
+};
+
+//定义NFA类
+class NFA {
+public:
+	State* startState; // 起始状态
+	set<State*, StatePtrCompare> endStates; // 终止状态集合
+	set<State*, StatePtrCompare> states; // 状态集合
+	NFA(State* startState, set<State*, StatePtrCompare> endStates, set<State*, StatePtrCompare> states) :
+		startState(startState), endStates(endStates), states(states) {}
+	// void printNFA();
+};
+NFA RexToNFA();
+void printNFA(const NFA& nfa);
+NFA buildNFA(string filename);
+NFA RexToNFA();
+set<State*, StatePtrCompare> move(const set<State*, StatePtrCompare>& states, InputCharType input);
+set<State*, StatePtrCompare> epsilonClosure(const set<State*, StatePtrCompare>& states);
+
+class DFA {
+public:
+	State* startState; // 起始状态
+	set<State*, StatePtrCompare> endStates; // 终止状态集合
+	set<State*, StatePtrCompare> states; // 状态集合
+	DFA(State* startState, set<State*, StatePtrCompare> endStates, set<State*, StatePtrCompare> states) :
+		startState(startState), endStates(endStates), states(states) {}
+};
+void removeUnreachableStates(DFA& dfa);
+void printDFA(const DFA& dfa);
+DFA nfaToDFA(const NFA& nfa);
+void printDFA(const DFA& dfa);
+struct SetComparator {
+	bool operator()(const set<State*, StatePtrCompare>& a, const set<State*, StatePtrCompare>& b) const {
+		if (a.size() != b.size()) {
+			return a.size() < b.size();
+		}
+
+		vector<State*> vecA(a.begin(), a.end());
+		vector<State*> vecB(b.begin(), b.end());
+
+		sort(vecA.begin(), vecA.end(), [](const State* a, const State* b) { return a->id < b->id; });
+		sort(vecB.begin(), vecB.end(), [](const State* a, const State* b) { return a->id < b->id; });
+
+		return vecA < vecB;
+	}
+};
+string getGrammarName(WordType type, string buffer);
+DFA minimizeDFA(const DFA& dfa);
+vector<string> recognize(const DFA& dfa, const string& input, const string& output);
+string readfile(const string& filename);
+#endif 
\ No newline at end of file
diff --git a/nfa/CMakeLists.txt b/nfa/CMakeLists.txt
new file mode 100644
index 0000000..cfc094e
--- /dev/null
+++ b/nfa/CMakeLists.txt
@@ -0,0 +1,14 @@
+cmake_minimum_required(VERSION 3.10)
+project(nfa)
+
+# 收集所有的cpp源文件
+file(GLOB SOURCES "*.cpp")
+
+# 设置输出目录为 bin
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
+
+# 创建静态链接库
+add_library(nfa STATIC ${SOURCES})
+
+# 添加头文件目录
+target_include_directories(nfa PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
diff --git a/nfa/dfa.cpp b/nfa/dfa.cpp
new file mode 100644
index 0000000..8b5009e
--- /dev/null
+++ b/nfa/dfa.cpp
@@ -0,0 +1,251 @@
+#include "nfa.h"
+
+class Partition {
+public:
+    set<State*, StatePtrCompare> states;
+    Partition(set<State*, StatePtrCompare> states) : states(states) {}
+};
+/*
+最小化算法步骤：
+首先把所有节点分为N和A两个集合，集非结束态和结束态
+S = {N,A}，然后遍历所有字符，去看每个字符都否对S中的状态集进行划分，每轮遍历下来，如果S仍然在扩大，则从头再来一轮。直到S不再扩大，即没有状态集可分为止。
+c can split s这里s指的是S中的一个状态集
+1.遍历s中每个状态，记录每个状态吃了字符c之后到达的状态，吃不了的不管。
+2.把到达的状态分类，分类依据：把属于同一个状态集的合在一起。这里的同一个状态集指的是S中现在有的状态集。
+3.按照第二步的分法把s分割。
+注意：是从s中分割出去，s最后保留下来的是吃了字符c还在状态集s中的状态或者吃不了c字符的状态。
+*/
+
+// split 函数用于将给定的状态集合（group）根据转移函数进一步细分。
+// group: 要细分的状态集合
+// input: 当前考虑的输入字符类型
+// partitions: 存储所有分区的集合，如果需要细分，将在该集合中添加新分区
+void split(const set<State*, StatePtrCompare>& group, InputCharType input, set<Partition*>& partitions) {
+    // 用于存储每个目标分区与对应新分组状态集合的映射
+    map<Partition*, set<State*, StatePtrCompare>> targetPartitionsMap;
+
+    for (State* state : group) {
+        auto it = state->transitions.find(input);
+        if (it != state->transitions.end()) {
+            State* targetState = *(it->second.begin());//DFA状态转移具有唯一性
+            // 在当前所有分区中查找包含目标状态的分区
+            for (Partition* partition : partitions) {
+                if (partition->states.find(targetState) != partition->states.end()) {
+                    // 在映射表中将当前状态添加到对应的目标分区
+                    targetPartitionsMap[partition].insert(state);
+                    break;
+                }
+            }
+        }
+    }
+    // 经过上述操作，将在group里的状态根据到达目标Partiset<State*, StatePtrCompare>分到不同set<State*>
+    // 遍历目标分区映射表，检查是否需要进一步细分，即将经过input输入状态转换后处于不同目标分区的集合内部拆分开
+    for (auto& entry : targetPartitionsMap) {
+        Partition* targetPartition = entry.first;
+        //到达该targetPartition的group部分状态合集如下：
+        set<State*, StatePtrCompare>& newGroupStates = entry.second;
+        //等于的情况不拆分，不会出现大于的情况，将targetPartition拆分开来，也可以将到达不同割集的源状态分割开来，也可以分割目标状态，总之是状态转移结果在现存割集即可
+        if (newGroupStates.size() < targetPartition->states.size()) {
+            for (State* state : newGroupStates) {
+                targetPartition->states.erase(state);
+            }
+            Partition* newGroup = new Partition(newGroupStates);
+            partitions.insert(newGroup);
+        }
+    }
+}
+
+DFA minimizeDFA(const DFA& dfa) {
+    set<Partition*> partitions;
+
+    // 将所有非终止状态分成一组，将所有终止状态按照 WordType 分组
+    /*
+    * 不同 WordType 的终止状态表示的是不同的词法单元类型。
+    * 这些状态在词法分析过程中具有不同的语义,不能被合并为同一个状态。
+    */
+    map<WordType, set<State*, StatePtrCompare>> endStateGroups; //初始终态集合
+    set<State*, StatePtrCompare> nonEndStates; //初始非终态集合
+    for (State* state : dfa.states) {
+        if (state->isFinalState) {
+            endStateGroups[state->wordType].insert(state);//使用wordType对终态集合进一步拆分
+        }
+        else {
+            nonEndStates.insert(state);
+        }
+    }
+    //构造初始分割，是对{N,A}中A的扩展（即终态，加快算法速度，扩展原因见上）
+    for (auto& entry : endStateGroups) {
+        Partition* endStateGroup = new Partition(entry.second);
+        partitions.insert(endStateGroup);
+    }
+    Partition* nonEndStateGroup = new Partition(nonEndStates);
+    partitions.insert(nonEndStateGroup);
+    //对现有分隔进行再分隔，以获得最小化分割
+    size_t oldSize;//分割集初始大小
+    do {
+        oldSize = partitions.size();
+        for (InputCharType input = static_cast<InputCharType>(0); input < EPSILON; input = static_cast<InputCharType>(input + 1)) {//类似于求Ia,Ib等
+            for (Partition* partition : set<Partition*>(partitions)) {//遍历现存分割的每一个割集，看是否可再分割
+                if (partition->states.size() > 1) {//为1的集合不可再分割
+                    split(partition->states, input, partitions);//核心分割函数
+                }
+            }
+        }
+    } while (partitions.size() != oldSize);//当割集集合大小不再变化时停止
+
+    // 创建新的最小化 DFA，即重新映射dfa，重新编号状态
+    // 构造DFA参数为DFA(State* set<State*, StatePtrCompare> set<State*>set<State*, StatePtrCompare> set<State*> states)
+    set<State*, StatePtrCompare> minimizedStates;
+    set<State*, StatePtrCompare> minimizedEndStates;
+    State* minimizedStartState = nullptr;
+    map<State*, State*> stateMap;
+
+    for (Partition* partition : partitions) {//遍历获得的每个割集
+        State* newState = new State(minimizedStates.size());//编号
+         // 检查当前划分是否包含旧DFA的开始状态，如果是，则将新状态设置为最小化DFA的开始状态
+        if (partition->states.find(dfa.startState) != partition->states.end()) {
+            minimizedStartState = newState;
+        }
+        // 如果划分的状态集合不为空，选择一个代表状态
+        if (!partition->states.empty()) {
+            State* representative = *(partition->states.begin());//因为在前面终止状态都分到了不同割集，且大小为1，所以如果是终止状态begin已经可以代表了
+            //在分割状态集合的过程中，已经确保了一个划分中所有状态具有相同的属性,要么所有状态都是终止状态，要么都不是终止状态。所以我们只需要检查一个状态来确定新状态是否应该是终止状态。
+            // 如果代表状态是终止状态，则设置新状态为终止状态，并保留相应的单词类型
+            if (representative->isFinalState) {
+                newState->setFinalState(true, representative->wordType);
+                minimizedEndStates.insert(newState);
+            }
+        }
+        // 将集合里面所有旧状态映射到同一个新状态
+        for (State* state : partition->states)
+        {
+            stateMap[state] = newState;
+        }
+        // 将新状态插入到最小化DFA的状态集合中
+        minimizedStates.insert(newState);
+    }
+    // 遍历旧DFA中的所有状态
+    for (State* oldState : dfa.states) {
+        // 通过映射找到与旧状态对应的新状态
+        State* newState = stateMap[oldState];
+        for (const auto& transition : oldState->transitions) {
+            InputCharType input = transition.first;
+            State* oldTargetState = *(transition.second.begin());//dfa每个状态只有一个转移状态，沿用了nfa的结构，所以集合大小<=1
+            State* newTargetState = stateMap[oldTargetState];// 获取旧状态的目标状态
+            newState->addTransition(input, newTargetState);// 通过映射找到新的目标状态
+        }
+    }
+
+    // 清理并删除原始分区
+    for (Partition* partition : partitions) {
+        delete partition;
+    }
+    return DFA(minimizedStartState, minimizedEndStates, minimizedStates);
+}
+void removeUnreachableStates(DFA& dfa) {
+    set<State*> reachableStates; //可达状态集合
+    queue<State*> statesQueue; //状态队列
+
+    //将初始状态加入可达状态集合和队列
+    reachableStates.insert(dfa.startState);
+    statesQueue.push(dfa.startState);
+
+    // BFS 遍历 DFA，找出所有可达状态
+    while (!statesQueue.empty()) {
+        State* currentState = statesQueue.front();
+        statesQueue.pop();
+        for (const auto& transition : currentState->transitions) {
+            State* targetState = *(transition.second.begin());//dfa每个状态只有一个转移状态，沿用了nfa的结构，所以集合大小<=1
+            if (reachableStates.find(targetState) == reachableStates.end()) {//若未访问
+                reachableStates.insert(targetState);
+                statesQueue.push(targetState);
+            }
+        }
+    }
+
+    // 删除所有不可达状态
+    for (auto it = dfa.states.begin(); it != dfa.states.end();) {
+        State* state = *it;
+        if (reachableStates.find(state) == reachableStates.end()) {//若当前状态不可达，删除
+            it = dfa.states.erase(it);
+            delete state;
+        }
+        else {
+            ++it;
+        }
+    }
+}
+vector<string> recognize(const DFA& dfa, const string& input, const string& output) {
+
+    State* currentState = dfa.startState;
+    State* nextState = nullptr;
+    string buffer;
+    vector<string> tokens; // 用于收集识别到的Token
+    //打开结果输出文件
+    ofstream file(output);
+    if (!file.is_open()) {
+
+        cout << "Error opening file!" << endl;
+        return tokens;
+    }
+    for (size_t i = 0; i < input.length(); ++i) {
+        char c = input[i];
+        if (c == ' '||c=='\n'||c=='\r\n'||c=='  ')// 如果是空格、换行等分隔符，则跳过
+        {continue; }
+        InputCharType inputCharType = getInputCharType(c);
+        auto it = currentState->transitions.find(inputCharType);
+
+        if (it != currentState->transitions.end()) {
+            nextState = *(it->second.begin());
+            buffer.push_back(c);
+
+            if (nextState->isFinalState && i + 1 < input.length()) {// 如果下一个状态是终止状态并且还有剩余字符
+                char nextChar = input[i + 1];
+                InputCharType nextInputCharType = getInputCharType(nextChar);
+                auto nextIt = nextState->transitions.find(nextInputCharType);// 查找下一个状态的转换表中是否有对应的输入字符类型
+
+                if (nextIt == nextState->transitions.end()) {// 如果没有更多匹配的转换
+                    // 输出识别到的单词符号和对应的类型
+                    cout << buffer << "\t<" << getWordTypeName(nextState->wordType,buffer) << ">" << endl;
+                    file << buffer << "\t<" << getWordTypeName(nextState->wordType, buffer) << ">" << endl;
+                    tokens.push_back(getGrammarName(nextState->wordType, buffer));
+                    buffer.clear();
+                    currentState = dfa.startState;
+                }
+                else {
+                    currentState = nextState;// 更新当前状态为下一个状态
+                }
+            }
+            else {
+                currentState = nextState;// 更新当前状态为下一个状态
+            }
+        }
+        else {// 如果没有找到匹配的转换
+            if (currentState->isFinalState) {// 如果当前状态是终止状态
+                // 输出识别到的单词符号和对应的类型
+                cout << buffer << "\t<" << getWordTypeName(currentState->wordType,buffer) << ">" << endl;
+                file << buffer << "\t<" << getWordTypeName(currentState->wordType, buffer) << ">" << endl;
+                tokens.push_back(getGrammarName(currentState->wordType, buffer) );
+                buffer.clear();
+            }
+            else {
+                // 如果当前状态不是终止状态
+                // 输出无法识别的字符信息
+                cout << "Unrecognized characters: " << c << endl;
+                file << "Unrecognized characters: " << c << endl;
+
+                buffer.clear();
+            }
+            currentState = dfa.startState;// 回到起始状态
+            //--i;// 重新处理当前字符,还是跳过吧，这里可以添加错误处理
+        }
+    }
+    // 处理最后一个字符，如果缓冲区不为空且当前状态是终止状态,对应第一个if里面的else
+    if (!buffer.empty() && currentState->isFinalState) {
+        cout << buffer << "\t<" << getWordTypeName(currentState->wordType,buffer) << ">" << endl;
+        file << buffer << "\t<" << getWordTypeName(currentState->wordType, buffer) << ">" << endl;
+        tokens.push_back(getGrammarName(currentState->wordType, buffer));
+    }
+    file.close();//关闭文件
+    return tokens;
+}
\ No newline at end of file
diff --git a/nfa/nfa.cpp b/nfa/nfa.cpp
new file mode 100644
index 0000000..cc3e3ca
--- /dev/null
+++ b/nfa/nfa.cpp
@@ -0,0 +1,262 @@
+// 将正则表达式转换为非确定性有限自动机
+
+
+
+
+#include "nfa.h"
+
+
+
+// 处理正则表达式，描述终态
+NFA RexToNFA() {
+    //由于里面存在||,所以不同正则间使用空格分隔代表| l代表letter，_代表下划线，0代表数字(也可以是d，但是为了使用已经有的函数)，
+    //[lu]代表l|u
+    string rex = "+ - * / % = > < == <= >= != && || ( ) { } , ; [l_][l_0]* -?00*";
+    //下面给出正则对应的输出(终态)
+    vector<WordType> finalState = {
+        OP_ADD, OP_SUB,OP_MUL,OP_DIV,OP_MOD,OP_ASSIGN,OP_GT,OP_LT, OP_EQ,OP_LE,OP_GE,OP_NE, OP_AND, OP_OR,SE_LBRAC, SE_RBRAC,
+        SE_LCBRAC,SE_RCBRAC,SE_COMMA,SE_SEMI,IDN,INT_VAL
+    };
+    stringstream ss(rex);
+    string target;
+
+    // 创建初始状态
+    int stateIndex = 0;
+    int finalIndex = 0;
+    State* startState = new State(stateIndex++);
+    set<State*, StatePtrCompare> endStates;
+    set<State*, StatePtrCompare> allStates = { startState };
+    while (getline(ss, target,' ')) {
+        //如获得[l_][l_0]*
+        State* currentState = startState;
+
+        for (size_t i = 0; i < target.length();i++) {
+            //创建一个新状态，startState通过输入InputCharType到达该状态
+            State* newState = new State(stateIndex++);
+            allStates.insert(newState);
+            //需要往后看一个符号
+            if (target[i] == '[') {
+                //[...]构成一种输入，查看]后面是否有？或者*，来判断当前状态的构成
+                for (i=i+1; i < target.length() && target[i] != ']'; i++) {
+                    InputCharType input = getInputCharType(target[i]);
+                    if (input != EPSILON) {
+                        // 添加转移函数，从当前状态向新状态转移
+                        currentState->addTransition(input, newState);
+                    }
+                }
+            }
+            else {
+                InputCharType input = getInputCharType(target[i]);
+                currentState->addTransition(input, newState);
+            }
+            //往后查看一个输入
+            if (i + 1 < target.length() && target[i + 1] == '?') {
+                //创建EPSILON转移状态
+                State* epsState = new State(stateIndex++);
+                allStates.insert(epsState);
+                currentState->addTransition(EPSILON, epsState);
+                newState->addTransition(EPSILON, epsState);
+                currentState = epsState;
+                // 跳过'?'字符
+                i++;
+            }
+            else if (i + 1 < target.length() && target[i + 1] == '*') {
+                State* epsState = new State(stateIndex++);
+                allStates.insert(epsState);
+                currentState->addTransition(EPSILON, epsState);
+                newState->addTransition(EPSILON, epsState);
+                epsState->addTransition(EPSILON, currentState);
+                currentState = epsState;
+                // 跳过'*'字符
+                i++;
+            }
+            else {
+                currentState = newState;
+            }
+            //判断是否是终止状态
+            if (i == (target.length() - 1)) {
+                // 到达最后一个字符，将当前状态设置为终止状态
+                currentState->setFinalState(true, finalState[endStates.size()]);
+                endStates.insert(currentState);
+            }
+        }//for
+    }
+    // 返回字符集合对应的NFA
+    return NFA(startState, endStates, allStates);
+}
+
+// 构造状态机
+NFA buildNFA(string filename) {
+    ifstream ifs(filename);
+    if (!ifs) {
+        cerr << "Cannot open file: " << filename << endl;
+        exit(EXIT_FAILURE);
+    }
+
+    int stateNum, inputNum;
+    ifs >> stateNum >> inputNum;
+
+    vector<State*> states(stateNum);
+    for (int i = 0; i < stateNum; i++) {
+        states[i] = new State(i);
+    }
+
+    State* startState = states[0];
+    set<State*, StatePtrCompare> endStates;
+    for (int i = 0; i < stateNum; i++) {
+        for (int j = 0; j < inputNum; j++) {
+            string targetStateIDs;
+            ifs >> targetStateIDs;
+            if (targetStateIDs.compare("#") != 0) {
+                stringstream ss(targetStateIDs);
+                string targetStateIDStr;
+                while (getline(ss, targetStateIDStr, ',')) {
+                    int targetStateID = stoi(targetStateIDStr);
+                    states[i]->addTransition(static_cast<InputCharType>(j), states[targetStateID]);
+                }
+            }
+        }
+    }
+
+    int endStateNum;
+    ifs >> endStateNum;
+    for (int i = 0; i < endStateNum; i++) {
+        int endStateID, wordTypeID;
+        ifs >> endStateID >> wordTypeID;
+        states[endStateID]->setFinalState(true, static_cast<WordType>(wordTypeID));
+        endStates.insert(states[endStateID]);
+    }
+
+    return NFA(startState, endStates, set<State*, StatePtrCompare>(states.begin(), states.end()));
+}
+
+void printNFA(const NFA& nfa) {
+    cout << "Start state: " << nfa.startState->id << endl;
+    cout << "End states: "<<endl;
+    for (auto state : nfa.endStates) {
+        cout << state->id << " " << getWordTypeName(state->wordType) << " " << (state->isFinalState == true) << endl;
+    }
+    cout << endl;
+
+    cout << "States and transitions:" << endl;
+    for (auto state : nfa.states) {
+        cout << "State " << state->id << ":" << endl;
+        for (auto transition : state->transitions) {
+            cout << "\tInput " << getInputChartypeName(transition.first) << ": ";
+            for (auto targetState : transition.second) {
+                cout << targetState->id << " ";
+            }
+            cout << endl;
+        }
+    }
+}
+
+set<State*, StatePtrCompare> move(const set<State*, StatePtrCompare>& states, InputCharType input) {
+    set<State*, StatePtrCompare> targetStates;
+    for (State* state : states) {
+        auto it = state->transitions.find(input);
+        if (it != state->transitions.end()) {
+            for (State* targetState : it->second) {
+                if (targetStates.find(targetState) == targetStates.end()) {
+                    targetStates.insert(targetState);
+                }
+            }
+        }
+    }
+    return targetStates;
+}
+
+
+set<State*, StatePtrCompare> epsilonClosure(const set<State*, StatePtrCompare>& states) {
+    set<State*, StatePtrCompare> closure = states;
+    stack<State*> stateStack;
+    for (State* state : states) {
+        stateStack.push(state);
+    }
+    while (!stateStack.empty()) {
+        State* currentState = stateStack.top();
+        stateStack.pop();
+        auto it = currentState->transitions.find(EPSILON);
+        if (it != currentState->transitions.end()) {
+            for (State* nextState : it->second) {
+                if (closure.find(nextState) == closure.end()) {//防止同一状态多次进栈，set自带去重
+                    closure.insert(nextState);
+                    stateStack.push(nextState);
+                }
+            }
+        }
+    }
+    return closure;
+}
+
+DFA nfaToDFA(const NFA& nfa) {
+    map<set<State*, StatePtrCompare>, State*, SetComparator> dfaStatesMap; // 用于映射NFA状态集合到DFA状态的映射表
+    queue<set<State*, StatePtrCompare>> nfaStatesQueue; // 用于BFS遍历的集合队列
+    set<State*, StatePtrCompare> dfaStates;
+    set<State*, StatePtrCompare> dfaEndStates;
+
+    set<State*, StatePtrCompare> nfaStartClosure = epsilonClosure({ nfa.startState });
+    State* dfaStartState = new State(0);
+    dfaStatesMap[nfaStartClosure] = dfaStartState;
+    dfaStates.insert(dfaStartState);
+    nfaStatesQueue.push(nfaStartClosure);
+
+    int nextStateId = 1;
+    //set<State*, StatePtrCompare> nfaStartClosure
+    while (!nfaStatesQueue.empty()) {
+        set<State*, StatePtrCompare> currentNFAStates = nfaStatesQueue.front();
+        nfaStatesQueue.pop();
+        State* currentDFAState = dfaStatesMap[currentNFAStates];
+
+        // 检查是否有终止状态，如果有，设置DFA状态为终止状态
+        for (State* nfaState : currentNFAStates) {
+            if (nfaState->isFinalState) {
+               // cout << nfaState->id << "is FinalState" << endl;
+                currentDFAState->setFinalState(true, nfaState->wordType);
+                dfaEndStates.insert(currentDFAState);
+                break;
+            }
+        }
+
+        // 遍历所有输入字符类型
+        for (int i = 0; i < static_cast<int>(EPSILON); i++) {
+            InputCharType inputCharType = static_cast<InputCharType>(i);
+            set<State*, StatePtrCompare> nextNFAStates = epsilonClosure(move(currentNFAStates, inputCharType));
+            if (nextNFAStates.empty()) {
+                continue;
+            }
+
+            // 如果NFA状态集合不存在于映射表中，则创建新的DFA状态
+            if (dfaStatesMap.find(nextNFAStates) == dfaStatesMap.end()) {
+                State* newDFAState = new State(nextStateId++);
+                dfaStatesMap[nextNFAStates] = newDFAState;
+                dfaStates.insert(newDFAState);
+                nfaStatesQueue.push(nextNFAStates);
+            }
+            currentDFAState->addTransition(inputCharType, dfaStatesMap[nextNFAStates]);
+        }
+    }
+
+    return DFA(dfaStartState, dfaEndStates, dfaStates);
+}
+
+void printDFA(const DFA& dfa) {
+    cout << "Start state: " << dfa.startState->id << endl;
+    cout << "End states: "<<endl;
+    for (auto state : dfa.endStates) {
+        cout << state->id << " " << getWordTypeName(state->wordType) << endl;
+    }
+    cout << endl;
+    cout << "States and transitions:" << endl;
+    for (auto state : dfa.states) {
+        cout << "State " << state->id << ":" << endl;
+        for (auto transition : state->transitions) {
+            cout << "\tInput " << getInputChartypeName(transition.first) << ": ";
+            for (auto targetState : transition.second) {
+                cout << targetState->id << " ";
+            }
+            cout << endl;
+        }
+    }
+}
+
diff --git a/nfa/nfa.h b/nfa/nfa.h
new file mode 100644
index 0000000..184b0cf
--- /dev/null
+++ b/nfa/nfa.h
@@ -0,0 +1,173 @@
+#pragma once
+#ifndef __NFA__H__
+#define __NFA__H__
+
+#include <map>
+#include <set>
+#include <deque>
+#include <vector>
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <stdio.h>
+#include <sstream>
+#include <stack>
+#include <queue>
+#include <algorithm>
+using namespace std;
+//单词符号的类型，返回<待测代码中的单词符号,WordType>
+typedef enum WordType {
+	//当识别成标识符后，先判断是不是保留字，让后再判断IDN
+	KW_INT = 0,		// int
+	KW_VOID,		// void
+	KW_RETURN,		// return
+	KW_CONST,		// const
+
+	OP_ADD,			// +
+	OP_SUB,			// -
+	OP_MUL,			// *
+	OP_DIV,			// /
+	OP_MOD,			// %
+	OP_ASSIGN,		// =
+	OP_GT,			// >
+	OP_LT,			// <
+	OP_EQ,			// ==
+	OP_LE,			// <=
+	OP_GE,			// >=
+	OP_NE,			// !=
+	OP_AND,			// &&
+	OP_OR,			// ||
+
+	SE_LBRAC,		// ( left backet
+	SE_RBRAC,		// ) right bracket
+	SE_LCBRAC,		// { left curly bracket
+	SE_RCBRAC,		// } right curly bracket
+	SE_COMMA,		// ,
+	SE_SEMI,		// ;
+
+	IDN,			// [a-zA-Z][a-zA-Z_0-9]*
+	INT_VAL,		// -*[0-9]+
+	UNKOWN
+}WordType;
+string getWordTypeName(WordType type);
+//定义输入的字符类别
+typedef enum InputCharType {
+	LETTER = 0,		// 字母					0
+	UNDERLINE,		// _ 					1
+	DIGIT,			// 数字					2  当识别成功一个数字时，为了避免出现数字01的情况，返回前先进行一个判断，对GCC，01可以识别并等于1的
+	//OP
+	ADD,			// +					3
+	SUB,			// -					4
+	MUL,			// *					5
+	DIV,			// /					6
+	MOD,			// %					7
+	EQ,				// =					8
+	GT,				// >					9
+	LT,				// <					10
+	NOT,			// !					11
+	AND,			// &					12
+	OR,				// |					13
+	//SE
+	LBRACKET,		// (					14
+	RBRACKET,		// )					15
+	LCBRAC,			// {					16
+	RCBRAC,			// }					17
+	COMMA,			// ,					18
+	SEMI,			// ;					19
+
+	EPSILON,		// 空字符				20
+}InputCharType;
+string getInputChartypeName(InputCharType type);
+enum class TokenType {
+	KW = 0,
+	OP,
+	SE,
+	IDN,
+	INT,
+	UNKNOWN
+};
+TokenType getTokenType(WordType wordType,string buffer);
+typedef struct Token {
+	string value;
+	TokenType type;
+} Token;
+
+//定义函数判断输入的字符类别
+InputCharType getInputCharType(char c);
+string getWordTypeName(WordType type,string buffer);
+
+//定义状态类
+class State {
+public:
+	int id; // 状态编号
+	map<InputCharType, set<State*>> transitions; // 转移函数映射表，记录每个输入字符类型对应的目标状态集合
+	bool isFinalState; // 是否为最终状态
+	WordType wordType; // 到达该状态时应该返回的词法单元类型
+	State(int id) : id(id), isFinalState(false), wordType(UNKOWN) {}
+	void addTransition(InputCharType input, State* targetState) {
+		transitions[input].insert(targetState);
+	}
+	void setFinalState(bool isFinal, WordType type) {
+		isFinalState = isFinal;
+		wordType = type;
+	}
+	bool operator<(const State& other) const {
+		return id < other.id;
+	}
+};
+//为了是set内部有序，定义排序结构体StatePtrCompare
+struct StatePtrCompare {
+	bool operator()(const State* lhs, const State* rhs) const {
+		return lhs->id < rhs->id;
+	}
+};
+
+//定义NFA类
+class NFA {
+public:
+	State* startState; // 起始状态
+	set<State*, StatePtrCompare> endStates; // 终止状态集合
+	set<State*, StatePtrCompare> states; // 状态集合
+	NFA(State* startState, set<State*, StatePtrCompare> endStates, set<State*, StatePtrCompare> states) :
+		startState(startState), endStates(endStates), states(states) {}
+	// void printNFA();
+};
+NFA RexToNFA();
+void printNFA(const NFA& nfa);
+NFA buildNFA(string filename);
+NFA RexToNFA();
+set<State*, StatePtrCompare> move(const set<State*, StatePtrCompare>& states, InputCharType input);
+set<State*, StatePtrCompare> epsilonClosure(const set<State*, StatePtrCompare>& states);
+
+class DFA {
+public:
+	State* startState; // 起始状态
+	set<State*, StatePtrCompare> endStates; // 终止状态集合
+	set<State*, StatePtrCompare> states; // 状态集合
+	DFA(State* startState, set<State*, StatePtrCompare> endStates, set<State*, StatePtrCompare> states) :
+		startState(startState), endStates(endStates), states(states) {}
+};
+void removeUnreachableStates(DFA& dfa);
+void printDFA(const DFA& dfa);
+DFA nfaToDFA(const NFA& nfa);
+void printDFA(const DFA& dfa);
+struct SetComparator {
+	bool operator()(const set<State*, StatePtrCompare>& a, const set<State*, StatePtrCompare>& b) const {
+		if (a.size() != b.size()) {
+			return a.size() < b.size();
+		}
+
+		vector<State*> vecA(a.begin(), a.end());
+		vector<State*> vecB(b.begin(), b.end());
+
+		sort(vecA.begin(), vecA.end(), [](const State* a, const State* b) { return a->id < b->id; });
+		sort(vecB.begin(), vecB.end(), [](const State* a, const State* b) { return a->id < b->id; });
+
+		return vecA < vecB;
+	}
+};
+string getGrammarName(WordType type, string buffer);
+DFA minimizeDFA(const DFA& dfa);
+vector<string> recognize(const DFA& dfa, const string& input, const string& output);
+string readfile(const string& filename);
+#endif 
\ No newline at end of file
diff --git a/nfa/tool.cpp b/nfa/tool.cpp
new file mode 100644
index 0000000..973e45f
--- /dev/null
+++ b/nfa/tool.cpp
@@ -0,0 +1,287 @@
+#include "nfa.h"
+
+
+InputCharType getInputCharType(char c) {
+    switch (c) {
+    case '_': return UNDERLINE;
+    case '+': return ADD;
+    case '-': return SUB;
+    case '*': return MUL;
+    case '/': return DIV;
+    case '%': return MOD;
+    case '=': return EQ;
+    case '>': return GT;
+    case '<': return LT;
+    case '!': return NOT;
+    case '&': return AND;
+    case '|': return OR;
+    case '(': return LBRACKET;
+    case ')': return RBRACKET;
+    case '{': return LCBRAC;
+    case '}': return RCBRAC;
+    case ',': return COMMA;
+    case ';': return SEMI;
+    default:
+        if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
+            return LETTER;
+        }
+        else if (c >= '0' && c <= '9') {
+            return DIGIT;
+        }
+        else {
+            return EPSILON;
+        }
+    }
+}
+string getInputChartypeName(InputCharType type) {
+    switch (type)
+    {
+    case LETTER:
+        return "LETTER";
+    case UNDERLINE:
+        return "UNDERLINE";
+    case DIGIT:
+        return "DIGIT";
+    case ADD:
+        return "+";
+    case SUB:
+        return "-";
+    case MUL:
+        return "*";
+    case DIV:
+        return "/";
+    case MOD:
+        return "%";
+    case EQ:
+        return "=";
+    case GT:
+        return ">";
+    case LT:
+        return "<";
+    case NOT:
+        return "!";
+    case AND:
+        return "&";
+    case OR:
+        return "|";
+    case LBRACKET:
+        return "(";
+    case RBRACKET:
+        return ")";
+    case LCBRAC:
+        return "{";
+    case RCBRAC:
+        return "}";
+    case COMMA:
+        return ",";
+    case SEMI:
+        return ";";
+    case EPSILON:
+        return "EPSILON";
+    default:
+        return "UNKOWN";
+    }
+}
+string getWordTypeName(WordType type, string buffer) {
+    switch (type) {
+    case OP_ADD:
+    case OP_SUB:
+    case OP_MUL:
+    case OP_DIV:
+    case OP_MOD:
+    case OP_ASSIGN:
+    case OP_GT:
+    case OP_LT:
+    case OP_EQ:
+    case OP_LE:
+    case OP_GE:
+    case OP_NE:
+    case OP_AND:
+    case OP_OR:
+        return "OP";
+
+    case SE_LBRAC:
+    case SE_RBRAC:
+    case SE_LCBRAC:
+    case SE_RCBRAC:
+    case SE_COMMA:
+    case SE_SEMI:
+        return "SE";
+
+    case IDN:
+        if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")){
+            return "KW";
+        }
+        else {
+            return "IDN";
+        }
+
+    case INT_VAL:
+        return "INT";
+
+    default:
+        return "UNKNOWN";
+    }
+}
+
+string readfile(const string& filename)
+{
+    // 打开文件流并读取文件内容
+    ifstream file(filename);
+
+    string content((istreambuf_iterator<char>(file)),
+        istreambuf_iterator<char>());
+
+    // 去掉换行符
+    //remove函数的作用是将字符串中的某个字符移动到字符串的末尾，并返回一个指向该字符后面位置的指针。
+    //erase 函数的作用是删除字符串中指定区间内的所有字符，返回修改后的字符串
+    //content.erase(remove(content.begin(), content.end(), '\n'), content.end());
+
+    return content;
+}
+TokenType getTokenType(WordType type,string buffer) {
+    switch (type) {
+    case OP_ADD:
+    case OP_SUB:
+    case OP_MUL:
+    case OP_DIV:
+    case OP_MOD:
+    case OP_ASSIGN:
+    case OP_GT:
+    case OP_LT:
+    case OP_EQ:
+    case OP_LE:
+    case OP_GE:
+    case OP_NE:
+    case OP_AND:
+    case OP_OR:
+        return TokenType::OP;
+
+    case SE_LBRAC:
+    case SE_RBRAC:
+    case SE_LCBRAC:
+    case SE_RCBRAC:
+    case SE_COMMA:
+    case SE_SEMI:
+        return TokenType::SE;
+
+    case IDN:
+        if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")) {
+            return TokenType::KW;
+        }
+        else {
+            return TokenType::IDN;
+        }
+
+    case INT_VAL:
+        return TokenType::INT;
+
+    default:
+        return TokenType::UNKNOWN;
+    }
+}
+
+string getWordTypeName(WordType type) {
+    switch (type) {
+    case KW_INT:
+        return "KW_INT";
+    case KW_VOID:
+        return "KW_VOID";
+    case KW_RETURN:
+        return "KW_RETURN";
+    case KW_CONST:
+        return "KW_CONST";
+    case OP_ADD:
+        return "OP_ADD";
+    case OP_SUB:
+        return "OP_SUB";
+    case OP_MUL:
+        return "OP_MUL";
+    case OP_DIV:
+        return "OP_DIV";
+    case OP_MOD:
+        return "OP_MOD";
+    case OP_ASSIGN:
+        return "OP_ASSIGN";
+    case OP_GT:
+        return "OP_GT";
+    case OP_LT:
+        return "OP_LT";
+    case OP_EQ:
+        return "OP_EQ";
+    case OP_LE:
+        return "OP_LE";
+    case OP_GE:
+        return "OP_GE";
+    case OP_NE:
+        return "OP_NE";
+    case OP_AND:
+        return "OP_AND";
+    case OP_OR:
+        return "OP_OR";
+    case SE_LBRAC:
+        return "SE_LBRAC";
+    case SE_RBRAC:
+        return "SE_RBRAC";
+    case SE_LCBRAC:
+        return "SE_LCBRAC";
+    case SE_RCBRAC:
+        return "SE_RCBRAC";
+    case SE_COMMA:
+        return "SE_COMMA";
+    case SE_SEMI:
+        return "SE_SEMI";
+    case IDN:
+        return "IDN";
+    case INT_VAL:
+        return "INT_VAL";
+    default:
+        return "UNKNOWN";
+    }
+}
+
+string getGrammarName(WordType type, string buffer) {
+    switch (type) {
+
+    case OP_ADD: return "+";
+    case OP_SUB: return "-";
+    case OP_MUL: return "*";
+    case OP_DIV: return "/";
+    case OP_MOD: return "%";
+    case OP_ASSIGN: return "=";
+    case OP_GT: return ">";
+    case OP_LT: return "<";	
+    case OP_EQ: return "==";
+    case OP_LE: return "<=";
+    case OP_GE: return ">=";
+    case OP_NE: return "!=";
+    case OP_AND: return "&&";
+    case OP_OR: return "||";
+
+    case SE_LBRAC: return "(";
+    case SE_RBRAC: return ")";
+    case SE_LCBRAC: return "{";
+    case SE_RCBRAC: return "}";
+    case SE_COMMA: return ",";
+    case SE_SEMI: return ";";
+
+    case IDN: 
+        if (!buffer.compare("int")) {
+            return "int";
+        }
+        else if (!buffer.compare("void")) {
+            return "void";
+        }
+        else if (!buffer.compare("return")) {
+            return "return";
+        }
+        else if (!buffer.compare("const")) {
+            return "const";
+        }
+        else {
+            return "IDN";
+        }
+    case INT_VAL: return "INT";	
+    default: cerr << "Token Error: "<< type << endl; exit(-1);
+    }
+}
\ No newline at end of file