2024-compiler/LL1.cpp

#include <set>
#include <algorithm>
#include <stack>
#include <cstring>
#include <fstream>
#include <iostream>
#include <sstream>

#include "LL1.h"


LL1::LL1()
{
	read_grammar();
	init_grammar_set();
}

LL1::~LL1()
{
}

bool LL1::IsLL1()
{
	string symbol;
	vector<string> right_first = vector<string>();
	vector<string> left_follow;
	for (int i = 0; i < grammar_rules.size(); i++) {
		symbol.clear();
		right_first.clear();
		left_follow.clear();

		symbol = grammar_rules[i].first;


		// 计算 产生式左侧 FOLLOW 集
		left_follow = follow[symbol];

		// 计算 产生式右侧 FIRST 集

		// 对 X1 的 非 $ 符号 加入
		for (int j = 0; j < first[grammar_rules[i].second[0]].size(); j++) {
			if (first[grammar_rules[i].second[0]][j] == "$") {
				continue;
			}
			right_first.push_back(first[grammar_rules[i].second[0]][j]);
		}

		int cnt;
		for (cnt = 1; cnt < grammar_rules[i].second.size(); cnt++) {

			// 当且仅当 有 $ 符号时 继续加入
			if (!infer_empty[grammar_rules[i].second[cnt - 1]]) {
				break;
			}
			for (int j = 0; j < first[grammar_rules[i].second[cnt]].size(); j++) {
				if (first[grammar_rules[i].second[cnt]][j] == "$") {
					continue;
				}
				right_first.push_back(first[grammar_rules[i].second[cnt]][j]);
			}
		}

		// 若都能推导至 $ 符号时 加入
		if (cnt == grammar_rules[i].second.size() && infer_empty[grammar_rules[i].second[0]]) {
			right_first.push_back("$");
		}

		// 对产生式右侧 FIRST 集 进行 去重
		set<string> sright_first(right_first.begin(), right_first.end());
		right_first.clear();
		right_first.resize(sright_first.size());
		right_first.assign(sright_first.begin(), sright_first.end());


		vector<string> symbol_select;

		// 若产生式右侧 FIRST 集为 {$} 时
		if (right_first.size() == 1 && right_first[0] == "$") {

			// SELECT 集为 产生式右侧 FOLLOW 集 与 {$} 的交集
			symbol_select = left_follow;
			if (find(left_follow.begin(), left_follow.end(), "$") == left_follow.end()) {
				symbol_select.push_back("$");
			}
		}
		else
		{
			// SELECT 集为 产生式左侧 FIRST 集
			symbol_select = right_first;
		}

		// 对 SELECT 集 进行排序 方便接下来进行集合运算
		sort(symbol_select.begin(), symbol_select.end());

		vector<string> new_select = vector<string>();

		// 判断 SELECT 表中有无现有数据
		if (select.find(symbol) == select.end()) {

			select[symbol] = symbol_select;
		}
		else {

			// 判断两个相同产生式左侧 SELECT 集 是否相交
			set_intersection(symbol_select.begin(), symbol_select.end(), select[symbol].begin(), select[symbol].end(), back_inserter(new_select));

			if (new_select.size() == 0) {
				// 不相交，继续运算，存入两者并集
				set_union(symbol_select.begin(), symbol_select.end(), select[symbol].begin(), select[symbol].end(), back_inserter(new_select));
			}
			else
			{
				// 非 LL(1) 文法，退出
				cout << "该文法不为 LL(1) 文法！" << endl;
				return false;
			}

		}

	}

	// cout << "该文法为 LL(1) 文法！" << endl;
	return true;
}

void LL1::build_LL1_predict()
{
	// 对每一个 非终结符 进行初始化行
	for (int i = 0; i < VNs.size(); i++) {
		if (LL1_predict.find(VNs[i]) == LL1_predict.end()) {
			LL1_predict[VNs[i]] = unordered_map<string, int>();
		}
	}

	string symbol;
	vector<string> right_first = vector<string>();
	vector<string> left_follow;

	// 遍历 产生式 构建 预测分析表
	for (int i = 0; i < grammar_rules.size(); i++) {
		symbol.clear();
		right_first.clear();
		left_follow.clear();

		symbol = grammar_rules[i].first;


		// 计算 产生式左侧 FOLLOW 集
		left_follow = follow[symbol];

		unordered_map<string, int> &symbol_predict = LL1_predict[symbol];


		// 计算 产生式右侧 FIRST 集

		// 对 X1 的 非 $ 符号 加入
		for (int j = 0; j < first[grammar_rules[i].second[0]].size(); j++) {
			if (first[grammar_rules[i].second[0]][j] == "$") {
				continue;
			}
			right_first.push_back(first[grammar_rules[i].second[0]][j]);
		}

		int cnt;
		for (cnt = 1; cnt < grammar_rules[i].second.size(); cnt++) {

			// 当且仅当 有 $ 符号时 继续加入
			if (!infer_empty[grammar_rules[i].second[cnt - 1]]) {
				break;
			}
			for (int j = 0; j < first[grammar_rules[i].second[cnt]].size(); j++) {
				if (first[grammar_rules[i].second[cnt]][j] == "$") {
					continue;
				}
				right_first.push_back(first[grammar_rules[i].second[cnt]][j]);
			}
		}

		// 若都能推导至 $ 符号时 加入
		if (cnt == grammar_rules[i].second.size() && infer_empty[grammar_rules[i].second[0]]) {
			right_first.push_back("$");
		}

		// 对产生式右侧 FIRST 集 进行 去重
		set<string> sright_first(right_first.begin(), right_first.end());
		right_first.clear();
		right_first.resize(sright_first.size());
		right_first.assign(sright_first.begin(), sright_first.end());

		// 循环遍历 FIRST 集进行初始化
		for (int j = 0; j < right_first.size(); j++) {
			if (right_first[j] == "$") {
				pair<string, vector<string>> new_rule (grammar_rules[i].first, vector<string>());
				new_rule.second.push_back("$");
				int rule_id = insert_rule(new_rule);

				for (int k = 0; k < left_follow.size(); k++) {
					symbol_predict[left_follow[k]] = rule_id;
				}
			}
			symbol_predict[right_first[j]] = i;

		}

	}


}

void LL1::print_LL1_predict()
{
	cout << "[LL1_predict]:" << endl;
	for (auto iter = LL1_predict.begin(); iter != LL1_predict.end(); ++iter) {
		cout << (*iter).first << " ";
		for (auto j = (*iter).second.begin(); j != (*iter).second.end(); ++j) {
			cout << (*j).first << "," << (*j).second << " ";
		}
		cout << endl;

	}
	cout << endl << endl;

}

void LL1::build_LL1_grammar()
{
	// 符号栈
	stack<string> stack;
	int token_cnt = 0;

	// 起始符 入栈
	stack.push(start);

	while (!stack.empty())
	{
		LL1_grammar_log.push_back(string());

		// 栈顶符号
		// 判断栈顶是否为 空符号
		if (stack.top() == "$") {
			// 栈空 以 EOF 表示
			LL1_grammar_log.back() += "EOF";
		}
		else
		{
			LL1_grammar_log.back() += stack.top();
		}

		// 添加 # 分割
		LL1_grammar_log.back() += "#";

		// 面临输入的符号
		string this_token;
		if (token_cnt == token_strings.size()) {
			// 栈空 以 EOF 表示
			this_token = "$";
			LL1_grammar_log.back() += "EOF";
		}
		else
		{
			this_token = token_strings[token_cnt];
			LL1_grammar_log.back() += token_strings[token_cnt];
		}

		// 对栈顶元素与即将输入的符号进行比较
		if (stack.top() == this_token) {
			// 栈顶出栈 token 指向下一位
			token_cnt++;
			stack.pop();

			if (this_token == "$") {
				// 分析成功 结束分析
				LL1_grammar_log.back() += "\taccept";
			}
			else
			{
				// 跳过
				LL1_grammar_log.back() += "\tmove";
			}
		}
		// 若为终结符
		else if (find(VTs.begin(), VTs.end(), stack.top()) != VTs.end()) {
			if (stack.top() == "$") {
				stack.pop();
				LL1_grammar_log.pop_back();
			}
			else {
				LL1_grammar_log.back() += "\terror";
				return;
			}
		}
		else
		{
			auto tab = LL1_predict[stack.top()];

			if (tab.find(this_token) == tab.end()) {
				LL1_grammar_log.back() += "\terror";
				return;
			}
			else
			{
				auto this_rule = grammar_rules[tab[this_token]];
				stack.pop();
				for (int i = this_rule.second.size() - 1; i >= 0; i--) {
					stack.push(this_rule.second[i]);
				}
				LL1_grammar_log.back() += "\treduction";
			}
		}
	}

}

void LL1::print_LL1_grammar_log()
{
	for (int i = 0; i < LL1_grammar_log.size(); ++i) {
		cout << LL1_grammar_log[i] << endl;
	}
}

void LL1::fileout_LL1_grammar_log(string file_name)
{
	//打开结果输出文件
	ofstream outfile(file_name);

	if (!outfile.is_open()) {
		cout << "打开文件失败" << endl;
	}

	for (int i = 0; i < LL1_grammar_log.size(); ++i) {
		outfile << LL1_grammar_log[i] << endl;
	}
	outfile.close();
}

int LL1::insert_rule(pair<string, vector<string>>& new_rule)
{
	int cnt;
	for (cnt = 0; cnt < grammar_rules.size(); cnt++) {
		// 当 产生式规则 中存在这条产生式时 返回序号
		if (grammar_rules[cnt].first == new_rule.first && grammar_rules[cnt].second == new_rule.second) {
			return cnt;
		}
	}
	// 若不存在 返回序号的同时加入
	grammar_rules.push_back(new_rule);
	return cnt;
}