352 lines
7.7 KiB
C++
352 lines
7.7 KiB
C++
#include <set>
|
|
#include <algorithm>
|
|
#include <stack>
|
|
#include <cstring>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <sstream>
|
|
|
|
#include "LL1.h"
|
|
|
|
|
|
LL1::LL1()
|
|
{
|
|
read_grammar();
|
|
init_grammar_set();
|
|
}
|
|
|
|
LL1::~LL1()
|
|
{
|
|
}
|
|
|
|
bool LL1::IsLL1()
|
|
{
|
|
string symbol;
|
|
vector<string> right_first = vector<string>();
|
|
vector<string> left_follow;
|
|
for (int i = 0; i < grammar_rules.size(); i++) {
|
|
symbol.clear();
|
|
right_first.clear();
|
|
left_follow.clear();
|
|
|
|
symbol = grammar_rules[i].first;
|
|
|
|
|
|
|
|
// 计算 产生式左侧 FOLLOW 集
|
|
left_follow = follow[symbol];
|
|
|
|
// 计算 产生式右侧 FIRST 集
|
|
|
|
// 对 X1 的 非 $ 符号 加入
|
|
for (int j = 0; j < first[grammar_rules[i].second[0]].size(); j++) {
|
|
if (first[grammar_rules[i].second[0]][j] == "$") {
|
|
continue;
|
|
}
|
|
right_first.push_back(first[grammar_rules[i].second[0]][j]);
|
|
}
|
|
|
|
int cnt;
|
|
for (cnt = 1; cnt < grammar_rules[i].second.size(); cnt++) {
|
|
|
|
// 当且仅当 有 $ 符号时 继续加入
|
|
if (!infer_empty[grammar_rules[i].second[cnt - 1]]) {
|
|
break;
|
|
}
|
|
for (int j = 0; j < first[grammar_rules[i].second[cnt]].size(); j++) {
|
|
if (first[grammar_rules[i].second[cnt]][j] == "$") {
|
|
continue;
|
|
}
|
|
right_first.push_back(first[grammar_rules[i].second[cnt]][j]);
|
|
}
|
|
}
|
|
|
|
// 若都能推导至 $ 符号时 加入
|
|
if (cnt == grammar_rules[i].second.size() && infer_empty[grammar_rules[i].second[0]]) {
|
|
right_first.push_back("$");
|
|
}
|
|
|
|
// 对产生式右侧 FIRST 集 进行 去重
|
|
set<string> sright_first(right_first.begin(), right_first.end());
|
|
right_first.clear();
|
|
right_first.resize(sright_first.size());
|
|
right_first.assign(sright_first.begin(), sright_first.end());
|
|
|
|
|
|
|
|
vector<string> symbol_select;
|
|
|
|
// 若产生式右侧 FIRST 集为 {$} 时
|
|
if (right_first.size() == 1 && right_first[0] == "$") {
|
|
|
|
// SELECT 集为 产生式右侧 FOLLOW 集 与 {$} 的交集
|
|
symbol_select = left_follow;
|
|
if (find(left_follow.begin(), left_follow.end(), "$") == left_follow.end()) {
|
|
symbol_select.push_back("$");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// SELECT 集为 产生式左侧 FIRST 集
|
|
symbol_select = right_first;
|
|
}
|
|
|
|
// 对 SELECT 集 进行排序 方便接下来进行集合运算
|
|
sort(symbol_select.begin(), symbol_select.end());
|
|
|
|
vector<string> new_select = vector<string>();
|
|
|
|
// 判断 SELECT 表中有无现有数据
|
|
if (select.find(symbol) == select.end()) {
|
|
|
|
select[symbol] = symbol_select;
|
|
}
|
|
else {
|
|
|
|
// 判断两个相同产生式左侧 SELECT 集 是否相交
|
|
set_intersection(symbol_select.begin(), symbol_select.end(), select[symbol].begin(), select[symbol].end(), back_inserter(new_select));
|
|
|
|
if (new_select.size() == 0) {
|
|
// 不相交,继续运算,存入两者并集
|
|
set_union(symbol_select.begin(), symbol_select.end(), select[symbol].begin(), select[symbol].end(), back_inserter(new_select));
|
|
}
|
|
else
|
|
{
|
|
// 非 LL(1) 文法,退出
|
|
cout << "该文法不为 LL(1) 文法!" << endl;
|
|
return false;
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// cout << "该文法为 LL(1) 文法!" << endl;
|
|
return true;
|
|
}
|
|
|
|
void LL1::build_LL1_predict()
|
|
{
|
|
// 对每一个 非终结符 进行初始化行
|
|
for (int i = 0; i < VNs.size(); i++) {
|
|
if (LL1_predict.find(VNs[i]) == LL1_predict.end()) {
|
|
LL1_predict[VNs[i]] = unordered_map<string, int>();
|
|
}
|
|
}
|
|
|
|
string symbol;
|
|
vector<string> right_first = vector<string>();
|
|
vector<string> left_follow;
|
|
|
|
// 遍历 产生式 构建 预测分析表
|
|
for (int i = 0; i < grammar_rules.size(); i++) {
|
|
symbol.clear();
|
|
right_first.clear();
|
|
left_follow.clear();
|
|
|
|
symbol = grammar_rules[i].first;
|
|
|
|
|
|
// 计算 产生式左侧 FOLLOW 集
|
|
left_follow = follow[symbol];
|
|
|
|
unordered_map<string, int> &symbol_predict = LL1_predict[symbol];
|
|
|
|
|
|
// 计算 产生式右侧 FIRST 集
|
|
|
|
// 对 X1 的 非 $ 符号 加入
|
|
for (int j = 0; j < first[grammar_rules[i].second[0]].size(); j++) {
|
|
if (first[grammar_rules[i].second[0]][j] == "$") {
|
|
continue;
|
|
}
|
|
right_first.push_back(first[grammar_rules[i].second[0]][j]);
|
|
}
|
|
|
|
int cnt;
|
|
for (cnt = 1; cnt < grammar_rules[i].second.size(); cnt++) {
|
|
|
|
// 当且仅当 有 $ 符号时 继续加入
|
|
if (!infer_empty[grammar_rules[i].second[cnt - 1]]) {
|
|
break;
|
|
}
|
|
for (int j = 0; j < first[grammar_rules[i].second[cnt]].size(); j++) {
|
|
if (first[grammar_rules[i].second[cnt]][j] == "$") {
|
|
continue;
|
|
}
|
|
right_first.push_back(first[grammar_rules[i].second[cnt]][j]);
|
|
}
|
|
}
|
|
|
|
// 若都能推导至 $ 符号时 加入
|
|
if (cnt == grammar_rules[i].second.size() && infer_empty[grammar_rules[i].second[0]]) {
|
|
right_first.push_back("$");
|
|
}
|
|
|
|
// 对产生式右侧 FIRST 集 进行 去重
|
|
set<string> sright_first(right_first.begin(), right_first.end());
|
|
right_first.clear();
|
|
right_first.resize(sright_first.size());
|
|
right_first.assign(sright_first.begin(), sright_first.end());
|
|
|
|
// 循环遍历 FIRST 集进行初始化
|
|
for (int j = 0; j < right_first.size(); j++) {
|
|
if (right_first[j] == "$") {
|
|
pair<string, vector<string>> new_rule (grammar_rules[i].first, vector<string>());
|
|
new_rule.second.push_back("$");
|
|
int rule_id = insert_rule(new_rule);
|
|
|
|
for (int k = 0; k < left_follow.size(); k++) {
|
|
symbol_predict[left_follow[k]] = rule_id;
|
|
}
|
|
}
|
|
symbol_predict[right_first[j]] = i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
void LL1::print_LL1_predict()
|
|
{
|
|
cout << "[LL1_predict]:" << endl;
|
|
for (auto iter = LL1_predict.begin(); iter != LL1_predict.end(); ++iter) {
|
|
cout << (*iter).first << " ";
|
|
for (auto j = (*iter).second.begin(); j != (*iter).second.end(); ++j) {
|
|
cout << (*j).first << "," << (*j).second << " ";
|
|
}
|
|
cout << endl;
|
|
|
|
}
|
|
cout << endl << endl;
|
|
|
|
}
|
|
|
|
void LL1::build_LL1_grammar()
|
|
{
|
|
// 符号栈
|
|
stack<string> stack;
|
|
int token_cnt = 0;
|
|
|
|
// 起始符 入栈
|
|
stack.push(start);
|
|
|
|
while (!stack.empty())
|
|
{
|
|
LL1_grammar_log.push_back(string());
|
|
|
|
// 栈顶符号
|
|
// 判断栈顶是否为 空符号
|
|
if (stack.top() == "$") {
|
|
// 栈空 以 EOF 表示
|
|
LL1_grammar_log.back() += "EOF";
|
|
}
|
|
else
|
|
{
|
|
LL1_grammar_log.back() += stack.top();
|
|
}
|
|
|
|
// 添加 # 分割
|
|
LL1_grammar_log.back() += "#";
|
|
|
|
// 面临输入的符号
|
|
string this_token;
|
|
if (token_cnt == token_strings.size()) {
|
|
// 栈空 以 EOF 表示
|
|
this_token = "$";
|
|
LL1_grammar_log.back() += "EOF";
|
|
}
|
|
else
|
|
{
|
|
this_token = token_strings[token_cnt];
|
|
LL1_grammar_log.back() += token_strings[token_cnt];
|
|
}
|
|
|
|
// 对栈顶元素与即将输入的符号进行比较
|
|
if (stack.top() == this_token) {
|
|
// 栈顶出栈 token 指向下一位
|
|
token_cnt++;
|
|
stack.pop();
|
|
|
|
if (this_token == "$") {
|
|
// 分析成功 结束分析
|
|
LL1_grammar_log.back() += "\taccept";
|
|
}
|
|
else
|
|
{
|
|
// 跳过
|
|
LL1_grammar_log.back() += "\tmove";
|
|
}
|
|
}
|
|
// 若为终结符
|
|
else if (find(VTs.begin(), VTs.end(), stack.top()) != VTs.end()) {
|
|
if (stack.top() == "$") {
|
|
stack.pop();
|
|
LL1_grammar_log.pop_back();
|
|
}
|
|
else {
|
|
LL1_grammar_log.back() += "\terror";
|
|
return;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
auto tab = LL1_predict[stack.top()];
|
|
|
|
if (tab.find(this_token) == tab.end()) {
|
|
LL1_grammar_log.back() += "\terror";
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
auto this_rule = grammar_rules[tab[this_token]];
|
|
stack.pop();
|
|
for (int i = this_rule.second.size() - 1; i >= 0; i--) {
|
|
stack.push(this_rule.second[i]);
|
|
}
|
|
LL1_grammar_log.back() += "\treduction";
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
void LL1::print_LL1_grammar_log()
|
|
{
|
|
for (int i = 0; i < LL1_grammar_log.size(); ++i) {
|
|
cout << LL1_grammar_log[i] << endl;
|
|
}
|
|
}
|
|
|
|
void LL1::fileout_LL1_grammar_log(string file_name)
|
|
{
|
|
//打开结果输出文件
|
|
ofstream outfile(file_name);
|
|
|
|
if (!outfile.is_open()) {
|
|
cout << "打开文件失败" << endl;
|
|
}
|
|
|
|
for (int i = 0; i < LL1_grammar_log.size(); ++i) {
|
|
outfile << LL1_grammar_log[i] << endl;
|
|
}
|
|
outfile.close();
|
|
}
|
|
|
|
int LL1::insert_rule(pair<string, vector<string>>& new_rule)
|
|
{
|
|
int cnt;
|
|
for (cnt = 0; cnt < grammar_rules.size(); cnt++) {
|
|
// 当 产生式规则 中存在这条产生式时 返回序号
|
|
if (grammar_rules[cnt].first == new_rule.first && grammar_rules[cnt].second == new_rule.second) {
|
|
return cnt;
|
|
}
|
|
}
|
|
// 若不存在 返回序号的同时加入
|
|
grammar_rules.push_back(new_rule);
|
|
return cnt;
|
|
}
|
|
|