521 lines
12 KiB
C++
521 lines
12 KiB
C++
#include <deque>
|
|
#include <cstring>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <sstream>
|
|
#include <algorithm>
|
|
#include <set>
|
|
|
|
#include "grammar.h"
|
|
|
|
|
|
Grammar::Grammar()
|
|
{
|
|
|
|
}
|
|
|
|
Grammar::~Grammar()
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
void Grammar::read_grammar() {
|
|
ifstream infile;
|
|
infile.open(grammar_file, ios::in);
|
|
if (!infile.is_open())
|
|
{
|
|
cout << "读取文件失败" << endl;
|
|
return;
|
|
}
|
|
|
|
string buf;
|
|
string arrow = "->";
|
|
string farrow;
|
|
bool start_flag = true;
|
|
string left;
|
|
string forms;
|
|
|
|
while (!infile.eof()) {
|
|
// 清理 string
|
|
buf.clear();
|
|
left.clear();
|
|
forms.clear();
|
|
farrow.clear();
|
|
|
|
|
|
grammar_rules.push_back(pair<string, vector<string>>());
|
|
getline(infile, buf);
|
|
|
|
stringstream ss(buf);
|
|
|
|
// 读取产生式左侧
|
|
ss >> left;
|
|
grammar_rules.back().first = left;
|
|
symbols.push_back(left);
|
|
VNs.push_back(left);
|
|
|
|
// 存储 start
|
|
if (start_flag) {
|
|
start = left;
|
|
start_flag = false;
|
|
}
|
|
|
|
// 读取 -> 符号 并保证合法
|
|
ss >> farrow;
|
|
if (farrow != arrow) {
|
|
cout << "语法读取出错" << endl;
|
|
}
|
|
|
|
// 读取产生式右侧
|
|
while (ss >> forms)
|
|
{
|
|
grammar_rules.back().second.push_back(forms);
|
|
symbols.push_back(forms);
|
|
forms.clear();
|
|
}
|
|
}
|
|
|
|
// 符号集 和 非终结符 去重
|
|
set<string> ssymbols(symbols.begin(), symbols.end());
|
|
symbols.clear();
|
|
symbols.resize(ssymbols.size());
|
|
symbols.assign(ssymbols.begin(), ssymbols.end());
|
|
|
|
set<string> sVNs(VNs.begin(), VNs.end());
|
|
VNs.clear();
|
|
VNs.resize(sVNs.size());
|
|
VNs.assign(sVNs.begin(), sVNs.end());
|
|
|
|
// 符号集 和 非终结符 排序 以保证差集的成功
|
|
sort(symbols.begin(), symbols.end());
|
|
sort(VNs.begin(), VNs.end());
|
|
|
|
// 取差集 得到终极符
|
|
set_difference(symbols.begin(), symbols.end(), VNs.begin(), VNs.end(), back_inserter(VTs));
|
|
|
|
infile.close();
|
|
|
|
}
|
|
|
|
void Grammar::print_grammar()
|
|
{
|
|
cout << "[start]: " << endl << start << endl << endl;
|
|
|
|
cout << "[VTs]:" << endl;
|
|
for (int i = 0; i < VTs.size(); i++) {
|
|
cout << VTs[i] << " ";
|
|
if (((i + 1) % 5) == 0)
|
|
cout << endl;
|
|
}
|
|
cout << endl << endl;
|
|
|
|
cout << "[VNs]:" << endl;
|
|
for (int i = 0; i < VNs.size(); i++) {
|
|
cout << VNs[i] << " ";
|
|
if (((i + 1) % 5) == 0)
|
|
cout << endl;
|
|
}
|
|
cout << endl << endl;
|
|
|
|
cout << "[symbols]:" << endl;
|
|
for (int i = 0; i < symbols.size(); i++) {
|
|
cout << symbols[i] << " ";
|
|
if (((i + 1) % 5) == 0)
|
|
cout << endl;
|
|
}
|
|
cout << endl << endl;
|
|
|
|
cout << "[grammar_rules]: " << grammar_rules.size() << endl;
|
|
for (int i = 0; i < grammar_rules.size(); ++i) {
|
|
cout << grammar_rules[i].first << " -> ";
|
|
for (int j = 0; j < grammar_rules[i].second.size(); ++j) {
|
|
cout << "\"" << grammar_rules[i].second[j] << "\" ";
|
|
}
|
|
cout << endl;
|
|
}
|
|
cout << endl << endl;
|
|
}
|
|
|
|
void Grammar::expand_grammar()
|
|
{
|
|
string new_start = start + "\'";
|
|
pair<string, vector<string>> new_rule = pair<string, vector<string>>(new_start, vector<string>());
|
|
new_rule.second.push_back(start);
|
|
|
|
VNs.push_back(new_start);
|
|
symbols.push_back(new_start);
|
|
grammar_rules.insert(grammar_rules.begin(), new_rule);
|
|
start = new_start;
|
|
|
|
// 符号集排序
|
|
sort(symbols.begin(), symbols.end());
|
|
|
|
}
|
|
|
|
void Grammar::init_grammar_set()
|
|
{
|
|
string symbol;
|
|
|
|
|
|
|
|
// 对符号集中各符号进行推导 是否可以到达 $ 空符号
|
|
for (int i = 0; i < symbols.size(); i++) {
|
|
symbol = symbols[i];
|
|
this->symbol_infer_empty(symbol);
|
|
symbol.clear();
|
|
}
|
|
|
|
// 初始化符号在产生式的 出现 依赖 情况
|
|
init_appears_depend();
|
|
|
|
// 对符号集中各符号进行推导 FIRST 集
|
|
for (int i = 0; i < symbols.size(); i++) {
|
|
symbol = symbols[i];
|
|
this->symbol_infer_first(symbol);
|
|
symbol.clear();
|
|
}
|
|
|
|
// 对符号集中各符号进行推导 FOLLOW 集
|
|
|
|
// 符号队列
|
|
deque<string> queue;
|
|
|
|
// 初次遍历所有符号 生成初始的 FOLLOW 集
|
|
|
|
// 构建 start 的 FOLLOW 集
|
|
follow[start] = this->symbol_infer_follow(start);
|
|
follow[start].push_back("$");
|
|
queue.push_back(start);
|
|
|
|
// 构建除 start 的 FOLLOW 集
|
|
for (int i = 0; i < symbols.size(); i++) {
|
|
symbol = symbols[i];
|
|
if (symbol == start) {
|
|
symbol.clear();
|
|
continue;
|
|
}
|
|
follow[symbol] = this->symbol_infer_follow(symbol);
|
|
queue.push_back(symbol);
|
|
symbol.clear();
|
|
}
|
|
|
|
// 对 符号队列 进行进一步生成
|
|
while (!queue.empty()) {
|
|
// 读取 符号队列 开头
|
|
symbol = queue.front();
|
|
queue.pop_front();
|
|
|
|
// 若 FOLLOW 集发生改变
|
|
vector<string> new_symbol_follow = this->symbol_infer_follow(symbol);
|
|
if (follow[symbol].size() < new_symbol_follow.size()) {
|
|
// 对依赖 该符号 的所有符号添加至 符号队列
|
|
vector<string> dep = depend[symbol];
|
|
for (int i = 0; i < dep.size(); i++) {
|
|
queue.push_back(dep[i]);
|
|
}
|
|
follow[symbol] = new_symbol_follow;
|
|
}
|
|
symbol.clear();
|
|
}
|
|
|
|
|
|
}
|
|
|
|
void Grammar::print_grammar_set()
|
|
{
|
|
// 打印符号在产生式的出现情况
|
|
cout << "[left_appears]:" << endl;
|
|
for (int i = 0; i < symbols.size(); i++) {
|
|
cout << "LEFT( " << symbols[i] << " ) = {";
|
|
for (int j = 0; j < left_appears[symbols[i]].size(); j++) {
|
|
cout << " " << left_appears[symbols[i]][j] << " ";
|
|
}
|
|
cout << "}" << endl;
|
|
}
|
|
cout << endl << endl;
|
|
|
|
cout << "[right_appears]:" << endl;
|
|
for (int i = 0; i < symbols.size(); i++) {
|
|
cout << "RIGHT( " << symbols[i] << " ) = {";
|
|
for (int j = 0; j < right_appears[symbols[i]].size(); j++) {
|
|
cout << " " << right_appears[symbols[i]][j] << " ";
|
|
}
|
|
cout << "}" << endl;
|
|
}
|
|
cout << endl << endl;
|
|
|
|
// 打印 FOLLOW 集的依赖关系
|
|
cout << "[depend]:" << endl;
|
|
for (int i = 0; i < symbols.size(); i++) {
|
|
cout << "DEPEND( " << symbols[i] << " ) = {";
|
|
for (int j = 0; j < depend[symbols[i]].size(); j++) {
|
|
cout << " " << depend[symbols[i]][j] << " ";
|
|
}
|
|
cout << "}" << endl;
|
|
}
|
|
cout << endl << endl;
|
|
|
|
|
|
// 打印是否可以推导出 $ 空符号
|
|
cout << "[infer_empty]:" << endl;
|
|
for (int i = 0; i < symbols.size(); i++) {
|
|
cout << symbols[i]<<" -> " << infer_empty[symbols[i]] << endl;
|
|
}
|
|
cout << endl << endl;
|
|
|
|
// 打印 FIRST 集
|
|
cout << "[FIRST]:" << endl;
|
|
for (int i = 0; i < symbols.size(); i++) {
|
|
cout << "FIRST( " << symbols[i] << " ) = {";
|
|
for (int j = 0; j < first[symbols[i]].size(); j++) {
|
|
cout << " " << first[symbols[i]][j] << " ";
|
|
}
|
|
cout << "}" << endl;
|
|
}
|
|
cout << endl << endl;
|
|
|
|
// 打印 FOLLOW 集
|
|
cout << "[FOLLOW]:" << endl;
|
|
for (int i = 0; i < symbols.size(); i++) {
|
|
cout << "FOLLOW( " << symbols[i] << " ) = {";
|
|
for (int j = 0; j < follow[symbols[i]].size(); j++) {
|
|
cout << " " << follow[symbols[i]][j] << " ";
|
|
}
|
|
cout << "}" << endl;
|
|
}
|
|
cout << endl << endl;
|
|
|
|
}
|
|
|
|
void Grammar::get_token_strings(vector<string>& my_token_strings)
|
|
{
|
|
token_strings.resize(my_token_strings.size());
|
|
token_strings.assign(my_token_strings.begin(), my_token_strings.end());
|
|
|
|
}
|
|
|
|
void Grammar::print_token_strings()
|
|
{
|
|
for (int i = 0; i < token_strings.size(); i++) {
|
|
cout << token_strings[i] << endl;
|
|
}
|
|
}
|
|
|
|
void Grammar::init_appears_depend()
|
|
{
|
|
for (int k = 0; k < symbols.size(); k++) {
|
|
left_appears[symbols[k]] = vector<int>();
|
|
right_appears[symbols[k]] = vector<int>();
|
|
depend[symbols[k]] = vector<string>();
|
|
for (int i = 0; i < grammar_rules.size(); i++) {
|
|
if (grammar_rules[i].first == symbols[k]) {
|
|
// 产生式左侧相等 存入 left
|
|
left_appears[symbols[k]].push_back(i);
|
|
|
|
// 对该产生式构建依赖关系
|
|
for (int m = 0; m < grammar_rules[i].second.size(); m++) {
|
|
int n;
|
|
|
|
// 判断该产生式右侧符号是否可以推导至 $ 空符号
|
|
for (n = m + 1; n < grammar_rules[i].second.size(); n++) {
|
|
if (!infer_empty[grammar_rules[i].second[n]]) {
|
|
break;
|
|
}
|
|
}
|
|
// 若可以推导 按照入栈的方式依次加入
|
|
if (n == grammar_rules[i].second.size()) {
|
|
if (symbols[k] != grammar_rules[i].second[m]) {
|
|
depend[symbols[k]].push_back(grammar_rules[i].second[m]);
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
for (int j = 0; j < grammar_rules[i].second.size(); j++) {
|
|
// 产生式右侧相等 存入 left
|
|
if (grammar_rules[i].second[j] == symbols[k]) {
|
|
right_appears[symbols[k]].push_back(i);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
bool Grammar::symbol_infer_empty(const string& symbol) {
|
|
|
|
// 已经进行推导过
|
|
if (infer_empty.find(symbol) != infer_empty.end()) {
|
|
return infer_empty[symbol];
|
|
}
|
|
|
|
// 当符号为终结符时,当且仅当为 $ 可以推导出 $
|
|
if (find(VTs.begin(), VTs.end(), symbol) != VTs.end()) {
|
|
infer_empty[symbol] = (symbol == "$") ;
|
|
return infer_empty[symbol];
|
|
}
|
|
|
|
// 当符号为非终结符时,通过产生式进行推导
|
|
for (int i = 0; i < grammar_rules.size(); i++) {
|
|
// 当该符号为产生式左侧时
|
|
if (grammar_rules[i].first == symbol) {
|
|
int j;
|
|
vector<string> rule_right = grammar_rules[i].second;
|
|
for (j = 0; j < rule_right.size(); j++) {
|
|
// 递归推导 产生式右侧无法推导至 $ 时
|
|
if (!(this->symbol_infer_empty(rule_right[j]))) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
// 当且仅当产生式右侧可以推导至 $ 时
|
|
if (j == rule_right.size()) {
|
|
infer_empty[symbol] = true;
|
|
return infer_empty[symbol];
|
|
}
|
|
}
|
|
}
|
|
|
|
// 当各产生式都无法推导至 $ 时,则无法推导
|
|
infer_empty[symbol] = false;
|
|
return infer_empty[symbol];
|
|
|
|
}
|
|
|
|
vector<string> Grammar::symbol_infer_first(const string& symbol)
|
|
{
|
|
// 已经推导过 FIRST 集
|
|
if (first.find(symbol) != first.end()) {
|
|
return first[symbol];
|
|
}
|
|
|
|
vector<string> symbol_first;
|
|
|
|
// 当符号为终结符时 FIRST 集为它本身
|
|
if (find(VTs.begin(), VTs.end(), symbol) != VTs.end()) {
|
|
symbol_first.push_back(symbol);
|
|
first[symbol] = symbol_first;
|
|
return first[symbol];
|
|
}
|
|
|
|
// 当符号为非终结符时,通过产生式进行推导
|
|
for (int i = 0; i < grammar_rules.size(); i++) {
|
|
// 当该符号为产生式左侧时
|
|
if (grammar_rules[i].first == symbol) {
|
|
int j;
|
|
for (j = 0; j < grammar_rules[i].second.size(); j++) {
|
|
|
|
// 依次添加所有产生式右侧的
|
|
vector<string> firsts = symbol_infer_first(grammar_rules[i].second[j]);
|
|
for (int k = 0; k < firsts.size(); k++) {
|
|
symbol_first.push_back(firsts[k]);
|
|
}
|
|
|
|
// 若产生式右侧无法推导至 $ 空字符时 中断
|
|
if (!infer_empty[grammar_rules[i].second[j]]) {
|
|
break;
|
|
}
|
|
|
|
}
|
|
|
|
// 当且仅当产生式右侧可以推导至 $ 时 将 $ 加入到 FIRST 集中
|
|
if (j == grammar_rules[i].second.size()) {
|
|
symbol_first.push_back("$");
|
|
}
|
|
}
|
|
}
|
|
|
|
// 对当前 FIRST 集进行 去重 与 排序
|
|
set<string> ssymbol_first(symbol_first.begin(), symbol_first.end());
|
|
symbol_first.clear();
|
|
symbol_first.resize(ssymbol_first.size());
|
|
symbol_first.assign(ssymbol_first.begin(), ssymbol_first.end());
|
|
|
|
sort(symbol_first.begin(), symbol_first.end());
|
|
|
|
// 返回非终结符的 FIRST 集
|
|
first[symbol] = symbol_first;
|
|
return first[symbol];
|
|
}
|
|
|
|
vector<string> Grammar::symbol_infer_follow(const string& symbol)
|
|
{
|
|
vector<string> symbol_follow;
|
|
|
|
// 获取该符号出现在哪些产生式右侧
|
|
vector<int> right_appear = right_appears[symbol];
|
|
for (int i = 0; i < right_appear.size(); i++) {
|
|
int cnt;
|
|
|
|
// 获取该产生式右侧的符号
|
|
vector<string> rule_right = grammar_rules[right_appear[i]].second;
|
|
|
|
// 依次遍历 该产生式右侧 至 该符号 后一位
|
|
for (cnt = 0; cnt < rule_right.size(); cnt++) {
|
|
if (rule_right[cnt] == symbol) {
|
|
break;
|
|
}
|
|
}
|
|
cnt++;
|
|
|
|
// 遍历 剩余产生式右侧
|
|
for (; cnt < rule_right.size(); cnt++) {
|
|
|
|
// 依次获取 后置元素 的 FIRST 集
|
|
vector<string> symbol_first = first[rule_right[cnt]];
|
|
|
|
// 将 该 FIRST 集 循环添加至 symbol_follow 中
|
|
for (int j = 0; j < symbol_first.size(); j++) {
|
|
symbol_follow.push_back(symbol_first[j]);
|
|
}
|
|
|
|
// 若不可达 $ 中断遍历
|
|
if (!infer_empty[rule_right[cnt]]) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
// 当剩余产生式右侧均可到达 $ 时
|
|
if (cnt == rule_right.size()) {
|
|
if (follow.find(grammar_rules[right_appear[i]].first) != follow.end()) {
|
|
|
|
// 将产生式左侧的 FOLLOW 集 加入到 当前符号的 FOLLOW 集中
|
|
vector<string> first_follow = follow[grammar_rules[right_appear[i]].first];
|
|
for (int j = 0; j < first_follow.size(); j++) {
|
|
symbol_follow.push_back(first_follow[j]);
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// 删除不需要的 $ 空字符
|
|
auto it = remove(symbol_follow.begin(), symbol_follow.end(), "$");
|
|
auto it1 = symbol_follow.erase(it, symbol_follow.end());
|
|
|
|
|
|
// 对当前 FOLLOW 集 进行去重排序
|
|
set<string> ssymbol_follow(symbol_follow.begin(), symbol_follow.end());
|
|
symbol_follow.clear();
|
|
symbol_follow.resize(ssymbol_follow.size());
|
|
symbol_follow.assign(ssymbol_follow.begin(), ssymbol_follow.end());
|
|
|
|
sort(symbol_follow.begin(), symbol_follow.end());
|
|
|
|
|
|
|
|
return symbol_follow;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|