#include #include #include #include #include #include #include #include "grammar.h" Grammar::Grammar() { } Grammar::~Grammar() { } void Grammar::read_grammar() { ifstream infile; infile.open(grammar_file, ios::in); if (!infile.is_open()) { cout << "读取文件失败" << endl; return; } string buf; string arrow = "->"; string farrow; bool start_flag = true; string left; string forms; while (!infile.eof()) { // 清理 string buf.clear(); left.clear(); forms.clear(); farrow.clear(); grammar_rules.push_back(pair>()); getline(infile, buf); stringstream ss(buf); // 读取产生式左侧 ss >> left; grammar_rules.back().first = left; symbols.push_back(left); VNs.push_back(left); // 存储 start if (start_flag) { start = left; start_flag = false; } // 读取 -> 符号并保证合法 ss >> farrow; if (farrow != arrow) { cout << "语法读取出错" << endl; } // 读取产生式右侧 while (ss >> forms) { grammar_rules.back().second.push_back(forms); symbols.push_back(forms); forms.clear(); } } // 符号集和非终结符去重 set ssymbols(symbols.begin(), symbols.end()); symbols.clear(); symbols.resize(ssymbols.size()); symbols.assign(ssymbols.begin(), ssymbols.end()); set sVNs(VNs.begin(), VNs.end()); VNs.clear(); VNs.resize(sVNs.size()); VNs.assign(sVNs.begin(), sVNs.end()); // 符号集和非终结符排序以保证差集的成功 sort(symbols.begin(), symbols.end()); sort(VNs.begin(), VNs.end()); // 取差集得到终极符 set_difference(symbols.begin(), symbols.end(), VNs.begin(), VNs.end(), back_inserter(VTs)); infile.close(); } void Grammar::print_grammar() { cout << "[start]: " << endl << start << endl << endl; cout << "[VTs]:" << endl; for (int i = 0; i < VTs.size(); i++) { cout << VTs[i] << " "; if (((i + 1) % 5) == 0) cout << endl; } cout << endl << endl; cout << "[VNs]:" << endl; for (int i = 0; i < VNs.size(); i++) { cout << VNs[i] << " "; if (((i + 1) % 5) == 0) cout << endl; } cout << endl << endl; cout << "[symbols]:" << endl; for (int i = 0; i < symbols.size(); i++) { cout << symbols[i] << " "; if (((i + 1) % 5) == 0) cout << endl; } cout << endl << endl; cout << "[grammar_rules]: " << grammar_rules.size() << endl; for (int i = 0; i < grammar_rules.size(); ++i) { cout << grammar_rules[i].first << " -> "; for (int j = 0; j < grammar_rules[i].second.size(); ++j) { cout << "\"" << grammar_rules[i].second[j] << "\" "; } cout << endl; } cout << endl << endl; } void Grammar::expand_grammar() { string new_start = start + "\'"; pair> new_rule = pair>(new_start, vector()); new_rule.second.push_back(start); VNs.push_back(new_start); symbols.push_back(new_start); grammar_rules.insert(grammar_rules.begin(), new_rule); start = new_start; // 符号集排序 sort(symbols.begin(), symbols.end()); } void Grammar::init_grammar_set() { string symbol; // 对符号集中各符号进行推导是否可以到达 $ 空符号 for (int i = 0; i < symbols.size(); i++) { symbol = symbols[i]; this->symbol_infer_empty(symbol); symbol.clear(); } // 初始化符号在产生式的出现依赖情况 init_appears_depend(); // 对符号集中各符号进行推导 FIRST 集 for (int i = 0; i < symbols.size(); i++) { symbol = symbols[i]; this->symbol_infer_first(symbol); symbol.clear(); } // 对符号集中各符号进行推导 FOLLOW 集 // 符号队列 deque queue; // 初次遍历所有符号生成初始的 FOLLOW 集 // 构建 start 的 FOLLOW 集 follow[start] = this->symbol_infer_follow(start); follow[start].push_back("$"); queue.push_back(start); // 构建除 start 的 FOLLOW 集 for (int i = 0; i < symbols.size(); i++) { symbol = symbols[i]; if (symbol == start) { symbol.clear(); continue; } follow[symbol] = this->symbol_infer_follow(symbol); queue.push_back(symbol); symbol.clear(); } // 对符号队列进行进一步生成 while (!queue.empty()) { // 读取符号队列开头 symbol = queue.front(); queue.pop_front(); // 若 FOLLOW 集发生改变 vector new_symbol_follow = this->symbol_infer_follow(symbol); if (follow[symbol].size() < new_symbol_follow.size()) { // 对依赖该符号的所有符号添加至符号队列 vector dep = depend[symbol]; for (int i = 0; i < dep.size(); i++) { queue.push_back(dep[i]); } follow[symbol] = new_symbol_follow; } symbol.clear(); } } void Grammar::print_grammar_set() { // 打印符号在产生式的出现情况 cout << "[left_appears]:" << endl; for (int i = 0; i < symbols.size(); i++) { cout << "LEFT( " << symbols[i] << " ) = {"; for (int j = 0; j < left_appears[symbols[i]].size(); j++) { cout << " " << left_appears[symbols[i]][j] << " "; } cout << "}" << endl; } cout << endl << endl; cout << "[right_appears]:" << endl; for (int i = 0; i < symbols.size(); i++) { cout << "RIGHT( " << symbols[i] << " ) = {"; for (int j = 0; j < right_appears[symbols[i]].size(); j++) { cout << " " << right_appears[symbols[i]][j] << " "; } cout << "}" << endl; } cout << endl << endl; // 打印 FOLLOW 集的依赖关系 cout << "[depend]:" << endl; for (int i = 0; i < symbols.size(); i++) { cout << "DEPEND( " << symbols[i] << " ) = {"; for (int j = 0; j < depend[symbols[i]].size(); j++) { cout << " " << depend[symbols[i]][j] << " "; } cout << "}" << endl; } cout << endl << endl; // 打印是否可以推导出 $ 空符号 cout << "[infer_empty]:" << endl; for (int i = 0; i < symbols.size(); i++) { cout << symbols[i]<<" -> " << infer_empty[symbols[i]] << endl; } cout << endl << endl; // 打印 FIRST 集 cout << "[FIRST]:" << endl; for (int i = 0; i < symbols.size(); i++) { cout << "FIRST( " << symbols[i] << " ) = {"; for (int j = 0; j < first[symbols[i]].size(); j++) { cout << " " << first[symbols[i]][j] << " "; } cout << "}" << endl; } cout << endl << endl; // 打印 FOLLOW 集 cout << "[FOLLOW]:" << endl; for (int i = 0; i < symbols.size(); i++) { cout << "FOLLOW( " << symbols[i] << " ) = {"; for (int j = 0; j < follow[symbols[i]].size(); j++) { cout << " " << follow[symbols[i]][j] << " "; } cout << "}" << endl; } cout << endl << endl; } void Grammar::get_token_strings(vector& my_token_strings) { token_strings.resize(my_token_strings.size()); token_strings.assign(my_token_strings.begin(), my_token_strings.end()); } void Grammar::print_token_strings() { for (int i = 0; i < token_strings.size(); i++) { cout << token_strings[i] << endl; } } void Grammar::init_appears_depend() { for (int k = 0; k < symbols.size(); k++) { left_appears[symbols[k]] = vector(); right_appears[symbols[k]] = vector(); depend[symbols[k]] = vector(); for (int i = 0; i < grammar_rules.size(); i++) { if (grammar_rules[i].first == symbols[k]) { // 产生式左侧相等存入 left left_appears[symbols[k]].push_back(i); // 对该产生式构建依赖关系 for (int m = 0; m < grammar_rules[i].second.size(); m++) { int n; // 判断该产生式右侧符号是否可以推导至 $ 空符号 for (n = m + 1; n < grammar_rules[i].second.size(); n++) { if (!infer_empty[grammar_rules[i].second[n]]) { break; } } // 若可以推导按照入栈的方式依次加入 if (n == grammar_rules[i].second.size()) { if (symbols[k] != grammar_rules[i].second[m]) { depend[symbols[k]].push_back(grammar_rules[i].second[m]); } } } } for (int j = 0; j < grammar_rules[i].second.size(); j++) { // 产生式右侧相等存入 left if (grammar_rules[i].second[j] == symbols[k]) { right_appears[symbols[k]].push_back(i); break; } } } } } bool Grammar::symbol_infer_empty(const string& symbol) { // 已经进行推导过 if (infer_empty.find(symbol) != infer_empty.end()) { return infer_empty[symbol]; } // 当符号为终结符时，当且仅当为 $ 可以推导出 $ if (find(VTs.begin(), VTs.end(), symbol) != VTs.end()) { infer_empty[symbol] = (symbol == "$") ; return infer_empty[symbol]; } // 当符号为非终结符时，通过产生式进行推导 for (int i = 0; i < grammar_rules.size(); i++) { // 当该符号为产生式左侧时 if (grammar_rules[i].first == symbol) { int j; vector rule_right = grammar_rules[i].second; for (j = 0; j < rule_right.size(); j++) { // 递归推导产生式右侧无法推导至 $ 时 if (!(this->symbol_infer_empty(rule_right[j]))) { break; } } // 当且仅当产生式右侧可以推导至 $ 时 if (j == rule_right.size()) { infer_empty[symbol] = true; return infer_empty[symbol]; } } } // 当各产生式都无法推导至 $ 时，则无法推导 infer_empty[symbol] = false; return infer_empty[symbol]; } vector Grammar::symbol_infer_first(const string& symbol) { // 已经推导过 FIRST 集 if (first.find(symbol) != first.end()) { return first[symbol]; } vector symbol_first; // 当符号为终结符时 FIRST 集为它本身 if (find(VTs.begin(), VTs.end(), symbol) != VTs.end()) { symbol_first.push_back(symbol); first[symbol] = symbol_first; return first[symbol]; } // 当符号为非终结符时，通过产生式进行推导 for (int i = 0; i < grammar_rules.size(); i++) { // 当该符号为产生式左侧时 if (grammar_rules[i].first == symbol) { int j; for (j = 0; j < grammar_rules[i].second.size(); j++) { // 依次添加所有产生式右侧的 vector firsts = symbol_infer_first(grammar_rules[i].second[j]); for (int k = 0; k < firsts.size(); k++) { symbol_first.push_back(firsts[k]); } // 若产生式右侧无法推导至 $ 空字符时中断 if (!infer_empty[grammar_rules[i].second[j]]) { break; } } // 当且仅当产生式右侧可以推导至 $ 时将 $ 加入到 FIRST 集中 if (j == grammar_rules[i].second.size()) { symbol_first.push_back("$"); } } } // 对当前 FIRST 集进行去重与排序 set ssymbol_first(symbol_first.begin(), symbol_first.end()); symbol_first.clear(); symbol_first.resize(ssymbol_first.size()); symbol_first.assign(ssymbol_first.begin(), ssymbol_first.end()); sort(symbol_first.begin(), symbol_first.end()); // 返回非终结符的 FIRST 集 first[symbol] = symbol_first; return first[symbol]; } vector Grammar::symbol_infer_follow(const string& symbol) { vector symbol_follow; // 获取该符号出现在哪些产生式右侧 vector right_appear = right_appears[symbol]; for (int i = 0; i < right_appear.size(); i++) { int cnt; // 获取该产生式右侧的符号 vector rule_right = grammar_rules[right_appear[i]].second; // 依次遍历该产生式右侧至该符号后一位 for (cnt = 0; cnt < rule_right.size(); cnt++) { if (rule_right[cnt] == symbol) { break; } } cnt++; // 遍历剩余产生式右侧 for (; cnt < rule_right.size(); cnt++) { // 依次获取后置元素的 FIRST 集 vector symbol_first = first[rule_right[cnt]]; // 将该 FIRST 集循环添加至 symbol_follow 中 for (int j = 0; j < symbol_first.size(); j++) { symbol_follow.push_back(symbol_first[j]); } // 若不可达 $ 中断遍历 if (!infer_empty[rule_right[cnt]]) { break; } } // 当剩余产生式右侧均可到达 $ 时 if (cnt == rule_right.size()) { if (follow.find(grammar_rules[right_appear[i]].first) != follow.end()) { // 将产生式左侧的 FOLLOW 集加入到当前符号的 FOLLOW 集中 vector first_follow = follow[grammar_rules[right_appear[i]].first]; for (int j = 0; j < first_follow.size(); j++) { symbol_follow.push_back(first_follow[j]); } } } } // 删除不需要的 $ 空字符 auto it = remove(symbol_follow.begin(), symbol_follow.end(), "$"); auto it1 = symbol_follow.erase(it, symbol_follow.end()); // 对当前 FOLLOW 集进行去重排序 set ssymbol_follow(symbol_follow.begin(), symbol_follow.end()); symbol_follow.clear(); symbol_follow.resize(ssymbol_follow.size()); symbol_follow.assign(ssymbol_follow.begin(), ssymbol_follow.end()); sort(symbol_follow.begin(), symbol_follow.end()); return symbol_follow; }