From 0600b113d6baf76f89c20ac40ecc3cdd60b14134 Mon Sep 17 00:00:00 2001 From: LiuYuanchi Date: Sat, 11 May 2024 21:41:45 +0800 Subject: [PATCH] =?UTF-8?q?[copy]=E6=AD=A3=E5=9C=A8=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E8=AF=AD=E6=B3=95=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 8 +- LL1/CMakeLists.txt | 15 +- LL1/archive/CMakeLists.txt | 14 + LL1/{ => archive}/LL1.cpp | 0 LL1/{ => archive}/LL1.h | 0 LL1/{ => archive}/grammar.cpp | 0 LL1/{ => archive}/grammar.h | 0 LL1/include/LL1.h | 32 +++ LL1/include/grammar.h | 55 ++++ LL1/src/LL1.cpp | 351 +++++++++++++++++++++++ LL1/src/grammar.cpp | 520 ++++++++++++++++++++++++++++++++++ LL1/test/test_main.cpp | 73 +++++ main/main.cpp | 122 ++++---- 13 files changed, 1127 insertions(+), 63 deletions(-) create mode 100644 LL1/archive/CMakeLists.txt rename LL1/{ => archive}/LL1.cpp (100%) rename LL1/{ => archive}/LL1.h (100%) rename LL1/{ => archive}/grammar.cpp (100%) rename LL1/{ => archive}/grammar.h (100%) create mode 100644 LL1/include/LL1.h create mode 100644 LL1/include/grammar.h create mode 100644 LL1/src/LL1.cpp create mode 100644 LL1/src/grammar.cpp create mode 100644 LL1/test/test_main.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 1bedd0b..6edaa56 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.10) project(compiler-bin) # 收集所有的cpp源文件 -file(GLOB SOURCES_LL "LL1/*.cpp") +file(GLOB SOURCES_LL1 "LL1/src/*.cpp") file(GLOB SOURCES_NFA "nfa/src/*.cpp") file(GLOB SOURCES_MAIN "main/*.cpp") @@ -10,15 +10,15 @@ file(GLOB SOURCES_MAIN "main/*.cpp") # set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) # 创建静态链接库 -add_library(LL STATIC ${SOURCES_LL}) +add_library(LL1 STATIC ${SOURCES_LL1}) add_library(nfa STATIC ${SOURCES_NFA}) # 添加头文件目录 -target_include_directories(LL PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/LL1) +target_include_directories(LL1 PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/LL1/include) target_include_directories(nfa PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/nfa/include) # 添加可执行文件 add_executable(main ${SOURCES_MAIN}) # 链接静态库 -target_link_libraries(main PRIVATE LL nfa) +target_link_libraries(main PRIVATE LL1 nfa) diff --git a/LL1/CMakeLists.txt b/LL1/CMakeLists.txt index 22471ff..633e877 100644 --- a/LL1/CMakeLists.txt +++ b/LL1/CMakeLists.txt @@ -1,14 +1,15 @@ cmake_minimum_required(VERSION 3.10) -project(LL) +project(LL1) # 收集所有的cpp源文件 -file(GLOB SOURCES "*.cpp") - -# 设置输出目录为 bin -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) +file(GLOB SOURCES "src/*.cpp") # 创建静态链接库 -add_library(LL STATIC ${SOURCES}) +add_library(LL1 STATIC ${SOURCES}) + +# Test:添加可执行文件并链接目标库 +add_executable(test_LL1 test/test_main.cpp) +target_link_libraries(test_LL1 LL1) # 添加头文件目录 -target_include_directories(LL PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) \ No newline at end of file +target_include_directories(LL PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) \ No newline at end of file diff --git a/LL1/archive/CMakeLists.txt b/LL1/archive/CMakeLists.txt new file mode 100644 index 0000000..22471ff --- /dev/null +++ b/LL1/archive/CMakeLists.txt @@ -0,0 +1,14 @@ +cmake_minimum_required(VERSION 3.10) +project(LL) + +# 收集所有的cpp源文件 +file(GLOB SOURCES "*.cpp") + +# 设置输出目录为 bin +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) + +# 创建静态链接库 +add_library(LL STATIC ${SOURCES}) + +# 添加头文件目录 +target_include_directories(LL PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) \ No newline at end of file diff --git a/LL1/LL1.cpp b/LL1/archive/LL1.cpp similarity index 100% rename from LL1/LL1.cpp rename to LL1/archive/LL1.cpp diff --git a/LL1/LL1.h b/LL1/archive/LL1.h similarity index 100% rename from LL1/LL1.h rename to LL1/archive/LL1.h diff --git a/LL1/grammar.cpp b/LL1/archive/grammar.cpp similarity index 100% rename from LL1/grammar.cpp rename to LL1/archive/grammar.cpp diff --git a/LL1/grammar.h b/LL1/archive/grammar.h similarity index 100% rename from LL1/grammar.h rename to LL1/archive/grammar.h diff --git a/LL1/include/LL1.h b/LL1/include/LL1.h new file mode 100644 index 0000000..5eaba92 --- /dev/null +++ b/LL1/include/LL1.h @@ -0,0 +1,32 @@ +// LL1 语法分析器 +#ifndef LL1_H +#define LL1_H + +#include "grammar.h" + +using namespace std; + +class LL1:public Grammar{ +public: + LL1(); + ~LL1(); + + bool IsLL1(); // 判断该文法是否为 LL1 文法 + void build_LL1_predict(); // 构建 LL1 的预测分析表 + void print_LL1_predict(); // 打印 LL1 的预测分析表 + void build_LL1_grammar(); // 构建规约序列 + void print_LL1_grammar_log(); + void fileout_LL1_grammar_log(string file_name); + + +private: + unordered_map> select; // 计算符号的 SELECT 集合 + unordered_map> LL1_predict; // LL1 的预测分析表 + vector LL1_grammar_log; // 规约序列 + + int insert_rule(pair>& new_rule); // 增加新的规则 +}; + + + +#endif // !LL1_H diff --git a/LL1/include/grammar.h b/LL1/include/grammar.h new file mode 100644 index 0000000..95e91b1 --- /dev/null +++ b/LL1/include/grammar.h @@ -0,0 +1,55 @@ +// 语法生成器 +#ifndef GRAMMAR_H +#define GRAMMAR_H + + +#include +#include +#include +#include +#include + +using namespace std; + +class Grammar +{ +public: + const string grammar_file = "./tests/grammar.txt"; + + Grammar(); + ~Grammar(); + void read_grammar(); // 读取语法规则 + void print_grammar(); // 打印语法规则 + void expand_grammar(); // 拓展语法规则 + void init_grammar_set(); // 初始化语法相关集合 + void print_grammar_set(); // 打印语法相关集合 + void get_token_strings(vector &); // 获取 token_stirngs + void print_token_strings(); + +protected: + vector>> grammar_rules; // 产生式规则 + string start; // 起始字符 + vector symbols; // 符号 + vector VTs; // 终结符 + vector VNs; // 非终结符 + unordered_map> first; // FIRST 集 + unordered_map> follow; // FOLLOW 集 + unordered_map infer_empty; // 是否可以推导出 $ 空字符 + vector token_strings; + + +private: + unordered_map> left_appears; // 该符号出现在哪些产生式左侧 + unordered_map> right_appears; // 该符号出现在哪些产生式右侧 + unordered_map> depend; // FOLLOW 集的依赖关系 + + + void init_appears_depend(); // 获取 appear depend 集合 + bool symbol_infer_empty(const string& symbol); // 判断符号是否可以推导出 $ 空字符 + vector symbol_infer_first(const string& symbol);// 推导符号的 FIRST 集 + vector symbol_infer_follow(const string& symbol);// 推导符号的 FOLLOW 集 + +}; + + +#endif // !GRAMMAR_H \ No newline at end of file diff --git a/LL1/src/LL1.cpp b/LL1/src/LL1.cpp new file mode 100644 index 0000000..f057f29 --- /dev/null +++ b/LL1/src/LL1.cpp @@ -0,0 +1,351 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "LL1.h" + + +LL1::LL1() +{ + read_grammar(); + init_grammar_set(); +} + +LL1::~LL1() +{ +} + +bool LL1::IsLL1() +{ + string symbol; + vector right_first = vector(); + vector left_follow; + for (int i = 0; i < grammar_rules.size(); i++) { + symbol.clear(); + right_first.clear(); + left_follow.clear(); + + symbol = grammar_rules[i].first; + + + + // 计算 产生式左侧 FOLLOW 集 + left_follow = follow[symbol]; + + // 计算 产生式右侧 FIRST 集 + + // 对 X1 的 非 $ 符号 加入 + for (int j = 0; j < first[grammar_rules[i].second[0]].size(); j++) { + if (first[grammar_rules[i].second[0]][j] == "$") { + continue; + } + right_first.push_back(first[grammar_rules[i].second[0]][j]); + } + + int cnt; + for (cnt = 1; cnt < grammar_rules[i].second.size(); cnt++) { + + // 当且仅当 有 $ 符号时 继续加入 + if (!infer_empty[grammar_rules[i].second[cnt - 1]]) { + break; + } + for (int j = 0; j < first[grammar_rules[i].second[cnt]].size(); j++) { + if (first[grammar_rules[i].second[cnt]][j] == "$") { + continue; + } + right_first.push_back(first[grammar_rules[i].second[cnt]][j]); + } + } + + // 若都能推导至 $ 符号时 加入 + if (cnt == grammar_rules[i].second.size() && infer_empty[grammar_rules[i].second[0]]) { + right_first.push_back("$"); + } + + // 对产生式右侧 FIRST 集 进行 去重 + set sright_first(right_first.begin(), right_first.end()); + right_first.clear(); + right_first.resize(sright_first.size()); + right_first.assign(sright_first.begin(), sright_first.end()); + + + + vector symbol_select; + + // 若产生式右侧 FIRST 集为 {$} 时 + if (right_first.size() == 1 && right_first[0] == "$") { + + // SELECT 集为 产生式右侧 FOLLOW 集 与 {$} 的交集 + symbol_select = left_follow; + if (find(left_follow.begin(), left_follow.end(), "$") == left_follow.end()) { + symbol_select.push_back("$"); + } + } + else + { + // SELECT 集为 产生式左侧 FIRST 集 + symbol_select = right_first; + } + + // 对 SELECT 集 进行排序 方便接下来进行集合运算 + sort(symbol_select.begin(), symbol_select.end()); + + vector new_select = vector(); + + // 判断 SELECT 表中有无现有数据 + if (select.find(symbol) == select.end()) { + + select[symbol] = symbol_select; + } + else { + + // 判断两个相同产生式左侧 SELECT 集 是否相交 + set_intersection(symbol_select.begin(), symbol_select.end(), select[symbol].begin(), select[symbol].end(), back_inserter(new_select)); + + if (new_select.size() == 0) { + // 不相交,继续运算,存入两者并集 + set_union(symbol_select.begin(), symbol_select.end(), select[symbol].begin(), select[symbol].end(), back_inserter(new_select)); + } + else + { + // 非 LL(1) 文法,退出 + cout << "This grammar is not LL (1) grammar" << endl; + return false; + } + + } + + } + + // cout << "该文法为 LL(1) 文法!" << endl; + return true; +} + +void LL1::build_LL1_predict() +{ + // 对每一个 非终结符 进行初始化行 + for (int i = 0; i < VNs.size(); i++) { + if (LL1_predict.find(VNs[i]) == LL1_predict.end()) { + LL1_predict[VNs[i]] = unordered_map(); + } + } + + string symbol; + vector right_first = vector(); + vector left_follow; + + // 遍历 产生式 构建 预测分析表 + for (int i = 0; i < grammar_rules.size(); i++) { + symbol.clear(); + right_first.clear(); + left_follow.clear(); + + symbol = grammar_rules[i].first; + + + // 计算 产生式左侧 FOLLOW 集 + left_follow = follow[symbol]; + + unordered_map &symbol_predict = LL1_predict[symbol]; + + + // 计算 产生式右侧 FIRST 集 + + // 对 X1 的 非 $ 符号 加入 + for (int j = 0; j < first[grammar_rules[i].second[0]].size(); j++) { + if (first[grammar_rules[i].second[0]][j] == "$") { + continue; + } + right_first.push_back(first[grammar_rules[i].second[0]][j]); + } + + int cnt; + for (cnt = 1; cnt < grammar_rules[i].second.size(); cnt++) { + + // 当且仅当 有 $ 符号时 继续加入 + if (!infer_empty[grammar_rules[i].second[cnt - 1]]) { + break; + } + for (int j = 0; j < first[grammar_rules[i].second[cnt]].size(); j++) { + if (first[grammar_rules[i].second[cnt]][j] == "$") { + continue; + } + right_first.push_back(first[grammar_rules[i].second[cnt]][j]); + } + } + + // 若都能推导至 $ 符号时 加入 + if (cnt == grammar_rules[i].second.size() && infer_empty[grammar_rules[i].second[0]]) { + right_first.push_back("$"); + } + + // 对产生式右侧 FIRST 集 进行 去重 + set sright_first(right_first.begin(), right_first.end()); + right_first.clear(); + right_first.resize(sright_first.size()); + right_first.assign(sright_first.begin(), sright_first.end()); + + // 循环遍历 FIRST 集进行初始化 + for (int j = 0; j < right_first.size(); j++) { + if (right_first[j] == "$") { + pair> new_rule (grammar_rules[i].first, vector()); + new_rule.second.push_back("$"); + int rule_id = insert_rule(new_rule); + + for (int k = 0; k < left_follow.size(); k++) { + symbol_predict[left_follow[k]] = rule_id; + } + } + symbol_predict[right_first[j]] = i; + + } + + } + + +} + +void LL1::print_LL1_predict() +{ + cout << "[LL1_predict]:" << endl; + for (auto iter = LL1_predict.begin(); iter != LL1_predict.end(); ++iter) { + cout << (*iter).first << " "; + for (auto j = (*iter).second.begin(); j != (*iter).second.end(); ++j) { + cout << (*j).first << "," << (*j).second << " "; + } + cout << endl; + + } + cout << endl << endl; + +} + +void LL1::build_LL1_grammar() +{ + // 符号栈 + stack stack; + int token_cnt = 0; + + // 起始符 入栈 + stack.push(start); + + while (!stack.empty()) + { + LL1_grammar_log.push_back(string()); + + // 栈顶符号 + // 判断栈顶是否为 空符号 + if (stack.top() == "$") { + // 栈空 以 EOF 表示 + LL1_grammar_log.back() += "EOF"; + } + else + { + LL1_grammar_log.back() += stack.top(); + } + + // 添加 # 分割 + LL1_grammar_log.back() += "#"; + + // 面临输入的符号 + string this_token; + if (token_cnt == token_strings.size()) { + // 栈空 以 EOF 表示 + this_token = "$"; + LL1_grammar_log.back() += "EOF"; + } + else + { + this_token = token_strings[token_cnt]; + LL1_grammar_log.back() += token_strings[token_cnt]; + } + + // 对栈顶元素与即将输入的符号进行比较 + if (stack.top() == this_token) { + // 栈顶出栈 token 指向下一位 + token_cnt++; + stack.pop(); + + if (this_token == "$") { + // 分析成功 结束分析 + LL1_grammar_log.back() += "\taccept"; + } + else + { + // 跳过 + LL1_grammar_log.back() += "\tmove"; + } + } + // 若为终结符 + else if (find(VTs.begin(), VTs.end(), stack.top()) != VTs.end()) { + if (stack.top() == "$") { + stack.pop(); + LL1_grammar_log.pop_back(); + } + else { + LL1_grammar_log.back() += "\terror"; + return; + } + } + else + { + auto tab = LL1_predict[stack.top()]; + + if (tab.find(this_token) == tab.end()) { + LL1_grammar_log.back() += "\terror"; + return; + } + else + { + auto this_rule = grammar_rules[tab[this_token]]; + stack.pop(); + for (int i = this_rule.second.size() - 1; i >= 0; i--) { + stack.push(this_rule.second[i]); + } + LL1_grammar_log.back() += "\treduction"; + } + } + } + +} + +void LL1::print_LL1_grammar_log() +{ + for (int i = 0; i < LL1_grammar_log.size(); ++i) { + cout << LL1_grammar_log[i] << endl; + } +} + +void LL1::fileout_LL1_grammar_log(string file_name) +{ + //打开结果输出文件 + fstream outfile(file_name); + + if (!outfile.is_open()) { + cout << "[FILEOUT] fail to open file" << endl; + } + + for (int i = 0; i < LL1_grammar_log.size(); ++i) { + outfile << LL1_grammar_log[i] << endl; + } + outfile.close(); +} + +int LL1::insert_rule(pair>& new_rule) +{ + int cnt; + for (cnt = 0; cnt < grammar_rules.size(); cnt++) { + // 当 产生式规则 中存在这条产生式时 返回序号 + if (grammar_rules[cnt].first == new_rule.first && grammar_rules[cnt].second == new_rule.second) { + return cnt; + } + } + // 若不存在 返回序号的同时加入 + grammar_rules.push_back(new_rule); + return cnt; +} + diff --git a/LL1/src/grammar.cpp b/LL1/src/grammar.cpp new file mode 100644 index 0000000..2aefe3e --- /dev/null +++ b/LL1/src/grammar.cpp @@ -0,0 +1,520 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "grammar.h" + + +Grammar::Grammar() +{ + +} + +Grammar::~Grammar() +{ + +} + + + +void Grammar::read_grammar() { + fstream infile; + infile.open(grammar_file,ios::in); + if (!infile.is_open()) + { + cout << "[READ_GRAMMAR] fail to open file: "<< grammar_file << endl; + return; + } + + string buf; + string arrow = "->"; + string farrow; + bool start_flag = true; + string left; + string forms; + + while (!infile.eof()) { + // 清理 string + buf.clear(); + left.clear(); + forms.clear(); + farrow.clear(); + + + grammar_rules.push_back(pair>()); + getline(infile, buf); + + stringstream ss(buf); + + // 读取产生式左侧 + ss >> left; + grammar_rules.back().first = left; + symbols.push_back(left); + VNs.push_back(left); + + // 存储 start + if (start_flag) { + start = left; + start_flag = false; + } + + // 读取 -> 符号 并保证合法 + ss >> farrow; + if (farrow != arrow) { + cout << "Grammar reading error" << endl; + } + + // 读取产生式右侧 + while (ss >> forms) + { + grammar_rules.back().second.push_back(forms); + symbols.push_back(forms); + forms.clear(); + } + } + + // 符号集 和 非终结符 去重 + set ssymbols(symbols.begin(), symbols.end()); + symbols.clear(); + symbols.resize(ssymbols.size()); + symbols.assign(ssymbols.begin(), ssymbols.end()); + + set sVNs(VNs.begin(), VNs.end()); + VNs.clear(); + VNs.resize(sVNs.size()); + VNs.assign(sVNs.begin(), sVNs.end()); + + // 符号集 和 非终结符 排序 以保证差集的成功 + sort(symbols.begin(), symbols.end()); + sort(VNs.begin(), VNs.end()); + + // 取差集 得到终极符 + set_difference(symbols.begin(), symbols.end(), VNs.begin(), VNs.end(), back_inserter(VTs)); + + infile.close(); + +} + +void Grammar::print_grammar() +{ + cout << "[start]: " << endl << start << endl << endl; + + cout << "[VTs]:" << endl; + for (int i = 0; i < VTs.size(); i++) { + cout << VTs[i] << " "; + if (((i + 1) % 5) == 0) + cout << endl; + } + cout << endl << endl; + + cout << "[VNs]:" << endl; + for (int i = 0; i < VNs.size(); i++) { + cout << VNs[i] << " "; + if (((i + 1) % 5) == 0) + cout << endl; + } + cout << endl << endl; + + cout << "[symbols]:" << endl; + for (int i = 0; i < symbols.size(); i++) { + cout << symbols[i] << " "; + if (((i + 1) % 5) == 0) + cout << endl; + } + cout << endl << endl; + + cout << "[grammar_rules]: " << grammar_rules.size() << endl; + for (int i = 0; i < grammar_rules.size(); ++i) { + cout << grammar_rules[i].first << " -> "; + for (int j = 0; j < grammar_rules[i].second.size(); ++j) { + cout << "\"" << grammar_rules[i].second[j] << "\" "; + } + cout << endl; + } + cout << endl << endl; +} + +void Grammar::expand_grammar() +{ + string new_start = start + "\'"; + pair> new_rule = pair>(new_start, vector()); + new_rule.second.push_back(start); + + VNs.push_back(new_start); + symbols.push_back(new_start); + grammar_rules.insert(grammar_rules.begin(), new_rule); + start = new_start; + + // 符号集排序 + sort(symbols.begin(), symbols.end()); + +} + +void Grammar::init_grammar_set() +{ + string symbol; + + + + // 对符号集中各符号进行推导 是否可以到达 $ 空符号 + for (int i = 0; i < symbols.size(); i++) { + symbol = symbols[i]; + this->symbol_infer_empty(symbol); + symbol.clear(); + } + + // 初始化符号在产生式的 出现 依赖 情况 + init_appears_depend(); + + // 对符号集中各符号进行推导 FIRST 集 + for (int i = 0; i < symbols.size(); i++) { + symbol = symbols[i]; + this->symbol_infer_first(symbol); + symbol.clear(); + } + + // 对符号集中各符号进行推导 FOLLOW 集 + + // 符号队列 + deque queue; + + // 初次遍历所有符号 生成初始的 FOLLOW 集 + + // 构建 start 的 FOLLOW 集 + follow[start] = this->symbol_infer_follow(start); + follow[start].push_back("$"); + queue.push_back(start); + + // 构建除 start 的 FOLLOW 集 + for (int i = 0; i < symbols.size(); i++) { + symbol = symbols[i]; + if (symbol == start) { + symbol.clear(); + continue; + } + follow[symbol] = this->symbol_infer_follow(symbol); + queue.push_back(symbol); + symbol.clear(); + } + + // 对 符号队列 进行进一步生成 + while (!queue.empty()) { + // 读取 符号队列 开头 + symbol = queue.front(); + queue.pop_front(); + + // 若 FOLLOW 集发生改变 + vector new_symbol_follow = this->symbol_infer_follow(symbol); + if (follow[symbol].size() < new_symbol_follow.size()) { + // 对依赖 该符号 的所有符号添加至 符号队列 + vector dep = depend[symbol]; + for (int i = 0; i < dep.size(); i++) { + queue.push_back(dep[i]); + } + follow[symbol] = new_symbol_follow; + } + symbol.clear(); + } + + +} + +void Grammar::print_grammar_set() +{ + // 打印符号在产生式的出现情况 + cout << "[left_appears]:" << endl; + for (int i = 0; i < symbols.size(); i++) { + cout << "LEFT( " << symbols[i] << " ) = {"; + for (int j = 0; j < left_appears[symbols[i]].size(); j++) { + cout << " " << left_appears[symbols[i]][j] << " "; + } + cout << "}" << endl; + } + cout << endl << endl; + + cout << "[right_appears]:" << endl; + for (int i = 0; i < symbols.size(); i++) { + cout << "RIGHT( " << symbols[i] << " ) = {"; + for (int j = 0; j < right_appears[symbols[i]].size(); j++) { + cout << " " << right_appears[symbols[i]][j] << " "; + } + cout << "}" << endl; + } + cout << endl << endl; + + // 打印 FOLLOW 集的依赖关系 + cout << "[depend]:" << endl; + for (int i = 0; i < symbols.size(); i++) { + cout << "DEPEND( " << symbols[i] << " ) = {"; + for (int j = 0; j < depend[symbols[i]].size(); j++) { + cout << " " << depend[symbols[i]][j] << " "; + } + cout << "}" << endl; + } + cout << endl << endl; + + + // 打印是否可以推导出 $ 空符号 + cout << "[infer_empty]:" << endl; + for (int i = 0; i < symbols.size(); i++) { + cout << symbols[i]<<" -> " << infer_empty[symbols[i]] << endl; + } + cout << endl << endl; + + // 打印 FIRST 集 + cout << "[FIRST]:" << endl; + for (int i = 0; i < symbols.size(); i++) { + cout << "FIRST( " << symbols[i] << " ) = {"; + for (int j = 0; j < first[symbols[i]].size(); j++) { + cout << " " << first[symbols[i]][j] << " "; + } + cout << "}" << endl; + } + cout << endl << endl; + + // 打印 FOLLOW 集 + cout << "[FOLLOW]:" << endl; + for (int i = 0; i < symbols.size(); i++) { + cout << "FOLLOW( " << symbols[i] << " ) = {"; + for (int j = 0; j < follow[symbols[i]].size(); j++) { + cout << " " << follow[symbols[i]][j] << " "; + } + cout << "}" << endl; + } + cout << endl << endl; + +} + +void Grammar::get_token_strings(vector& my_token_strings) +{ + token_strings.resize(my_token_strings.size()); + token_strings.assign(my_token_strings.begin(), my_token_strings.end()); + +} + +void Grammar::print_token_strings() +{ + for (int i = 0; i < token_strings.size(); i++) { + cout << token_strings[i] << endl; + } +} + +void Grammar::init_appears_depend() +{ + for (int k = 0; k < symbols.size(); k++) { + left_appears[symbols[k]] = vector(); + right_appears[symbols[k]] = vector(); + depend[symbols[k]] = vector(); + for (int i = 0; i < grammar_rules.size(); i++) { + if (grammar_rules[i].first == symbols[k]) { + // 产生式左侧相等 存入 left + left_appears[symbols[k]].push_back(i); + + // 对该产生式构建依赖关系 + for (int m = 0; m < grammar_rules[i].second.size(); m++) { + int n; + + // 判断该产生式右侧符号是否可以推导至 $ 空符号 + for (n = m + 1; n < grammar_rules[i].second.size(); n++) { + if (!infer_empty[grammar_rules[i].second[n]]) { + break; + } + } + // 若可以推导 按照入栈的方式依次加入 + if (n == grammar_rules[i].second.size()) { + if (symbols[k] != grammar_rules[i].second[m]) { + depend[symbols[k]].push_back(grammar_rules[i].second[m]); + } + } + + } + } + for (int j = 0; j < grammar_rules[i].second.size(); j++) { + // 产生式右侧相等 存入 left + if (grammar_rules[i].second[j] == symbols[k]) { + right_appears[symbols[k]].push_back(i); + break; + } + } + } + } + +} + +bool Grammar::symbol_infer_empty(const string& symbol) { + + // 已经进行推导过 + if (infer_empty.find(symbol) != infer_empty.end()) { + return infer_empty[symbol]; + } + + // 当符号为终结符时,当且仅当为 $ 可以推导出 $ + if (find(VTs.begin(), VTs.end(), symbol) != VTs.end()) { + infer_empty[symbol] = (symbol == "$") ; + return infer_empty[symbol]; + } + + // 当符号为非终结符时,通过产生式进行推导 + for (int i = 0; i < grammar_rules.size(); i++) { + // 当该符号为产生式左侧时 + if (grammar_rules[i].first == symbol) { + int j; + vector rule_right = grammar_rules[i].second; + for (j = 0; j < rule_right.size(); j++) { + // 递归推导 产生式右侧无法推导至 $ 时 + if (!(this->symbol_infer_empty(rule_right[j]))) { + break; + } + } + + // 当且仅当产生式右侧可以推导至 $ 时 + if (j == rule_right.size()) { + infer_empty[symbol] = true; + return infer_empty[symbol]; + } + } + } + + // 当各产生式都无法推导至 $ 时,则无法推导 + infer_empty[symbol] = false; + return infer_empty[symbol]; + +} + +vector Grammar::symbol_infer_first(const string& symbol) +{ + // 已经推导过 FIRST 集 + if (first.find(symbol) != first.end()) { + return first[symbol]; + } + + vector symbol_first; + + // 当符号为终结符时 FIRST 集为它本身 + if (find(VTs.begin(), VTs.end(), symbol) != VTs.end()) { + symbol_first.push_back(symbol); + first[symbol] = symbol_first; + return first[symbol]; + } + + // 当符号为非终结符时,通过产生式进行推导 + for (int i = 0; i < grammar_rules.size(); i++) { + // 当该符号为产生式左侧时 + if (grammar_rules[i].first == symbol) { + int j; + for (j = 0; j < grammar_rules[i].second.size(); j++) { + + // 依次添加所有产生式右侧的 + vector firsts = symbol_infer_first(grammar_rules[i].second[j]); + for (int k = 0; k < firsts.size(); k++) { + symbol_first.push_back(firsts[k]); + } + + // 若产生式右侧无法推导至 $ 空字符时 中断 + if (!infer_empty[grammar_rules[i].second[j]]) { + break; + } + + } + + // 当且仅当产生式右侧可以推导至 $ 时 将 $ 加入到 FIRST 集中 + if (j == grammar_rules[i].second.size()) { + symbol_first.push_back("$"); + } + } + } + + // 对当前 FIRST 集进行 去重 与 排序 + set ssymbol_first(symbol_first.begin(), symbol_first.end()); + symbol_first.clear(); + symbol_first.resize(ssymbol_first.size()); + symbol_first.assign(ssymbol_first.begin(), ssymbol_first.end()); + + sort(symbol_first.begin(), symbol_first.end()); + + // 返回非终结符的 FIRST 集 + first[symbol] = symbol_first; + return first[symbol]; +} + +vector Grammar::symbol_infer_follow(const string& symbol) +{ + vector symbol_follow; + + // 获取该符号出现在哪些产生式右侧 + vector right_appear = right_appears[symbol]; + for (int i = 0; i < right_appear.size(); i++) { + int cnt; + + // 获取该产生式右侧的符号 + vector rule_right = grammar_rules[right_appear[i]].second; + + // 依次遍历 该产生式右侧 至 该符号 后一位 + for (cnt = 0; cnt < rule_right.size(); cnt++) { + if (rule_right[cnt] == symbol) { + break; + } + } + cnt++; + + // 遍历 剩余产生式右侧 + for (; cnt < rule_right.size(); cnt++) { + + // 依次获取 后置元素 的 FIRST 集 + vector symbol_first = first[rule_right[cnt]]; + + // 将 该 FIRST 集 循环添加至 symbol_follow 中 + for (int j = 0; j < symbol_first.size(); j++) { + symbol_follow.push_back(symbol_first[j]); + } + + // 若不可达 $ 中断遍历 + if (!infer_empty[rule_right[cnt]]) { + break; + } + } + + // 当剩余产生式右侧均可到达 $ 时 + if (cnt == rule_right.size()) { + if (follow.find(grammar_rules[right_appear[i]].first) != follow.end()) { + + // 将产生式左侧的 FOLLOW 集 加入到 当前符号的 FOLLOW 集中 + vector first_follow = follow[grammar_rules[right_appear[i]].first]; + for (int j = 0; j < first_follow.size(); j++) { + symbol_follow.push_back(first_follow[j]); + } + + } + + } + + } + + // 删除不需要的 $ 空字符 + auto it = remove(symbol_follow.begin(), symbol_follow.end(), "$"); + auto it1 = symbol_follow.erase(it, symbol_follow.end()); + + + // 对当前 FOLLOW 集 进行去重排序 + set ssymbol_follow(symbol_follow.begin(), symbol_follow.end()); + symbol_follow.clear(); + symbol_follow.resize(ssymbol_follow.size()); + symbol_follow.assign(ssymbol_follow.begin(), ssymbol_follow.end()); + + sort(symbol_follow.begin(), symbol_follow.end()); + + + + return symbol_follow; +} + + + + + + + diff --git a/LL1/test/test_main.cpp b/LL1/test/test_main.cpp new file mode 100644 index 0000000..66e4445 --- /dev/null +++ b/LL1/test/test_main.cpp @@ -0,0 +1,73 @@ +#include +#include +#include + +#include "grammar.h" +#include "LL1.h" +using namespace std; + +string inputs[] = { + "tests/00.txt", + "tests/20.txt" +}; + +string outputs_lexical[] = { +"tests/00_my_lexical.txt", +"tests/20_my_lexical.txt" +}; + +string outputs_grammar[] = { +"tests/00_my_grammar.txt", +"tests/20_my_grammar.txt", +}; + +// 读取文件 +string readfile(const string& filename) +{ + ifstream file(filename); + string content((istreambuf_iterator(file)),istreambuf_iterator()); + return content; +} + +int main(int argc, char** argv) { + + try{ + int i=0; + for (auto input : inputs) { + LL1 ll; + //ll.print_grammar_set(); + + string content = readfile(input); + vector token_strings = recognize(minimizedDFA, content,outputs_lexical[i]); + + bool flag = ll.IsLL1(); + ll.build_LL1_predict(); + + + //ll.print_LL1_predict(); + ll.get_token_strings(token_strings); + + + ll.print_token_strings(); + ll.build_LL1_grammar(); + + + ll.fileout_LL1_grammar_log(outputs_grammar[i]); + + + //ll.print_LL1_grammar_log(); + cout << endl; + cout< token_strings = recognize(minimizedDFA, content,outputs_lexical[i]); - string outputs_grammar[6] = { - "tests/00/00_my_grammar.txt", - "tests/01/01_my_grammar.txt", - "tests/02/02_my_grammar.txt", - "tests/07/07_my_grammar.txt", - "tests/08_err/08_my_grammar.txt", - "tests/10_err/10_my_grammar.txt" - }; + bool flag = ll.IsLL1(); + ll.build_LL1_predict(); - int i = 0; - for (auto input : inputs) { - LL1 ll; - //ll.print_grammar_set(); - - string content = readfile(input); - vector token_strings = recognize(minimizedDFA, content,outputs_lexical[i]); - - bool flag = ll.IsLL1(); - ll.build_LL1_predict(); + //ll.print_LL1_predict(); + ll.get_token_strings(token_strings); - //ll.print_LL1_predict(); - ll.get_token_strings(token_strings); + ll.print_token_strings(); + ll.build_LL1_grammar(); - ll.print_token_strings(); - ll.build_LL1_grammar(); + ll.fileout_LL1_grammar_log(outputs_grammar[i]); - ll.fileout_LL1_grammar_log(outputs_grammar[i]); - - - //ll.print_LL1_grammar_log(); - cout << endl; - cout<<"end"<