From 2b373378cb102a2cf91f1f942c780ab2f4b020b8 Mon Sep 17 00:00:00 2001 From: LiuYuanchi Date: Fri, 10 May 2024 20:50:55 +0800 Subject: [PATCH] =?UTF-8?q?[add]=20=E6=97=A0=E9=94=99=E8=AF=AF=E8=AF=86?= =?UTF-8?q?=E5=88=AB=EF=BC=8C=E6=B5=AE=E7=82=B9=E5=8F=AF=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .vscode/settings.json | 5 +- CMakeLists.txt | 4 +- main/CMakeLists.txt | 14 ---- main/LL1.h | 32 -------- main/grammar.h | 55 ------------- main/nfa.h | 175 ------------------------------------------ nfa/CMakeLists.txt | 8 +- nfa/nfa.h | 28 ++++--- nfa/test_main.cpp | 27 +++++++ nfa/tool.cpp | 103 +++++++++++++------------ 10 files changed, 114 insertions(+), 337 deletions(-) delete mode 100644 main/CMakeLists.txt delete mode 100644 main/LL1.h delete mode 100644 main/grammar.h delete mode 100644 main/nfa.h create mode 100644 nfa/test_main.cpp diff --git a/.vscode/settings.json b/.vscode/settings.json index 62e3861..6cab0b3 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -3,6 +3,9 @@ "xstring": "cpp", "iterator": "cpp", "ostream": "cpp", - "vector": "cpp" + "vector": "cpp", + "*.tcc": "cpp", + "iostream": "cpp", + "map": "cpp" } } \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f4d0b4..d48c4c0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,14 +7,14 @@ file(GLOB SOURCES_NFA "nfa/*.cpp") file(GLOB SOURCES_MAIN "main/*.cpp") # 设置输出目录为 bin -set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) +# set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) # 创建静态链接库 add_library(LL STATIC ${SOURCES_LL}) add_library(nfa STATIC ${SOURCES_NFA}) # 添加头文件目录 -target_include_directories(LL PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/LL) +target_include_directories(LL PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/LL1) target_include_directories(nfa PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/nfa) # 添加可执行文件 diff --git a/main/CMakeLists.txt b/main/CMakeLists.txt deleted file mode 100644 index 3b9a75c..0000000 --- a/main/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -cmake_minimum_required(VERSION 3.10) -project(main) - -file(GLOB SOURCES "*.cpp") - -add_executable(main ${SOURCES}) - -# 链接静态库 -target_link_libraries(main PRIVATE ${CMAKE_BINARY_DIR}/../../bin/LL.lib) -target_link_libraries(main PRIVATE ${CMAKE_BINARY_DIR}/../../bin/nfa.lib) - - -# 添加头文件目录 -# target_include_directories(main PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/main/LL1.h b/main/LL1.h deleted file mode 100644 index 5eaba92..0000000 --- a/main/LL1.h +++ /dev/null @@ -1,32 +0,0 @@ -// LL1 语法分析器 -#ifndef LL1_H -#define LL1_H - -#include "grammar.h" - -using namespace std; - -class LL1:public Grammar{ -public: - LL1(); - ~LL1(); - - bool IsLL1(); // 判断该文法是否为 LL1 文法 - void build_LL1_predict(); // 构建 LL1 的预测分析表 - void print_LL1_predict(); // 打印 LL1 的预测分析表 - void build_LL1_grammar(); // 构建规约序列 - void print_LL1_grammar_log(); - void fileout_LL1_grammar_log(string file_name); - - -private: - unordered_map> select; // 计算符号的 SELECT 集合 - unordered_map> LL1_predict; // LL1 的预测分析表 - vector LL1_grammar_log; // 规约序列 - - int insert_rule(pair>& new_rule); // 增加新的规则 -}; - - - -#endif // !LL1_H diff --git a/main/grammar.h b/main/grammar.h deleted file mode 100644 index 95e91b1..0000000 --- a/main/grammar.h +++ /dev/null @@ -1,55 +0,0 @@ -// 语法生成器 -#ifndef GRAMMAR_H -#define GRAMMAR_H - - -#include -#include -#include -#include -#include - -using namespace std; - -class Grammar -{ -public: - const string grammar_file = "./tests/grammar.txt"; - - Grammar(); - ~Grammar(); - void read_grammar(); // 读取语法规则 - void print_grammar(); // 打印语法规则 - void expand_grammar(); // 拓展语法规则 - void init_grammar_set(); // 初始化语法相关集合 - void print_grammar_set(); // 打印语法相关集合 - void get_token_strings(vector &); // 获取 token_stirngs - void print_token_strings(); - -protected: - vector>> grammar_rules; // 产生式规则 - string start; // 起始字符 - vector symbols; // 符号 - vector VTs; // 终结符 - vector VNs; // 非终结符 - unordered_map> first; // FIRST 集 - unordered_map> follow; // FOLLOW 集 - unordered_map infer_empty; // 是否可以推导出 $ 空字符 - vector token_strings; - - -private: - unordered_map> left_appears; // 该符号出现在哪些产生式左侧 - unordered_map> right_appears; // 该符号出现在哪些产生式右侧 - unordered_map> depend; // FOLLOW 集的依赖关系 - - - void init_appears_depend(); // 获取 appear depend 集合 - bool symbol_infer_empty(const string& symbol); // 判断符号是否可以推导出 $ 空字符 - vector symbol_infer_first(const string& symbol);// 推导符号的 FIRST 集 - vector symbol_infer_follow(const string& symbol);// 推导符号的 FOLLOW 集 - -}; - - -#endif // !GRAMMAR_H \ No newline at end of file diff --git a/main/nfa.h b/main/nfa.h deleted file mode 100644 index d918a91..0000000 --- a/main/nfa.h +++ /dev/null @@ -1,175 +0,0 @@ -#pragma once -#ifndef __NFA__H__ -#define __NFA__H__ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -using namespace std; -//单词符号的类型,返回<待测代码中的单词符号,WordType> -typedef enum WordType { - //当识别成标识符后,先判断是不是保留字,让后再判断IDN - KW_INT = 0, // int - KW_VOID, // void - KW_RETURN, // return - KW_CONST, // const - KW_MAIN, //main - - OP_ADD, // + - OP_SUB, // - - OP_MUL, // * - OP_DIV, // / - OP_MOD, // % - OP_ASSIGN, // = - OP_GT, // > - OP_LT, // < - OP_EQ, // == - OP_LE, // <= - OP_GE, // >= - OP_NE, // != - OP_AND, // && - OP_OR, // || - - SE_LBRAC, // ( left backet - SE_RBRAC, // ) right bracket - SE_LCBRAC, // { left curly bracket - SE_RCBRAC, // } right curly bracket - SE_COMMA, // , - SE_SEMI, // ; - - IDN, // [a-zA-Z][a-zA-Z_0-9]* - INT_VAL, // -*[0-9]+ - UNKOWN -}WordType; -string getWordTypeName(WordType type); -//定义输入的字符类别 -typedef enum InputCharType { - LETTER = 0, // 字母 0 - UNDERLINE, // _ 1 - DIGIT, // 数字 2 当识别成功一个数字时,为了避免出现数字01的情况,返回前先进行一个判断,对GCC,01可以识别并等于1的 - //OP - ADD, // + 3 - SUB, // - 4 - MUL, // * 5 - DIV, // / 6 - MOD, // % 7 - EQ, // = 8 - GT, // > 9 - LT, // < 10 - NOT, // ! 11 - AND, // & 12 - OR, // | 13 - //SE - LBRACKET, // ( 14 - RBRACKET, // ) 15 - LCBRAC, // { 16 - RCBRAC, // } 17 - COMMA, // , 18 - SEMI, // ; 19 - - EPSILON, // 空字符 20 -}InputCharType; -string getInputChartypeName(InputCharType type); -enum class TokenType { - KW = 0, - OP, - SE, - IDN, - INT, - UNKNOWN -}; -TokenType getTokenType(WordType wordType,string buffer); -typedef struct Token { - string value; - TokenType type; -} Token; - -//定义函数判断输入的字符类别 -InputCharType getInputCharType(char c); -string getWordTypeName(WordType type,string buffer); -string getWordAttribute(WordType type,string buffer); - -//定义状态类 -class State { -public: - int id; // 状态编号 - map> transitions; // 转移函数映射表,记录每个输入字符类型对应的目标状态集合 - bool isFinalState; // 是否为最终状态 - WordType wordType; // 到达该状态时应该返回的词法单元类型 - State(int id) : id(id), isFinalState(false), wordType(UNKOWN) {} - void addTransition(InputCharType input, State* targetState) { - transitions[input].insert(targetState); - } - void setFinalState(bool isFinal, WordType type) { - isFinalState = isFinal; - wordType = type; - } - bool operator<(const State& other) const { - return id < other.id; - } -}; -//为了是set内部有序,定义排序结构体StatePtrCompare -struct StatePtrCompare { - bool operator()(const State* lhs, const State* rhs) const { - return lhs->id < rhs->id; - } -}; - -//定义NFA类 -class NFA { -public: - State* startState; // 起始状态 - set endStates; // 终止状态集合 - set states; // 状态集合 - NFA(State* startState, set endStates, set states) : - startState(startState), endStates(endStates), states(states) {} - // void printNFA(); -}; -NFA RexToNFA(); -void printNFA(const NFA& nfa); -NFA buildNFA(string filename); -NFA RexToNFA(); -set move(const set& states, InputCharType input); -set epsilonClosure(const set& states); - -class DFA { -public: - State* startState; // 起始状态 - set endStates; // 终止状态集合 - set states; // 状态集合 - DFA(State* startState, set endStates, set states) : - startState(startState), endStates(endStates), states(states) {} -}; -void removeUnreachableStates(DFA& dfa); -void printDFA(const DFA& dfa); -DFA nfaToDFA(const NFA& nfa); -void printDFA(const DFA& dfa); -struct SetComparator { - bool operator()(const set& a, const set& b) const { - if (a.size() != b.size()) { - return a.size() < b.size(); - } - - vector vecA(a.begin(), a.end()); - vector vecB(b.begin(), b.end()); - - sort(vecA.begin(), vecA.end(), [](const State* a, const State* b) { return a->id < b->id; }); - sort(vecB.begin(), vecB.end(), [](const State* a, const State* b) { return a->id < b->id; }); - - return vecA < vecB; - } -}; -string getGrammarName(WordType type, string buffer); -DFA minimizeDFA(const DFA& dfa); -vector recognize(const DFA& dfa, const string& input, const string& output); -string readfile(const string& filename); -#endif \ No newline at end of file diff --git a/nfa/CMakeLists.txt b/nfa/CMakeLists.txt index cfc094e..8a51dd4 100644 --- a/nfa/CMakeLists.txt +++ b/nfa/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.10) project(nfa) # 收集所有的cpp源文件 -file(GLOB SOURCES "*.cpp") +file(GLOB SOURCES dfa.cpp nfa.cpp tool.cpp test_main.cpp) # 设置输出目录为 bin set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}) @@ -12,3 +12,9 @@ add_library(nfa STATIC ${SOURCES}) # 添加头文件目录 target_include_directories(nfa PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +# 添加可执行文件 +add_executable(test_nfa test_main.cpp nfa) + +# 链接目标库 +target_link_libraries(test_nfa nfa) \ No newline at end of file diff --git a/nfa/nfa.h b/nfa/nfa.h index f96323e..8ad82ac 100644 --- a/nfa/nfa.h +++ b/nfa/nfa.h @@ -18,15 +18,22 @@ using namespace std; //单词符号的类型,返回<待测代码中的单词符号,WordType> -// 保留关键字 +//当识别成标识符后,先判断是不是保留字,让后再判断IDN + +// Token 类型定义 typedef enum WordType { - //当识别成标识符后,先判断是不是保留字,让后再判断IDN + + //关键字 KW_INT = 0, // int KW_VOID, // void KW_RETURN, // return KW_CONST, // const - KW_MAIN, //main + KW_MAIN, // main + KW_IF, // if + KW_ELSE, // else + KW_FLOAT, // float + //操作符 OP_ADD, // + OP_SUB, // - OP_MUL, // * @@ -42,6 +49,8 @@ typedef enum WordType { OP_AND, // && OP_OR, // || + + //界符 SE_LBRAC, // ( left backet SE_RBRAC, // ) right bracket SE_LCBRAC, // { left curly bracket @@ -51,13 +60,13 @@ typedef enum WordType { IDN, // [a-zA-Z][a-zA-Z_0-9]* INT_VAL, // -*[0-9]+ + FLOAT_VAL, // -?[0-9]+\\.[0-9]+ + UNKOWN }WordType; string getWordTypeName(WordType type); // 定义输入的字符类别 -// 输入与实际不完全匹配 -// 注意:此处定义的;和,顺序与实验指导书中不同 typedef enum InputCharType { LETTER = 0, // 字母 0 UNDERLINE, // _ 1 @@ -81,12 +90,12 @@ typedef enum InputCharType { RCBRAC, // } 17 COMMA, // , 18 SEMI, // ; 19 + POINT, // . 20 如果浮点按整数缓存判断,则将小数点作为数字类型加载,最后在缓冲区内判断 - EPSILON, // 空字符 20 + EPSILON, // 空字符 21 }InputCharType; string getInputChartypeName(InputCharType type); - // 定义 token类型 enum class TokenType { KW = 0, @@ -94,11 +103,11 @@ enum class TokenType { SE, IDN, INT, + FLOAT, UNKNOWN }; -TokenType getTokenType(WordType wordType,string buffer); -// 定义最终返回的token的组成类型,包含值和类型两部分 +// 定义最终返回/输出的token的组成类型,包含值和类型两部分 typedef struct Token { string value; TokenType type; @@ -115,6 +124,7 @@ public: int id; // 状态编号 bool isFinalState; // 是否为最终状态 WordType wordType; // 到达该状态时应该返回的词法单元类型 + map> transitions; // 转移函数映射表,记录每个输入字符类型对应的目标状态集合 // 构造函数 diff --git a/nfa/test_main.cpp b/nfa/test_main.cpp new file mode 100644 index 0000000..061a31b --- /dev/null +++ b/nfa/test_main.cpp @@ -0,0 +1,27 @@ +#include +#include +#include + +#include "nfa.h" +using namespace std; + +int main(int argc, char** argv) { + + NFA nfa = RexToNFA(); + printNFA(nfa); + //cout<<"OK1"<= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { return LETTER; @@ -88,6 +92,8 @@ string getInputChartypeName(InputCharType type) { return ","; case SEMI: return ";"; + case POINT: //暂不使用 + return "."; case EPSILON: return "EPSILON"; default: @@ -95,6 +101,19 @@ string getInputChartypeName(InputCharType type) { } } +string judeFloat(string buffer){ + size_t firstDot = buffer.find('.'); + size_t lastDot = buffer.rfind('.'); + if (firstDot == lastDot && firstDot != std::string::npos) { + return "FLOAT"; + } + else if (firstDot == std::string::npos) + { + return "INT"; + } + else return "UNKNOWN"; +} + //根据关键字类型获取其所属的种别 string getWordTypeName(WordType type, string buffer) { switch (type) { @@ -126,16 +145,27 @@ string getWordTypeName(WordType type, string buffer) { // 标识符和关键字 case IDN: - if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")||!buffer.compare("main")){ + if (!buffer.compare("int") || + !buffer.compare("void") || + !buffer.compare("const") || + !buffer.compare("return")|| + !buffer.compare("if") || + !buffer.compare("else") || + !buffer.compare("float") + ){ return "KW"; } else { return "IDN"; } - // 整数 + // 整数(添加了浮点判断) case INT_VAL: - return "INT"; + return judeFloat(buffer); + + //浮点 + // case FLOAT_VAL: + // return "FLOAT"; //default default: @@ -188,6 +218,15 @@ string getWordAttribute(WordType type, string buffer) { else if (!buffer.compare("main")){ return "5"; } + else if (!buffer.compare("if")){ + return "6"; + } + else if (!buffer.compare("else")){ + return "7"; + } + else if (!buffer.compare("float")){ + return "8"; + } else { return buffer; } @@ -195,6 +234,10 @@ string getWordAttribute(WordType type, string buffer) { // 整数 case INT_VAL: return buffer; + + //浮点类型,理论不使用 + case FLOAT_VAL: + return buffer; //default default: @@ -202,9 +245,6 @@ string getWordAttribute(WordType type, string buffer) { } } - - - // 读取文件 string readfile(const string& filename) { @@ -222,48 +262,6 @@ string readfile(const string& filename) return content; } -// 获取关键字的Token种类 -TokenType getTokenType(WordType type,string buffer) { - switch (type) { - case OP_ADD: - case OP_SUB: - case OP_MUL: - case OP_DIV: - case OP_MOD: - case OP_ASSIGN: - case OP_GT: - case OP_LT: - case OP_EQ: - case OP_LE: - case OP_GE: - case OP_NE: - case OP_AND: - case OP_OR: - return TokenType::OP; - - case SE_LBRAC: - case SE_RBRAC: - case SE_LCBRAC: - case SE_RCBRAC: - case SE_COMMA: - case SE_SEMI: - return TokenType::SE; - - case IDN: - if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")|| !buffer.compare("main")) { - return TokenType::KW; - } - else { - return TokenType::IDN; - } - - case INT_VAL: - return TokenType::INT; - - default: - return TokenType::UNKNOWN; - } -} // 获取token名称 string getWordTypeName(WordType type) { @@ -278,6 +276,12 @@ string getWordTypeName(WordType type) { return "KW_CONST"; case KW_MAIN: return "KW_MAIN"; + case KW_IF: + return "KW_IF"; + case KW_ELSE: + return "KW_ELSE"; + case KW_FLOAT: + return "KW_FLOAT"; case OP_ADD: return "OP_ADD"; case OP_SUB: @@ -322,6 +326,8 @@ string getWordTypeName(WordType type) { return "IDN"; case INT_VAL: return "INT_VAL"; + case FLOAT_VAL: + return "FLOAT_VAL"; default: return "UNKNOWN"; } @@ -373,6 +379,7 @@ string getGrammarName(WordType type, string buffer) { return "IDN"; } case INT_VAL: return "INT"; + case FLOAT_VAL: return "FLOAT"; default: cerr << "Token Error: "<< type << endl; exit(-1); } } \ No newline at end of file