Compare commits
No commits in common. "c8f827a54ca772b221a0fcb49d19eb4a7907e915" and "1af61d2eeffd3bbfec15b97114a179f91df25af8" have entirely different histories.
c8f827a54c
...
1af61d2eef
|
|
@ -3,47 +3,6 @@
|
|||
"xstring": "cpp",
|
||||
"iterator": "cpp",
|
||||
"ostream": "cpp",
|
||||
"vector": "cpp",
|
||||
"*.tcc": "cpp",
|
||||
"iostream": "cpp",
|
||||
"map": "cpp",
|
||||
"array": "cpp",
|
||||
"atomic": "cpp",
|
||||
"cctype": "cpp",
|
||||
"clocale": "cpp",
|
||||
"cmath": "cpp",
|
||||
"cstdarg": "cpp",
|
||||
"cstddef": "cpp",
|
||||
"cstdint": "cpp",
|
||||
"cstdio": "cpp",
|
||||
"cstdlib": "cpp",
|
||||
"cstring": "cpp",
|
||||
"cwchar": "cpp",
|
||||
"cwctype": "cpp",
|
||||
"deque": "cpp",
|
||||
"unordered_map": "cpp",
|
||||
"unordered_set": "cpp",
|
||||
"exception": "cpp",
|
||||
"algorithm": "cpp",
|
||||
"memory": "cpp",
|
||||
"memory_resource": "cpp",
|
||||
"optional": "cpp",
|
||||
"set": "cpp",
|
||||
"string": "cpp",
|
||||
"string_view": "cpp",
|
||||
"system_error": "cpp",
|
||||
"tuple": "cpp",
|
||||
"type_traits": "cpp",
|
||||
"utility": "cpp",
|
||||
"fstream": "cpp",
|
||||
"initializer_list": "cpp",
|
||||
"iosfwd": "cpp",
|
||||
"istream": "cpp",
|
||||
"limits": "cpp",
|
||||
"new": "cpp",
|
||||
"sstream": "cpp",
|
||||
"stdexcept": "cpp",
|
||||
"streambuf": "cpp",
|
||||
"typeinfo": "cpp"
|
||||
"vector": "cpp"
|
||||
}
|
||||
}
|
||||
|
|
@ -3,19 +3,19 @@ project(compiler-bin)
|
|||
|
||||
# 收集所有的cpp源文件
|
||||
file(GLOB SOURCES_LL "LL1/*.cpp")
|
||||
file(GLOB SOURCES_NFA "nfa/src/*.cpp")
|
||||
file(GLOB SOURCES_NFA "nfa/*.cpp")
|
||||
file(GLOB SOURCES_MAIN "main/*.cpp")
|
||||
|
||||
# 设置输出目录为 bin
|
||||
# set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||
|
||||
# 创建静态链接库
|
||||
add_library(LL STATIC ${SOURCES_LL})
|
||||
add_library(nfa STATIC ${SOURCES_NFA})
|
||||
|
||||
# 添加头文件目录
|
||||
target_include_directories(LL PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/LL1)
|
||||
target_include_directories(nfa PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/nfa/include)
|
||||
target_include_directories(LL PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/LL)
|
||||
target_include_directories(nfa PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/nfa)
|
||||
|
||||
# 添加可执行文件
|
||||
add_executable(main ${SOURCES_MAIN})
|
||||
|
|
|
|||
|
|
@ -0,0 +1,14 @@
|
|||
cmake_minimum_required(VERSION 3.10)
|
||||
project(main)
|
||||
|
||||
file(GLOB SOURCES "*.cpp")
|
||||
|
||||
add_executable(main ${SOURCES})
|
||||
|
||||
# 链接静态库
|
||||
target_link_libraries(main PRIVATE ${CMAKE_BINARY_DIR}/../../bin/LL.lib)
|
||||
target_link_libraries(main PRIVATE ${CMAKE_BINARY_DIR}/../../bin/nfa.lib)
|
||||
|
||||
|
||||
# 添加头文件目录
|
||||
# target_include_directories(main PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
// LL1 语法分析器
|
||||
#ifndef LL1_H
|
||||
#define LL1_H
|
||||
|
||||
#include "grammar.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
class LL1:public Grammar{
|
||||
public:
|
||||
LL1();
|
||||
~LL1();
|
||||
|
||||
bool IsLL1(); // 判断该文法是否为 LL1 文法
|
||||
void build_LL1_predict(); // 构建 LL1 的预测分析表
|
||||
void print_LL1_predict(); // 打印 LL1 的预测分析表
|
||||
void build_LL1_grammar(); // 构建规约序列
|
||||
void print_LL1_grammar_log();
|
||||
void fileout_LL1_grammar_log(string file_name);
|
||||
|
||||
|
||||
private:
|
||||
unordered_map<string, vector<string>> select; // 计算符号的 SELECT 集合
|
||||
unordered_map<string, unordered_map<string, int>> LL1_predict; // LL1 的预测分析表
|
||||
vector<string> LL1_grammar_log; // 规约序列
|
||||
|
||||
int insert_rule(pair<string, vector<string>>& new_rule); // 增加新的规则
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif // !LL1_H
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
// 语法生成器
|
||||
#ifndef GRAMMAR_H
|
||||
#define GRAMMAR_H
|
||||
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <unordered_set>
|
||||
#include <unordered_map>
|
||||
|
||||
using namespace std;
|
||||
|
||||
class Grammar
|
||||
{
|
||||
public:
|
||||
const string grammar_file = "./tests/grammar.txt";
|
||||
|
||||
Grammar();
|
||||
~Grammar();
|
||||
void read_grammar(); // 读取语法规则
|
||||
void print_grammar(); // 打印语法规则
|
||||
void expand_grammar(); // 拓展语法规则
|
||||
void init_grammar_set(); // 初始化语法相关集合
|
||||
void print_grammar_set(); // 打印语法相关集合
|
||||
void get_token_strings(vector<string> &); // 获取 token_stirngs
|
||||
void print_token_strings();
|
||||
|
||||
protected:
|
||||
vector<pair<string, vector<string>>> grammar_rules; // 产生式规则
|
||||
string start; // 起始字符
|
||||
vector<string> symbols; // 符号
|
||||
vector<string> VTs; // 终结符
|
||||
vector<string> VNs; // 非终结符
|
||||
unordered_map<string, vector<string>> first; // FIRST 集
|
||||
unordered_map<string, vector<string>> follow; // FOLLOW 集
|
||||
unordered_map<string, bool> infer_empty; // 是否可以推导出 $ 空字符
|
||||
vector<string> token_strings;
|
||||
|
||||
|
||||
private:
|
||||
unordered_map<string, vector<int>> left_appears; // 该符号出现在哪些产生式左侧
|
||||
unordered_map<string, vector<int>> right_appears; // 该符号出现在哪些产生式右侧
|
||||
unordered_map<string, vector<string>> depend; // FOLLOW 集的依赖关系
|
||||
|
||||
|
||||
void init_appears_depend(); // 获取 appear depend 集合
|
||||
bool symbol_infer_empty(const string& symbol); // 判断符号是否可以推导出 $ 空字符
|
||||
vector<string> symbol_infer_first(const string& symbol);// 推导符号的 FIRST 集
|
||||
vector<string> symbol_infer_follow(const string& symbol);// 推导符号的 FOLLOW 集
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif // !GRAMMAR_H
|
||||
|
|
@ -0,0 +1,175 @@
|
|||
#pragma once
|
||||
#ifndef __NFA__H__
|
||||
#define __NFA__H__
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <deque>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <stdio.h>
|
||||
#include <sstream>
|
||||
#include <stack>
|
||||
#include <queue>
|
||||
#include <algorithm>
|
||||
using namespace std;
|
||||
//单词符号的类型,返回<待测代码中的单词符号,WordType>
|
||||
typedef enum WordType {
|
||||
//当识别成标识符后,先判断是不是保留字,让后再判断IDN
|
||||
KW_INT = 0, // int
|
||||
KW_VOID, // void
|
||||
KW_RETURN, // return
|
||||
KW_CONST, // const
|
||||
KW_MAIN, //main
|
||||
|
||||
OP_ADD, // +
|
||||
OP_SUB, // -
|
||||
OP_MUL, // *
|
||||
OP_DIV, // /
|
||||
OP_MOD, // %
|
||||
OP_ASSIGN, // =
|
||||
OP_GT, // >
|
||||
OP_LT, // <
|
||||
OP_EQ, // ==
|
||||
OP_LE, // <=
|
||||
OP_GE, // >=
|
||||
OP_NE, // !=
|
||||
OP_AND, // &&
|
||||
OP_OR, // ||
|
||||
|
||||
SE_LBRAC, // ( left backet
|
||||
SE_RBRAC, // ) right bracket
|
||||
SE_LCBRAC, // { left curly bracket
|
||||
SE_RCBRAC, // } right curly bracket
|
||||
SE_COMMA, // ,
|
||||
SE_SEMI, // ;
|
||||
|
||||
IDN, // [a-zA-Z][a-zA-Z_0-9]*
|
||||
INT_VAL, // -*[0-9]+
|
||||
UNKOWN
|
||||
}WordType;
|
||||
string getWordTypeName(WordType type);
|
||||
//定义输入的字符类别
|
||||
typedef enum InputCharType {
|
||||
LETTER = 0, // 字母 0
|
||||
UNDERLINE, // _ 1
|
||||
DIGIT, // 数字 2 当识别成功一个数字时,为了避免出现数字01的情况,返回前先进行一个判断,对GCC,01可以识别并等于1的
|
||||
//OP
|
||||
ADD, // + 3
|
||||
SUB, // - 4
|
||||
MUL, // * 5
|
||||
DIV, // / 6
|
||||
MOD, // % 7
|
||||
EQ, // = 8
|
||||
GT, // > 9
|
||||
LT, // < 10
|
||||
NOT, // ! 11
|
||||
AND, // & 12
|
||||
OR, // | 13
|
||||
//SE
|
||||
LBRACKET, // ( 14
|
||||
RBRACKET, // ) 15
|
||||
LCBRAC, // { 16
|
||||
RCBRAC, // } 17
|
||||
COMMA, // , 18
|
||||
SEMI, // ; 19
|
||||
|
||||
EPSILON, // 空字符 20
|
||||
}InputCharType;
|
||||
string getInputChartypeName(InputCharType type);
|
||||
enum class TokenType {
|
||||
KW = 0,
|
||||
OP,
|
||||
SE,
|
||||
IDN,
|
||||
INT,
|
||||
UNKNOWN
|
||||
};
|
||||
TokenType getTokenType(WordType wordType,string buffer);
|
||||
typedef struct Token {
|
||||
string value;
|
||||
TokenType type;
|
||||
} Token;
|
||||
|
||||
//定义函数判断输入的字符类别
|
||||
InputCharType getInputCharType(char c);
|
||||
string getWordTypeName(WordType type,string buffer);
|
||||
string getWordAttribute(WordType type,string buffer);
|
||||
|
||||
//定义状态类
|
||||
class State {
|
||||
public:
|
||||
int id; // 状态编号
|
||||
map<InputCharType, set<State*>> transitions; // 转移函数映射表,记录每个输入字符类型对应的目标状态集合
|
||||
bool isFinalState; // 是否为最终状态
|
||||
WordType wordType; // 到达该状态时应该返回的词法单元类型
|
||||
State(int id) : id(id), isFinalState(false), wordType(UNKOWN) {}
|
||||
void addTransition(InputCharType input, State* targetState) {
|
||||
transitions[input].insert(targetState);
|
||||
}
|
||||
void setFinalState(bool isFinal, WordType type) {
|
||||
isFinalState = isFinal;
|
||||
wordType = type;
|
||||
}
|
||||
bool operator<(const State& other) const {
|
||||
return id < other.id;
|
||||
}
|
||||
};
|
||||
//为了是set内部有序,定义排序结构体StatePtrCompare
|
||||
struct StatePtrCompare {
|
||||
bool operator()(const State* lhs, const State* rhs) const {
|
||||
return lhs->id < rhs->id;
|
||||
}
|
||||
};
|
||||
|
||||
//定义NFA类
|
||||
class NFA {
|
||||
public:
|
||||
State* startState; // 起始状态
|
||||
set<State*, StatePtrCompare> endStates; // 终止状态集合
|
||||
set<State*, StatePtrCompare> states; // 状态集合
|
||||
NFA(State* startState, set<State*, StatePtrCompare> endStates, set<State*, StatePtrCompare> states) :
|
||||
startState(startState), endStates(endStates), states(states) {}
|
||||
// void printNFA();
|
||||
};
|
||||
NFA RexToNFA();
|
||||
void printNFA(const NFA& nfa);
|
||||
NFA buildNFA(string filename);
|
||||
NFA RexToNFA();
|
||||
set<State*, StatePtrCompare> move(const set<State*, StatePtrCompare>& states, InputCharType input);
|
||||
set<State*, StatePtrCompare> epsilonClosure(const set<State*, StatePtrCompare>& states);
|
||||
|
||||
class DFA {
|
||||
public:
|
||||
State* startState; // 起始状态
|
||||
set<State*, StatePtrCompare> endStates; // 终止状态集合
|
||||
set<State*, StatePtrCompare> states; // 状态集合
|
||||
DFA(State* startState, set<State*, StatePtrCompare> endStates, set<State*, StatePtrCompare> states) :
|
||||
startState(startState), endStates(endStates), states(states) {}
|
||||
};
|
||||
void removeUnreachableStates(DFA& dfa);
|
||||
void printDFA(const DFA& dfa);
|
||||
DFA nfaToDFA(const NFA& nfa);
|
||||
void printDFA(const DFA& dfa);
|
||||
struct SetComparator {
|
||||
bool operator()(const set<State*, StatePtrCompare>& a, const set<State*, StatePtrCompare>& b) const {
|
||||
if (a.size() != b.size()) {
|
||||
return a.size() < b.size();
|
||||
}
|
||||
|
||||
vector<State*> vecA(a.begin(), a.end());
|
||||
vector<State*> vecB(b.begin(), b.end());
|
||||
|
||||
sort(vecA.begin(), vecA.end(), [](const State* a, const State* b) { return a->id < b->id; });
|
||||
sort(vecB.begin(), vecB.end(), [](const State* a, const State* b) { return a->id < b->id; });
|
||||
|
||||
return vecA < vecB;
|
||||
}
|
||||
};
|
||||
string getGrammarName(WordType type, string buffer);
|
||||
DFA minimizeDFA(const DFA& dfa);
|
||||
vector<string> recognize(const DFA& dfa, const string& input, const string& output);
|
||||
string readfile(const string& filename);
|
||||
#endif
|
||||
|
|
@ -2,14 +2,13 @@ cmake_minimum_required(VERSION 3.10)
|
|||
project(nfa)
|
||||
|
||||
# 收集所有的cpp源文件
|
||||
file(GLOB SOURCES "src/*.cpp")
|
||||
file(GLOB SOURCES "*.cpp")
|
||||
|
||||
# 设置输出目录为 bin
|
||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
|
||||
|
||||
# 创建静态链接库
|
||||
add_library(nfa STATIC ${SOURCES})
|
||||
|
||||
# Test:添加可执行文件并链接目标库
|
||||
add_executable(test_nfa test/test_main.cpp)
|
||||
target_link_libraries(test_nfa nfa)
|
||||
|
||||
# 添加头文件目录
|
||||
target_include_directories(nfa PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include)
|
||||
target_include_directories(nfa PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -84,7 +84,7 @@ DFA minimizeDFA(const DFA& dfa) {
|
|||
size_t oldSize;//分割集初始大小
|
||||
do {
|
||||
oldSize = partitions.size();
|
||||
for (InputCharType input = static_cast<InputCharType>(0); input < InputCharType::EPSILON; input = static_cast<InputCharType>(static_cast<int>(input) + 1)) {//类似于求Ia,Ib等
|
||||
for (InputCharType input = static_cast<InputCharType>(0); input < EPSILON; input = static_cast<InputCharType>(input + 1)) {//类似于求Ia,Ib等
|
||||
for (Partition* partition : set<Partition*>(partitions)) {//遍历现存分割的每一个割集,看是否可再分割
|
||||
if (partition->states.size() > 1) {//为1的集合不可再分割
|
||||
split(partition->states, input, partitions);//核心分割函数
|
||||
|
|
@ -1,4 +1,8 @@
|
|||
// 将正则表达式转换为非确定性有限自动机
|
||||
|
||||
|
||||
|
||||
|
||||
#include "nfa.h"
|
||||
|
||||
|
||||
|
|
@ -7,32 +11,12 @@
|
|||
NFA RexToNFA() {
|
||||
//由于里面存在||,所以不同正则间使用空格分隔代表| l代表letter,_代表下划线,0代表数字(也可以是d,但是为了使用已经有的函数),
|
||||
//[lu]代表l|u
|
||||
std::string rex = "+ - * / % = > < == <= >= != && || ( ) { } , ; [l_][l_0]* -?00*";
|
||||
|
||||
//正则对应的输出(终态)
|
||||
string rex = "+ - * / % = > < == <= >= != && || ( ) { } , ; [l_][l_0]* -?00*";
|
||||
|
||||
//下面给出正则对应的输出(终态)
|
||||
vector<WordType> finalState = {
|
||||
WordType::OP_ADD,
|
||||
WordType::OP_SUB,
|
||||
WordType::OP_MUL,
|
||||
WordType::OP_DIV,
|
||||
WordType::OP_MOD,
|
||||
WordType::OP_ASSIGN,
|
||||
WordType::OP_GT,
|
||||
WordType::OP_LT,
|
||||
WordType::OP_EQ,
|
||||
WordType::OP_LE,
|
||||
WordType::OP_GE,
|
||||
WordType::OP_NE,
|
||||
WordType::OP_AND,
|
||||
WordType::OP_OR,
|
||||
WordType::SE_LBRAC,
|
||||
WordType::SE_RBRAC,
|
||||
WordType::SE_LCBRAC,
|
||||
WordType::SE_RCBRAC,
|
||||
WordType::SE_COMMA,
|
||||
WordType::SE_SEMI,
|
||||
WordType::IDN,
|
||||
WordType::INT_VAL
|
||||
OP_ADD, OP_SUB,OP_MUL,OP_DIV,OP_MOD,OP_ASSIGN,OP_GT,OP_LT, OP_EQ,OP_LE,OP_GE,OP_NE, OP_AND, OP_OR,SE_LBRAC, SE_RBRAC,
|
||||
SE_LCBRAC,SE_RCBRAC,SE_COMMA,SE_SEMI,IDN,INT_VAL
|
||||
};
|
||||
|
||||
stringstream ss(rex);
|
||||
|
|
@ -59,7 +43,7 @@ NFA RexToNFA() {
|
|||
//[...]构成一种输入,查看]后面是否有?或者*,来判断当前状态的构成
|
||||
for (i=i+1; i < target.length() && target[i] != ']'; i++) {
|
||||
InputCharType input = getInputCharType(target[i]);
|
||||
if (input != InputCharType::EPSILON) {
|
||||
if (input != EPSILON) {
|
||||
// 添加转移函数,从当前状态向新状态转移
|
||||
currentState->addTransition(input, newState);
|
||||
}
|
||||
|
|
@ -74,8 +58,8 @@ NFA RexToNFA() {
|
|||
//创建EPSILON转移状态
|
||||
State* epsState = new State(stateIndex++);
|
||||
allStates.insert(epsState);
|
||||
currentState->addTransition(InputCharType::EPSILON, epsState);
|
||||
newState->addTransition(InputCharType::EPSILON, epsState);
|
||||
currentState->addTransition(EPSILON, epsState);
|
||||
newState->addTransition(EPSILON, epsState);
|
||||
currentState = epsState;
|
||||
// 跳过'?'字符
|
||||
i++;
|
||||
|
|
@ -83,9 +67,9 @@ NFA RexToNFA() {
|
|||
else if (i + 1 < target.length() && target[i + 1] == '*') {
|
||||
State* epsState = new State(stateIndex++);
|
||||
allStates.insert(epsState);
|
||||
currentState->addTransition(InputCharType::EPSILON, epsState);
|
||||
newState->addTransition(InputCharType::EPSILON, epsState);
|
||||
epsState->addTransition(InputCharType::EPSILON, currentState);
|
||||
currentState->addTransition(EPSILON, epsState);
|
||||
newState->addTransition(EPSILON, epsState);
|
||||
epsState->addTransition(EPSILON, currentState);
|
||||
currentState = epsState;
|
||||
// 跳过'*'字符
|
||||
i++;
|
||||
|
|
@ -105,6 +89,51 @@ NFA RexToNFA() {
|
|||
return NFA(startState, endStates, allStates);
|
||||
}
|
||||
|
||||
// 构造状态机
|
||||
NFA buildNFA(string filename) {
|
||||
ifstream ifs(filename);
|
||||
if (!ifs) {
|
||||
cerr << "Cannot open file: " << filename << endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
int stateNum, inputNum;
|
||||
ifs >> stateNum >> inputNum;
|
||||
|
||||
vector<State*> states(stateNum);
|
||||
for (int i = 0; i < stateNum; i++) {
|
||||
states[i] = new State(i);
|
||||
}
|
||||
|
||||
State* startState = states[0];
|
||||
set<State*, StatePtrCompare> endStates;
|
||||
for (int i = 0; i < stateNum; i++) {
|
||||
for (int j = 0; j < inputNum; j++) {
|
||||
string targetStateIDs;
|
||||
ifs >> targetStateIDs;
|
||||
if (targetStateIDs.compare("#") != 0) {
|
||||
stringstream ss(targetStateIDs);
|
||||
string targetStateIDStr;
|
||||
while (getline(ss, targetStateIDStr, ',')) {
|
||||
int targetStateID = stoi(targetStateIDStr);
|
||||
states[i]->addTransition(static_cast<InputCharType>(j), states[targetStateID]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int endStateNum;
|
||||
ifs >> endStateNum;
|
||||
for (int i = 0; i < endStateNum; i++) {
|
||||
int endStateID, wordTypeID;
|
||||
ifs >> endStateID >> wordTypeID;
|
||||
states[endStateID]->setFinalState(true, static_cast<WordType>(wordTypeID));
|
||||
endStates.insert(states[endStateID]);
|
||||
}
|
||||
|
||||
return NFA(startState, endStates, set<State*, StatePtrCompare>(states.begin(), states.end()));
|
||||
}
|
||||
|
||||
void printNFA(const NFA& nfa) {
|
||||
cout << "Start state: " << nfa.startState->id << endl;
|
||||
cout << "End states: "<<endl;
|
||||
|
|
@ -151,7 +180,7 @@ set<State*, StatePtrCompare> epsilonClosure(const set<State*, StatePtrCompare>&
|
|||
while (!stateStack.empty()) {
|
||||
State* currentState = stateStack.top();
|
||||
stateStack.pop();
|
||||
auto it = currentState->transitions.find(InputCharType::EPSILON);
|
||||
auto it = currentState->transitions.find(EPSILON);
|
||||
if (it != currentState->transitions.end()) {
|
||||
for (State* nextState : it->second) {
|
||||
if (closure.find(nextState) == closure.end()) {//防止同一状态多次进栈,set自带去重
|
||||
|
|
@ -194,7 +223,7 @@ DFA nfaToDFA(const NFA& nfa) {
|
|||
}
|
||||
|
||||
// 遍历所有输入字符类型
|
||||
for (int i = 0; i < static_cast<int>(InputCharType::EPSILON); i++) {
|
||||
for (int i = 0; i < static_cast<int>(EPSILON); i++) {
|
||||
InputCharType inputCharType = static_cast<InputCharType>(i);
|
||||
set<State*, StatePtrCompare> nextNFAStates = epsilonClosure(move(currentNFAStates, inputCharType));
|
||||
if (nextNFAStates.empty()) {
|
||||
|
|
@ -1,4 +1,7 @@
|
|||
#pragma once
|
||||
#ifndef __NFA__H__
|
||||
#define __NFA__H__
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <deque>
|
||||
|
|
@ -11,29 +14,19 @@
|
|||
#include <stack>
|
||||
#include <queue>
|
||||
#include <algorithm>
|
||||
#include <unordered_map>
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
|
||||
//单词符号的类型,返回<待测代码中的单词符号,WordType>
|
||||
//当识别成标识符后,先判断是不是保留字,让后再判断IDN
|
||||
|
||||
// Token 类型定义
|
||||
enum class WordType {
|
||||
|
||||
//关键字
|
||||
// 保留关键字
|
||||
typedef enum WordType {
|
||||
//当识别成标识符后,先判断是不是保留字,让后再判断IDN
|
||||
KW_INT = 0, // int
|
||||
KW_VOID, // void
|
||||
KW_RETURN, // return
|
||||
KW_CONST, // const
|
||||
KW_MAIN, // main
|
||||
KW_IF, // if
|
||||
KW_ELSE, // else
|
||||
KW_FLOAT, // float
|
||||
KW_MAIN, //main
|
||||
|
||||
//操作符
|
||||
OP_ADD, // +
|
||||
OP_SUB, // -
|
||||
OP_MUL, // *
|
||||
|
|
@ -49,8 +42,6 @@ enum class WordType {
|
|||
OP_AND, // &&
|
||||
OP_OR, // ||
|
||||
|
||||
|
||||
//界符
|
||||
SE_LBRAC, // ( left backet
|
||||
SE_RBRAC, // ) right bracket
|
||||
SE_LCBRAC, // { left curly bracket
|
||||
|
|
@ -60,14 +51,14 @@ enum class WordType {
|
|||
|
||||
IDN, // [a-zA-Z][a-zA-Z_0-9]*
|
||||
INT_VAL, // -*[0-9]+
|
||||
FLOAT_VAL, // -?[0-9]+\\.[0-9]+
|
||||
|
||||
UNKOWN
|
||||
};
|
||||
extern std::unordered_map<WordType, std::string> WordTypeNames;
|
||||
}WordType;
|
||||
string getWordTypeName(WordType type);
|
||||
|
||||
// 定义输入的字符类别
|
||||
enum class InputCharType {
|
||||
// 输入与实际不完全匹配
|
||||
// 注意:此处定义的;和,顺序与实验指导书中不同
|
||||
typedef enum InputCharType {
|
||||
LETTER = 0, // 字母 0
|
||||
UNDERLINE, // _ 1
|
||||
DIGIT, // 数字 2 当识别成功一个数字时,为了避免出现数字01的情况,返回前先进行一个判断,对GCC,01可以识别并等于1的
|
||||
|
|
@ -90,12 +81,11 @@ enum class InputCharType {
|
|||
RCBRAC, // } 17
|
||||
COMMA, // , 18
|
||||
SEMI, // ; 19
|
||||
POINT, // . 20 如果浮点按整数缓存判断,则将小数点作为数字类型加载,最后在缓冲区内判断
|
||||
|
||||
EPSILON, // 空字符 21
|
||||
};
|
||||
EPSILON, // 空字符 20
|
||||
}InputCharType;
|
||||
string getInputChartypeName(InputCharType type);
|
||||
|
||||
extern std::unordered_map<InputCharType, std::string> CharTypeNames;
|
||||
|
||||
// 定义 token类型
|
||||
enum class TokenType {
|
||||
|
|
@ -104,21 +94,15 @@ enum class TokenType {
|
|||
SE,
|
||||
IDN,
|
||||
INT,
|
||||
FLOAT,
|
||||
UNKNOWN
|
||||
};
|
||||
TokenType getTokenType(WordType wordType,string buffer);
|
||||
|
||||
// 定义最终返回/输出的token的组成类型,包含值和类型两部分
|
||||
class Token {
|
||||
// 定义最终返回的token的组成类型,包含值和类型两部分
|
||||
typedef struct Token {
|
||||
string value;
|
||||
TokenType type;
|
||||
};
|
||||
|
||||
string getWordTypeName(WordType type);
|
||||
string getInputChartypeName(InputCharType type);
|
||||
|
||||
|
||||
|
||||
} Token;
|
||||
|
||||
// 定义函数判断输入的字符类别
|
||||
InputCharType getInputCharType(char c);
|
||||
|
|
@ -131,11 +115,10 @@ public:
|
|||
int id; // 状态编号
|
||||
bool isFinalState; // 是否为最终状态
|
||||
WordType wordType; // 到达该状态时应该返回的词法单元类型
|
||||
|
||||
map<InputCharType, set<State*>> transitions; // 转移函数映射表,记录每个输入字符类型对应的目标状态集合
|
||||
|
||||
// 构造函数
|
||||
State(int id) : id(id), isFinalState(false), wordType(WordType::UNKOWN) {}
|
||||
State(int id) : id(id), isFinalState(false), wordType(UNKOWN) {}
|
||||
|
||||
// 添加状态转移映射
|
||||
void addTransition(InputCharType input, State* targetState) {
|
||||
|
|
@ -177,6 +160,8 @@ public:
|
|||
// 正则表达式构建状态机
|
||||
NFA RexToNFA();
|
||||
void printNFA(const NFA& nfa);
|
||||
NFA buildNFA(string filename);
|
||||
NFA RexToNFA();
|
||||
set<State*, StatePtrCompare> move(const set<State*, StatePtrCompare>& states, InputCharType input);
|
||||
set<State*, StatePtrCompare> epsilonClosure(const set<State*, StatePtrCompare>& states);
|
||||
|
||||
|
|
@ -211,4 +196,4 @@ string getGrammarName(WordType type, string buffer);
|
|||
DFA minimizeDFA(const DFA& dfa);
|
||||
vector<string> recognize(const DFA& dfa, const string& input, const string& output);
|
||||
string readfile(const string& filename);
|
||||
|
||||
#endif
|
||||
309
nfa/src/tool.cpp
309
nfa/src/tool.cpp
|
|
@ -1,309 +0,0 @@
|
|||
#include "nfa.h"
|
||||
|
||||
std::unordered_map<InputCharType, std::string> CharTypeNames = {
|
||||
{InputCharType::UNDERLINE, "_"},
|
||||
{InputCharType::ADD, "+"},
|
||||
{InputCharType::SUB, "-"},
|
||||
{InputCharType::MUL, "*"},
|
||||
{InputCharType::DIV, "/"},
|
||||
{InputCharType::MOD, "%"},
|
||||
{InputCharType::EQ, "="},
|
||||
{InputCharType::GT, ">"},
|
||||
{InputCharType::LT, "<"},
|
||||
{InputCharType::NOT, "!"},
|
||||
{InputCharType::AND, "&"},
|
||||
{InputCharType::OR, "|"},
|
||||
{InputCharType::LBRACKET, "("},
|
||||
{InputCharType::RBRACKET, ")"},
|
||||
{InputCharType::LCBRAC, "{"},
|
||||
{InputCharType::RCBRAC, "}"},
|
||||
{InputCharType::COMMA, ","},
|
||||
{InputCharType::SEMI, ";"},
|
||||
{InputCharType::POINT, "."} // 小数点
|
||||
};
|
||||
|
||||
|
||||
std::unordered_map<WordType, std::string> WordTypeNames = {
|
||||
{WordType::KW_INT, "INT"}, {WordType::KW_VOID, "VOID"}, {WordType::KW_RETURN, "RETURN"},
|
||||
{WordType::KW_CONST, "CONST"}, {WordType::KW_MAIN, "MAIN"}, {WordType::KW_IF, "IF"},
|
||||
{WordType::KW_ELSE, "ELSE"}, {WordType::KW_FLOAT, "FLOAT"}, {WordType::OP_ADD, "+"},
|
||||
{WordType::OP_SUB, "-"}, {WordType::OP_MUL, "*"}, {WordType::OP_DIV, "/"},
|
||||
{WordType::OP_MOD, "%"}, {WordType::OP_ASSIGN, "="}, {WordType::OP_GT, ">"},
|
||||
{WordType::OP_LT, "<"}, {WordType::OP_EQ, "=="}, {WordType::OP_LE, "<="},
|
||||
{WordType::OP_GE, ">="}, {WordType::OP_NE, "!="}, {WordType::OP_AND, "&&"},
|
||||
{WordType::OP_OR, "||"}, {WordType::SE_LBRAC, "("}, {WordType::SE_RBRAC, ")"},
|
||||
{WordType::SE_LCBRAC, "{"}, {WordType::SE_RCBRAC, "}"}, {WordType::SE_COMMA, ","},
|
||||
{WordType::SE_SEMI, ";"}, {WordType::IDN, "IDENTIFIER"}, {WordType::INT_VAL, "INTEGER"},
|
||||
{WordType::FLOAT_VAL, "FLOAT"}, {WordType::UNKOWN, "UNKNOWN"}
|
||||
};
|
||||
//扫描,以字符的格式读入
|
||||
//对于界符和部分运算符,是单个组成,即可以单独代表一个状态
|
||||
InputCharType getInputCharType(char c) {
|
||||
switch (c) {
|
||||
case '_': return InputCharType::UNDERLINE;
|
||||
case '+': return InputCharType::ADD;
|
||||
case '-': return InputCharType::SUB;
|
||||
case '*': return InputCharType::MUL;
|
||||
case '/': return InputCharType::DIV;
|
||||
case '%': return InputCharType::MOD;
|
||||
case '=': return InputCharType::EQ;
|
||||
case '>': return InputCharType::GT;
|
||||
case '<': return InputCharType::LT;
|
||||
case '!': return InputCharType::NOT;
|
||||
case '&': return InputCharType::AND;
|
||||
case '|': return InputCharType::OR;
|
||||
case '(': return InputCharType::LBRACKET;
|
||||
case ')': return InputCharType::RBRACKET;
|
||||
case '{': return InputCharType::LCBRAC;
|
||||
case '}': return InputCharType::RCBRAC;
|
||||
case ',': return InputCharType::COMMA;
|
||||
case ';': return InputCharType::SEMI;
|
||||
|
||||
//小数点作为数字读入
|
||||
case '.': return InputCharType::DIGIT;
|
||||
|
||||
default:
|
||||
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
|
||||
return InputCharType::LETTER;
|
||||
}
|
||||
else if (c >= '0' && c <= '9') {
|
||||
return InputCharType::DIGIT;
|
||||
}
|
||||
else {
|
||||
return InputCharType::EPSILON;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 根据状态获取名称
|
||||
std::string getInputChartypeName(InputCharType type) {
|
||||
auto it = CharTypeNames.find(type);
|
||||
if (it != CharTypeNames.end()) {
|
||||
return it->second;
|
||||
} else {
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
string judeFloat(string buffer){
|
||||
size_t firstDot = buffer.find('.');
|
||||
size_t lastDot = buffer.rfind('.');
|
||||
if (firstDot == lastDot && firstDot != std::string::npos) {
|
||||
return "FLOAT";
|
||||
}
|
||||
else if (firstDot == std::string::npos)
|
||||
{
|
||||
return "INT";
|
||||
}
|
||||
else return "UNKNOWN";
|
||||
}
|
||||
|
||||
//根据关键字类型获取其所属的种别
|
||||
string getWordTypeName(WordType type, string buffer) {
|
||||
switch (type) {
|
||||
// 运算符
|
||||
case WordType::OP_ADD:
|
||||
case WordType::OP_SUB:
|
||||
case WordType::OP_MUL:
|
||||
case WordType::OP_DIV:
|
||||
case WordType::OP_MOD:
|
||||
case WordType::OP_ASSIGN:
|
||||
case WordType::OP_GT:
|
||||
case WordType::OP_LT:
|
||||
case WordType::OP_EQ:
|
||||
case WordType::OP_LE:
|
||||
case WordType::OP_GE:
|
||||
case WordType::OP_NE:
|
||||
case WordType::OP_AND:
|
||||
case WordType::OP_OR:
|
||||
return "OP";
|
||||
|
||||
// 界符
|
||||
case WordType::SE_LBRAC:
|
||||
case WordType::SE_RBRAC:
|
||||
case WordType::SE_LCBRAC:
|
||||
case WordType::SE_RCBRAC:
|
||||
case WordType::SE_COMMA:
|
||||
case WordType::SE_SEMI:
|
||||
return "SE";
|
||||
|
||||
// 标识符和关键字
|
||||
case WordType::IDN:
|
||||
if (!buffer.compare("int") ||
|
||||
!buffer.compare("void") ||
|
||||
!buffer.compare("const") ||
|
||||
!buffer.compare("return")||
|
||||
!buffer.compare("if") ||
|
||||
!buffer.compare("else") ||
|
||||
!buffer.compare("float")
|
||||
){
|
||||
return "KW";
|
||||
}
|
||||
else {
|
||||
return "IDN";
|
||||
}
|
||||
|
||||
// 整数(添加了浮点判断)
|
||||
case WordType::INT_VAL:
|
||||
return judeFloat(buffer);
|
||||
|
||||
//浮点
|
||||
// case FLOAT_VAL:
|
||||
// return "FLOAT";
|
||||
|
||||
//default
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
//根据关键字属性
|
||||
string getWordAttribute(WordType type, string buffer) {
|
||||
switch (type) {
|
||||
// 运算符
|
||||
case WordType::OP_ADD: return "6";
|
||||
case WordType::OP_SUB: return "7";
|
||||
case WordType::OP_MUL: return "8";
|
||||
case WordType::OP_DIV: return "9";
|
||||
case WordType::OP_MOD: return "10";
|
||||
case WordType::OP_ASSIGN: return "11";
|
||||
case WordType::OP_GT: return "12";
|
||||
case WordType::OP_LT: return "13";
|
||||
case WordType::OP_EQ: return "14";
|
||||
case WordType::OP_LE: return "15";
|
||||
case WordType::OP_GE: return "16";
|
||||
case WordType::OP_NE: return "17";
|
||||
case WordType::OP_AND:return "18";
|
||||
case WordType::OP_OR: return "19";
|
||||
|
||||
// 界符
|
||||
case WordType::SE_LBRAC: return "20";
|
||||
case WordType::SE_RBRAC: return "21";
|
||||
case WordType::SE_LCBRAC: return "22";
|
||||
case WordType::SE_RCBRAC: return "23";
|
||||
case WordType::SE_COMMA: return "25";
|
||||
case WordType::SE_SEMI: return "24";
|
||||
|
||||
|
||||
// 标识符和关键字
|
||||
case WordType::IDN:
|
||||
if (!buffer.compare("int")){
|
||||
return "1";
|
||||
}
|
||||
else if (!buffer.compare("void")){
|
||||
return "2";
|
||||
}
|
||||
else if (!buffer.compare("return")){
|
||||
return "3";
|
||||
}
|
||||
else if (!buffer.compare("const")){
|
||||
return "4";
|
||||
}
|
||||
else if (!buffer.compare("main")){
|
||||
return "5";
|
||||
}
|
||||
else if (!buffer.compare("if")){
|
||||
return "6";
|
||||
}
|
||||
else if (!buffer.compare("else")){
|
||||
return "7";
|
||||
}
|
||||
else if (!buffer.compare("float")){
|
||||
return "8";
|
||||
}
|
||||
else {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
// 整数
|
||||
case WordType::INT_VAL:
|
||||
return buffer;
|
||||
|
||||
//浮点类型,理论不使用
|
||||
case WordType::FLOAT_VAL:
|
||||
return buffer;
|
||||
|
||||
//default
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
// 读取文件
|
||||
string readfile(const string& filename)
|
||||
{
|
||||
// 打开文件流并读取文件内容
|
||||
ifstream file(filename);
|
||||
|
||||
// 使用istreambuf_iterator类逐字符从file中读取到content中
|
||||
string content((istreambuf_iterator<char>(file)),istreambuf_iterator<char>());
|
||||
|
||||
// 去掉换行符
|
||||
//remove函数的作用是将字符串中的某个字符移动到字符串的末尾,并返回一个指向该字符后面位置的指针。
|
||||
//erase 函数的作用是删除字符串中指定区间内的所有字符,返回修改后的字符串
|
||||
//content.erase(remove(content.begin(), content.end(), '\n'), content.end());
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
|
||||
// 获取token名称
|
||||
string getWordTypeName(WordType type) {
|
||||
auto it = WordTypeNames.find(type);
|
||||
if (it != WordTypeNames.end()) {
|
||||
return it->second;
|
||||
} else {
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
// 获取语法名称
|
||||
string getGrammarName(WordType type, string buffer) {
|
||||
switch (type) {
|
||||
|
||||
case WordType::OP_ADD: return "+";
|
||||
case WordType::OP_SUB: return "-";
|
||||
case WordType::OP_MUL: return "*";
|
||||
case WordType::OP_DIV: return "/";
|
||||
case WordType::OP_MOD: return "%";
|
||||
case WordType::OP_ASSIGN: return "=";
|
||||
case WordType::OP_GT: return ">";
|
||||
case WordType::OP_LT: return "<";
|
||||
case WordType::OP_EQ: return "==";
|
||||
case WordType::OP_LE: return "<=";
|
||||
case WordType::OP_GE: return ">=";
|
||||
case WordType::OP_NE: return "!=";
|
||||
case WordType::OP_AND: return "&&";
|
||||
case WordType::OP_OR: return "||";
|
||||
|
||||
case WordType::SE_LBRAC: return "(";
|
||||
case WordType::SE_RBRAC: return ")";
|
||||
case WordType::SE_LCBRAC: return "{";
|
||||
case WordType::SE_RCBRAC: return "}";
|
||||
case WordType::SE_COMMA: return ",";
|
||||
case WordType::SE_SEMI: return ";";
|
||||
|
||||
case WordType::IDN:
|
||||
if (!buffer.compare("int")) {
|
||||
return "int";
|
||||
}
|
||||
else if (!buffer.compare("void")) {
|
||||
return "void";
|
||||
}
|
||||
else if (!buffer.compare("return")) {
|
||||
return "return";
|
||||
}
|
||||
else if (!buffer.compare("const")) {
|
||||
return "const";
|
||||
}
|
||||
else if (!buffer.compare("main")) {
|
||||
return "main";
|
||||
}
|
||||
else {
|
||||
return "IDN";
|
||||
}
|
||||
case WordType::INT_VAL: return "INT";
|
||||
case WordType::FLOAT_VAL: return "FLOAT";
|
||||
default: cerr << "Token Error: "<< WordTypeNames.find(type)->second << endl; exit(-1);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,27 +0,0 @@
|
|||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cassert>
|
||||
|
||||
#include "nfa.h"
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
NFA nfa = RexToNFA();
|
||||
printNFA(nfa);
|
||||
//cout<<"OK1"<<endl;
|
||||
|
||||
DFA dfa = nfaToDFA(nfa);
|
||||
printDFA(dfa);
|
||||
//cout<<"OK2"<<endl;
|
||||
|
||||
system("pause");
|
||||
DFA minimizedDFA = minimizeDFA(minimizeDFA(dfa));
|
||||
removeUnreachableStates(minimizedDFA);
|
||||
printDFA(minimizedDFA);
|
||||
//cout<<"OK3"<<endl;
|
||||
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,378 @@
|
|||
#include "nfa.h"
|
||||
|
||||
/*
|
||||
扫描读入-->以字符的格式读入
|
||||
对于界符和部分运算符,显然是单个组成,即可以单独代表一个状态
|
||||
|
||||
|
||||
注意:
|
||||
1.字母需要区分大小写
|
||||
*/
|
||||
|
||||
// 获取输入串的类型
|
||||
// 单独一位的读入-->下一步标识终态
|
||||
InputCharType getInputCharType(char c) {
|
||||
switch (c) {
|
||||
case '_': return UNDERLINE;
|
||||
case '+': return ADD;
|
||||
case '-': return SUB;
|
||||
case '*': return MUL;
|
||||
case '/': return DIV;
|
||||
case '%': return MOD;
|
||||
case '=': return EQ;
|
||||
case '>': return GT;
|
||||
case '<': return LT;
|
||||
case '!': return NOT;
|
||||
case '&': return AND;
|
||||
case '|': return OR;
|
||||
case '(': return LBRACKET;
|
||||
case ')': return RBRACKET;
|
||||
case '{': return LCBRAC;
|
||||
case '}': return RCBRAC;
|
||||
case ',': return COMMA;
|
||||
case ';': return SEMI;
|
||||
default:
|
||||
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
|
||||
return LETTER;
|
||||
}
|
||||
else if (c >= '0' && c <= '9') {
|
||||
return DIGIT;
|
||||
}
|
||||
else {
|
||||
return EPSILON;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 根据状态获取名称
|
||||
string getInputChartypeName(InputCharType type) {
|
||||
switch (type)
|
||||
{
|
||||
case LETTER:
|
||||
return "LETTER";
|
||||
case UNDERLINE:
|
||||
return "UNDERLINE";
|
||||
case DIGIT:
|
||||
return "DIGIT";
|
||||
case ADD:
|
||||
return "+";
|
||||
case SUB:
|
||||
return "-";
|
||||
case MUL:
|
||||
return "*";
|
||||
case DIV:
|
||||
return "/";
|
||||
case MOD:
|
||||
return "%";
|
||||
case EQ:
|
||||
return "=";
|
||||
case GT:
|
||||
return ">";
|
||||
case LT:
|
||||
return "<";
|
||||
case NOT:
|
||||
return "!";
|
||||
case AND:
|
||||
return "&";
|
||||
case OR:
|
||||
return "|";
|
||||
case LBRACKET:
|
||||
return "(";
|
||||
case RBRACKET:
|
||||
return ")";
|
||||
case LCBRAC:
|
||||
return "{";
|
||||
case RCBRAC:
|
||||
return "}";
|
||||
case COMMA:
|
||||
return ",";
|
||||
case SEMI:
|
||||
return ";";
|
||||
case EPSILON:
|
||||
return "EPSILON";
|
||||
default:
|
||||
return "UNKOWN";
|
||||
}
|
||||
}
|
||||
|
||||
//根据关键字类型获取其所属的种别
|
||||
string getWordTypeName(WordType type, string buffer) {
|
||||
switch (type) {
|
||||
// 运算符
|
||||
case OP_ADD:
|
||||
case OP_SUB:
|
||||
case OP_MUL:
|
||||
case OP_DIV:
|
||||
case OP_MOD:
|
||||
case OP_ASSIGN:
|
||||
case OP_GT:
|
||||
case OP_LT:
|
||||
case OP_EQ:
|
||||
case OP_LE:
|
||||
case OP_GE:
|
||||
case OP_NE:
|
||||
case OP_AND:
|
||||
case OP_OR:
|
||||
return "OP";
|
||||
|
||||
// 界符
|
||||
case SE_LBRAC:
|
||||
case SE_RBRAC:
|
||||
case SE_LCBRAC:
|
||||
case SE_RCBRAC:
|
||||
case SE_COMMA:
|
||||
case SE_SEMI:
|
||||
return "SE";
|
||||
|
||||
// 标识符和关键字
|
||||
case IDN:
|
||||
if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")||!buffer.compare("main")){
|
||||
return "KW";
|
||||
}
|
||||
else {
|
||||
return "IDN";
|
||||
}
|
||||
|
||||
// 整数
|
||||
case INT_VAL:
|
||||
return "INT";
|
||||
|
||||
//default
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
//根据关键字属性
|
||||
string getWordAttribute(WordType type, string buffer) {
|
||||
switch (type) {
|
||||
// 运算符
|
||||
case OP_ADD: return "6";
|
||||
case OP_SUB: return "7";
|
||||
case OP_MUL: return "8";
|
||||
case OP_DIV: return "9";
|
||||
case OP_MOD: return "10";
|
||||
case OP_ASSIGN: return "11";
|
||||
case OP_GT: return "12";
|
||||
case OP_LT: return "13";
|
||||
case OP_EQ: return "14";
|
||||
case OP_LE: return "15";
|
||||
case OP_GE: return "16";
|
||||
case OP_NE: return "17";
|
||||
case OP_AND:return "18";
|
||||
case OP_OR: return "19";
|
||||
|
||||
// 界符
|
||||
case SE_LBRAC: return "20";
|
||||
case SE_RBRAC: return "21";
|
||||
case SE_LCBRAC: return "22";
|
||||
case SE_RCBRAC: return "23";
|
||||
case SE_COMMA: return "25";
|
||||
case SE_SEMI: return "24";
|
||||
|
||||
|
||||
// 标识符和关键字
|
||||
case IDN:
|
||||
if (!buffer.compare("int")){
|
||||
return "1";
|
||||
}
|
||||
else if (!buffer.compare("void")){
|
||||
return "2";
|
||||
}
|
||||
else if (!buffer.compare("return")){
|
||||
return "3";
|
||||
}
|
||||
else if (!buffer.compare("const")){
|
||||
return "4";
|
||||
}
|
||||
else if (!buffer.compare("main")){
|
||||
return "5";
|
||||
}
|
||||
else {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
// 整数
|
||||
case INT_VAL:
|
||||
return buffer;
|
||||
|
||||
//default
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// 读取文件
|
||||
string readfile(const string& filename)
|
||||
{
|
||||
// 打开文件流并读取文件内容
|
||||
ifstream file(filename);
|
||||
|
||||
// 使用istreambuf_iterator类逐字符从file中读取到content中
|
||||
string content((istreambuf_iterator<char>(file)),istreambuf_iterator<char>());
|
||||
|
||||
// 去掉换行符
|
||||
//remove函数的作用是将字符串中的某个字符移动到字符串的末尾,并返回一个指向该字符后面位置的指针。
|
||||
//erase 函数的作用是删除字符串中指定区间内的所有字符,返回修改后的字符串
|
||||
//content.erase(remove(content.begin(), content.end(), '\n'), content.end());
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
// 获取关键字的Token种类
|
||||
TokenType getTokenType(WordType type,string buffer) {
|
||||
switch (type) {
|
||||
case OP_ADD:
|
||||
case OP_SUB:
|
||||
case OP_MUL:
|
||||
case OP_DIV:
|
||||
case OP_MOD:
|
||||
case OP_ASSIGN:
|
||||
case OP_GT:
|
||||
case OP_LT:
|
||||
case OP_EQ:
|
||||
case OP_LE:
|
||||
case OP_GE:
|
||||
case OP_NE:
|
||||
case OP_AND:
|
||||
case OP_OR:
|
||||
return TokenType::OP;
|
||||
|
||||
case SE_LBRAC:
|
||||
case SE_RBRAC:
|
||||
case SE_LCBRAC:
|
||||
case SE_RCBRAC:
|
||||
case SE_COMMA:
|
||||
case SE_SEMI:
|
||||
return TokenType::SE;
|
||||
|
||||
case IDN:
|
||||
if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")|| !buffer.compare("main")) {
|
||||
return TokenType::KW;
|
||||
}
|
||||
else {
|
||||
return TokenType::IDN;
|
||||
}
|
||||
|
||||
case INT_VAL:
|
||||
return TokenType::INT;
|
||||
|
||||
default:
|
||||
return TokenType::UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
// 获取token名称
|
||||
string getWordTypeName(WordType type) {
|
||||
switch (type) {
|
||||
case KW_INT:
|
||||
return "KW_INT";
|
||||
case KW_VOID:
|
||||
return "KW_VOID";
|
||||
case KW_RETURN:
|
||||
return "KW_RETURN";
|
||||
case KW_CONST:
|
||||
return "KW_CONST";
|
||||
case KW_MAIN:
|
||||
return "KW_MAIN";
|
||||
case OP_ADD:
|
||||
return "OP_ADD";
|
||||
case OP_SUB:
|
||||
return "OP_SUB";
|
||||
case OP_MUL:
|
||||
return "OP_MUL";
|
||||
case OP_DIV:
|
||||
return "OP_DIV";
|
||||
case OP_MOD:
|
||||
return "OP_MOD";
|
||||
case OP_ASSIGN:
|
||||
return "OP_ASSIGN";
|
||||
case OP_GT:
|
||||
return "OP_GT";
|
||||
case OP_LT:
|
||||
return "OP_LT";
|
||||
case OP_EQ:
|
||||
return "OP_EQ";
|
||||
case OP_LE:
|
||||
return "OP_LE";
|
||||
case OP_GE:
|
||||
return "OP_GE";
|
||||
case OP_NE:
|
||||
return "OP_NE";
|
||||
case OP_AND:
|
||||
return "OP_AND";
|
||||
case OP_OR:
|
||||
return "OP_OR";
|
||||
case SE_LBRAC:
|
||||
return "SE_LBRAC";
|
||||
case SE_RBRAC:
|
||||
return "SE_RBRAC";
|
||||
case SE_LCBRAC:
|
||||
return "SE_LCBRAC";
|
||||
case SE_RCBRAC:
|
||||
return "SE_RCBRAC";
|
||||
case SE_COMMA:
|
||||
return "SE_COMMA";
|
||||
case SE_SEMI:
|
||||
return "SE_SEMI";
|
||||
case IDN:
|
||||
return "IDN";
|
||||
case INT_VAL:
|
||||
return "INT_VAL";
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
// 获取语法名称
|
||||
string getGrammarName(WordType type, string buffer) {
|
||||
switch (type) {
|
||||
|
||||
case OP_ADD: return "+";
|
||||
case OP_SUB: return "-";
|
||||
case OP_MUL: return "*";
|
||||
case OP_DIV: return "/";
|
||||
case OP_MOD: return "%";
|
||||
case OP_ASSIGN: return "=";
|
||||
case OP_GT: return ">";
|
||||
case OP_LT: return "<";
|
||||
case OP_EQ: return "==";
|
||||
case OP_LE: return "<=";
|
||||
case OP_GE: return ">=";
|
||||
case OP_NE: return "!=";
|
||||
case OP_AND: return "&&";
|
||||
case OP_OR: return "||";
|
||||
|
||||
case SE_LBRAC: return "(";
|
||||
case SE_RBRAC: return ")";
|
||||
case SE_LCBRAC: return "{";
|
||||
case SE_RCBRAC: return "}";
|
||||
case SE_COMMA: return ",";
|
||||
case SE_SEMI: return ";";
|
||||
|
||||
case IDN:
|
||||
if (!buffer.compare("int")) {
|
||||
return "int";
|
||||
}
|
||||
else if (!buffer.compare("void")) {
|
||||
return "void";
|
||||
}
|
||||
else if (!buffer.compare("return")) {
|
||||
return "return";
|
||||
}
|
||||
else if (!buffer.compare("const")) {
|
||||
return "const";
|
||||
}
|
||||
else if (!buffer.compare("main")) {
|
||||
return "main";
|
||||
}
|
||||
else {
|
||||
return "IDN";
|
||||
}
|
||||
case INT_VAL: return "INT";
|
||||
default: cerr << "Token Error: "<< type << endl; exit(-1);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue