[add]fix vision
This commit is contained in:
parent
b4145e7fbf
commit
1af61d2eef
|
|
@ -0,0 +1,8 @@
|
||||||
|
{
|
||||||
|
"files.associations": {
|
||||||
|
"xstring": "cpp",
|
||||||
|
"iterator": "cpp",
|
||||||
|
"ostream": "cpp",
|
||||||
|
"vector": "cpp"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -10,7 +10,7 @@ using namespace std;
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
|
||||||
NFA nfa = RexToNFA();
|
NFA nfa = RexToNFA();
|
||||||
printNFA(nfa);
|
//printNFA(nfa);
|
||||||
|
|
||||||
DFA dfa = nfaToDFA(nfa);
|
DFA dfa = nfaToDFA(nfa);
|
||||||
//printDFA(dfa);
|
//printDFA(dfa);
|
||||||
|
|
@ -62,7 +62,7 @@ int main(int argc, char** argv) {
|
||||||
ll.get_token_strings(token_strings);
|
ll.get_token_strings(token_strings);
|
||||||
|
|
||||||
|
|
||||||
// ll.print_token_strings();
|
ll.print_token_strings();
|
||||||
ll.build_LL1_grammar();
|
ll.build_LL1_grammar();
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -71,6 +71,7 @@ int main(int argc, char** argv) {
|
||||||
|
|
||||||
//ll.print_LL1_grammar_log();
|
//ll.print_LL1_grammar_log();
|
||||||
cout << endl;
|
cout << endl;
|
||||||
|
cout<<"end"<<endl;
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,7 @@ typedef enum WordType {
|
||||||
KW_VOID, // void
|
KW_VOID, // void
|
||||||
KW_RETURN, // return
|
KW_RETURN, // return
|
||||||
KW_CONST, // const
|
KW_CONST, // const
|
||||||
|
KW_MAIN, //main
|
||||||
|
|
||||||
OP_ADD, // +
|
OP_ADD, // +
|
||||||
OP_SUB, // -
|
OP_SUB, // -
|
||||||
|
|
@ -95,6 +96,7 @@ typedef struct Token {
|
||||||
//定义函数判断输入的字符类别
|
//定义函数判断输入的字符类别
|
||||||
InputCharType getInputCharType(char c);
|
InputCharType getInputCharType(char c);
|
||||||
string getWordTypeName(WordType type,string buffer);
|
string getWordTypeName(WordType type,string buffer);
|
||||||
|
string getWordAttribute(WordType type,string buffer);
|
||||||
|
|
||||||
//定义状态类
|
//定义状态类
|
||||||
class State {
|
class State {
|
||||||
|
|
|
||||||
14
nfa/dfa.cpp
14
nfa/dfa.cpp
|
|
@ -142,6 +142,7 @@ DFA minimizeDFA(const DFA& dfa) {
|
||||||
}
|
}
|
||||||
return DFA(minimizedStartState, minimizedEndStates, minimizedStates);
|
return DFA(minimizedStartState, minimizedEndStates, minimizedStates);
|
||||||
}
|
}
|
||||||
|
|
||||||
void removeUnreachableStates(DFA& dfa) {
|
void removeUnreachableStates(DFA& dfa) {
|
||||||
set<State*> reachableStates; //可达状态集合
|
set<State*> reachableStates; //可达状态集合
|
||||||
queue<State*> statesQueue; //状态队列
|
queue<State*> statesQueue; //状态队列
|
||||||
|
|
@ -175,6 +176,7 @@ void removeUnreachableStates(DFA& dfa) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<string> recognize(const DFA& dfa, const string& input, const string& output) {
|
vector<string> recognize(const DFA& dfa, const string& input, const string& output) {
|
||||||
|
|
||||||
State* currentState = dfa.startState;
|
State* currentState = dfa.startState;
|
||||||
|
|
@ -206,8 +208,8 @@ vector<string> recognize(const DFA& dfa, const string& input, const string& outp
|
||||||
|
|
||||||
if (nextIt == nextState->transitions.end()) {// 如果没有更多匹配的转换
|
if (nextIt == nextState->transitions.end()) {// 如果没有更多匹配的转换
|
||||||
// 输出识别到的单词符号和对应的类型
|
// 输出识别到的单词符号和对应的类型
|
||||||
cout << buffer << "\t<" << getWordTypeName(nextState->wordType,buffer) << ">" << endl;
|
cout << buffer << "\t<" << getWordTypeName(nextState->wordType,buffer)<<","<<getWordAttribute(nextState->wordType,buffer) << ">" << endl;
|
||||||
file << buffer << "\t<" << getWordTypeName(nextState->wordType, buffer) << ">" << endl;
|
file << buffer << "\t<" << getWordTypeName(nextState->wordType, buffer)<<","<<getWordAttribute(nextState->wordType,buffer) << ">" << endl;
|
||||||
tokens.push_back(getGrammarName(nextState->wordType, buffer));
|
tokens.push_back(getGrammarName(nextState->wordType, buffer));
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
currentState = dfa.startState;
|
currentState = dfa.startState;
|
||||||
|
|
@ -223,8 +225,8 @@ vector<string> recognize(const DFA& dfa, const string& input, const string& outp
|
||||||
else {// 如果没有找到匹配的转换
|
else {// 如果没有找到匹配的转换
|
||||||
if (currentState->isFinalState) {// 如果当前状态是终止状态
|
if (currentState->isFinalState) {// 如果当前状态是终止状态
|
||||||
// 输出识别到的单词符号和对应的类型
|
// 输出识别到的单词符号和对应的类型
|
||||||
cout << buffer << "\t<" << getWordTypeName(currentState->wordType,buffer) << ">" << endl;
|
cout << buffer << "\t<" << getWordTypeName(currentState->wordType,buffer) <<","<<getWordAttribute(nextState->wordType,buffer) << ">" << endl;
|
||||||
file << buffer << "\t<" << getWordTypeName(currentState->wordType, buffer) << ">" << endl;
|
file << buffer << "\t<" << getWordTypeName(currentState->wordType, buffer) << ","<<getWordAttribute(nextState->wordType,buffer) << ">" << endl;
|
||||||
tokens.push_back(getGrammarName(currentState->wordType, buffer) );
|
tokens.push_back(getGrammarName(currentState->wordType, buffer) );
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
}
|
}
|
||||||
|
|
@ -242,8 +244,8 @@ vector<string> recognize(const DFA& dfa, const string& input, const string& outp
|
||||||
}
|
}
|
||||||
// 处理最后一个字符,如果缓冲区不为空且当前状态是终止状态,对应第一个if里面的else
|
// 处理最后一个字符,如果缓冲区不为空且当前状态是终止状态,对应第一个if里面的else
|
||||||
if (!buffer.empty() && currentState->isFinalState) {
|
if (!buffer.empty() && currentState->isFinalState) {
|
||||||
cout << buffer << "\t<" << getWordTypeName(currentState->wordType,buffer) << ">" << endl;
|
cout << buffer << "\t<" << getWordTypeName(currentState->wordType,buffer) << ","<<getWordAttribute(nextState->wordType,buffer) << ">" << endl;
|
||||||
file << buffer << "\t<" << getWordTypeName(currentState->wordType, buffer) << ">" << endl;
|
file << buffer << "\t<" << getWordTypeName(currentState->wordType, buffer) << ","<<getWordAttribute(nextState->wordType,buffer) << ">" << endl;
|
||||||
tokens.push_back(getGrammarName(currentState->wordType, buffer));
|
tokens.push_back(getGrammarName(currentState->wordType, buffer));
|
||||||
}
|
}
|
||||||
file.close();//关闭文件
|
file.close();//关闭文件
|
||||||
|
|
|
||||||
|
|
@ -12,11 +12,13 @@ NFA RexToNFA() {
|
||||||
//由于里面存在||,所以不同正则间使用空格分隔代表| l代表letter,_代表下划线,0代表数字(也可以是d,但是为了使用已经有的函数),
|
//由于里面存在||,所以不同正则间使用空格分隔代表| l代表letter,_代表下划线,0代表数字(也可以是d,但是为了使用已经有的函数),
|
||||||
//[lu]代表l|u
|
//[lu]代表l|u
|
||||||
string rex = "+ - * / % = > < == <= >= != && || ( ) { } , ; [l_][l_0]* -?00*";
|
string rex = "+ - * / % = > < == <= >= != && || ( ) { } , ; [l_][l_0]* -?00*";
|
||||||
|
|
||||||
//下面给出正则对应的输出(终态)
|
//下面给出正则对应的输出(终态)
|
||||||
vector<WordType> finalState = {
|
vector<WordType> finalState = {
|
||||||
OP_ADD, OP_SUB,OP_MUL,OP_DIV,OP_MOD,OP_ASSIGN,OP_GT,OP_LT, OP_EQ,OP_LE,OP_GE,OP_NE, OP_AND, OP_OR,SE_LBRAC, SE_RBRAC,
|
OP_ADD, OP_SUB,OP_MUL,OP_DIV,OP_MOD,OP_ASSIGN,OP_GT,OP_LT, OP_EQ,OP_LE,OP_GE,OP_NE, OP_AND, OP_OR,SE_LBRAC, SE_RBRAC,
|
||||||
SE_LCBRAC,SE_RCBRAC,SE_COMMA,SE_SEMI,IDN,INT_VAL
|
SE_LCBRAC,SE_RCBRAC,SE_COMMA,SE_SEMI,IDN,INT_VAL
|
||||||
};
|
};
|
||||||
|
|
||||||
stringstream ss(rex);
|
stringstream ss(rex);
|
||||||
string target;
|
string target;
|
||||||
|
|
||||||
|
|
@ -26,6 +28,8 @@ NFA RexToNFA() {
|
||||||
State* startState = new State(stateIndex++);
|
State* startState = new State(stateIndex++);
|
||||||
set<State*, StatePtrCompare> endStates;
|
set<State*, StatePtrCompare> endStates;
|
||||||
set<State*, StatePtrCompare> allStates = { startState };
|
set<State*, StatePtrCompare> allStates = { startState };
|
||||||
|
|
||||||
|
// 按空格分割
|
||||||
while (getline(ss, target,' ')) {
|
while (getline(ss, target,' ')) {
|
||||||
//如获得[l_][l_0]*
|
//如获得[l_][l_0]*
|
||||||
State* currentState = startState;
|
State* currentState = startState;
|
||||||
|
|
|
||||||
28
nfa/nfa.h
28
nfa/nfa.h
|
|
@ -15,13 +15,17 @@
|
||||||
#include <queue>
|
#include <queue>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
|
||||||
//单词符号的类型,返回<待测代码中的单词符号,WordType>
|
//单词符号的类型,返回<待测代码中的单词符号,WordType>
|
||||||
|
// 保留关键字
|
||||||
typedef enum WordType {
|
typedef enum WordType {
|
||||||
//当识别成标识符后,先判断是不是保留字,让后再判断IDN
|
//当识别成标识符后,先判断是不是保留字,让后再判断IDN
|
||||||
KW_INT = 0, // int
|
KW_INT = 0, // int
|
||||||
KW_VOID, // void
|
KW_VOID, // void
|
||||||
KW_RETURN, // return
|
KW_RETURN, // return
|
||||||
KW_CONST, // const
|
KW_CONST, // const
|
||||||
|
KW_MAIN, //main
|
||||||
|
|
||||||
OP_ADD, // +
|
OP_ADD, // +
|
||||||
OP_SUB, // -
|
OP_SUB, // -
|
||||||
|
|
@ -50,7 +54,10 @@ typedef enum WordType {
|
||||||
UNKOWN
|
UNKOWN
|
||||||
}WordType;
|
}WordType;
|
||||||
string getWordTypeName(WordType type);
|
string getWordTypeName(WordType type);
|
||||||
|
|
||||||
// 定义输入的字符类别
|
// 定义输入的字符类别
|
||||||
|
// 输入与实际不完全匹配
|
||||||
|
// 注意:此处定义的;和,顺序与实验指导书中不同
|
||||||
typedef enum InputCharType {
|
typedef enum InputCharType {
|
||||||
LETTER = 0, // 字母 0
|
LETTER = 0, // 字母 0
|
||||||
UNDERLINE, // _ 1
|
UNDERLINE, // _ 1
|
||||||
|
|
@ -78,6 +85,9 @@ typedef enum InputCharType {
|
||||||
EPSILON, // 空字符 20
|
EPSILON, // 空字符 20
|
||||||
}InputCharType;
|
}InputCharType;
|
||||||
string getInputChartypeName(InputCharType type);
|
string getInputChartypeName(InputCharType type);
|
||||||
|
|
||||||
|
|
||||||
|
// 定义 token类型
|
||||||
enum class TokenType {
|
enum class TokenType {
|
||||||
KW = 0,
|
KW = 0,
|
||||||
OP,
|
OP,
|
||||||
|
|
@ -87,6 +97,8 @@ enum class TokenType {
|
||||||
UNKNOWN
|
UNKNOWN
|
||||||
};
|
};
|
||||||
TokenType getTokenType(WordType wordType,string buffer);
|
TokenType getTokenType(WordType wordType,string buffer);
|
||||||
|
|
||||||
|
// 定义最终返回的token的组成类型,包含值和类型两部分
|
||||||
typedef struct Token {
|
typedef struct Token {
|
||||||
string value;
|
string value;
|
||||||
TokenType type;
|
TokenType type;
|
||||||
|
|
@ -95,26 +107,36 @@ typedef struct Token {
|
||||||
// 定义函数判断输入的字符类别
|
// 定义函数判断输入的字符类别
|
||||||
InputCharType getInputCharType(char c);
|
InputCharType getInputCharType(char c);
|
||||||
string getWordTypeName(WordType type,string buffer);
|
string getWordTypeName(WordType type,string buffer);
|
||||||
|
string getWordAttribute(WordType type,string buffer);
|
||||||
|
|
||||||
// 定义状态类
|
// 定义状态类
|
||||||
class State {
|
class State {
|
||||||
public:
|
public:
|
||||||
int id; // 状态编号
|
int id; // 状态编号
|
||||||
map<InputCharType, set<State*>> transitions; // 转移函数映射表,记录每个输入字符类型对应的目标状态集合
|
|
||||||
bool isFinalState; // 是否为最终状态
|
bool isFinalState; // 是否为最终状态
|
||||||
WordType wordType; // 到达该状态时应该返回的词法单元类型
|
WordType wordType; // 到达该状态时应该返回的词法单元类型
|
||||||
|
map<InputCharType, set<State*>> transitions; // 转移函数映射表,记录每个输入字符类型对应的目标状态集合
|
||||||
|
|
||||||
|
// 构造函数
|
||||||
State(int id) : id(id), isFinalState(false), wordType(UNKOWN) {}
|
State(int id) : id(id), isFinalState(false), wordType(UNKOWN) {}
|
||||||
|
|
||||||
|
// 添加状态转移映射
|
||||||
void addTransition(InputCharType input, State* targetState) {
|
void addTransition(InputCharType input, State* targetState) {
|
||||||
transitions[input].insert(targetState);
|
transitions[input].insert(targetState);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 转换终态
|
||||||
void setFinalState(bool isFinal, WordType type) {
|
void setFinalState(bool isFinal, WordType type) {
|
||||||
isFinalState = isFinal;
|
isFinalState = isFinal;
|
||||||
wordType = type;
|
wordType = type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 对象序号比较
|
||||||
bool operator<(const State& other) const {
|
bool operator<(const State& other) const {
|
||||||
return id < other.id;
|
return id < other.id;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
//为了是set内部有序,定义排序结构体StatePtrCompare
|
//为了是set内部有序,定义排序结构体StatePtrCompare
|
||||||
struct StatePtrCompare {
|
struct StatePtrCompare {
|
||||||
bool operator()(const State* lhs, const State* rhs) const {
|
bool operator()(const State* lhs, const State* rhs) const {
|
||||||
|
|
@ -128,10 +150,14 @@ public:
|
||||||
State* startState; // 起始状态
|
State* startState; // 起始状态
|
||||||
set<State*, StatePtrCompare> endStates; // 终止状态集合
|
set<State*, StatePtrCompare> endStates; // 终止状态集合
|
||||||
set<State*, StatePtrCompare> states; // 状态集合
|
set<State*, StatePtrCompare> states; // 状态集合
|
||||||
|
|
||||||
|
// 构造函数
|
||||||
NFA(State* startState, set<State*, StatePtrCompare> endStates, set<State*, StatePtrCompare> states) :
|
NFA(State* startState, set<State*, StatePtrCompare> endStates, set<State*, StatePtrCompare> states) :
|
||||||
startState(startState), endStates(endStates), states(states) {}
|
startState(startState), endStates(endStates), states(states) {}
|
||||||
// void printNFA();
|
// void printNFA();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// 正则表达式构建状态机
|
||||||
NFA RexToNFA();
|
NFA RexToNFA();
|
||||||
void printNFA(const NFA& nfa);
|
void printNFA(const NFA& nfa);
|
||||||
NFA buildNFA(string filename);
|
NFA buildNFA(string filename);
|
||||||
|
|
|
||||||
99
nfa/tool.cpp
99
nfa/tool.cpp
|
|
@ -1,6 +1,16 @@
|
||||||
#include "nfa.h"
|
#include "nfa.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
扫描读入-->以字符的格式读入
|
||||||
|
对于界符和部分运算符,显然是单个组成,即可以单独代表一个状态
|
||||||
|
|
||||||
|
|
||||||
|
注意:
|
||||||
|
1.字母需要区分大小写
|
||||||
|
*/
|
||||||
|
|
||||||
|
// 获取输入串的类型
|
||||||
|
// 单独一位的读入-->下一步标识终态
|
||||||
InputCharType getInputCharType(char c) {
|
InputCharType getInputCharType(char c) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case '_': return UNDERLINE;
|
case '_': return UNDERLINE;
|
||||||
|
|
@ -33,6 +43,8 @@ InputCharType getInputCharType(char c) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 根据状态获取名称
|
||||||
string getInputChartypeName(InputCharType type) {
|
string getInputChartypeName(InputCharType type) {
|
||||||
switch (type)
|
switch (type)
|
||||||
{
|
{
|
||||||
|
|
@ -82,8 +94,11 @@ string getInputChartypeName(InputCharType type) {
|
||||||
return "UNKOWN";
|
return "UNKOWN";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//根据关键字类型获取其所属的种别
|
||||||
string getWordTypeName(WordType type, string buffer) {
|
string getWordTypeName(WordType type, string buffer) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
|
// 运算符
|
||||||
case OP_ADD:
|
case OP_ADD:
|
||||||
case OP_SUB:
|
case OP_SUB:
|
||||||
case OP_MUL:
|
case OP_MUL:
|
||||||
|
|
@ -100,6 +115,7 @@ string getWordTypeName(WordType type, string buffer) {
|
||||||
case OP_OR:
|
case OP_OR:
|
||||||
return "OP";
|
return "OP";
|
||||||
|
|
||||||
|
// 界符
|
||||||
case SE_LBRAC:
|
case SE_LBRAC:
|
||||||
case SE_RBRAC:
|
case SE_RBRAC:
|
||||||
case SE_LCBRAC:
|
case SE_LCBRAC:
|
||||||
|
|
@ -108,29 +124,95 @@ string getWordTypeName(WordType type, string buffer) {
|
||||||
case SE_SEMI:
|
case SE_SEMI:
|
||||||
return "SE";
|
return "SE";
|
||||||
|
|
||||||
|
// 标识符和关键字
|
||||||
case IDN:
|
case IDN:
|
||||||
if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")){
|
if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")||!buffer.compare("main")){
|
||||||
return "KW";
|
return "KW";
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return "IDN";
|
return "IDN";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 整数
|
||||||
case INT_VAL:
|
case INT_VAL:
|
||||||
return "INT";
|
return "INT";
|
||||||
|
|
||||||
|
//default
|
||||||
default:
|
default:
|
||||||
return "UNKNOWN";
|
return "UNKNOWN";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//根据关键字属性
|
||||||
|
string getWordAttribute(WordType type, string buffer) {
|
||||||
|
switch (type) {
|
||||||
|
// 运算符
|
||||||
|
case OP_ADD: return "6";
|
||||||
|
case OP_SUB: return "7";
|
||||||
|
case OP_MUL: return "8";
|
||||||
|
case OP_DIV: return "9";
|
||||||
|
case OP_MOD: return "10";
|
||||||
|
case OP_ASSIGN: return "11";
|
||||||
|
case OP_GT: return "12";
|
||||||
|
case OP_LT: return "13";
|
||||||
|
case OP_EQ: return "14";
|
||||||
|
case OP_LE: return "15";
|
||||||
|
case OP_GE: return "16";
|
||||||
|
case OP_NE: return "17";
|
||||||
|
case OP_AND:return "18";
|
||||||
|
case OP_OR: return "19";
|
||||||
|
|
||||||
|
// 界符
|
||||||
|
case SE_LBRAC: return "20";
|
||||||
|
case SE_RBRAC: return "21";
|
||||||
|
case SE_LCBRAC: return "22";
|
||||||
|
case SE_RCBRAC: return "23";
|
||||||
|
case SE_COMMA: return "25";
|
||||||
|
case SE_SEMI: return "24";
|
||||||
|
|
||||||
|
|
||||||
|
// 标识符和关键字
|
||||||
|
case IDN:
|
||||||
|
if (!buffer.compare("int")){
|
||||||
|
return "1";
|
||||||
|
}
|
||||||
|
else if (!buffer.compare("void")){
|
||||||
|
return "2";
|
||||||
|
}
|
||||||
|
else if (!buffer.compare("return")){
|
||||||
|
return "3";
|
||||||
|
}
|
||||||
|
else if (!buffer.compare("const")){
|
||||||
|
return "4";
|
||||||
|
}
|
||||||
|
else if (!buffer.compare("main")){
|
||||||
|
return "5";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 整数
|
||||||
|
case INT_VAL:
|
||||||
|
return buffer;
|
||||||
|
|
||||||
|
//default
|
||||||
|
default:
|
||||||
|
return "UNKNOWN";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// 读取文件
|
||||||
string readfile(const string& filename)
|
string readfile(const string& filename)
|
||||||
{
|
{
|
||||||
// 打开文件流并读取文件内容
|
// 打开文件流并读取文件内容
|
||||||
ifstream file(filename);
|
ifstream file(filename);
|
||||||
|
|
||||||
string content((istreambuf_iterator<char>(file)),
|
// 使用istreambuf_iterator类逐字符从file中读取到content中
|
||||||
istreambuf_iterator<char>());
|
string content((istreambuf_iterator<char>(file)),istreambuf_iterator<char>());
|
||||||
|
|
||||||
// 去掉换行符
|
// 去掉换行符
|
||||||
//remove函数的作用是将字符串中的某个字符移动到字符串的末尾,并返回一个指向该字符后面位置的指针。
|
//remove函数的作用是将字符串中的某个字符移动到字符串的末尾,并返回一个指向该字符后面位置的指针。
|
||||||
|
|
@ -139,6 +221,8 @@ string readfile(const string& filename)
|
||||||
|
|
||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 获取关键字的Token种类
|
||||||
TokenType getTokenType(WordType type,string buffer) {
|
TokenType getTokenType(WordType type,string buffer) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case OP_ADD:
|
case OP_ADD:
|
||||||
|
|
@ -166,7 +250,7 @@ TokenType getTokenType(WordType type,string buffer) {
|
||||||
return TokenType::SE;
|
return TokenType::SE;
|
||||||
|
|
||||||
case IDN:
|
case IDN:
|
||||||
if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")) {
|
if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")|| !buffer.compare("main")) {
|
||||||
return TokenType::KW;
|
return TokenType::KW;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
@ -181,6 +265,7 @@ TokenType getTokenType(WordType type,string buffer) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 获取token名称
|
||||||
string getWordTypeName(WordType type) {
|
string getWordTypeName(WordType type) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case KW_INT:
|
case KW_INT:
|
||||||
|
|
@ -191,6 +276,8 @@ string getWordTypeName(WordType type) {
|
||||||
return "KW_RETURN";
|
return "KW_RETURN";
|
||||||
case KW_CONST:
|
case KW_CONST:
|
||||||
return "KW_CONST";
|
return "KW_CONST";
|
||||||
|
case KW_MAIN:
|
||||||
|
return "KW_MAIN";
|
||||||
case OP_ADD:
|
case OP_ADD:
|
||||||
return "OP_ADD";
|
return "OP_ADD";
|
||||||
case OP_SUB:
|
case OP_SUB:
|
||||||
|
|
@ -240,6 +327,7 @@ string getWordTypeName(WordType type) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 获取语法名称
|
||||||
string getGrammarName(WordType type, string buffer) {
|
string getGrammarName(WordType type, string buffer) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
|
|
||||||
|
|
@ -278,6 +366,9 @@ string getGrammarName(WordType type, string buffer) {
|
||||||
else if (!buffer.compare("const")) {
|
else if (!buffer.compare("const")) {
|
||||||
return "const";
|
return "const";
|
||||||
}
|
}
|
||||||
|
else if (!buffer.compare("main")) {
|
||||||
|
return "main";
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
return "IDN";
|
return "IDN";
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,63 @@
|
||||||
|
# 任务划分
|
||||||
|
- 两部分任务
|
||||||
|
1. 词法分析器
|
||||||
|
2. 语法分析器
|
||||||
|
|
||||||
|
## 词法分析器
|
||||||
|
### 流程
|
||||||
|
- 关键字的识别 --> 超前搜索
|
||||||
|
- 构建词法识别的状态转换图
|
||||||
|
```
|
||||||
|
1. 构造出关键字与种类编码的对应表
|
||||||
|
2. 根据对应表 构造状态转换图 --->先根据所做内容画出语法分析器
|
||||||
|
```
|
||||||
|
|
||||||
|
### 输出格式
|
||||||
|
[待测代码中的单词符号] [TAB] <[单词符号种别],[单词符号内容]>
|
||||||
|
- 其中单词符号种别为 KW(关键字)、OP(运算符)、SE(界符)、IDN(标识符)INT(整形数)
|
||||||
|
- 单词符号内容第⼀个维度为其种别,第⼆个维度为其属性。
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# 代码结构
|
||||||
|
```
|
||||||
|
📁2024-compiler-mods
|
||||||
|
├─ 📄.gitignore
|
||||||
|
├─ 📄CMakeLists.txt
|
||||||
|
├─ 📄read.md
|
||||||
|
├─ 📄README.md
|
||||||
|
├─ 📁LL1
|
||||||
|
│ ├─ 📄CMakeLists.txt
|
||||||
|
│ ├─ 📄grammar.cpp
|
||||||
|
│ ├─ 📄grammar.h
|
||||||
|
│ ├─ 📄LL1.cpp
|
||||||
|
│ └─ 📄LL1.h
|
||||||
|
├─ 📁main
|
||||||
|
│ ├─ 📄CMakeLists.txt
|
||||||
|
│ ├─ 📄grammar.h
|
||||||
|
│ ├─ 📄LL1.h
|
||||||
|
│ ├─ 📄main.cpp
|
||||||
|
│ └─ 📄nfa.h
|
||||||
|
├─ 📁nfa
|
||||||
|
│ ├─ 📄CMakeLists.txt
|
||||||
|
│ ├─ 📄dfa.cpp
|
||||||
|
│ ├─ 📄nfa.cpp
|
||||||
|
│ ├─ 📄nfa.h
|
||||||
|
│ └─ 📄tool.cpp
|
||||||
|
├─ 📁.git
|
||||||
|
└─ 📁build
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
# 需做改动
|
||||||
|
1. 可视化部分做一做改动,原有代码中的比较可以用但最终考虑不提交,考虑添加DFA、NFA的画图可视化
|
||||||
|
2. 拆分词法分析器和语法分析器为两部分,对两部分测试独立进行,保证至少有一部分无误
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# 需考虑
|
||||||
|
1. 错误处理部分
|
||||||
|
2. 数字 01 情况
|
||||||
|
3. 标识符一定包含字母、数、下划线三部分
|
||||||
|
4. 测试部分复合的标识符定义读取
|
||||||
Loading…
Reference in New Issue