[add]fix vision
This commit is contained in:
parent
b4145e7fbf
commit
1af61d2eef
|
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"files.associations": {
|
||||
"xstring": "cpp",
|
||||
"iterator": "cpp",
|
||||
"ostream": "cpp",
|
||||
"vector": "cpp"
|
||||
}
|
||||
}
|
||||
|
|
@ -10,7 +10,7 @@ using namespace std;
|
|||
int main(int argc, char** argv) {
|
||||
|
||||
NFA nfa = RexToNFA();
|
||||
printNFA(nfa);
|
||||
//printNFA(nfa);
|
||||
|
||||
DFA dfa = nfaToDFA(nfa);
|
||||
//printDFA(dfa);
|
||||
|
|
@ -62,7 +62,7 @@ int main(int argc, char** argv) {
|
|||
ll.get_token_strings(token_strings);
|
||||
|
||||
|
||||
// ll.print_token_strings();
|
||||
ll.print_token_strings();
|
||||
ll.build_LL1_grammar();
|
||||
|
||||
|
||||
|
|
@ -71,6 +71,7 @@ int main(int argc, char** argv) {
|
|||
|
||||
//ll.print_LL1_grammar_log();
|
||||
cout << endl;
|
||||
cout<<"end"<<endl;
|
||||
i++;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ typedef enum WordType {
|
|||
KW_VOID, // void
|
||||
KW_RETURN, // return
|
||||
KW_CONST, // const
|
||||
KW_MAIN, //main
|
||||
|
||||
OP_ADD, // +
|
||||
OP_SUB, // -
|
||||
|
|
@ -95,6 +96,7 @@ typedef struct Token {
|
|||
//定义函数判断输入的字符类别
|
||||
InputCharType getInputCharType(char c);
|
||||
string getWordTypeName(WordType type,string buffer);
|
||||
string getWordAttribute(WordType type,string buffer);
|
||||
|
||||
//定义状态类
|
||||
class State {
|
||||
|
|
|
|||
14
nfa/dfa.cpp
14
nfa/dfa.cpp
|
|
@ -142,6 +142,7 @@ DFA minimizeDFA(const DFA& dfa) {
|
|||
}
|
||||
return DFA(minimizedStartState, minimizedEndStates, minimizedStates);
|
||||
}
|
||||
|
||||
void removeUnreachableStates(DFA& dfa) {
|
||||
set<State*> reachableStates; //可达状态集合
|
||||
queue<State*> statesQueue; //状态队列
|
||||
|
|
@ -175,6 +176,7 @@ void removeUnreachableStates(DFA& dfa) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
vector<string> recognize(const DFA& dfa, const string& input, const string& output) {
|
||||
|
||||
State* currentState = dfa.startState;
|
||||
|
|
@ -206,8 +208,8 @@ vector<string> recognize(const DFA& dfa, const string& input, const string& outp
|
|||
|
||||
if (nextIt == nextState->transitions.end()) {// 如果没有更多匹配的转换
|
||||
// 输出识别到的单词符号和对应的类型
|
||||
cout << buffer << "\t<" << getWordTypeName(nextState->wordType,buffer) << ">" << endl;
|
||||
file << buffer << "\t<" << getWordTypeName(nextState->wordType, buffer) << ">" << endl;
|
||||
cout << buffer << "\t<" << getWordTypeName(nextState->wordType,buffer)<<","<<getWordAttribute(nextState->wordType,buffer) << ">" << endl;
|
||||
file << buffer << "\t<" << getWordTypeName(nextState->wordType, buffer)<<","<<getWordAttribute(nextState->wordType,buffer) << ">" << endl;
|
||||
tokens.push_back(getGrammarName(nextState->wordType, buffer));
|
||||
buffer.clear();
|
||||
currentState = dfa.startState;
|
||||
|
|
@ -223,8 +225,8 @@ vector<string> recognize(const DFA& dfa, const string& input, const string& outp
|
|||
else {// 如果没有找到匹配的转换
|
||||
if (currentState->isFinalState) {// 如果当前状态是终止状态
|
||||
// 输出识别到的单词符号和对应的类型
|
||||
cout << buffer << "\t<" << getWordTypeName(currentState->wordType,buffer) << ">" << endl;
|
||||
file << buffer << "\t<" << getWordTypeName(currentState->wordType, buffer) << ">" << endl;
|
||||
cout << buffer << "\t<" << getWordTypeName(currentState->wordType,buffer) <<","<<getWordAttribute(nextState->wordType,buffer) << ">" << endl;
|
||||
file << buffer << "\t<" << getWordTypeName(currentState->wordType, buffer) << ","<<getWordAttribute(nextState->wordType,buffer) << ">" << endl;
|
||||
tokens.push_back(getGrammarName(currentState->wordType, buffer) );
|
||||
buffer.clear();
|
||||
}
|
||||
|
|
@ -242,8 +244,8 @@ vector<string> recognize(const DFA& dfa, const string& input, const string& outp
|
|||
}
|
||||
// 处理最后一个字符,如果缓冲区不为空且当前状态是终止状态,对应第一个if里面的else
|
||||
if (!buffer.empty() && currentState->isFinalState) {
|
||||
cout << buffer << "\t<" << getWordTypeName(currentState->wordType,buffer) << ">" << endl;
|
||||
file << buffer << "\t<" << getWordTypeName(currentState->wordType, buffer) << ">" << endl;
|
||||
cout << buffer << "\t<" << getWordTypeName(currentState->wordType,buffer) << ","<<getWordAttribute(nextState->wordType,buffer) << ">" << endl;
|
||||
file << buffer << "\t<" << getWordTypeName(currentState->wordType, buffer) << ","<<getWordAttribute(nextState->wordType,buffer) << ">" << endl;
|
||||
tokens.push_back(getGrammarName(currentState->wordType, buffer));
|
||||
}
|
||||
file.close();//关闭文件
|
||||
|
|
|
|||
|
|
@ -12,11 +12,13 @@ NFA RexToNFA() {
|
|||
//由于里面存在||,所以不同正则间使用空格分隔代表| l代表letter,_代表下划线,0代表数字(也可以是d,但是为了使用已经有的函数),
|
||||
//[lu]代表l|u
|
||||
string rex = "+ - * / % = > < == <= >= != && || ( ) { } , ; [l_][l_0]* -?00*";
|
||||
|
||||
//下面给出正则对应的输出(终态)
|
||||
vector<WordType> finalState = {
|
||||
OP_ADD, OP_SUB,OP_MUL,OP_DIV,OP_MOD,OP_ASSIGN,OP_GT,OP_LT, OP_EQ,OP_LE,OP_GE,OP_NE, OP_AND, OP_OR,SE_LBRAC, SE_RBRAC,
|
||||
SE_LCBRAC,SE_RCBRAC,SE_COMMA,SE_SEMI,IDN,INT_VAL
|
||||
};
|
||||
|
||||
stringstream ss(rex);
|
||||
string target;
|
||||
|
||||
|
|
@ -26,6 +28,8 @@ NFA RexToNFA() {
|
|||
State* startState = new State(stateIndex++);
|
||||
set<State*, StatePtrCompare> endStates;
|
||||
set<State*, StatePtrCompare> allStates = { startState };
|
||||
|
||||
// 按空格分割
|
||||
while (getline(ss, target,' ')) {
|
||||
//如获得[l_][l_0]*
|
||||
State* currentState = startState;
|
||||
|
|
|
|||
28
nfa/nfa.h
28
nfa/nfa.h
|
|
@ -15,13 +15,17 @@
|
|||
#include <queue>
|
||||
#include <algorithm>
|
||||
using namespace std;
|
||||
|
||||
|
||||
//单词符号的类型,返回<待测代码中的单词符号,WordType>
|
||||
// 保留关键字
|
||||
typedef enum WordType {
|
||||
//当识别成标识符后,先判断是不是保留字,让后再判断IDN
|
||||
KW_INT = 0, // int
|
||||
KW_VOID, // void
|
||||
KW_RETURN, // return
|
||||
KW_CONST, // const
|
||||
KW_MAIN, //main
|
||||
|
||||
OP_ADD, // +
|
||||
OP_SUB, // -
|
||||
|
|
@ -50,7 +54,10 @@ typedef enum WordType {
|
|||
UNKOWN
|
||||
}WordType;
|
||||
string getWordTypeName(WordType type);
|
||||
|
||||
// 定义输入的字符类别
|
||||
// 输入与实际不完全匹配
|
||||
// 注意:此处定义的;和,顺序与实验指导书中不同
|
||||
typedef enum InputCharType {
|
||||
LETTER = 0, // 字母 0
|
||||
UNDERLINE, // _ 1
|
||||
|
|
@ -78,6 +85,9 @@ typedef enum InputCharType {
|
|||
EPSILON, // 空字符 20
|
||||
}InputCharType;
|
||||
string getInputChartypeName(InputCharType type);
|
||||
|
||||
|
||||
// 定义 token类型
|
||||
enum class TokenType {
|
||||
KW = 0,
|
||||
OP,
|
||||
|
|
@ -87,6 +97,8 @@ enum class TokenType {
|
|||
UNKNOWN
|
||||
};
|
||||
TokenType getTokenType(WordType wordType,string buffer);
|
||||
|
||||
// 定义最终返回的token的组成类型,包含值和类型两部分
|
||||
typedef struct Token {
|
||||
string value;
|
||||
TokenType type;
|
||||
|
|
@ -95,26 +107,36 @@ typedef struct Token {
|
|||
// 定义函数判断输入的字符类别
|
||||
InputCharType getInputCharType(char c);
|
||||
string getWordTypeName(WordType type,string buffer);
|
||||
string getWordAttribute(WordType type,string buffer);
|
||||
|
||||
// 定义状态类
|
||||
class State {
|
||||
public:
|
||||
int id; // 状态编号
|
||||
map<InputCharType, set<State*>> transitions; // 转移函数映射表,记录每个输入字符类型对应的目标状态集合
|
||||
bool isFinalState; // 是否为最终状态
|
||||
WordType wordType; // 到达该状态时应该返回的词法单元类型
|
||||
map<InputCharType, set<State*>> transitions; // 转移函数映射表,记录每个输入字符类型对应的目标状态集合
|
||||
|
||||
// 构造函数
|
||||
State(int id) : id(id), isFinalState(false), wordType(UNKOWN) {}
|
||||
|
||||
// 添加状态转移映射
|
||||
void addTransition(InputCharType input, State* targetState) {
|
||||
transitions[input].insert(targetState);
|
||||
}
|
||||
|
||||
// 转换终态
|
||||
void setFinalState(bool isFinal, WordType type) {
|
||||
isFinalState = isFinal;
|
||||
wordType = type;
|
||||
}
|
||||
|
||||
// 对象序号比较
|
||||
bool operator<(const State& other) const {
|
||||
return id < other.id;
|
||||
}
|
||||
};
|
||||
|
||||
//为了是set内部有序,定义排序结构体StatePtrCompare
|
||||
struct StatePtrCompare {
|
||||
bool operator()(const State* lhs, const State* rhs) const {
|
||||
|
|
@ -128,10 +150,14 @@ public:
|
|||
State* startState; // 起始状态
|
||||
set<State*, StatePtrCompare> endStates; // 终止状态集合
|
||||
set<State*, StatePtrCompare> states; // 状态集合
|
||||
|
||||
// 构造函数
|
||||
NFA(State* startState, set<State*, StatePtrCompare> endStates, set<State*, StatePtrCompare> states) :
|
||||
startState(startState), endStates(endStates), states(states) {}
|
||||
// void printNFA();
|
||||
};
|
||||
|
||||
// 正则表达式构建状态机
|
||||
NFA RexToNFA();
|
||||
void printNFA(const NFA& nfa);
|
||||
NFA buildNFA(string filename);
|
||||
|
|
|
|||
99
nfa/tool.cpp
99
nfa/tool.cpp
|
|
@ -1,6 +1,16 @@
|
|||
#include "nfa.h"
|
||||
|
||||
/*
|
||||
扫描读入-->以字符的格式读入
|
||||
对于界符和部分运算符,显然是单个组成,即可以单独代表一个状态
|
||||
|
||||
|
||||
注意:
|
||||
1.字母需要区分大小写
|
||||
*/
|
||||
|
||||
// 获取输入串的类型
|
||||
// 单独一位的读入-->下一步标识终态
|
||||
InputCharType getInputCharType(char c) {
|
||||
switch (c) {
|
||||
case '_': return UNDERLINE;
|
||||
|
|
@ -33,6 +43,8 @@ InputCharType getInputCharType(char c) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 根据状态获取名称
|
||||
string getInputChartypeName(InputCharType type) {
|
||||
switch (type)
|
||||
{
|
||||
|
|
@ -82,8 +94,11 @@ string getInputChartypeName(InputCharType type) {
|
|||
return "UNKOWN";
|
||||
}
|
||||
}
|
||||
|
||||
//根据关键字类型获取其所属的种别
|
||||
string getWordTypeName(WordType type, string buffer) {
|
||||
switch (type) {
|
||||
// 运算符
|
||||
case OP_ADD:
|
||||
case OP_SUB:
|
||||
case OP_MUL:
|
||||
|
|
@ -100,6 +115,7 @@ string getWordTypeName(WordType type, string buffer) {
|
|||
case OP_OR:
|
||||
return "OP";
|
||||
|
||||
// 界符
|
||||
case SE_LBRAC:
|
||||
case SE_RBRAC:
|
||||
case SE_LCBRAC:
|
||||
|
|
@ -108,29 +124,95 @@ string getWordTypeName(WordType type, string buffer) {
|
|||
case SE_SEMI:
|
||||
return "SE";
|
||||
|
||||
// 标识符和关键字
|
||||
case IDN:
|
||||
if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")){
|
||||
if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")||!buffer.compare("main")){
|
||||
return "KW";
|
||||
}
|
||||
else {
|
||||
return "IDN";
|
||||
}
|
||||
|
||||
// 整数
|
||||
case INT_VAL:
|
||||
return "INT";
|
||||
|
||||
//default
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
//根据关键字属性
|
||||
string getWordAttribute(WordType type, string buffer) {
|
||||
switch (type) {
|
||||
// 运算符
|
||||
case OP_ADD: return "6";
|
||||
case OP_SUB: return "7";
|
||||
case OP_MUL: return "8";
|
||||
case OP_DIV: return "9";
|
||||
case OP_MOD: return "10";
|
||||
case OP_ASSIGN: return "11";
|
||||
case OP_GT: return "12";
|
||||
case OP_LT: return "13";
|
||||
case OP_EQ: return "14";
|
||||
case OP_LE: return "15";
|
||||
case OP_GE: return "16";
|
||||
case OP_NE: return "17";
|
||||
case OP_AND:return "18";
|
||||
case OP_OR: return "19";
|
||||
|
||||
// 界符
|
||||
case SE_LBRAC: return "20";
|
||||
case SE_RBRAC: return "21";
|
||||
case SE_LCBRAC: return "22";
|
||||
case SE_RCBRAC: return "23";
|
||||
case SE_COMMA: return "25";
|
||||
case SE_SEMI: return "24";
|
||||
|
||||
|
||||
// 标识符和关键字
|
||||
case IDN:
|
||||
if (!buffer.compare("int")){
|
||||
return "1";
|
||||
}
|
||||
else if (!buffer.compare("void")){
|
||||
return "2";
|
||||
}
|
||||
else if (!buffer.compare("return")){
|
||||
return "3";
|
||||
}
|
||||
else if (!buffer.compare("const")){
|
||||
return "4";
|
||||
}
|
||||
else if (!buffer.compare("main")){
|
||||
return "5";
|
||||
}
|
||||
else {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
// 整数
|
||||
case INT_VAL:
|
||||
return buffer;
|
||||
|
||||
//default
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// 读取文件
|
||||
string readfile(const string& filename)
|
||||
{
|
||||
// 打开文件流并读取文件内容
|
||||
ifstream file(filename);
|
||||
|
||||
string content((istreambuf_iterator<char>(file)),
|
||||
istreambuf_iterator<char>());
|
||||
// 使用istreambuf_iterator类逐字符从file中读取到content中
|
||||
string content((istreambuf_iterator<char>(file)),istreambuf_iterator<char>());
|
||||
|
||||
// 去掉换行符
|
||||
//remove函数的作用是将字符串中的某个字符移动到字符串的末尾,并返回一个指向该字符后面位置的指针。
|
||||
|
|
@ -139,6 +221,8 @@ string readfile(const string& filename)
|
|||
|
||||
return content;
|
||||
}
|
||||
|
||||
// 获取关键字的Token种类
|
||||
TokenType getTokenType(WordType type,string buffer) {
|
||||
switch (type) {
|
||||
case OP_ADD:
|
||||
|
|
@ -166,7 +250,7 @@ TokenType getTokenType(WordType type,string buffer) {
|
|||
return TokenType::SE;
|
||||
|
||||
case IDN:
|
||||
if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")) {
|
||||
if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")|| !buffer.compare("main")) {
|
||||
return TokenType::KW;
|
||||
}
|
||||
else {
|
||||
|
|
@ -181,6 +265,7 @@ TokenType getTokenType(WordType type,string buffer) {
|
|||
}
|
||||
}
|
||||
|
||||
// 获取token名称
|
||||
string getWordTypeName(WordType type) {
|
||||
switch (type) {
|
||||
case KW_INT:
|
||||
|
|
@ -191,6 +276,8 @@ string getWordTypeName(WordType type) {
|
|||
return "KW_RETURN";
|
||||
case KW_CONST:
|
||||
return "KW_CONST";
|
||||
case KW_MAIN:
|
||||
return "KW_MAIN";
|
||||
case OP_ADD:
|
||||
return "OP_ADD";
|
||||
case OP_SUB:
|
||||
|
|
@ -240,6 +327,7 @@ string getWordTypeName(WordType type) {
|
|||
}
|
||||
}
|
||||
|
||||
// 获取语法名称
|
||||
string getGrammarName(WordType type, string buffer) {
|
||||
switch (type) {
|
||||
|
||||
|
|
@ -278,6 +366,9 @@ string getGrammarName(WordType type, string buffer) {
|
|||
else if (!buffer.compare("const")) {
|
||||
return "const";
|
||||
}
|
||||
else if (!buffer.compare("main")) {
|
||||
return "main";
|
||||
}
|
||||
else {
|
||||
return "IDN";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,63 @@
|
|||
# 任务划分
|
||||
- 两部分任务
|
||||
1. 词法分析器
|
||||
2. 语法分析器
|
||||
|
||||
## 词法分析器
|
||||
### 流程
|
||||
- 关键字的识别 --> 超前搜索
|
||||
- 构建词法识别的状态转换图
|
||||
```
|
||||
1. 构造出关键字与种类编码的对应表
|
||||
2. 根据对应表 构造状态转换图 --->先根据所做内容画出语法分析器
|
||||
```
|
||||
|
||||
### 输出格式
|
||||
[待测代码中的单词符号] [TAB] <[单词符号种别],[单词符号内容]>
|
||||
- 其中单词符号种别为 KW(关键字)、OP(运算符)、SE(界符)、IDN(标识符)INT(整形数)
|
||||
- 单词符号内容第⼀个维度为其种别,第⼆个维度为其属性。
|
||||
|
||||
|
||||
|
||||
# 代码结构
|
||||
```
|
||||
📁2024-compiler-mods
|
||||
├─ 📄.gitignore
|
||||
├─ 📄CMakeLists.txt
|
||||
├─ 📄read.md
|
||||
├─ 📄README.md
|
||||
├─ 📁LL1
|
||||
│ ├─ 📄CMakeLists.txt
|
||||
│ ├─ 📄grammar.cpp
|
||||
│ ├─ 📄grammar.h
|
||||
│ ├─ 📄LL1.cpp
|
||||
│ └─ 📄LL1.h
|
||||
├─ 📁main
|
||||
│ ├─ 📄CMakeLists.txt
|
||||
│ ├─ 📄grammar.h
|
||||
│ ├─ 📄LL1.h
|
||||
│ ├─ 📄main.cpp
|
||||
│ └─ 📄nfa.h
|
||||
├─ 📁nfa
|
||||
│ ├─ 📄CMakeLists.txt
|
||||
│ ├─ 📄dfa.cpp
|
||||
│ ├─ 📄nfa.cpp
|
||||
│ ├─ 📄nfa.h
|
||||
│ └─ 📄tool.cpp
|
||||
├─ 📁.git
|
||||
└─ 📁build
|
||||
|
||||
```
|
||||
|
||||
|
||||
# 需做改动
|
||||
1. 可视化部分做一做改动,原有代码中的比较可以用但最终考虑不提交,考虑添加DFA、NFA的画图可视化
|
||||
2. 拆分词法分析器和语法分析器为两部分,对两部分测试独立进行,保证至少有一部分无误
|
||||
|
||||
|
||||
|
||||
# 需考虑
|
||||
1. 错误处理部分
|
||||
2. 数字 01 情况
|
||||
3. 标识符一定包含字母、数、下划线三部分
|
||||
4. 测试部分复合的标识符定义读取
|
||||
Loading…
Reference in New Issue