309 lines
9.1 KiB
C++
309 lines
9.1 KiB
C++
#include "nfa.h"
|
||
|
||
std::unordered_map<InputCharType, std::string> CharTypeNames = {
|
||
{InputCharType::UNDERLINE, "_"},
|
||
{InputCharType::ADD, "+"},
|
||
{InputCharType::SUB, "-"},
|
||
{InputCharType::MUL, "*"},
|
||
{InputCharType::DIV, "/"},
|
||
{InputCharType::MOD, "%"},
|
||
{InputCharType::EQ, "="},
|
||
{InputCharType::GT, ">"},
|
||
{InputCharType::LT, "<"},
|
||
{InputCharType::NOT, "!"},
|
||
{InputCharType::AND, "&"},
|
||
{InputCharType::OR, "|"},
|
||
{InputCharType::LBRACKET, "("},
|
||
{InputCharType::RBRACKET, ")"},
|
||
{InputCharType::LCBRAC, "{"},
|
||
{InputCharType::RCBRAC, "}"},
|
||
{InputCharType::COMMA, ","},
|
||
{InputCharType::SEMI, ";"},
|
||
{InputCharType::POINT, "."} // 小数点
|
||
};
|
||
|
||
|
||
std::unordered_map<WordType, std::string> WordTypeNames = {
|
||
{WordType::KW_INT, "INT"}, {WordType::KW_VOID, "VOID"}, {WordType::KW_RETURN, "RETURN"},
|
||
{WordType::KW_CONST, "CONST"}, {WordType::KW_MAIN, "MAIN"}, {WordType::KW_IF, "IF"},
|
||
{WordType::KW_ELSE, "ELSE"}, {WordType::KW_FLOAT, "FLOAT"}, {WordType::OP_ADD, "+"},
|
||
{WordType::OP_SUB, "-"}, {WordType::OP_MUL, "*"}, {WordType::OP_DIV, "/"},
|
||
{WordType::OP_MOD, "%"}, {WordType::OP_ASSIGN, "="}, {WordType::OP_GT, ">"},
|
||
{WordType::OP_LT, "<"}, {WordType::OP_EQ, "=="}, {WordType::OP_LE, "<="},
|
||
{WordType::OP_GE, ">="}, {WordType::OP_NE, "!="}, {WordType::OP_AND, "&&"},
|
||
{WordType::OP_OR, "||"}, {WordType::SE_LBRAC, "("}, {WordType::SE_RBRAC, ")"},
|
||
{WordType::SE_LCBRAC, "{"}, {WordType::SE_RCBRAC, "}"}, {WordType::SE_COMMA, ","},
|
||
{WordType::SE_SEMI, ";"}, {WordType::IDN, "IDENTIFIER"}, {WordType::INT_VAL, "INTEGER"},
|
||
{WordType::FLOAT_VAL, "FLOAT"}, {WordType::UNKOWN, "UNKNOWN"}
|
||
};
|
||
//扫描,以字符的格式读入
|
||
//对于界符和部分运算符,是单个组成,即可以单独代表一个状态
|
||
InputCharType getInputCharType(char c) {
|
||
switch (c) {
|
||
case '_': return InputCharType::UNDERLINE;
|
||
case '+': return InputCharType::ADD;
|
||
case '-': return InputCharType::SUB;
|
||
case '*': return InputCharType::MUL;
|
||
case '/': return InputCharType::DIV;
|
||
case '%': return InputCharType::MOD;
|
||
case '=': return InputCharType::EQ;
|
||
case '>': return InputCharType::GT;
|
||
case '<': return InputCharType::LT;
|
||
case '!': return InputCharType::NOT;
|
||
case '&': return InputCharType::AND;
|
||
case '|': return InputCharType::OR;
|
||
case '(': return InputCharType::LBRACKET;
|
||
case ')': return InputCharType::RBRACKET;
|
||
case '{': return InputCharType::LCBRAC;
|
||
case '}': return InputCharType::RCBRAC;
|
||
case ',': return InputCharType::COMMA;
|
||
case ';': return InputCharType::SEMI;
|
||
|
||
//小数点作为数字读入
|
||
case '.': return InputCharType::DIGIT;
|
||
|
||
default:
|
||
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
|
||
return InputCharType::LETTER;
|
||
}
|
||
else if (c >= '0' && c <= '9') {
|
||
return InputCharType::DIGIT;
|
||
}
|
||
else {
|
||
return InputCharType::EPSILON;
|
||
}
|
||
}
|
||
}
|
||
|
||
// 根据状态获取名称
|
||
std::string getInputChartypeName(InputCharType type) {
|
||
auto it = CharTypeNames.find(type);
|
||
if (it != CharTypeNames.end()) {
|
||
return it->second;
|
||
} else {
|
||
return "UNKNOWN";
|
||
}
|
||
}
|
||
|
||
string judeFloat(string buffer){
|
||
size_t firstDot = buffer.find('.');
|
||
size_t lastDot = buffer.rfind('.');
|
||
if (firstDot == lastDot && firstDot != std::string::npos) {
|
||
return "FLOAT";
|
||
}
|
||
else if (firstDot == std::string::npos)
|
||
{
|
||
return "INT";
|
||
}
|
||
else return "UNKNOWN";
|
||
}
|
||
|
||
//根据关键字类型获取其所属的种别
|
||
string getWordTypeName(WordType type, string buffer) {
|
||
switch (type) {
|
||
// 运算符
|
||
case WordType::OP_ADD:
|
||
case WordType::OP_SUB:
|
||
case WordType::OP_MUL:
|
||
case WordType::OP_DIV:
|
||
case WordType::OP_MOD:
|
||
case WordType::OP_ASSIGN:
|
||
case WordType::OP_GT:
|
||
case WordType::OP_LT:
|
||
case WordType::OP_EQ:
|
||
case WordType::OP_LE:
|
||
case WordType::OP_GE:
|
||
case WordType::OP_NE:
|
||
case WordType::OP_AND:
|
||
case WordType::OP_OR:
|
||
return "OP";
|
||
|
||
// 界符
|
||
case WordType::SE_LBRAC:
|
||
case WordType::SE_RBRAC:
|
||
case WordType::SE_LCBRAC:
|
||
case WordType::SE_RCBRAC:
|
||
case WordType::SE_COMMA:
|
||
case WordType::SE_SEMI:
|
||
return "SE";
|
||
|
||
// 标识符和关键字
|
||
case WordType::IDN:
|
||
if (!buffer.compare("int") ||
|
||
!buffer.compare("void") ||
|
||
!buffer.compare("const") ||
|
||
!buffer.compare("return")||
|
||
!buffer.compare("if") ||
|
||
!buffer.compare("else") ||
|
||
!buffer.compare("float")
|
||
){
|
||
return "KW";
|
||
}
|
||
else {
|
||
return "IDN";
|
||
}
|
||
|
||
// 整数(添加了浮点判断)
|
||
case WordType::INT_VAL:
|
||
return judeFloat(buffer);
|
||
|
||
//浮点
|
||
// case FLOAT_VAL:
|
||
// return "FLOAT";
|
||
|
||
//default
|
||
default:
|
||
return "UNKNOWN";
|
||
}
|
||
}
|
||
|
||
//根据关键字属性
|
||
string getWordAttribute(WordType type, string buffer) {
|
||
switch (type) {
|
||
// 运算符
|
||
case WordType::OP_ADD: return "6";
|
||
case WordType::OP_SUB: return "7";
|
||
case WordType::OP_MUL: return "8";
|
||
case WordType::OP_DIV: return "9";
|
||
case WordType::OP_MOD: return "10";
|
||
case WordType::OP_ASSIGN: return "11";
|
||
case WordType::OP_GT: return "12";
|
||
case WordType::OP_LT: return "13";
|
||
case WordType::OP_EQ: return "14";
|
||
case WordType::OP_LE: return "15";
|
||
case WordType::OP_GE: return "16";
|
||
case WordType::OP_NE: return "17";
|
||
case WordType::OP_AND:return "18";
|
||
case WordType::OP_OR: return "19";
|
||
|
||
// 界符
|
||
case WordType::SE_LBRAC: return "20";
|
||
case WordType::SE_RBRAC: return "21";
|
||
case WordType::SE_LCBRAC: return "22";
|
||
case WordType::SE_RCBRAC: return "23";
|
||
case WordType::SE_COMMA: return "25";
|
||
case WordType::SE_SEMI: return "24";
|
||
|
||
|
||
// 标识符和关键字
|
||
case WordType::IDN:
|
||
if (!buffer.compare("int")){
|
||
return "1";
|
||
}
|
||
else if (!buffer.compare("void")){
|
||
return "2";
|
||
}
|
||
else if (!buffer.compare("return")){
|
||
return "3";
|
||
}
|
||
else if (!buffer.compare("const")){
|
||
return "4";
|
||
}
|
||
else if (!buffer.compare("main")){
|
||
return "5";
|
||
}
|
||
else if (!buffer.compare("if")){
|
||
return "6";
|
||
}
|
||
else if (!buffer.compare("else")){
|
||
return "7";
|
||
}
|
||
else if (!buffer.compare("float")){
|
||
return "8";
|
||
}
|
||
else {
|
||
return buffer;
|
||
}
|
||
|
||
// 整数
|
||
case WordType::INT_VAL:
|
||
return buffer;
|
||
|
||
//浮点类型,理论不使用
|
||
case WordType::FLOAT_VAL:
|
||
return buffer;
|
||
|
||
//default
|
||
default:
|
||
return "UNKNOWN";
|
||
}
|
||
}
|
||
|
||
// 读取文件
|
||
string readfile(const string& filename)
|
||
{
|
||
// 打开文件流并读取文件内容
|
||
ifstream file(filename);
|
||
|
||
// 使用istreambuf_iterator类逐字符从file中读取到content中
|
||
string content((istreambuf_iterator<char>(file)),istreambuf_iterator<char>());
|
||
|
||
// 去掉换行符
|
||
//remove函数的作用是将字符串中的某个字符移动到字符串的末尾,并返回一个指向该字符后面位置的指针。
|
||
//erase 函数的作用是删除字符串中指定区间内的所有字符,返回修改后的字符串
|
||
//content.erase(remove(content.begin(), content.end(), '\n'), content.end());
|
||
|
||
return content;
|
||
}
|
||
|
||
|
||
// 获取token名称
|
||
string getWordTypeName(WordType type) {
|
||
auto it = WordTypeNames.find(type);
|
||
if (it != WordTypeNames.end()) {
|
||
return it->second;
|
||
} else {
|
||
return "UNKNOWN";
|
||
}
|
||
}
|
||
|
||
// 获取语法名称
|
||
string getGrammarName(WordType type, string buffer) {
|
||
switch (type) {
|
||
|
||
case WordType::OP_ADD: return "+";
|
||
case WordType::OP_SUB: return "-";
|
||
case WordType::OP_MUL: return "*";
|
||
case WordType::OP_DIV: return "/";
|
||
case WordType::OP_MOD: return "%";
|
||
case WordType::OP_ASSIGN: return "=";
|
||
case WordType::OP_GT: return ">";
|
||
case WordType::OP_LT: return "<";
|
||
case WordType::OP_EQ: return "==";
|
||
case WordType::OP_LE: return "<=";
|
||
case WordType::OP_GE: return ">=";
|
||
case WordType::OP_NE: return "!=";
|
||
case WordType::OP_AND: return "&&";
|
||
case WordType::OP_OR: return "||";
|
||
|
||
case WordType::SE_LBRAC: return "(";
|
||
case WordType::SE_RBRAC: return ")";
|
||
case WordType::SE_LCBRAC: return "{";
|
||
case WordType::SE_RCBRAC: return "}";
|
||
case WordType::SE_COMMA: return ",";
|
||
case WordType::SE_SEMI: return ";";
|
||
|
||
case WordType::IDN:
|
||
if (!buffer.compare("int")) {
|
||
return "int";
|
||
}
|
||
else if (!buffer.compare("void")) {
|
||
return "void";
|
||
}
|
||
else if (!buffer.compare("return")) {
|
||
return "return";
|
||
}
|
||
else if (!buffer.compare("const")) {
|
||
return "const";
|
||
}
|
||
else if (!buffer.compare("main")) {
|
||
return "main";
|
||
}
|
||
else {
|
||
return "IDN";
|
||
}
|
||
case WordType::INT_VAL: return "INT";
|
||
case WordType::FLOAT_VAL: return "FLOAT";
|
||
default: cerr << "Token Error: "<< WordTypeNames.find(type)->second << endl; exit(-1);
|
||
}
|
||
} |