#include "nfa.h" std::unordered_map CharTypeNames = { {InputCharType::UNDERLINE, "_"}, {InputCharType::ADD, "+"}, {InputCharType::SUB, "-"}, {InputCharType::MUL, "*"}, {InputCharType::DIV, "/"}, {InputCharType::MOD, "%"}, {InputCharType::EQ, "="}, {InputCharType::GT, ">"}, {InputCharType::LT, "<"}, {InputCharType::NOT, "!"}, {InputCharType::AND, "&"}, {InputCharType::OR, "|"}, {InputCharType::LBRACKET, "("}, {InputCharType::RBRACKET, ")"}, {InputCharType::LCBRAC, "{"}, {InputCharType::RCBRAC, "}"}, {InputCharType::COMMA, ","}, {InputCharType::SEMI, ";"}, {InputCharType::POINT, "."} // 小数点 }; std::unordered_map WordTypeNames = { {WordType::KW_INT, "INT"}, {WordType::KW_VOID, "VOID"}, {WordType::KW_RETURN, "RETURN"}, {WordType::KW_CONST, "CONST"}, {WordType::KW_MAIN, "MAIN"}, {WordType::KW_IF, "IF"}, {WordType::KW_ELSE, "ELSE"}, {WordType::KW_FLOAT, "FLOAT"}, {WordType::OP_ADD, "+"}, {WordType::OP_SUB, "-"}, {WordType::OP_MUL, "*"}, {WordType::OP_DIV, "/"}, {WordType::OP_MOD, "%"}, {WordType::OP_ASSIGN, "="}, {WordType::OP_GT, ">"}, {WordType::OP_LT, "<"}, {WordType::OP_EQ, "=="}, {WordType::OP_LE, "<="}, {WordType::OP_GE, ">="}, {WordType::OP_NE, "!="}, {WordType::OP_AND, "&&"}, {WordType::OP_OR, "||"}, {WordType::SE_LBRAC, "("}, {WordType::SE_RBRAC, ")"}, {WordType::SE_LCBRAC, "{"}, {WordType::SE_RCBRAC, "}"}, {WordType::SE_COMMA, ","}, {WordType::SE_SEMI, ";"}, {WordType::IDN, "IDENTIFIER"}, {WordType::INT_VAL, "INTEGER"}, {WordType::FLOAT_VAL, "FLOAT"}, {WordType::UNKOWN, "UNKNOWN"} }; //扫描,以字符的格式读入 //对于界符和部分运算符,是单个组成,即可以单独代表一个状态 InputCharType getInputCharType(char c) { switch (c) { case '_': return InputCharType::UNDERLINE; case '+': return InputCharType::ADD; case '-': return InputCharType::SUB; case '*': return InputCharType::MUL; case '/': return InputCharType::DIV; case '%': return InputCharType::MOD; case '=': return InputCharType::EQ; case '>': return InputCharType::GT; case '<': return InputCharType::LT; case '!': return InputCharType::NOT; case '&': return InputCharType::AND; case '|': return InputCharType::OR; case '(': return InputCharType::LBRACKET; case ')': return InputCharType::RBRACKET; case '{': return InputCharType::LCBRAC; case '}': return InputCharType::RCBRAC; case ',': return InputCharType::COMMA; case ';': return InputCharType::SEMI; //小数点作为数字读入 case '.': return InputCharType::DIGIT; default: if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { return InputCharType::LETTER; } else if (c >= '0' && c <= '9') { return InputCharType::DIGIT; } else { return InputCharType::EPSILON; } } } // 根据状态获取名称 std::string getInputChartypeName(InputCharType type) { auto it = CharTypeNames.find(type); if (it != CharTypeNames.end()) { return it->second; } else { return "UNKNOWN"; } } string judeFloat(string buffer){ size_t firstDot = buffer.find('.'); size_t lastDot = buffer.rfind('.'); if (firstDot == lastDot && firstDot != std::string::npos) { return "FLOAT"; } else if (firstDot == std::string::npos) { return "INT"; } else return "UNKNOWN"; } //根据关键字类型获取其所属的种别 string getWordTypeName(WordType type, string buffer) { switch (type) { // 运算符 case WordType::OP_ADD: case WordType::OP_SUB: case WordType::OP_MUL: case WordType::OP_DIV: case WordType::OP_MOD: case WordType::OP_ASSIGN: case WordType::OP_GT: case WordType::OP_LT: case WordType::OP_EQ: case WordType::OP_LE: case WordType::OP_GE: case WordType::OP_NE: case WordType::OP_AND: case WordType::OP_OR: return "OP"; // 界符 case WordType::SE_LBRAC: case WordType::SE_RBRAC: case WordType::SE_LCBRAC: case WordType::SE_RCBRAC: case WordType::SE_COMMA: case WordType::SE_SEMI: return "SE"; // 标识符和关键字 case WordType::IDN: if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")|| !buffer.compare("if") || !buffer.compare("else") || !buffer.compare("float") ){ return "KW"; } else { return "IDN"; } // 整数(添加了浮点判断) case WordType::INT_VAL: return judeFloat(buffer); //浮点 // case FLOAT_VAL: // return "FLOAT"; //default default: return "UNKNOWN"; } } //根据关键字属性 string getWordAttribute(WordType type, string buffer) { switch (type) { // 运算符 case WordType::OP_ADD: return "6"; case WordType::OP_SUB: return "7"; case WordType::OP_MUL: return "8"; case WordType::OP_DIV: return "9"; case WordType::OP_MOD: return "10"; case WordType::OP_ASSIGN: return "11"; case WordType::OP_GT: return "12"; case WordType::OP_LT: return "13"; case WordType::OP_EQ: return "14"; case WordType::OP_LE: return "15"; case WordType::OP_GE: return "16"; case WordType::OP_NE: return "17"; case WordType::OP_AND:return "18"; case WordType::OP_OR: return "19"; // 界符 case WordType::SE_LBRAC: return "20"; case WordType::SE_RBRAC: return "21"; case WordType::SE_LCBRAC: return "22"; case WordType::SE_RCBRAC: return "23"; case WordType::SE_COMMA: return "25"; case WordType::SE_SEMI: return "24"; // 标识符和关键字 case WordType::IDN: if (!buffer.compare("int")){ return "1"; } else if (!buffer.compare("void")){ return "2"; } else if (!buffer.compare("return")){ return "3"; } else if (!buffer.compare("const")){ return "4"; } else if (!buffer.compare("main")){ return "5"; } else if (!buffer.compare("if")){ return "6"; } else if (!buffer.compare("else")){ return "7"; } else if (!buffer.compare("float")){ return "8"; } else { return buffer; } // 整数 case WordType::INT_VAL: return buffer; //浮点类型,理论不使用 case WordType::FLOAT_VAL: return buffer; //default default: return "UNKNOWN"; } } // 读取文件 string readfile(const string& filename) { // 打开文件流并读取文件内容 ifstream file(filename); // 使用istreambuf_iterator类逐字符从file中读取到content中 string content((istreambuf_iterator(file)),istreambuf_iterator()); // 去掉换行符 //remove函数的作用是将字符串中的某个字符移动到字符串的末尾,并返回一个指向该字符后面位置的指针。 //erase 函数的作用是删除字符串中指定区间内的所有字符,返回修改后的字符串 //content.erase(remove(content.begin(), content.end(), '\n'), content.end()); return content; } // 获取token名称 string getWordTypeName(WordType type) { auto it = WordTypeNames.find(type); if (it != WordTypeNames.end()) { return it->second; } else { return "UNKNOWN"; } } // 获取语法名称 string getGrammarName(WordType type, string buffer) { switch (type) { case WordType::OP_ADD: return "+"; case WordType::OP_SUB: return "-"; case WordType::OP_MUL: return "*"; case WordType::OP_DIV: return "/"; case WordType::OP_MOD: return "%"; case WordType::OP_ASSIGN: return "="; case WordType::OP_GT: return ">"; case WordType::OP_LT: return "<"; case WordType::OP_EQ: return "=="; case WordType::OP_LE: return "<="; case WordType::OP_GE: return ">="; case WordType::OP_NE: return "!="; case WordType::OP_AND: return "&&"; case WordType::OP_OR: return "||"; case WordType::SE_LBRAC: return "("; case WordType::SE_RBRAC: return ")"; case WordType::SE_LCBRAC: return "{"; case WordType::SE_RCBRAC: return "}"; case WordType::SE_COMMA: return ","; case WordType::SE_SEMI: return ";"; case WordType::IDN: if (!buffer.compare("int")) { return "int"; } else if (!buffer.compare("void")) { return "void"; } else if (!buffer.compare("return")) { return "return"; } else if (!buffer.compare("const")) { return "const"; } else if (!buffer.compare("main")) { return "main"; } else { return "IDN"; } case WordType::INT_VAL: return "INT"; case WordType::FLOAT_VAL: return "FLOAT"; default: cerr << "Token Error: "<< WordTypeNames.find(type)->second << endl; exit(-1); } }