#include "nfa.h" /* 扫描读入-->以字符的格式读入 对于界符和部分运算符,显然是单个组成,即可以单独代表一个状态 注意: 1.字母需要区分大小写 */ // 获取输入串的类型 // 单独一位的读入-->下一步标识终态 InputCharType getInputCharType(char c) { switch (c) { case '_': return UNDERLINE; case '+': return ADD; case '-': return SUB; case '*': return MUL; case '/': return DIV; case '%': return MOD; case '=': return EQ; case '>': return GT; case '<': return LT; case '!': return NOT; case '&': return AND; case '|': return OR; case '(': return LBRACKET; case ')': return RBRACKET; case '{': return LCBRAC; case '}': return RCBRAC; case ',': return COMMA; case ';': return SEMI; default: if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { return LETTER; } else if (c >= '0' && c <= '9') { return DIGIT; } else { return EPSILON; } } } // 根据状态获取名称 string getInputChartypeName(InputCharType type) { switch (type) { case LETTER: return "LETTER"; case UNDERLINE: return "UNDERLINE"; case DIGIT: return "DIGIT"; case ADD: return "+"; case SUB: return "-"; case MUL: return "*"; case DIV: return "/"; case MOD: return "%"; case EQ: return "="; case GT: return ">"; case LT: return "<"; case NOT: return "!"; case AND: return "&"; case OR: return "|"; case LBRACKET: return "("; case RBRACKET: return ")"; case LCBRAC: return "{"; case RCBRAC: return "}"; case COMMA: return ","; case SEMI: return ";"; case EPSILON: return "EPSILON"; default: return "UNKOWN"; } } //根据关键字类型获取其所属的种别 string getWordTypeName(WordType type, string buffer) { switch (type) { // 运算符 case OP_ADD: case OP_SUB: case OP_MUL: case OP_DIV: case OP_MOD: case OP_ASSIGN: case OP_GT: case OP_LT: case OP_EQ: case OP_LE: case OP_GE: case OP_NE: case OP_AND: case OP_OR: return "OP"; // 界符 case SE_LBRAC: case SE_RBRAC: case SE_LCBRAC: case SE_RCBRAC: case SE_COMMA: case SE_SEMI: return "SE"; // 标识符和关键字 case IDN: if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")||!buffer.compare("main")){ return "KW"; } else { return "IDN"; } // 整数 case INT_VAL: return "INT"; //default default: return "UNKNOWN"; } } //根据关键字属性 string getWordAttribute(WordType type, string buffer) { switch (type) { // 运算符 case OP_ADD: return "6"; case OP_SUB: return "7"; case OP_MUL: return "8"; case OP_DIV: return "9"; case OP_MOD: return "10"; case OP_ASSIGN: return "11"; case OP_GT: return "12"; case OP_LT: return "13"; case OP_EQ: return "14"; case OP_LE: return "15"; case OP_GE: return "16"; case OP_NE: return "17"; case OP_AND:return "18"; case OP_OR: return "19"; // 界符 case SE_LBRAC: return "20"; case SE_RBRAC: return "21"; case SE_LCBRAC: return "22"; case SE_RCBRAC: return "23"; case SE_COMMA: return "25"; case SE_SEMI: return "24"; // 标识符和关键字 case IDN: if (!buffer.compare("int")){ return "1"; } else if (!buffer.compare("void")){ return "2"; } else if (!buffer.compare("return")){ return "3"; } else if (!buffer.compare("const")){ return "4"; } else if (!buffer.compare("main")){ return "5"; } else { return buffer; } // 整数 case INT_VAL: return buffer; //default default: return "UNKNOWN"; } } // 读取文件 string readfile(const string& filename) { // 打开文件流并读取文件内容 ifstream file(filename); // 使用istreambuf_iterator类逐字符从file中读取到content中 string content((istreambuf_iterator(file)),istreambuf_iterator()); // 去掉换行符 //remove函数的作用是将字符串中的某个字符移动到字符串的末尾,并返回一个指向该字符后面位置的指针。 //erase 函数的作用是删除字符串中指定区间内的所有字符,返回修改后的字符串 //content.erase(remove(content.begin(), content.end(), '\n'), content.end()); return content; } // 获取关键字的Token种类 TokenType getTokenType(WordType type,string buffer) { switch (type) { case OP_ADD: case OP_SUB: case OP_MUL: case OP_DIV: case OP_MOD: case OP_ASSIGN: case OP_GT: case OP_LT: case OP_EQ: case OP_LE: case OP_GE: case OP_NE: case OP_AND: case OP_OR: return TokenType::OP; case SE_LBRAC: case SE_RBRAC: case SE_LCBRAC: case SE_RCBRAC: case SE_COMMA: case SE_SEMI: return TokenType::SE; case IDN: if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")|| !buffer.compare("main")) { return TokenType::KW; } else { return TokenType::IDN; } case INT_VAL: return TokenType::INT; default: return TokenType::UNKNOWN; } } // 获取token名称 string getWordTypeName(WordType type) { switch (type) { case KW_INT: return "KW_INT"; case KW_VOID: return "KW_VOID"; case KW_RETURN: return "KW_RETURN"; case KW_CONST: return "KW_CONST"; case KW_MAIN: return "KW_MAIN"; case OP_ADD: return "OP_ADD"; case OP_SUB: return "OP_SUB"; case OP_MUL: return "OP_MUL"; case OP_DIV: return "OP_DIV"; case OP_MOD: return "OP_MOD"; case OP_ASSIGN: return "OP_ASSIGN"; case OP_GT: return "OP_GT"; case OP_LT: return "OP_LT"; case OP_EQ: return "OP_EQ"; case OP_LE: return "OP_LE"; case OP_GE: return "OP_GE"; case OP_NE: return "OP_NE"; case OP_AND: return "OP_AND"; case OP_OR: return "OP_OR"; case SE_LBRAC: return "SE_LBRAC"; case SE_RBRAC: return "SE_RBRAC"; case SE_LCBRAC: return "SE_LCBRAC"; case SE_RCBRAC: return "SE_RCBRAC"; case SE_COMMA: return "SE_COMMA"; case SE_SEMI: return "SE_SEMI"; case IDN: return "IDN"; case INT_VAL: return "INT_VAL"; default: return "UNKNOWN"; } } // 获取语法名称 string getGrammarName(WordType type, string buffer) { switch (type) { case OP_ADD: return "+"; case OP_SUB: return "-"; case OP_MUL: return "*"; case OP_DIV: return "/"; case OP_MOD: return "%"; case OP_ASSIGN: return "="; case OP_GT: return ">"; case OP_LT: return "<"; case OP_EQ: return "=="; case OP_LE: return "<="; case OP_GE: return ">="; case OP_NE: return "!="; case OP_AND: return "&&"; case OP_OR: return "||"; case SE_LBRAC: return "("; case SE_RBRAC: return ")"; case SE_LCBRAC: return "{"; case SE_RCBRAC: return "}"; case SE_COMMA: return ","; case SE_SEMI: return ";"; case IDN: if (!buffer.compare("int")) { return "int"; } else if (!buffer.compare("void")) { return "void"; } else if (!buffer.compare("return")) { return "return"; } else if (!buffer.compare("const")) { return "const"; } else if (!buffer.compare("main")) { return "main"; } else { return "IDN"; } case INT_VAL: return "INT"; default: cerr << "Token Error: "<< type << endl; exit(-1); } }