383 lines
8.3 KiB
C++
383 lines
8.3 KiB
C++
#include "nfa.h"
|
||
|
||
/*
|
||
扫描读入-->以字符的格式读入
|
||
对于界符和部分运算符,显然是单个组成,即可以单独代表一个状态
|
||
|
||
|
||
注意:
|
||
1.字母需要区分大小写
|
||
*/
|
||
|
||
// 获取输入串的类型
|
||
// 单独一位的读入-->下一步标识终态
|
||
InputCharType getInputCharType(char c) {
|
||
switch (c) {
|
||
case '_': return UNDERLINE;
|
||
case '+': return ADD;
|
||
case '-': return SUB;
|
||
case '*': return MUL;
|
||
case '/': return DIV;
|
||
case '%': return MOD;
|
||
case '=': return EQ;
|
||
case '>': return GT;
|
||
case '<': return LT;
|
||
case '!': return NOT;
|
||
case '&': return AND;
|
||
case '|': return OR;
|
||
case '(': return LBRACKET;
|
||
case ')': return RBRACKET;
|
||
case '{': return LCBRAC;
|
||
case '}': return RCBRAC;
|
||
case ',': return COMMA;
|
||
case ';': return SEMI;
|
||
case '.': return POINT;
|
||
default:
|
||
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
|
||
return LETTER;
|
||
}
|
||
else if (c >= '0' && c <= '9') {
|
||
return DIGIT;
|
||
}
|
||
else {
|
||
return EPSILON;
|
||
}
|
||
}
|
||
}
|
||
|
||
// 根据状态获取名称
|
||
string getInputChartypeName(InputCharType type) {
|
||
switch (type)
|
||
{
|
||
case LETTER:
|
||
return "LETTER";
|
||
case UNDERLINE:
|
||
return "UNDERLINE";
|
||
case DIGIT:
|
||
return "DIGIT";
|
||
case ADD:
|
||
return "+";
|
||
case SUB:
|
||
return "-";
|
||
case MUL:
|
||
return "*";
|
||
case DIV:
|
||
return "/";
|
||
case MOD:
|
||
return "%";
|
||
case EQ:
|
||
return "=";
|
||
case GT:
|
||
return ">";
|
||
case LT:
|
||
return "<";
|
||
case NOT:
|
||
return "!";
|
||
case AND:
|
||
return "&";
|
||
case OR:
|
||
return "|";
|
||
case LBRACKET:
|
||
return "(";
|
||
case RBRACKET:
|
||
return ")";
|
||
case LCBRAC:
|
||
return "{";
|
||
case RCBRAC:
|
||
return "}";
|
||
case COMMA:
|
||
return ",";
|
||
case SEMI:
|
||
return ";";
|
||
case POINT:
|
||
return ".";
|
||
case EPSILON:
|
||
return "EPSILON";
|
||
default:
|
||
return "UNKOWN";
|
||
}
|
||
}
|
||
|
||
//根据关键字类型获取其所属的种别
|
||
string getWordTypeName(WordType type, string buffer) {
|
||
switch (type) {
|
||
// 运算符
|
||
case OP_ADD:
|
||
case OP_SUB:
|
||
case OP_MUL:
|
||
case OP_DIV:
|
||
case OP_MOD:
|
||
case OP_ASSIGN:
|
||
case OP_GT:
|
||
case OP_LT:
|
||
case OP_EQ:
|
||
case OP_LE:
|
||
case OP_GE:
|
||
case OP_NE:
|
||
case OP_AND:
|
||
case OP_OR:
|
||
return "OP";
|
||
|
||
// 界符
|
||
case SE_LBRAC:
|
||
case SE_RBRAC:
|
||
case SE_LCBRAC:
|
||
case SE_RCBRAC:
|
||
case SE_COMMA:
|
||
case SE_SEMI:
|
||
return "SE";
|
||
|
||
// 标识符和关键字
|
||
case IDN:
|
||
if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")||!buffer.compare("main")){
|
||
return "KW";
|
||
}
|
||
else {
|
||
return "IDN";
|
||
}
|
||
|
||
// 整数
|
||
case INT_VAL:
|
||
return "INT";
|
||
case FLOAT_CONST:
|
||
return "FLOAT";
|
||
|
||
//default
|
||
default:
|
||
return "UNKNOWN";
|
||
}
|
||
}
|
||
|
||
//根据关键字属性
|
||
string getWordAttribute(WordType type, string buffer) {
|
||
switch (type) {
|
||
// 运算符
|
||
case OP_ADD: return "6";
|
||
case OP_SUB: return "7";
|
||
case OP_MUL: return "8";
|
||
case OP_DIV: return "9";
|
||
case OP_MOD: return "10";
|
||
case OP_ASSIGN: return "11";
|
||
case OP_GT: return "12";
|
||
case OP_LT: return "13";
|
||
case OP_EQ: return "14";
|
||
case OP_LE: return "15";
|
||
case OP_GE: return "16";
|
||
case OP_NE: return "17";
|
||
case OP_AND:return "18";
|
||
case OP_OR: return "19";
|
||
|
||
// 界符
|
||
case SE_LBRAC: return "20";
|
||
case SE_RBRAC: return "21";
|
||
case SE_LCBRAC: return "22";
|
||
case SE_RCBRAC: return "23";
|
||
case SE_COMMA: return "25";
|
||
case SE_SEMI: return "24";
|
||
|
||
|
||
// 标识符和关键字
|
||
case IDN:
|
||
if (!buffer.compare("int")){
|
||
return "1";
|
||
}
|
||
else if (!buffer.compare("void")){
|
||
return "2";
|
||
}
|
||
else if (!buffer.compare("return")){
|
||
return "3";
|
||
}
|
||
else if (!buffer.compare("const")){
|
||
return "4";
|
||
}
|
||
else if (!buffer.compare("main")){
|
||
return "5";
|
||
}
|
||
else {
|
||
return buffer;
|
||
}
|
||
|
||
// 整数
|
||
case INT_VAL:
|
||
return buffer;
|
||
|
||
//default
|
||
default:
|
||
return "UNKNOWN";
|
||
}
|
||
}
|
||
|
||
|
||
|
||
|
||
// 读取文件
|
||
string readfile(const string& filename)
|
||
{
|
||
// 打开文件流并读取文件内容
|
||
ifstream file(filename);
|
||
|
||
// 使用istreambuf_iterator类逐字符从file中读取到content中
|
||
string content((istreambuf_iterator<char>(file)),istreambuf_iterator<char>());
|
||
|
||
// 去掉换行符
|
||
//remove函数的作用是将字符串中的某个字符移动到字符串的末尾,并返回一个指向该字符后面位置的指针。
|
||
//erase 函数的作用是删除字符串中指定区间内的所有字符,返回修改后的字符串
|
||
//content.erase(remove(content.begin(), content.end(), '\n'), content.end());
|
||
|
||
return content;
|
||
}
|
||
|
||
// 获取关键字的Token种类
|
||
TokenType getTokenType(WordType type,string buffer) {
|
||
switch (type) {
|
||
case OP_ADD:
|
||
case OP_SUB:
|
||
case OP_MUL:
|
||
case OP_DIV:
|
||
case OP_MOD:
|
||
case OP_ASSIGN:
|
||
case OP_GT:
|
||
case OP_LT:
|
||
case OP_EQ:
|
||
case OP_LE:
|
||
case OP_GE:
|
||
case OP_NE:
|
||
case OP_AND:
|
||
case OP_OR:
|
||
return TokenType::OP;
|
||
|
||
case SE_LBRAC:
|
||
case SE_RBRAC:
|
||
case SE_LCBRAC:
|
||
case SE_RCBRAC:
|
||
case SE_COMMA:
|
||
case SE_SEMI:
|
||
return TokenType::SE;
|
||
|
||
case IDN:
|
||
if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")|| !buffer.compare("main")) {
|
||
return TokenType::KW;
|
||
}
|
||
else {
|
||
return TokenType::IDN;
|
||
}
|
||
|
||
case INT_VAL:
|
||
return TokenType::INT;
|
||
|
||
default:
|
||
return TokenType::UNKNOWN;
|
||
}
|
||
}
|
||
|
||
// 获取token名称
|
||
string getWordTypeName(WordType type) {
|
||
switch (type) {
|
||
case KW_INT:
|
||
return "KW_INT";
|
||
case KW_VOID:
|
||
return "KW_VOID";
|
||
case KW_RETURN:
|
||
return "KW_RETURN";
|
||
case KW_CONST:
|
||
return "KW_CONST";
|
||
case KW_MAIN:
|
||
return "KW_MAIN";
|
||
case OP_ADD:
|
||
return "OP_ADD";
|
||
case OP_SUB:
|
||
return "OP_SUB";
|
||
case OP_MUL:
|
||
return "OP_MUL";
|
||
case OP_DIV:
|
||
return "OP_DIV";
|
||
case OP_MOD:
|
||
return "OP_MOD";
|
||
case OP_ASSIGN:
|
||
return "OP_ASSIGN";
|
||
case OP_GT:
|
||
return "OP_GT";
|
||
case OP_LT:
|
||
return "OP_LT";
|
||
case OP_EQ:
|
||
return "OP_EQ";
|
||
case OP_LE:
|
||
return "OP_LE";
|
||
case OP_GE:
|
||
return "OP_GE";
|
||
case OP_NE:
|
||
return "OP_NE";
|
||
case OP_AND:
|
||
return "OP_AND";
|
||
case OP_OR:
|
||
return "OP_OR";
|
||
case SE_LBRAC:
|
||
return "SE_LBRAC";
|
||
case SE_RBRAC:
|
||
return "SE_RBRAC";
|
||
case SE_LCBRAC:
|
||
return "SE_LCBRAC";
|
||
case SE_RCBRAC:
|
||
return "SE_RCBRAC";
|
||
case SE_COMMA:
|
||
return "SE_COMMA";
|
||
case SE_SEMI:
|
||
return "SE_SEMI";
|
||
case IDN:
|
||
return "IDN";
|
||
case INT_VAL:
|
||
return "INT_VAL";
|
||
default:
|
||
return "UNKNOWN";
|
||
}
|
||
}
|
||
|
||
// 获取语法名称
|
||
string getGrammarName(WordType type, string buffer) {
|
||
switch (type) {
|
||
|
||
case OP_ADD: return "+";
|
||
case OP_SUB: return "-";
|
||
case OP_MUL: return "*";
|
||
case OP_DIV: return "/";
|
||
case OP_MOD: return "%";
|
||
case OP_ASSIGN: return "=";
|
||
case OP_GT: return ">";
|
||
case OP_LT: return "<";
|
||
case OP_EQ: return "==";
|
||
case OP_LE: return "<=";
|
||
case OP_GE: return ">=";
|
||
case OP_NE: return "!=";
|
||
case OP_AND: return "&&";
|
||
case OP_OR: return "||";
|
||
|
||
case SE_LBRAC: return "(";
|
||
case SE_RBRAC: return ")";
|
||
case SE_LCBRAC: return "{";
|
||
case SE_RCBRAC: return "}";
|
||
case SE_COMMA: return ",";
|
||
case SE_SEMI: return ";";
|
||
|
||
case IDN:
|
||
if (!buffer.compare("int")) {
|
||
return "int";
|
||
}
|
||
else if (!buffer.compare("void")) {
|
||
return "void";
|
||
}
|
||
else if (!buffer.compare("return")) {
|
||
return "return";
|
||
}
|
||
else if (!buffer.compare("const")) {
|
||
return "const";
|
||
}
|
||
else if (!buffer.compare("main")) {
|
||
return "main";
|
||
}
|
||
else {
|
||
return "IDN";
|
||
}
|
||
case INT_VAL: return "INT";
|
||
default: cerr << "Token Error: "<< type << endl; exit(-1);
|
||
}
|
||
} |