compiler-temp/nfa/tool.cpp

#include "nfa.h"

/*
扫描读入-->以字符的格式读入
对于界符和部分运算符，显然是单个组成，即可以单独代表一个状态


注意：
1.字母需要区分大小写
*/

// 获取输入串的类型
// 单独一位的读入-->下一步标识终态
InputCharType getInputCharType(char c) {
    switch (c) {
    case '_': return UNDERLINE;
    case '+': return ADD;
    case '-': return SUB;
    case '*': return MUL;
    case '/': return DIV;
    case '%': return MOD;
    case '=': return EQ;
    case '>': return GT;
    case '<': return LT;
    case '!': return NOT;
    case '&': return AND;
    case '|': return OR;
    case '(': return LBRACKET;
    case ')': return RBRACKET;
    case '{': return LCBRAC;
    case '}': return RCBRAC;
    case ',': return COMMA;
    case ';': return SEMI;
    default:
        if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
            return LETTER;
        }
        else if (c >= '0' && c <= '9') {
            return DIGIT;
        }
        else {
            return EPSILON;
        }
    }
}

// 根据状态获取名称
string getInputChartypeName(InputCharType type) {
    switch (type)
    {
    case LETTER:
        return "LETTER";
    case UNDERLINE:
        return "UNDERLINE";
    case DIGIT:
        return "DIGIT";
    case ADD:
        return "+";
    case SUB:
        return "-";
    case MUL:
        return "*";
    case DIV:
        return "/";
    case MOD:
        return "%";
    case EQ:
        return "=";
    case GT:
        return ">";
    case LT:
        return "<";
    case NOT:
        return "!";
    case AND:
        return "&";
    case OR:
        return "|";
    case LBRACKET:
        return "(";
    case RBRACKET:
        return ")";
    case LCBRAC:
        return "{";
    case RCBRAC:
        return "}";
    case COMMA:
        return ",";
    case SEMI:
        return ";";
    case EPSILON:
        return "EPSILON";
    default:
        return "UNKOWN";
    }
}

//根据关键字类型获取其所属的种别
string getWordTypeName(WordType type, string buffer) {
    switch (type) {
    // 运算符
    case OP_ADD:
    case OP_SUB:
    case OP_MUL:
    case OP_DIV:
    case OP_MOD:
    case OP_ASSIGN:
    case OP_GT:
    case OP_LT:
    case OP_EQ:
    case OP_LE:
    case OP_GE:
    case OP_NE:
    case OP_AND:
    case OP_OR:
        return "OP";

    // 界符
    case SE_LBRAC:
    case SE_RBRAC:
    case SE_LCBRAC:
    case SE_RCBRAC:
    case SE_COMMA:
    case SE_SEMI:
        return "SE";

    // 标识符和关键字
    case IDN:
        if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")||!buffer.compare("main")){
            return "KW";
        }
        else {
            return "IDN";
        }

    // 整数
    case INT_VAL:
        return "INT";

    //default
    default:
        return "UNKNOWN";
    }
}

//根据关键字属性
string getWordAttribute(WordType type, string buffer) {
    switch (type) {
    // 运算符
    case OP_ADD: return "6";
    case OP_SUB: return "7";
    case OP_MUL: return "8";
    case OP_DIV: return "9";
    case OP_MOD: return "10";
    case OP_ASSIGN: return "11";
    case OP_GT: return "12";
    case OP_LT: return "13";
    case OP_EQ: return "14";
    case OP_LE: return "15";
    case OP_GE: return "16";
    case OP_NE: return "17";
    case OP_AND:return "18";
    case OP_OR: return "19";

    // 界符
    case SE_LBRAC: return "20";
    case SE_RBRAC: return "21";
    case SE_LCBRAC: return "22";
    case SE_RCBRAC: return "23";
    case SE_COMMA: return "25";
    case SE_SEMI: return "24";


    // 标识符和关键字
    case IDN:
        if (!buffer.compare("int")){
            return "1";
        }
        else if (!buffer.compare("void")){
            return "2";
        }
        else if (!buffer.compare("return")){
            return "3";
        }
        else if (!buffer.compare("const")){
            return "4";
        }
        else if (!buffer.compare("main")){
            return "5";
        }
        else {
            return buffer;
        }

    // 整数
    case INT_VAL:
        return buffer;

    //default
    default:
        return "UNKNOWN";
    }
}


// 读取文件
string readfile(const string& filename)
{
    // 打开文件流并读取文件内容
    ifstream file(filename);

    // 使用istreambuf_iterator类逐字符从file中读取到content中
    string content((istreambuf_iterator<char>(file)),istreambuf_iterator<char>());

    // 去掉换行符
    //remove函数的作用是将字符串中的某个字符移动到字符串的末尾，并返回一个指向该字符后面位置的指针。
    //erase 函数的作用是删除字符串中指定区间内的所有字符，返回修改后的字符串
    //content.erase(remove(content.begin(), content.end(), '\n'), content.end());

    return content;
}

// 获取关键字的Token种类
TokenType getTokenType(WordType type,string buffer) {
    switch (type) {
    case OP_ADD:
    case OP_SUB:
    case OP_MUL:
    case OP_DIV:
    case OP_MOD:
    case OP_ASSIGN:
    case OP_GT:
    case OP_LT:
    case OP_EQ:
    case OP_LE:
    case OP_GE:
    case OP_NE:
    case OP_AND:
    case OP_OR:
        return TokenType::OP;

    case SE_LBRAC:
    case SE_RBRAC:
    case SE_LCBRAC:
    case SE_RCBRAC:
    case SE_COMMA:
    case SE_SEMI:
        return TokenType::SE;

    case IDN:
        if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")|| !buffer.compare("main")) {
            return TokenType::KW;
        }
        else {
            return TokenType::IDN;
        }

    case INT_VAL:
        return TokenType::INT;

    default:
        return TokenType::UNKNOWN;
    }
}

// 获取token名称
string getWordTypeName(WordType type) {
    switch (type) {
    case KW_INT:
        return "KW_INT";
    case KW_VOID:
        return "KW_VOID";
    case KW_RETURN:
        return "KW_RETURN";
    case KW_CONST:
        return "KW_CONST";
    case KW_MAIN:
        return "KW_MAIN";
    case OP_ADD:
        return "OP_ADD";
    case OP_SUB:
        return "OP_SUB";
    case OP_MUL:
        return "OP_MUL";
    case OP_DIV:
        return "OP_DIV";
    case OP_MOD:
        return "OP_MOD";
    case OP_ASSIGN:
        return "OP_ASSIGN";
    case OP_GT:
        return "OP_GT";
    case OP_LT:
        return "OP_LT";
    case OP_EQ:
        return "OP_EQ";
    case OP_LE:
        return "OP_LE";
    case OP_GE:
        return "OP_GE";
    case OP_NE:
        return "OP_NE";
    case OP_AND:
        return "OP_AND";
    case OP_OR:
        return "OP_OR";
    case SE_LBRAC:
        return "SE_LBRAC";
    case SE_RBRAC:
        return "SE_RBRAC";
    case SE_LCBRAC:
        return "SE_LCBRAC";
    case SE_RCBRAC:
        return "SE_RCBRAC";
    case SE_COMMA:
        return "SE_COMMA";
    case SE_SEMI:
        return "SE_SEMI";
    case IDN:
        return "IDN";
    case INT_VAL:
        return "INT_VAL";
    default:
        return "UNKNOWN";
    }
}

// 获取语法名称
string getGrammarName(WordType type, string buffer) {
    switch (type) {

    case OP_ADD: return "+";
    case OP_SUB: return "-";
    case OP_MUL: return "*";
    case OP_DIV: return "/";
    case OP_MOD: return "%";
    case OP_ASSIGN: return "=";
    case OP_GT: return ">";
    case OP_LT: return "<";
    case OP_EQ: return "==";
    case OP_LE: return "<=";
    case OP_GE: return ">=";
    case OP_NE: return "!=";
    case OP_AND: return "&&";
    case OP_OR: return "||";

    case SE_LBRAC: return "(";
    case SE_RBRAC: return ")";
    case SE_LCBRAC: return "{";
    case SE_RCBRAC: return "}";
    case SE_COMMA: return ",";
    case SE_SEMI: return ";";

    case IDN:
        if (!buffer.compare("int")) {
            return "int";
        }
        else if (!buffer.compare("void")) {
            return "void";
        }
        else if (!buffer.compare("return")) {
            return "return";
        }
        else if (!buffer.compare("const")) {
            return "const";
        }
        else if (!buffer.compare("main")) {
            return "main";
        }
        else {
            return "IDN";
        }
    case INT_VAL: return "INT";
    default: cerr << "Token Error: "<< type << endl; exit(-1);
    }
}