compiler-temp/nfa/src/tool.cpp

309 lines
9.1 KiB
C++
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "nfa.h"
std::unordered_map<InputCharType, std::string> CharTypeNames = {
{InputCharType::UNDERLINE, "_"},
{InputCharType::ADD, "+"},
{InputCharType::SUB, "-"},
{InputCharType::MUL, "*"},
{InputCharType::DIV, "/"},
{InputCharType::MOD, "%"},
{InputCharType::EQ, "="},
{InputCharType::GT, ">"},
{InputCharType::LT, "<"},
{InputCharType::NOT, "!"},
{InputCharType::AND, "&"},
{InputCharType::OR, "|"},
{InputCharType::LBRACKET, "("},
{InputCharType::RBRACKET, ")"},
{InputCharType::LCBRAC, "{"},
{InputCharType::RCBRAC, "}"},
{InputCharType::COMMA, ","},
{InputCharType::SEMI, ";"},
{InputCharType::POINT, "."} // 小数点
};
std::unordered_map<WordType, std::string> WordTypeNames = {
{WordType::KW_INT, "INT"}, {WordType::KW_VOID, "VOID"}, {WordType::KW_RETURN, "RETURN"},
{WordType::KW_CONST, "CONST"}, {WordType::KW_MAIN, "MAIN"}, {WordType::KW_IF, "IF"},
{WordType::KW_ELSE, "ELSE"}, {WordType::KW_FLOAT, "FLOAT"}, {WordType::OP_ADD, "+"},
{WordType::OP_SUB, "-"}, {WordType::OP_MUL, "*"}, {WordType::OP_DIV, "/"},
{WordType::OP_MOD, "%"}, {WordType::OP_ASSIGN, "="}, {WordType::OP_GT, ">"},
{WordType::OP_LT, "<"}, {WordType::OP_EQ, "=="}, {WordType::OP_LE, "<="},
{WordType::OP_GE, ">="}, {WordType::OP_NE, "!="}, {WordType::OP_AND, "&&"},
{WordType::OP_OR, "||"}, {WordType::SE_LBRAC, "("}, {WordType::SE_RBRAC, ")"},
{WordType::SE_LCBRAC, "{"}, {WordType::SE_RCBRAC, "}"}, {WordType::SE_COMMA, ","},
{WordType::SE_SEMI, ";"}, {WordType::IDN, "IDENTIFIER"}, {WordType::INT_VAL, "INTEGER"},
{WordType::FLOAT_VAL, "FLOAT"}, {WordType::UNKOWN, "UNKNOWN"}
};
//扫描,以字符的格式读入
//对于界符和部分运算符,是单个组成,即可以单独代表一个状态
InputCharType getInputCharType(char c) {
switch (c) {
case '_': return InputCharType::UNDERLINE;
case '+': return InputCharType::ADD;
case '-': return InputCharType::SUB;
case '*': return InputCharType::MUL;
case '/': return InputCharType::DIV;
case '%': return InputCharType::MOD;
case '=': return InputCharType::EQ;
case '>': return InputCharType::GT;
case '<': return InputCharType::LT;
case '!': return InputCharType::NOT;
case '&': return InputCharType::AND;
case '|': return InputCharType::OR;
case '(': return InputCharType::LBRACKET;
case ')': return InputCharType::RBRACKET;
case '{': return InputCharType::LCBRAC;
case '}': return InputCharType::RCBRAC;
case ',': return InputCharType::COMMA;
case ';': return InputCharType::SEMI;
//小数点作为数字读入
case '.': return InputCharType::DIGIT;
default:
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
return InputCharType::LETTER;
}
else if (c >= '0' && c <= '9') {
return InputCharType::DIGIT;
}
else {
return InputCharType::EPSILON;
}
}
}
// 根据状态获取名称
std::string getInputChartypeName(InputCharType type) {
auto it = CharTypeNames.find(type);
if (it != CharTypeNames.end()) {
return it->second;
} else {
return "UNKNOWN";
}
}
string judeFloat(string buffer){
size_t firstDot = buffer.find('.');
size_t lastDot = buffer.rfind('.');
if (firstDot == lastDot && firstDot != std::string::npos) {
return "FLOAT";
}
else if (firstDot == std::string::npos)
{
return "INT";
}
else return "UNKNOWN";
}
//根据关键字类型获取其所属的种别
string getWordTypeName(WordType type, string buffer) {
switch (type) {
// 运算符
case WordType::OP_ADD:
case WordType::OP_SUB:
case WordType::OP_MUL:
case WordType::OP_DIV:
case WordType::OP_MOD:
case WordType::OP_ASSIGN:
case WordType::OP_GT:
case WordType::OP_LT:
case WordType::OP_EQ:
case WordType::OP_LE:
case WordType::OP_GE:
case WordType::OP_NE:
case WordType::OP_AND:
case WordType::OP_OR:
return "OP";
// 界符
case WordType::SE_LBRAC:
case WordType::SE_RBRAC:
case WordType::SE_LCBRAC:
case WordType::SE_RCBRAC:
case WordType::SE_COMMA:
case WordType::SE_SEMI:
return "SE";
// 标识符和关键字
case WordType::IDN:
if (!buffer.compare("int") ||
!buffer.compare("void") ||
!buffer.compare("const") ||
!buffer.compare("return")||
!buffer.compare("if") ||
!buffer.compare("else") ||
!buffer.compare("float")
){
return "KW";
}
else {
return "IDN";
}
// 整数(添加了浮点判断)
case WordType::INT_VAL:
return judeFloat(buffer);
//浮点
// case FLOAT_VAL:
// return "FLOAT";
//default
default:
return "UNKNOWN";
}
}
//根据关键字属性
string getWordAttribute(WordType type, string buffer) {
switch (type) {
// 运算符
case WordType::OP_ADD: return "6";
case WordType::OP_SUB: return "7";
case WordType::OP_MUL: return "8";
case WordType::OP_DIV: return "9";
case WordType::OP_MOD: return "10";
case WordType::OP_ASSIGN: return "11";
case WordType::OP_GT: return "12";
case WordType::OP_LT: return "13";
case WordType::OP_EQ: return "14";
case WordType::OP_LE: return "15";
case WordType::OP_GE: return "16";
case WordType::OP_NE: return "17";
case WordType::OP_AND:return "18";
case WordType::OP_OR: return "19";
// 界符
case WordType::SE_LBRAC: return "20";
case WordType::SE_RBRAC: return "21";
case WordType::SE_LCBRAC: return "22";
case WordType::SE_RCBRAC: return "23";
case WordType::SE_COMMA: return "25";
case WordType::SE_SEMI: return "24";
// 标识符和关键字
case WordType::IDN:
if (!buffer.compare("int")){
return "1";
}
else if (!buffer.compare("void")){
return "2";
}
else if (!buffer.compare("return")){
return "3";
}
else if (!buffer.compare("const")){
return "4";
}
else if (!buffer.compare("main")){
return "5";
}
else if (!buffer.compare("if")){
return "6";
}
else if (!buffer.compare("else")){
return "7";
}
else if (!buffer.compare("float")){
return "8";
}
else {
return buffer;
}
// 整数
case WordType::INT_VAL:
return buffer;
//浮点类型,理论不使用
case WordType::FLOAT_VAL:
return buffer;
//default
default:
return "UNKNOWN";
}
}
// 读取文件
string readfile(const string& filename)
{
// 打开文件流并读取文件内容
ifstream file(filename);
// 使用istreambuf_iterator类逐字符从file中读取到content中
string content((istreambuf_iterator<char>(file)),istreambuf_iterator<char>());
// 去掉换行符
//remove函数的作用是将字符串中的某个字符移动到字符串的末尾并返回一个指向该字符后面位置的指针。
//erase 函数的作用是删除字符串中指定区间内的所有字符,返回修改后的字符串
//content.erase(remove(content.begin(), content.end(), '\n'), content.end());
return content;
}
// 获取token名称
string getWordTypeName(WordType type) {
auto it = WordTypeNames.find(type);
if (it != WordTypeNames.end()) {
return it->second;
} else {
return "UNKNOWN";
}
}
// 获取语法名称
string getGrammarName(WordType type, string buffer) {
switch (type) {
case WordType::OP_ADD: return "+";
case WordType::OP_SUB: return "-";
case WordType::OP_MUL: return "*";
case WordType::OP_DIV: return "/";
case WordType::OP_MOD: return "%";
case WordType::OP_ASSIGN: return "=";
case WordType::OP_GT: return ">";
case WordType::OP_LT: return "<";
case WordType::OP_EQ: return "==";
case WordType::OP_LE: return "<=";
case WordType::OP_GE: return ">=";
case WordType::OP_NE: return "!=";
case WordType::OP_AND: return "&&";
case WordType::OP_OR: return "||";
case WordType::SE_LBRAC: return "(";
case WordType::SE_RBRAC: return ")";
case WordType::SE_LCBRAC: return "{";
case WordType::SE_RCBRAC: return "}";
case WordType::SE_COMMA: return ",";
case WordType::SE_SEMI: return ";";
case WordType::IDN:
if (!buffer.compare("int")) {
return "int";
}
else if (!buffer.compare("void")) {
return "void";
}
else if (!buffer.compare("return")) {
return "return";
}
else if (!buffer.compare("const")) {
return "const";
}
else if (!buffer.compare("main")) {
return "main";
}
else {
return "IDN";
}
case WordType::INT_VAL: return "INT";
case WordType::FLOAT_VAL: return "FLOAT";
default: cerr << "Token Error: "<< WordTypeNames.find(type)->second << endl; exit(-1);
}
}