2024-compiler/tool.cpp

287 lines
6.1 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "nfa.h"
InputCharType getInputCharType(char c) {
switch (c) {
case '_': return UNDERLINE;
case '+': return ADD;
case '-': return SUB;
case '*': return MUL;
case '/': return DIV;
case '%': return MOD;
case '=': return EQ;
case '>': return GT;
case '<': return LT;
case '!': return NOT;
case '&': return AND;
case '|': return OR;
case '(': return LBRACKET;
case ')': return RBRACKET;
case '{': return LCBRAC;
case '}': return RCBRAC;
case ',': return COMMA;
case ';': return SEMI;
default:
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
return LETTER;
}
else if (c >= '0' && c <= '9') {
return DIGIT;
}
else {
return EPSILON;
}
}
}
string getInputChartypeName(InputCharType type) {
switch (type)
{
case LETTER:
return "LETTER";
case UNDERLINE:
return "UNDERLINE";
case DIGIT:
return "DIGIT";
case ADD:
return "+";
case SUB:
return "-";
case MUL:
return "*";
case DIV:
return "/";
case MOD:
return "%";
case EQ:
return "=";
case GT:
return ">";
case LT:
return "<";
case NOT:
return "!";
case AND:
return "&";
case OR:
return "|";
case LBRACKET:
return "(";
case RBRACKET:
return ")";
case LCBRAC:
return "{";
case RCBRAC:
return "}";
case COMMA:
return ",";
case SEMI:
return ";";
case EPSILON:
return "EPSILON";
default:
return "UNKOWN";
}
}
string getWordTypeName(WordType type, string buffer) {
switch (type) {
case OP_ADD:
case OP_SUB:
case OP_MUL:
case OP_DIV:
case OP_MOD:
case OP_ASSIGN:
case OP_GT:
case OP_LT:
case OP_EQ:
case OP_LE:
case OP_GE:
case OP_NE:
case OP_AND:
case OP_OR:
return "OP";
case SE_LBRAC:
case SE_RBRAC:
case SE_LCBRAC:
case SE_RCBRAC:
case SE_COMMA:
case SE_SEMI:
return "SE";
case IDN:
if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")){
return "KW";
}
else {
return "IDN";
}
case INT_VAL:
return "INT";
default:
return "UNKNOWN";
}
}
string readfile(const string& filename)
{
// <20><><EFBFBD><EFBFBD><EFBFBD>ļ<EFBFBD><C4BC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ȡ<EFBFBD>ļ<EFBFBD><C4BC><EFBFBD><EFBFBD><EFBFBD>
ifstream file(filename);
string content((istreambuf_iterator<char>(file)),
istreambuf_iterator<char>());
// ȥ<><C8A5><EFBFBD><EFBFBD><EFBFBD>з<EFBFBD>
//remove<76><65><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ǽ<EFBFBD><C7BD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD>е<EFBFBD>ij<EFBFBD><C4B3><EFBFBD>ַ<EFBFBD><D6B7>ƶ<EFBFBD><C6B6><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD>ĩβ<C4A9><CEB2><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>ָ<EFBFBD><D6B8><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD>λ<EFBFBD>õ<EFBFBD>ָ<EFBFBD>
//erase <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ɾ<EFBFBD><C9BE><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD>ָ<EFBFBD><D6B8><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڵ<EFBFBD><DAB5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>޸ĺ<DEB8><C4BA><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD>
//content.erase(remove(content.begin(), content.end(), '\n'), content.end());
return content;
}
TokenType getTokenType(WordType type,string buffer) {
switch (type) {
case OP_ADD:
case OP_SUB:
case OP_MUL:
case OP_DIV:
case OP_MOD:
case OP_ASSIGN:
case OP_GT:
case OP_LT:
case OP_EQ:
case OP_LE:
case OP_GE:
case OP_NE:
case OP_AND:
case OP_OR:
return TokenType::OP;
case SE_LBRAC:
case SE_RBRAC:
case SE_LCBRAC:
case SE_RCBRAC:
case SE_COMMA:
case SE_SEMI:
return TokenType::SE;
case IDN:
if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")) {
return TokenType::KW;
}
else {
return TokenType::IDN;
}
case INT_VAL:
return TokenType::INT;
default:
return TokenType::UNKNOWN;
}
}
string getWordTypeName(WordType type) {
switch (type) {
case KW_INT:
return "KW_INT";
case KW_VOID:
return "KW_VOID";
case KW_RETURN:
return "KW_RETURN";
case KW_CONST:
return "KW_CONST";
case OP_ADD:
return "OP_ADD";
case OP_SUB:
return "OP_SUB";
case OP_MUL:
return "OP_MUL";
case OP_DIV:
return "OP_DIV";
case OP_MOD:
return "OP_MOD";
case OP_ASSIGN:
return "OP_ASSIGN";
case OP_GT:
return "OP_GT";
case OP_LT:
return "OP_LT";
case OP_EQ:
return "OP_EQ";
case OP_LE:
return "OP_LE";
case OP_GE:
return "OP_GE";
case OP_NE:
return "OP_NE";
case OP_AND:
return "OP_AND";
case OP_OR:
return "OP_OR";
case SE_LBRAC:
return "SE_LBRAC";
case SE_RBRAC:
return "SE_RBRAC";
case SE_LCBRAC:
return "SE_LCBRAC";
case SE_RCBRAC:
return "SE_RCBRAC";
case SE_COMMA:
return "SE_COMMA";
case SE_SEMI:
return "SE_SEMI";
case IDN:
return "IDN";
case INT_VAL:
return "INT_VAL";
default:
return "UNKNOWN";
}
}
string getGrammarName(WordType type, string buffer) {
switch (type) {
case OP_ADD: return "+";
case OP_SUB: return "-";
case OP_MUL: return "*";
case OP_DIV: return "/";
case OP_MOD: return "%";
case OP_ASSIGN: return "=";
case OP_GT: return ">";
case OP_LT: return "<";
case OP_EQ: return "==";
case OP_LE: return "<=";
case OP_GE: return ">=";
case OP_NE: return "!=";
case OP_AND: return "&&";
case OP_OR: return "||";
case SE_LBRAC: return "(";
case SE_RBRAC: return ")";
case SE_LCBRAC: return "{";
case SE_RCBRAC: return "}";
case SE_COMMA: return ",";
case SE_SEMI: return ";";
case IDN:
if (!buffer.compare("int")) {
return "int";
}
else if (!buffer.compare("void")) {
return "void";
}
else if (!buffer.compare("return")) {
return "return";
}
else if (!buffer.compare("const")) {
return "const";
}
else {
return "IDN";
}
case INT_VAL: return "INT";
default: cerr << "Token Error: "<< type << endl; exit(-1);
}
}