compiler-temp/nfa/tool.cpp

378 lines
8.2 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "nfa.h"
/*
扫描读入-->以字符的格式读入
对于界符和部分运算符,显然是单个组成,即可以单独代表一个状态
注意:
1.字母需要区分大小写
*/
// 获取输入串的类型
// 单独一位的读入-->下一步标识终态
InputCharType getInputCharType(char c) {
switch (c) {
case '_': return UNDERLINE;
case '+': return ADD;
case '-': return SUB;
case '*': return MUL;
case '/': return DIV;
case '%': return MOD;
case '=': return EQ;
case '>': return GT;
case '<': return LT;
case '!': return NOT;
case '&': return AND;
case '|': return OR;
case '(': return LBRACKET;
case ')': return RBRACKET;
case '{': return LCBRAC;
case '}': return RCBRAC;
case ',': return COMMA;
case ';': return SEMI;
default:
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
return LETTER;
}
else if (c >= '0' && c <= '9') {
return DIGIT;
}
else {
return EPSILON;
}
}
}
// 根据状态获取名称
string getInputChartypeName(InputCharType type) {
switch (type)
{
case LETTER:
return "LETTER";
case UNDERLINE:
return "UNDERLINE";
case DIGIT:
return "DIGIT";
case ADD:
return "+";
case SUB:
return "-";
case MUL:
return "*";
case DIV:
return "/";
case MOD:
return "%";
case EQ:
return "=";
case GT:
return ">";
case LT:
return "<";
case NOT:
return "!";
case AND:
return "&";
case OR:
return "|";
case LBRACKET:
return "(";
case RBRACKET:
return ")";
case LCBRAC:
return "{";
case RCBRAC:
return "}";
case COMMA:
return ",";
case SEMI:
return ";";
case EPSILON:
return "EPSILON";
default:
return "UNKOWN";
}
}
//根据关键字类型获取其所属的种别
string getWordTypeName(WordType type, string buffer) {
switch (type) {
// 运算符
case OP_ADD:
case OP_SUB:
case OP_MUL:
case OP_DIV:
case OP_MOD:
case OP_ASSIGN:
case OP_GT:
case OP_LT:
case OP_EQ:
case OP_LE:
case OP_GE:
case OP_NE:
case OP_AND:
case OP_OR:
return "OP";
// 界符
case SE_LBRAC:
case SE_RBRAC:
case SE_LCBRAC:
case SE_RCBRAC:
case SE_COMMA:
case SE_SEMI:
return "SE";
// 标识符和关键字
case IDN:
if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")||!buffer.compare("main")){
return "KW";
}
else {
return "IDN";
}
// 整数
case INT_VAL:
return "INT";
//default
default:
return "UNKNOWN";
}
}
//根据关键字属性
string getWordAttribute(WordType type, string buffer) {
switch (type) {
// 运算符
case OP_ADD: return "6";
case OP_SUB: return "7";
case OP_MUL: return "8";
case OP_DIV: return "9";
case OP_MOD: return "10";
case OP_ASSIGN: return "11";
case OP_GT: return "12";
case OP_LT: return "13";
case OP_EQ: return "14";
case OP_LE: return "15";
case OP_GE: return "16";
case OP_NE: return "17";
case OP_AND:return "18";
case OP_OR: return "19";
// 界符
case SE_LBRAC: return "20";
case SE_RBRAC: return "21";
case SE_LCBRAC: return "22";
case SE_RCBRAC: return "23";
case SE_COMMA: return "25";
case SE_SEMI: return "24";
// 标识符和关键字
case IDN:
if (!buffer.compare("int")){
return "1";
}
else if (!buffer.compare("void")){
return "2";
}
else if (!buffer.compare("return")){
return "3";
}
else if (!buffer.compare("const")){
return "4";
}
else if (!buffer.compare("main")){
return "5";
}
else {
return buffer;
}
// 整数
case INT_VAL:
return buffer;
//default
default:
return "UNKNOWN";
}
}
// 读取文件
string readfile(const string& filename)
{
// 打开文件流并读取文件内容
ifstream file(filename);
// 使用istreambuf_iterator类逐字符从file中读取到content中
string content((istreambuf_iterator<char>(file)),istreambuf_iterator<char>());
// 去掉换行符
//remove函数的作用是将字符串中的某个字符移动到字符串的末尾并返回一个指向该字符后面位置的指针。
//erase 函数的作用是删除字符串中指定区间内的所有字符,返回修改后的字符串
//content.erase(remove(content.begin(), content.end(), '\n'), content.end());
return content;
}
// 获取关键字的Token种类
TokenType getTokenType(WordType type,string buffer) {
switch (type) {
case OP_ADD:
case OP_SUB:
case OP_MUL:
case OP_DIV:
case OP_MOD:
case OP_ASSIGN:
case OP_GT:
case OP_LT:
case OP_EQ:
case OP_LE:
case OP_GE:
case OP_NE:
case OP_AND:
case OP_OR:
return TokenType::OP;
case SE_LBRAC:
case SE_RBRAC:
case SE_LCBRAC:
case SE_RCBRAC:
case SE_COMMA:
case SE_SEMI:
return TokenType::SE;
case IDN:
if (!buffer.compare("int") || !buffer.compare("void") || !buffer.compare("const") || !buffer.compare("return")|| !buffer.compare("main")) {
return TokenType::KW;
}
else {
return TokenType::IDN;
}
case INT_VAL:
return TokenType::INT;
default:
return TokenType::UNKNOWN;
}
}
// 获取token名称
string getWordTypeName(WordType type) {
switch (type) {
case KW_INT:
return "KW_INT";
case KW_VOID:
return "KW_VOID";
case KW_RETURN:
return "KW_RETURN";
case KW_CONST:
return "KW_CONST";
case KW_MAIN:
return "KW_MAIN";
case OP_ADD:
return "OP_ADD";
case OP_SUB:
return "OP_SUB";
case OP_MUL:
return "OP_MUL";
case OP_DIV:
return "OP_DIV";
case OP_MOD:
return "OP_MOD";
case OP_ASSIGN:
return "OP_ASSIGN";
case OP_GT:
return "OP_GT";
case OP_LT:
return "OP_LT";
case OP_EQ:
return "OP_EQ";
case OP_LE:
return "OP_LE";
case OP_GE:
return "OP_GE";
case OP_NE:
return "OP_NE";
case OP_AND:
return "OP_AND";
case OP_OR:
return "OP_OR";
case SE_LBRAC:
return "SE_LBRAC";
case SE_RBRAC:
return "SE_RBRAC";
case SE_LCBRAC:
return "SE_LCBRAC";
case SE_RCBRAC:
return "SE_RCBRAC";
case SE_COMMA:
return "SE_COMMA";
case SE_SEMI:
return "SE_SEMI";
case IDN:
return "IDN";
case INT_VAL:
return "INT_VAL";
default:
return "UNKNOWN";
}
}
// 获取语法名称
string getGrammarName(WordType type, string buffer) {
switch (type) {
case OP_ADD: return "+";
case OP_SUB: return "-";
case OP_MUL: return "*";
case OP_DIV: return "/";
case OP_MOD: return "%";
case OP_ASSIGN: return "=";
case OP_GT: return ">";
case OP_LT: return "<";
case OP_EQ: return "==";
case OP_LE: return "<=";
case OP_GE: return ">=";
case OP_NE: return "!=";
case OP_AND: return "&&";
case OP_OR: return "||";
case SE_LBRAC: return "(";
case SE_RBRAC: return ")";
case SE_LCBRAC: return "{";
case SE_RCBRAC: return "}";
case SE_COMMA: return ",";
case SE_SEMI: return ";";
case IDN:
if (!buffer.compare("int")) {
return "int";
}
else if (!buffer.compare("void")) {
return "void";
}
else if (!buffer.compare("return")) {
return "return";
}
else if (!buffer.compare("const")) {
return "const";
}
else if (!buffer.compare("main")) {
return "main";
}
else {
return "IDN";
}
case INT_VAL: return "INT";
default: cerr << "Token Error: "<< type << endl; exit(-1);
}
}