std::string Token::tokenClassString() const { switch (tokenClass()) { case TokenClass::Keyword: return "Keyword"; case TokenClass::Identifier: return "Identifier"; case TokenClass::Constant: return "Constant"; case TokenClass::StringLiteral: return "StringLiteral"; case TokenClass::Punctuator: return "Punctuator"; default: return "None"; } }
tokenClass scannerClass::getToken() { int currentStateNum = 0; int c; string currentLexeme; do { //First clear the buffer if it is a new state. if(currentStateNum == 0) currentLexeme = ""; c = (int)fileManager.getNextChar(); if(c == EOF)//Because EOF(-1) is not a valid index, we need to change it before using it in the matrix. c = EOF_INDEX; if(c >= MAX_CHAR)//If the char read is not within the range. { errorAndExit(("Illegal symbol: "+c)); return tokenClass(EOF_T,NONE_ST,"EOF"); } State s = stateMatrix[currentStateNum][c];//Get the next state. if(s.nextStateNum == INVALID_STATE)//If state is invalid, e.g. the char sequence is not recognized. { errorAndExit("Invalid char sequence: "+(currentLexeme+(char)c)); return tokenClass(EOF_T,NONE_ST,"EOF"); } if(s.nextStateNum == EOF_INDEX)//If reached EOF, in this case no action needs to be taken. return tokenClass(EOF_T,NONE_ST,"EOF"); switch(s.action) { case NO_ACTION: currentLexeme += c;//Store the current char. break; case ACCEPT: { if(!s.needPushBack) currentLexeme += c; else fileManager.pushBack(); int type = s.token->type; if(type == INTEGER_T)//Check for interger length if(currentLexeme.length() > 4) { errorAndExit("Integer can have at most four digits: "+currentLexeme); return tokenClass(EOF_T,NONE_ST,"EOF"); } if(type == STRING_T)//Check for string length if(currentLexeme.length() > 52) { errorAndExit("String can have at most fifty characters: "+currentLexeme); return tokenClass(EOF_T,NONE_ST,"EOF"); } if(type == IDENTIFIER_T)//Check for identifier length { if(currentLexeme.length() > 12) { errorAndExit("Identifier can have at most twelve characters: "+currentLexeme); return tokenClass(EOF_T,NONE_ST,"EOF"); } //Here we check for the keywords const char* cString = currentLexeme.c_str(); if(_strcmpi("or",cString)==0) return tokenClass(ADDOP_T,OR_ST,currentLexeme); if(_strcmpi("and",cString)==0) return tokenClass(MULOP_T,AND_ST,currentLexeme); if(_strcmpi("div",cString)==0) return tokenClass(MULOP_T,DIV_ST,currentLexeme); if(_strcmpi("mod",cString)==0) return tokenClass(MULOP_T,MOD_ST,currentLexeme); Keyword keywords[16]={PROGRAM_KEYWORD,FUNCTION_KEYWORD,BEGIN_KEYWORD,END_KEYWORD,IF_KEYWORD,THEN_KEYWORD,ELSE_KEYWORD,WHILE_KEYWORD,DO_KEYWORD,COUT_KEYWORD,CIN_KEYWORD,ENDL_KEYWORD,INT_KEYWORD,BOOLEAN_KEYWORD,TRUE_KEYWORD,FALSE_KEYWORD}; //char* keywords[16]={"program","function","begin","end","if","then","else","while","do","cout","cin","endl","int","boolean","true","false"}; for(int i = 0;i<16;i++) if(_strcmpi(keywords[i].keyword,cString)==0) return tokenClass(keywords[i].type,NONE_ST,currentLexeme); } return tokenClass(s.token->type,s.token->subtype,currentLexeme); } case WARNING_ACTION: cout<<"warning"<<endl;//Currently not used because no warning state is checked by the state matrix. break; case ERROR_ACTION: { if(s.actionInfo != NULL) errorAndExit(*s.actionInfo); else //This else is only here as a double check. Normally an error message should be included. errorAndExit("Error"); } case CLEAR_BUFFER_ACTION: currentLexeme = "";//Clear buffer, for comments only. break; case CHECK_COMPILER_DIRECTIVE_ACTION: currentLexeme+=c; if(currentLexeme == "{$p+}") fileManager.setPrintStatus(true); else if (currentLexeme == "{$p-}") fileManager.setPrintStatus(false); else if (currentLexeme == "{$e+}") expressionDebugging = true; else if (currentLexeme == "{$e-}") expressionDebugging = false; else if (currentLexeme == "{$s+}") cout<<symbolTable.toString()<<endl; else cout<<"Warning, compiler directive "+currentLexeme+" is undefined."<<endl; break; } currentStateNum = s.nextStateNum; } while(true); //This should be unreachable return tokenClass(EMPTY_T,EMPTY_ST,EMPTY_LEXEME); }