static void Classification (void) { lsymbol ntext = ltoken_getRawText (nextToken); if (ntext == ltoken_getText (endCommentCharToken) || ntext == ltoken_getText (idCharToken) || ntext == ltoken_getText (opCharToken) || ntext == ltoken_getText (extensionCharToken) || ntext == ltoken_getText (singleCharToken) || ntext == ltoken_getText (whiteCharToken)) { CharClass (); InitReduce (CLASSIFICATION1); } else if (ntext == ltoken_getText (quantifierSymToken) || ntext == ltoken_getText (logicalOpToken) || ntext == ltoken_getText (eqOpToken) || ntext == ltoken_getText (equationSymToken) || ntext == ltoken_getText (eqSepSymToken) || ntext == ltoken_getText (selectSymToken) || ntext == ltoken_getText (openSymToken) || ntext == ltoken_getText (sepSymToken) || ntext == ltoken_getText (closeSymToken) || ntext == ltoken_getText (simpleIdToken) || ntext == ltoken_getText (mapSymToken) || ntext == ltoken_getText (markerSymToken) || ntext == ltoken_getText (commentSymToken)) { TokenClass (); InitReduce (CLASSIFICATION2); } else if (ntext == ltoken_getText (synonymToken)) { SynClass (); InitReduce (CLASSIFICATION3); } else { llbug (message ("Expected character, token, or synonym classification: %s", ltoken_getRawString (nextToken))); /* pop off all tokens on this line */ } }
static void Classification (void) { if (ltoken_getRawText (nextToken) == ltoken_getText (endCommentCharToken) || ltoken_getRawText (nextToken) == ltoken_getText (idCharToken) || ltoken_getRawText (nextToken) == ltoken_getText (opCharToken) || ltoken_getRawText (nextToken) == ltoken_getText (extensionCharToken) || ltoken_getRawText (nextToken) == ltoken_getText (singleCharToken) || ltoken_getRawText (nextToken) == ltoken_getText (whiteCharToken)) { CharClass (); InitReduce (CLASSIFICATION1); } else if (ltoken_getRawText (nextToken) == ltoken_getText (quantifierSymToken) || ltoken_getRawText (nextToken) == ltoken_getText (logicalOpToken) || ltoken_getRawText (nextToken) == ltoken_getText (eqOpToken) || ltoken_getRawText (nextToken) == ltoken_getText (equationSymToken) || ltoken_getRawText (nextToken) == ltoken_getText (eqSepSymToken) || ltoken_getRawText (nextToken) == ltoken_getText (selectSymToken) || ltoken_getRawText (nextToken) == ltoken_getText (openSymToken) || ltoken_getRawText (nextToken) == ltoken_getText (sepSymToken) || ltoken_getRawText (nextToken) == ltoken_getText (closeSymToken) || ltoken_getRawText (nextToken) == ltoken_getText (simpleIdToken) || ltoken_getRawText (nextToken) == ltoken_getText (mapSymToken) || ltoken_getRawText (nextToken) == ltoken_getText (markerSymToken) || ltoken_getRawText (nextToken) == ltoken_getText (commentSymToken)) { TokenClass (); InitReduce (CLASSIFICATION2); } else if (ltoken_getRawText (nextToken) == ltoken_getText (synonymToken)) { SynClass (); InitReduce (CLASSIFICATION3); } else { LocalUserError (nextToken, "expected character, token, or synonym classification"); } }
TokenClass ScannerClass::getToken(){ int state = 0; int chr; string currentLexeme; do { //First clear the buffer if it is a new state. if(state == 0) currentLexeme = ""; //Second identify the char, with some special cases. chr = (int)fileManager.getNextChar(); if(chr == EOF)//assume EOF is 128 - end of the used ascii system. chr = EOF_INDEX; if(chr >= MAX_CHAR)//all the chars in YASL definition is in first 128 ascii chars return TokenClass(EMPTY_T, EMPTY_ST, "Illegal symbol: " + chr); //Then lookup in the matrix State s = stateTable[state][chr]; if(s.state == ERROR_STATE) return TokenClass(EMPTY_T, EMPTY_ST, "Invalid char sequence: " + (currentLexeme + (char)chr)); if(s.state == EOF_INDEX)//If reached EOF, in this case no action needs to be taken. return TokenClass(EOF_T, NONE_ST, "EOF"); switch(s.action){ case NOTHING: //In process or reading, just store the char currentLexeme += chr; break; case ACCEPT:{ if(!s.pushBack) currentLexeme += chr; else fileManager.pushBack(); int type = s.token->type; if(type == INTEGER_T)//Check for interger length if(currentLexeme.length() > 4) return TokenClass(EMPTY_T, EMPTY_ST, "Integer can have at most four digits: " + currentLexeme); if(type == STRING_T)//Check for string length if(currentLexeme.length() > 52) return TokenClass(EMPTY_T, EMPTY_ST, "String can have at most fifty characters: " + currentLexeme); if(type == IDENT_T)//Check for identifier length { if(currentLexeme.length() > 12) return TokenClass(EMPTY_T, EMPTY_ST, "Identifier can have at most twelve characters: " + currentLexeme); const char* cString = currentLexeme.c_str(); //trim all unnecessary spaces if (_strcmpi("or", cString) == 0){ return TokenClass(ADDOP_T, OR_ST, currentLexeme); } if(_strcmpi("and",cString)==0) return TokenClass(MULOP_T, AND_ST, currentLexeme); if(_strcmpi("div",cString)==0) return TokenClass(MULOP_T, DIV_ST, currentLexeme); if(_strcmpi("mod",cString)==0) return TokenClass(MULOP_T, MOD_ST, currentLexeme); char* keywords[16]={"program","function","begin","end","if","then","else","while","do","cout","cin","endl","int","boolean","true","false"}; for(int i = 0;i<16;i++) if(_strcmpi(keywords[i],cString)==0) return TokenClass(KEYWORD_T,NONE_ST,currentLexeme); } return TokenClass(s.token->type,s.token->subtype,currentLexeme); } case ERROR:{ if(s.actionInfo != NULL) return TokenClass(EMPTY_T,EMPTY_ST,*s.actionInfo); else //In case the error messag was forgotten. return TokenClass(EMPTY_T,EMPTY_ST,"Error"); } case CLEAR_BUFFER: currentLexeme = ""; //Clear buffer, for comments only. break; case COMPILER_DIRECTIVE: currentLexeme += chr; if(currentLexeme != "{$p+}" && currentLexeme != "{$p-}") cout<<"Warning, compiler directive " + currentLexeme + " is undefined."<<endl; break; } state = s.state;//ready for next char }while(true); //In case something in the settoken process goes wrong return TokenClass(EMPTY_T,EMPTY_ST,EMPTY_LEXEME); }