int GetNewToken( char *pcToken) /******************************************************************************/ /* get a new token from the input file. Don't check for a put back token */ /* characters are classified as: */ /* cr :== carriage return character */ /* lf :== line feed character */ /* tab :== the tab character */ /* dquote :== the " character */ /* digit :== 0123456789 */ /* bindigit :== 01 */ /* hexdigit :== 0123456789ABCDEF */ /* alpha :== _abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ */ /* any :== all prtble chars except cr, lf and dqoute */ /* tab :== the tab character */ /* alphanum :== alpha | digit */ /* */ /* <x> :== zero or more x */ /* [x] :== one or more x */ /* x :== exactly one x */ /* */ /* recognized tokens are : */ /* */ /* FLOAT_TOKEN :== <digit> '.' [digit] */ /* DEC_NUMBER_TOKEN :== [digit] */ /* HEX_NUMBER_TOKEN :== '0' 'x' [hexdigit] */ /* BIN_NUMBER_TOKEN :== '0' 'b' [bindigit] */ /* IDENTIFIER_TOKEN :== alpha <alphanum> */ /* STRING_TOKEN :== dquote <any> dquote */ /* OPERATOR_TOKEN :== '=' */ /* :== '=' '=' */ /* :== '<' */ /* :== '>' */ /* :== '<' '=' */ /* :== '>' '=' */ /* :== '<' '>' */ /* :== '+' */ /* :== '-' */ /* :== '*' */ /* :== '/' */ /* :== '<' '<' */ /* :== '>' '>' */ /* :== '&' */ /* :== '|' */ /* :== '[' ']' */ /* :== '[' */ /* :== ']' */ /* DOT_TOKEN :== '.' */ /* DOTDOT_TOKEN :== '.' '.' */ /* COMMA_TOKEN :== ',' */ /* SEMICOLON_TOKEN :== ';' */ /* COLON_TOKEN :== ':' */ /* LEFT_ARROW_TOKEN :== '<' '-' */ /* RIGHT_ARROW_TOKEN :== '-' '>' */ /* OPEN_PARENTHESE_TOKEN :== '(' */ /* CLOSE_PARENTHESE_TOKEN :== ')' */ /* PRECOMPILER_TOKEN :== '#' [alpha] */ /* DELIMITER_TOKEN :== ' ' */ /* :== tab */ /* :== cr */ /* :== lf */ /******************************************************************************/ { char cCharacter; int iResult; int iCurrentLine; boolean bEoLn; char *pcTokenWalker; pcTokenWalker = pcToken; cCharacter = GetChar(); /* first skip all delimiter chars */ while (chrpos((char) cCharacter, DELIMITER_CHARS) != NULL) { cCharacter = GetChar(); } if (isDigit(cCharacter)) { /* read a DEC_NUMBER_TOKEN */ /* or read a HEX_NUMBER_TOKEN */ /* or read a BIN_NUMBER_TOKEN */ iResult = GetNumberToken(cCharacter, pcToken); } else if (isAlpha(cCharacter)) { /* read an IDENTIFIER_TOKEN */ iResult = GetIdentifierToken(cCharacter, pcToken); } else if (cCharacter == '"') { /* read a STRING_TOKEN */ iResult = GetStringToken(cCharacter, pcToken); } else if (chrpos(cCharacter, "=<>+-*/&|[].") != NULL) { /* read an OPERATOR_TOKEN */ iResult = GetOperatorToken(cCharacter, pcToken); if (strcmp((char *) pcToken, "->") == 0) iResult = RIGHT_ARROW_TOKEN; else if (strcmp((char *) pcToken, "<-") == 0) iResult = LEFT_ARROW_TOKEN; else if (strcmp((char *) pcToken, "..") == 0) iResult = DOTDOT_TOKEN; else if (strcmp((char *) pcToken, "//") == 0) { iCurrentLine = CurrentLine(); /* bEoLn (EndOfLine) will be set to true if end of line is reached */ /* undetected because of end of file. (This will also happen in the */ /* unlikely situation that an inputfile contains something like: */ /* #define COMMENT //this is comment */ /* because while parsing a preprocessor macro, the scanner refuses to */ /* go to the next line. */ bEoLn = FALSE; while ((iCurrentLine == CurrentLine()) && (!bEoLn)) { cCharacter = GetChar(); if ((cCharacter != ILLEGAL_CHAR) && (cCharacter != '\0')) { *(pcToken++) = cCharacter; } else { bEoLn = TRUE; } } iResult = COMMENT_TOKEN; if (!bEoLn) { /* End of comment detected by reading a Cr/Lf character. This */ /* read character is added to pcToken. */ /* Put cr/lf back, only to adjust the line counter... */ /* (only needed if not in the middle of a macro.) */ UngetChar(); /* Delete cr or lf character from pcToken by overwriting it with */ /* the zero character (for terminating pcToken) */ *(--pcToken) = 0; } else { /* Terminate pcToken by adding a zero character */ *(pcToken) = 0; } } } else if (cCharacter == '#') { /* read a PRECOMPILER_TOKEN */ iResult = GetPrecompilerToken(cCharacter, pcToken); } else if (cCharacter == '{') { iResult = OPEN_BRACKETS_TOKEN; (*pcTokenWalker++) = cCharacter; *pcTokenWalker = 0; } else if (cCharacter == '}') { iResult = CLOSE_BRACKETS_TOKEN; (*pcTokenWalker++) = cCharacter; *pcTokenWalker = 0; } else if (cCharacter == '(') { iResult = OPEN_PARENTHESES_TOKEN; (*pcTokenWalker++) = cCharacter; *pcTokenWalker = 0; } else if (cCharacter == ')') { iResult = CLOSE_PARENTHESES_TOKEN; (*pcTokenWalker++) = cCharacter; *pcTokenWalker = 0; } else if (cCharacter == '.') { iResult = DOT_TOKEN; (*pcTokenWalker++) = cCharacter; cCharacter = GetChar(); while (isDigit(cCharacter)) { iResult = FLOAT_TOKEN; (*pcTokenWalker++) = cCharacter; cCharacter = GetChar(); } UngetChar(); *pcTokenWalker = 0; } else if (cCharacter == ',') { iResult = COMMA_TOKEN; (*pcTokenWalker++) = cCharacter; *pcTokenWalker = 0; } else if (cCharacter == ';') { iResult = SEMICOLON_TOKEN; (*pcTokenWalker++) = cCharacter; *pcTokenWalker = 0; } else if (cCharacter == ':') { iResult = COLON_TOKEN; (*pcTokenWalker++) = cCharacter; *pcTokenWalker = 0; } else if (cCharacter == '~') { iResult = TILDE_TOKEN; (*pcTokenWalker++) = cCharacter; *pcTokenWalker = 0; } else if (cCharacter == '\\') { iResult = BACKSLASH_TOKEN; (*pcTokenWalker++) = cCharacter; *pcTokenWalker = 0; } else if (cCharacter == 0) { /* End of file reached. */ iResult = END_OF_FILE; (*pcTokenWalker++) = cCharacter; } else { /* unexpected character read */ iResult = UNEXPECTED_CHAR_READ; (*pcTokenWalker++) = cCharacter; *pcTokenWalker = 0; } return iResult; }
bool kexLexer::Find(void) { char c = 0; int comment = COMMENT_NONE; ClearToken(); while(CheckState()) { c = GetChar(); if(comment == COMMENT_NONE) { if(c == '/') { char gc = GetChar(); if(gc != '/' && gc != '*') { Rewind(); } else { if(gc == '*') { comment = COMMENT_MULTILINE; } else { comment = COMMENT_SINGLELINE; } } } } else if(comment == COMMENT_MULTILINE) { if(c == '*') { char gc = GetChar(); if(gc != '/') { Rewind(); } else { comment = COMMENT_NONE; continue; } } } if(comment == COMMENT_NONE) { byte bc = ((byte)c); if(parser.CharCode()[bc] != CHAR_SPECIAL) { switch(parser.CharCode()[bc]) { case CHAR_NUMBER: GetNumberToken(c); return true; case CHAR_LETTER: GetLetterToken(c); return true; case CHAR_QUOTE: GetStringToken(); return true; case CHAR_SYMBOL: GetSymbolToken(c); return true; case CHAR_EOF: tokentype = TK_EOF; #ifdef SC_DEBUG SC_DebugPrintf("EOF token\n"); #endif return true; default: break; } } } if(c == '\n') { linepos++; rowpos = 1; if(comment == COMMENT_SINGLELINE) { comment = COMMENT_NONE; } } } return false; }