void Lexal::skipSpaces() { while (! reader.isEof()) { wchar_t ch = reader.getNextChar(); pos++; if (! isWhiteSpace(ch)) { if ('#' == ch) skipToLineEnd(); else { bool finish = false; if (('/' == ch) && (! reader.isEof())) { wchar_t nextCh = reader.getNextChar(); pos++; if ('/' == nextCh) skipToLineEnd(); else if ('*' == nextCh) skipMultilineComment(line, pos); else { pos--; reader.ungetChar(nextCh); finish = true; } } else finish = true; if (finish) { pos--; reader.ungetChar(ch); return; } } } else if ('\n' == ch) { pos = 0; line++; } } }
/* Tokenizer::readToken * Reads the next 'token' from the text & moves past it *******************************************************************/ void Tokenizer::readToken(bool toeol) { token_current.clear(); bool ready = false; qstring = false; // Increment pointer to next token while (!ready) { ready = true; // Increment pointer until non-whitespace is found while (isWhitespace(current[0])) { // Return if end of text found if (!incrementCurrent()) return; } // Skip C-style comments if (comments & CCOMMENTS) { // Check if we have a line comment if (current + 1 < end && current[0] == '/' && current[1] == '/') { ready = false; // DECORATE //$ handling if (!decorate) skipLineComment(); else if (current + 2 < end && current[2] != '$') skipLineComment(); else ready = true; } // Check if we have a multiline comment if (current + 1 != end && current[0] == '/' && current[1] == '*') { skipMultilineComment(); // Skip it ready = false; } } // Skip '##' comments if (comments & DCOMMENTS) { if (current + 1 != end && current[0] == '#' && current[1] == '#') { skipLineComment(); // Skip it ready = false; } } // Skip '#' comments if (comments & HCOMMENTS) { if (current + 1 != end && current[0] == '#') { skipLineComment(); // Skip it ready = false; } } // Skip ';' comments if (comments & SCOMMENTS) { if (current[0] == ';') { skipLineComment(); // Skip it ready = false; } } // Check for end of text if (position == size) return; } // Init token delimiters t_start = position; t_end = position; // If we're at a special character, it's our token if (isSpecialCharacter(current[0])) { token_current += current[0]; t_end = position + 1; incrementCurrent(); return; } // Now read the token if (current[0] == '\"') // If we have a literal string (enclosed with "") { qstring = true; // Skip opening " incrementCurrent(); // Read literal string (include whitespace) while (current[0] != '\"') { //if (position < size - 1 && current[0] == '\\' && current[1] == '\"') if (current[0] == '\\') incrementCurrent(); token_current += current[0]; if (!incrementCurrent()) return; } // Skip closing " incrementCurrent(); } else { // Read token (don't include whitespace) while (!((!toeol && isWhitespace(current[0])) || current[0] == '\n')) { // Return if special character found if (!toeol && isSpecialCharacter(current[0])) return; // Add current character to the token token_current += current[0]; // Return if end of text found if (!incrementCurrent()) return; } } // Write token to log if debug mode enabled if (debug) wxLogMessage(token_current); // Return the token return; }
void Lexer::tokenize() { while (location.ptr < eof) { bool newLine = false; char c = *location.ptr; switch (c) { case ' ': case '\t': case '\r': storePreviousToken(); break; case '\n': newLine = true; storePreviousToken(); storeToken(Token::Newline); break; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '_': switch (state) { case Idle: state = GettingIdentifier; start = location; break; case GettingIntegerNumber: case GettingFloatingPointNumber: storeToken(Token::Invalid); break; default: break; } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': switch (state) { case Idle: state = GettingIntegerNumber; start = location; break; default: break; } break; case '/': storePreviousToken(); if (isNextChar('/', location.ptr)) { skipUntilNewline(); continue; } else if (isNextChar('*', location.ptr)) { skipMultilineComment(); continue; } else { makeOperatorToken(location); } break; case '=': case '!': case '+': case '-': case '*': case '%': case '.': case ',': case '>': case '<': case ':': case ';': case '?': case '|': case '&': case '^': case '~': case '(': case ')': case '{': case '}': case '[': case ']': if (c == '.' && !isNextChar('.') && state == GettingIntegerNumber) { state = GettingFloatingPointNumber; } else { storePreviousToken(); makeOperatorToken(location); } break; case '"': storePreviousToken(); makeStringLiteral(location.ptr); continue; case '\'': storePreviousToken(); makeCharLiteral(); continue; default: storeToken(Token::Invalid); break; } if (newLine) { location.stepLine(); } else { location.stepColumn(); } } storeToken(Token::Eof); }
char * removeComments(char* source) { /*char* rez = stradd("as\0", 'b');*/ /*printf("[%s]", rez);*/ /*return 1;*/ const int DEBUG = 0; const char FIRST_COMMENT_SYMBOL = '/'; const char SINGLELINE = '/'; const char MULTILINE = '*'; const char IS_NOT_COMMENT = -1; char* result = "\0"; int matchedPreviously = 0, i = 0; if (DEBUG) { printf("ORIGINAL:[%s]\n", source); } int sourceLen = strlen(source); for(i = 0; i < sourceLen; i++) { // printf("\n[%s]\n", source); char curChar = source[i]; char typeOfComment = IS_NOT_COMMENT; if (matchedPreviously) { matchedPreviously = 0; if (curChar == MULTILINE) { typeOfComment = MULTILINE; }else if (curChar == SINGLELINE) { typeOfComment = SINGLELINE; } if (typeOfComment == IS_NOT_COMMENT) { //if comment was not matched add first comment symbol(/) to string result = stradd(result, FIRST_COMMENT_SYMBOL); }else { if (typeOfComment == SINGLELINE) { i = skipSingleLineComment(i+1, sourceLen, source); }else if (typeOfComment == MULTILINE) { int rez = skipMultilineComment(i+1, sourceLen, source); //if end of comment was not recognized if (rez == -1) { //return first two charackter that were skipped result = stradd(result, FIRST_COMMENT_SYMBOL); result = stradd(result, MULTILINE); }else { i = rez; } } } }else { int isFirstCommentSymbol = curChar == FIRST_COMMENT_SYMBOL; if (!isFirstCommentSymbol) { result = stradd(result, curChar); } matchedPreviously = isFirstCommentSymbol; } if (DEBUG == 2) { printf("RESULT:[%s]\n", result); } } if (DEBUG) { printf("RESULT:[%s]", result); } return result; }