// ---------------------------------------------------------------------------- // Tokenizer::tokenizeUnknown // // Process the current unknown character // ---------------------------------------------------------------------------- void Tokenizer::tokenizeUnknown() { // Whitespace if (isWhitespace(data_[state_.position])) { state_.state = TokenizeState::State::Whitespace; ++state_.position; return; } // Comment state_.comment_type = checkCommentBegin(); if (state_.comment_type > 0) { state_.state = TokenizeState::State::Comment; if (state_.comment_type == Hash || state_.comment_type == Shell) ++state_.position; else state_.position += 2; return; } // Special character if (isSpecialCharacter(data_[state_.position])) { // End token state_.current_token.line_no = state_.current_line; state_.current_token.quoted_string = false; state_.current_token.pos_start = state_.position; ++state_.position; state_.done = true; return; } // Quoted string if (data_[state_.position] == '\"') { // Skip " ++state_.position; // Begin token state_.current_token.line_no = state_.current_line; state_.current_token.quoted_string = true; state_.current_token.pos_start = state_.position; state_.state = TokenizeState::State::Token; return; } // Token state_.current_token.line_no = state_.current_line; state_.current_token.quoted_string = false; state_.current_token.pos_start = state_.position; state_.state = TokenizeState::State::Token; }
QString SchemaParser::getWord(void) { QString word, current_line; //Gets the current line buffer current_line=buffer[line]; /* Attempt to extract a word if the first character is not a special character. */ if(!isSpecialCharacter(current_line[column].toAscii())) { /* Extract the word while it is not end of line, space or special character */ while(current_line[column]!=CHR_LINE_END && !isSpecialCharacter(current_line[column].toAscii()) && current_line[column]!=CHR_SPACE) { word+=current_line[column]; column++; } } return(word); }
// ---------------------------------------------------------------------------- // Tokenizer::tokenizeToken // // Process the current token character // ---------------------------------------------------------------------------- void Tokenizer::tokenizeToken() { // Quoted string if (state_.current_token.quoted_string) { // Check for closing " if (data_[state_.position] == '\"') { // Skip to character after closing " and end token state_.state = TokenizeState::State::Unknown; state_.done = true; return; } // Escape backslash if (data_[state_.position] == '\\') ++state_.position; // Continue token ++state_.position; return; } // Check for end of token if (isWhitespace(data_[state_.position]) || // Whitespace isSpecialCharacter(data_[state_.position]) || // Special character checkCommentBegin() > 0) // Comment { // End token state_.state = TokenizeState::State::Unknown; state_.done = true; return; } // Continue token ++state_.position; }
/* Tokenizer::readToken * Reads the next 'token' from the text & moves past it *******************************************************************/ void Tokenizer::readToken(bool toeol) { token_current.clear(); bool ready = false; qstring = false; // Increment pointer to next token while (!ready) { ready = true; // Increment pointer until non-whitespace is found while (isWhitespace(current[0])) { // Return if end of text found if (!incrementCurrent()) return; } // Skip C-style comments if (comments & CCOMMENTS) { // Check if we have a line comment if (current + 1 < end && current[0] == '/' && current[1] == '/') { ready = false; // DECORATE //$ handling if (!decorate) skipLineComment(); else if (current + 2 < end && current[2] != '$') skipLineComment(); else ready = true; } // Check if we have a multiline comment if (current + 1 != end && current[0] == '/' && current[1] == '*') { skipMultilineComment(); // Skip it ready = false; } } // Skip '##' comments if (comments & DCOMMENTS) { if (current + 1 != end && current[0] == '#' && current[1] == '#') { skipLineComment(); // Skip it ready = false; } } // Skip '#' comments if (comments & HCOMMENTS) { if (current + 1 != end && current[0] == '#') { skipLineComment(); // Skip it ready = false; } } // Skip ';' comments if (comments & SCOMMENTS) { if (current[0] == ';') { skipLineComment(); // Skip it ready = false; } } // Check for end of text if (position == size) return; } // Init token delimiters t_start = position; t_end = position; // If we're at a special character, it's our token if (isSpecialCharacter(current[0])) { token_current += current[0]; t_end = position + 1; incrementCurrent(); return; } // Now read the token if (current[0] == '\"') // If we have a literal string (enclosed with "") { qstring = true; // Skip opening " incrementCurrent(); // Read literal string (include whitespace) while (current[0] != '\"') { //if (position < size - 1 && current[0] == '\\' && current[1] == '\"') if (current[0] == '\\') incrementCurrent(); token_current += current[0]; if (!incrementCurrent()) return; } // Skip closing " incrementCurrent(); } else { // Read token (don't include whitespace) while (!((!toeol && isWhitespace(current[0])) || current[0] == '\n')) { // Return if special character found if (!toeol && isSpecialCharacter(current[0])) return; // Add current character to the token token_current += current[0]; // Return if end of text found if (!incrementCurrent()) return; } } // Write token to log if debug mode enabled if (debug) wxLogMessage(token_current); // Return the token return; }
string Lexer::GetNextWord() { string stringBuffer = ""; bool readingDigitOrChar = false; bool readingSymbols = false; char ch = buffer[lexemeBegin]; if (ch == '\n') { line++; } if (isDigitOrChar(ch)) { readingDigitOrChar = true; } else { readingSymbols = true; } while (ch != EOF) { if (isEndOfFile(ch)) { break; } if (isSpecialCharacter(ch) && (stringBuffer.length() == 0)) { stringBuffer.push_back(ch); break; } if (isEmptyCharacter(ch)) { if (stringBuffer.length() != 0) { break; } } if (isDigitOrChar(ch) && readingDigitOrChar) { stringBuffer.push_back(ch); } else if (!isDigitOrChar(ch) && readingSymbols) { stringBuffer.push_back(ch); } else { if (lexemeBegin > 0) { lexemeBegin--; forward--; } char returnedChar = buffer[forward]; if (returnedChar == '\n') { line--; } break; } lexemeBegin = forward; forward++; ch = buffer[lexemeBegin]; if (ch == '\n') { line++; } } return stringBuffer; }