void Tokenizer::tokenize(const string& line) { size_t start = 0; for (size_t i=0; i<line.length(); i++) { was_in_string = in_string; if (blank(in_string, line[i])) { continue; } if (isStartOfToken(in_string, line, i)) { onTokenStart(start, i, line[i]); } if (isEndOfToken(in_string && was_in_string, line, i)) { onTokenEnd(line.substr(start, i - start + 1)); } } if (in_string) { leftover_string += line.substr(start, line.length() - start + 1); } }
bool parseForNextToken() { size_t pos = getCurrentPosition(); assert( pos < codestr.size() ); // // Consume whitespace // for ( ; pos < codestr.size() && isWhitespace(codestr[pos]); ++pos ) { // do nothing, just consume space. } // // End of the line? // if ( pos == codestr.size() ) { setLastToken( "", TOK_EOF ); return false; } size_t startPos = pos; // // Search for the next token // TokenType type = TOK_UNKNOWN; bool done = false; bool error = false; for ( ; (!done) && pos < codestr.size() && (!isWhitespace(codestr[pos])) && ((pos == startPos) || !isEndOfToken(codestr[pos]) ); ++pos ) { char c = codestr[pos]; switch ( type ) { case TOK_UNKNOWN: if ( c == '(' ) { type = TOK_OPEN; done = true; } else if ( c == ')' ) { type = TOK_CLOSE; done = true; } else if ( c == ',' ) { type = TOK_SEP; done = true; } else if ( isNumeric( c ) ) { type = TOK_NUMERIC; } else if ( isIdent( c ) ) { type = TOK_IDENT; } else { error = true; } break; case TOK_NUMERIC: if ( isNumeric( c ) == false ) { error = true; } break; case TOK_IDENT: if ( isIdent( c ) == false ) { error = true; } break; default: error = true; } // // Was there an error while parsing the token? // if ( error ) { std::cerr << "Error while parsing token. " << "start=" << startPos << ", " << "pos=" << pos << ", " << "type=" << toString(type) << ", " << "value: " << codestr.substr( startPos, pos - startPos+1 ) << std::endl; return false; } } setLastToken( codestr.substr(startPos, pos - startPos), type ); setCurrentPosition( pos ); return !isEOF(); }