void TestScanner() { TestBegin("Scanner"); Scanner s; Token tok; s.From( " \t\n" " \ta.b = 1;\n" " > < == <= >= != =~ && || . , : ; [ ] ( ) { } = + += -= *\r\n" "123 0123 0xafbDDdd00\r" "01233999 00. 1E9 1E-9 1e+9 1.1e900\n" "\'fasdfa/#@$@@@\\n\'\n" "\"fjalksdfjal#@$@#$@#\\n\"\n" "_abcd\r" ); while( s.NextToken(&tok) && tok.Rep() != TOK_eof ){ tok.Print(); std::putchar('\n'); } TestEnd(); }
Token* Tokenizer::GetNext() { Token* T = NULL; // First check to see if there is an UnGetToken. If there is, use it. if (UnGetToken != NULL) { T = UnGetToken; UnGetToken = NULL; return T; } // Otherwise, crank up the scanner and get a new token. // Get rid of any whitespace SkipWhiteSpace(); // test for end of file if (buffer.isEOF()) { T = new Token(EOFSYM); } else { // Save the starting position of the symbol in a variable, // so that nicer error messages can be produced. TokenColumn = buffer.CurColumn(); // Check kind of current character // Note that _'s are now allowed in identifiers. if (isalpha(CurrentCh) || '_' == CurrentCh) { // grab identifier or reserved word T = GetIdent(); } else if ( '"' == CurrentCh) { T = GetQuotedIdent(); } else if (isdigit(CurrentCh) || '-' == CurrentCh || '.' == CurrentCh) { T = GetScalar(); } else { // // Check for other tokens // T = GetPunct(); } } if (T == NULL) { throw ParserFatalException("didn't get a token"); } if (_printTokens) { std::cout << "Token read: "; T->Print(); std::cout << std::endl; } return T; }