예제 #1
0
void TestScanner()
{
  TestBegin("Scanner");

  Scanner s;
  Token tok;

  s.From(
    "    \t\n"
    "  \ta.b = 1;\n"
    "  > < == <= >=  != =~ && || . , : ; [ ] ( ) { } = + += -= *\r\n"
    "123 0123 0xafbDDdd00\r"
    "01233999 00. 1E9 1E-9 1e+9 1.1e900\n"
    "\'fasdfa/#@$@@@\\n\'\n"
    "\"fjalksdfjal#@$@#$@#\\n\"\n"
    "_abcd\r"
  );

  while( s.NextToken(&tok) && tok.Rep() != TOK_eof ){
    tok.Print();
    std::putchar('\n');
  }

  TestEnd();
}
Token* Tokenizer::GetNext() {
  Token* T = NULL;

  // First check to see if there is an UnGetToken. If there is, use it.
  if (UnGetToken != NULL) {
    T = UnGetToken;
    UnGetToken = NULL;
    return T;
  }

  // Otherwise, crank up the scanner and get a new token.

  // Get rid of any whitespace
  SkipWhiteSpace();

  // test for end of file
  if (buffer.isEOF()) {
    T = new Token(EOFSYM);

  } else {
    
    // Save the starting position of the symbol in a variable,
    // so that nicer error messages can be produced.
    TokenColumn = buffer.CurColumn();
    
    // Check kind of current character
    
    // Note that _'s are now allowed in identifiers.
    if (isalpha(CurrentCh) || '_' == CurrentCh) {
      // grab identifier or reserved word
      T = GetIdent();
    } else if ( '"' == CurrentCh)  {
      T = GetQuotedIdent(); 
    } else if (isdigit(CurrentCh) || '-' == CurrentCh || '.' == CurrentCh) {
      T = GetScalar();
    } else { 
      //
      // Check for other tokens
      //
      
      T = GetPunct();
    }
  }
  
  if (T == NULL) {
    throw ParserFatalException("didn't get a token");
  }

  if (_printTokens) {
    std::cout << "Token read: ";
    T->Print();
    std::cout << std::endl;
  }

  return T;
}