void tokenize(char *input){ // TODO: support operators char *start; while(1){ while(*input == ' '){ input++; } start = input; if(isalpha(*input)){ while(isalpha(*input)){ input++; } emit_token(start, input, TOK_IDENTIFIER); } else if(isdigit(*input)){ while(isdigit(*input)){ input++; } emit_token(start, input, TOK_INTEGER); } else if(*input == '\0'){ return; } else { input++; emit_token(start, input, TOK_UNKNOWN); } } }
bool VLogLexer::next_token() { bool whitespace; bool kwid_char; bool seen_match; unsigned int current_match; while(true) { switch(lex_state) { case LEX_KWID: seen_match = false; for(int i = 0;i < num_token_mappings; ++i) { if(live_token_mappings[i]) { char token_char = token_mapping_table[i].str[token_buf_pos]; if(current_char == token_char) { if(token_mapping_table[i].str[token_buf_pos + 1] == 0) { seen_match = true; current_match = i; } } else { live_token_mappings[i] = false; } } } break; case LEX_NON_KWID: switch(current_char) { case '@': emit_token(TOKEN_AT); break; case '(': emit_token(TOKEN_LBRACKET); break; case ')': emit_token(TOKEN_RBRACKET); break; case '[': emit_token(TOKEN_LSQBRACKET); break; case ']': emit_token(TOKEN_RSQBRACKET); break; case '{': emit_token(TOKEN_LBRACE); break; case '}': emit_token(TOKEN_RBRACE); break; case '>': lex_state = LEX_GT; break; case '<': lex_state = LEX_LT; break; case ',': emit_token(TOKEN_COMMA); break; case ';': emit_token(TOKEN_SEMICOLON); break; case '=': emit_token(TOKEN_EQ); break; default: throw TokenErr("Unexpected character", line_char, current_line); break; } break; } if(lex_state == LEX_KWID) { token_buffer[token_buf_pos] = current_char; token_buf_pos++; } whitespace = consume_char(); kwid_char = is_kwid_char(current_char); if(lex_state == LEX_KWID) { if(whitespace || !kwid_char) { if(seen_match) { emit_token(token_mapping_table[current_match].type); } else { emit_token(TOKEN_ID); } lex_state = kwid_char ? LEX_KWID : LEX_NON_KWID; return !eof; } } else { lex_state = kwid_char ? LEX_KWID : LEX_NON_KWID; return !eof; } } }