Example #1
0
void tokenize(char *input){
	// TODO: support operators
	char *start;

	while(1){
		while(*input == ' '){
			input++;
		}
		start = input;
		if(isalpha(*input)){
			while(isalpha(*input)){
				input++;
			}
			emit_token(start, input, TOK_IDENTIFIER);
		} else if(isdigit(*input)){
			while(isdigit(*input)){
				input++;
			}
			emit_token(start, input, TOK_INTEGER);
		} else if(*input == '\0'){
			return;
		} else {
			input++;
			emit_token(start, input, TOK_UNKNOWN);
		}
	}
}
Example #2
0
bool VLogLexer::next_token() {
    bool         whitespace;
    bool         kwid_char;
    bool         seen_match;
    unsigned int current_match;

    while(true) {
        switch(lex_state) {
            case LEX_KWID:
                seen_match = false;

                for(int i = 0;i < num_token_mappings; ++i) {
                    if(live_token_mappings[i]) {
                        char token_char = token_mapping_table[i].str[token_buf_pos];

                        if(current_char == token_char) {
                            if(token_mapping_table[i].str[token_buf_pos + 1] == 0) {
                                seen_match    = true;
                                current_match = i;
                            }
                        } else {
                            live_token_mappings[i] = false;
                        }
                    }
                }
                break;
            case LEX_NON_KWID:
                switch(current_char) {
                    case '@': 
                        emit_token(TOKEN_AT);
                        break;
                    case '(':
                        emit_token(TOKEN_LBRACKET);
                        break;
                    case ')':
                        emit_token(TOKEN_RBRACKET);
                        break;
                    case '[':
                        emit_token(TOKEN_LSQBRACKET);
                        break;
                    case ']':
                        emit_token(TOKEN_RSQBRACKET);
                        break;
                    case '{':
                        emit_token(TOKEN_LBRACE);
                        break;
                    case '}':
                        emit_token(TOKEN_RBRACE);
                        break;
                    case '>':
                        lex_state = LEX_GT;
                        break;
                    case '<':
                        lex_state = LEX_LT;
                        break;
                    case ',':
                        emit_token(TOKEN_COMMA);
                        break;
                    case ';':
                        emit_token(TOKEN_SEMICOLON);
                        break;
                    case '=':
                        emit_token(TOKEN_EQ);
                        break;
                    default:
                        throw TokenErr("Unexpected character", line_char, current_line);
                        break;
                }
                break;
        }

        if(lex_state == LEX_KWID) {
            token_buffer[token_buf_pos] = current_char;
            token_buf_pos++;
        }

        whitespace = consume_char();
        kwid_char  = is_kwid_char(current_char);

        if(lex_state == LEX_KWID) {
            if(whitespace || !kwid_char) {
                if(seen_match) {
                    emit_token(token_mapping_table[current_match].type);
                } else {
                    emit_token(TOKEN_ID);
                }
                
                lex_state = kwid_char ? LEX_KWID : LEX_NON_KWID;

                return !eof;
            }
        } else {
            lex_state = kwid_char ? LEX_KWID : LEX_NON_KWID;

            return  !eof;
        }

    }
}