END_TEST START_TEST (test_Token_free) { Token_t *t = Token_create(); t->type = TT_NAME; t->value.name = safe_strdup("foo"); Token_free(t); }
/* * Returns all tokens delimeted by the given beginning and ending delimeters */ static void get_delimited_tokens(const Parser *parser, const Symbol beg, const Symbol end, List **out) { if (parser == NULL || out == NULL) { return; } Token *token; token = NULL; unsigned int i, open_count, close_count, group_index; i = 0; open_count = 0; close_count = 0; group_index = 0; while ((token = List_get_data(parser->token_list, i)) != NULL) { if (strcmp(token->value, beg.value) == 0) { unsigned int j; j = i; Token *temp; temp = NULL; while ((temp = List_get_data(parser->token_list, j)) != NULL) { Token *match; match = Token_create(temp->value, temp->begin, temp->end, temp->type); List_push_back(out[group_index], match); if (strcmp(temp->value, beg.value) == 0) { ++open_count; } if (strcmp(temp->value, end.value) == 0) { ++close_count; } if (open_count == close_count) { ++group_index; break; } ++j; } } ++i; } }
/** * @return the next token in the formula string. If no more tokens are * available, the token type will be TT_END. */ LIBSBML_EXTERN Token_t * FormulaTokenizer_nextToken (FormulaTokenizer_t *ft) { char c = ft->formula[ ft->pos ]; Token_t *t = Token_create(); /** * Skip whitespace */ while (isspace(c)) { c = ft->formula[ ++ft->pos ]; } if (c == '\0') { t->type = TT_END; t->value.ch = c; } else if (c == '+' || c == '-' || c == '*' || c == '/' || c == '^' || c == '(' || c == ')' || c == ',' ) { t->type = (TokenType_t) c; t->value.ch = c; ft->pos++; } else if (isalpha(c) || c == '_') { FormulaTokenizer_getName(ft, t); } else if (c == '.' || isdigit(c)) { FormulaTokenizer_getNumber(ft, t); } else { t->type = TT_UNKNOWN; t->value.ch = c; ft->pos++; } if (t->type == TT_NAME) { Token_convertNaNInf(t); } return t; }
void __CUT__Token_create( void ) { CORD data = CORD_from_char_star("100"); Token *tk = Token_create(TK_INT, data, NULL); ASSERT(tk->id == TK_INT, "wrong id."); ASSERT(tk->value == 100, "wrong value."); tk = Token_create(TK_CHR, CORD_from_char_star("'A'"), NULL); ASSERT(tk->id == TK_CHR, "wrong id."); ASSERT(tk->value == 'A', "wrong value."); tk = Token_create(TK_FLOAT, CORD_from_char_star("1.1"), NULL); ASSERT(tk->id == TK_FLOAT, "wrong id."); ASSERT(tk->value <= 2.0f && tk->value >= 0.0f, "wrong value"); tk = Token_create(TK_HEX, CORD_from_char_star("0xAE"), NULL); ASSERT(tk->id == TK_HEX, "wrong id."); ASSERT(tk->value == 0xAE, "wrong value"); tk = Token_create(TK_REG, CORD_from_char_star("R0"), NULL); ASSERT(tk->id = TK_REG, "wrong id."); ASSERT(tk->value == 0, "wrong value."); }
END_TEST START_TEST (test_Token_create) { Token_t *t = Token_create(); fail_unless( t->type == TT_UNKNOWN ); fail_unless( t->value.ch == '\0' ); fail_unless( t->value.name == NULL ); fail_unless( t->value.integer == 0 ); fail_unless( t->value.real == 0.0 ); fail_unless( t->exponent == 0 ); Token_free(t); }
int cli_params_parse_args(bstring args, Command *cmd) { struct params params; cmd->error = 0; cmd->token_count = 0; cli_params_init(¶ms); cli_params_execute(¶ms, args); int rc = cli_params_finish(¶ms); check(rc == 1, "error processing arguments: %d", rc); if(params.token_count < 2) { params.tokens[params.token_count++] = Token_create( TKIDENT, bdata(&DEFAULT_COMMAND), blength(&DEFAULT_COMMAND)); } return Command_parse(¶ms, cmd); error: cmd->error = 1; return -1; }
CK_CPPSTART #endif /** * The following are private and used only within FormulaParser.c; however, * I don't know how else to make them "public" only for testing than to put * them here */ #define START_STATE 0 #define ACCEPT_STATE 0 #define ERROR_STATE 27 #define NUM_STATES 27 /** * The Action[] table contains 144 entries. To test them all would be * laborious and silly. Instead, for a few token type, test the first, * last and middle entry in each token "run". Also, test some error * states. */ START_TEST (test_FormulaParser_getAction) { int i; Token_t *t = Token_create(); t->type = TT_NAME; fail_unless( FormulaParser_getAction( 0, t) == 6, NULL ); fail_unless( FormulaParser_getAction(10, t) == 6, NULL ); fail_unless( FormulaParser_getAction(25, t) == 6, NULL ); fail_unless( FormulaParser_getAction( 1, t) == ERROR_STATE, NULL ); t->type = TT_INTEGER; fail_unless( FormulaParser_getAction( 0, t) == 1, NULL ); fail_unless( FormulaParser_getAction(10, t) == 1, NULL ); fail_unless( FormulaParser_getAction(25, t) == 1, NULL ); fail_unless( FormulaParser_getAction( 1, t) == ERROR_STATE, NULL ); t->type = TT_PLUS; fail_unless( FormulaParser_getAction( 1, t) == -9, NULL ); fail_unless( FormulaParser_getAction(16, t) == -2, NULL ); fail_unless( FormulaParser_getAction(24, t) == -11, NULL ); fail_unless( FormulaParser_getAction( 2, t) == ERROR_STATE, NULL ); t->type = TT_MINUS; fail_unless( FormulaParser_getAction( 0, t) == 5, NULL ); fail_unless( FormulaParser_getAction(16, t) == -2, NULL ); fail_unless( FormulaParser_getAction(25, t) == 5, NULL ); fail_unless( FormulaParser_getAction( 2, t) == ERROR_STATE, NULL ); t->type = TT_END; fail_unless( FormulaParser_getAction( 1, t) == -9, NULL ); fail_unless( FormulaParser_getAction(17, t) == -5, NULL ); fail_unless( FormulaParser_getAction(24, t) == -11, NULL ); fail_unless( FormulaParser_getAction( 3, t) == ERROR_STATE, NULL ); /** * TT_UNKNOWN should always yield an error state. */ t->type = TT_UNKNOWN; for (i = 0; i < NUM_STATES; i++) { fail_unless( FormulaParser_getAction(i, t) == ERROR_STATE, NULL ); } Token_free(t); }
/* * Match high level parser-tokens using lexer-tokens */ static void init_tokens(Parser *parser, const Token *tokens, const size_t token_count) { if (parser == NULL || tokens == NULL) { return; } char escape_chars[parser->symbol_count][3]; { unsigned int i; for (i = 0; i < parser->symbol_count; ++i) { if (i == 0) { escape_chars[i][0] = '\0'; } else { escape_chars[i][0] = parser->symbols[SYMBOL_ESCAPE].value[0]; escape_chars[i][1] = parser->symbols[i].value[0]; escape_chars[i][2] = '\0'; } } } unsigned int indices[token_count]; { unsigned int i; for (i = 0; i < token_count; ++i) { indices[i] = 0; } } char symbol_buffer[token_count + 1]; char literal_buffer[token_count + 1]; char symbol_match[1024]; char literal_match[1024]; symbol_buffer[0] = '\0'; literal_buffer[0] = '\0'; symbol_match[0] = '\0'; literal_match[0] = '\0'; unsigned int symbol_match_begin, symbol_match_end; unsigned int literal_match_begin, literal_match_end; TOKEN_TYPE symbol_match_type, literal_match_type; symbol_match_begin = 0; symbol_match_end = 0; literal_match_begin = 0; literal_match_end = 0; symbol_match_type = TOKEN_UNKNOWN; literal_match_type = TOKEN_UNKNOWN; unsigned int i; for (i = 0; i < token_count; ++i) { unsigned int j, increment; increment = 0; for (j = i; j < token_count; ++j) { strcat(symbol_buffer, tokens[j].value); unsigned int k; if (strlen(symbol_buffer) == 2 && symbol_buffer[0] == parser->symbols[SYMBOL_ESCAPE].value[0] ) { unsigned int match; match = 0; for (k = 1; k < parser->symbol_count; ++k) { if (strcmp(symbol_buffer, escape_chars[k]) == 0) { sprintf(symbol_match, "%s", symbol_buffer); symbol_match_begin = i; symbol_match_end = i + 1; symbol_match_type = TOKEN_ESCAPE_CHAR; indices[i] = 2; if (i < token_count - 1) { indices[i + 1] = 2; increment = 1; } match = 1; break; } } if (match) { break; } } for (k = 1; k < parser->symbol_count; ++k) { if (strcmp(symbol_buffer, parser->symbols[k].value) == 0) { sprintf(symbol_match, "%s", symbol_buffer); symbol_match_begin = i; symbol_match_end = i; symbol_match_type = TOKEN_SYMBOL; indices[i] = 1; break; } } } symbol_buffer[0] = '\0'; if (indices[i] != 1 && indices[i] != 2) { strcat(literal_buffer, tokens[i].value); } else { if (strlen(literal_buffer) == 1) { sprintf(literal_match, "%s", literal_buffer); literal_match_begin = i - (unsigned int) strlen(literal_buffer); literal_match_end = literal_match_begin; literal_match_type = TOKEN_LITERAL; } else if (strlen(literal_buffer) > 1) { sprintf(literal_match, "%s", literal_buffer); literal_match_begin = i - (unsigned int) strlen(literal_buffer); literal_match_end = i - 1; literal_match_type = TOKEN_LITERAL; } literal_buffer[0] = '\0'; } { if (literal_match[0] != '\0') { Token *token; token = Token_create(literal_match, literal_match_begin, literal_match_end, literal_match_type); List_push_back(parser->token_list, token); } } { if (symbol_match[0] != '\0') { Token *token; token = Token_create(symbol_match, symbol_match_begin, symbol_match_end, symbol_match_type); List_push_back(parser->token_list, token); } } symbol_match[0] = '\0'; literal_match[0] = '\0'; i += increment; } }
Token *lex_get_next_lexeme(ParseState *state) { String *word = String_create(""); int starting_index = 0; // used to track progress against comments int column = 0; int line = 0; char c = lex_state_current_char(state); Boolean should_continue = true; while ( lex_state_in_progress(state) && should_continue ) { // strings, comments, regex, etc ... if ( string_empty(word) && lex_state_opens_at_current(state) ) { starting_index = lex_state_current(state); lex_state_lexical_bookend(state) = lex_state_closer(state); } if ( lex_state_current_is_significant(state) ) { starting_index = starting_index ? starting_index : (lex_state_current(state)); column = column ? column : (lex_state_column(state)); line = line ? line : (lex_state_line(state)); string_push(word, c); } // update lex state for new line if ( char_is_line_end(c) ) { lex_state_start_new_line(state); } // check for termination of strings and other bookends that may contain spaces if ( lex_state_is_open(state) && starting_index < lex_state_current(state) ) { // regexes are special, because there can be characters after the ending '/' // so we have to switch the state lex_state_transition_regex_if_needed(state); if ( lex_state_will_close(state) ) { lex_state_close(state); should_continue = false; } } else if ( lex_state_current_is_significant(state) && ( char_is_line_end(c) || char_is_statement_end(c) || // line ends usually significant of a statement end lex_state_end_of_word(state) || // end of normal word sequence word_is_method_selector(word, c) || // '.' char_is_syntax(c) || // '(' ')' ',' char_is_colon(lex_state_next_char(state)) || // : appearing after first char breaks the word lex_state_will_end_word_by_dot(state, word)) ) { // next char is a dot, and word is not a number should_continue = false; } // move to next character lex_state_advance(state); c = lex_state_current_char(state); } if ( string_empty(word) ) { string_free(word); return NULL; } Token *lexeme = Token_create(word, line, column); check_mem(lexeme); return lexeme; error: return NULL; }