void test_parse_paren() { struct token_list *tkl = make_token_list(); struct token *tk0 = make_token(tok_punc, "(", 0.0, 0); struct token *tk1 = make_token(tok_number, NULL, 0.0, 42); struct token *tk2 = make_token(tok_punc, "+", 0.0, 0); struct token *tk3 = make_token(tok_number, NULL, 0.0, 24); struct token *tk4 = make_token(tok_punc, ")", 0.0, 0); append_token_list(tkl, tk0); append_token_list(tkl, tk1); append_token_list(tkl, tk2); append_token_list(tkl, tk3); append_token_list(tkl, tk4); struct ast_node *result = parse_paren(tkl); EXPECT_EQ(result->val, tk2); EXPECT_EQ(result->num_children, 2); EXPECT_EQ(result->children[0]->val, tk1); EXPECT_EQ(result->children[1]->val, tk3); EXPECT_EQ(result->children[0]->num_children, 0); EXPECT_EQ(result->children[1]->num_children, 0); destroy_token_list(tkl); delete_node(result); }
void test_parse_identifier() { struct token_list *tkl = make_token_list(); struct token *tk = make_token(tok_identifier, "_identifier123", 0.0, 0); append_token_list(tkl, tk); struct ast_node *result = parse_identifier(tkl); EXPECT_EQ(result->val, tk); EXPECT_EQ(result->num_children, 0); destroy_token_list(tkl); delete_node(result); struct token *tk0, *tk1, *tk2, *tk3, *tk4, *tk5; tkl = make_token_list(); tk0 = make_token(tok_identifier, "_function123", 0.0, 0); append_token_list(tkl, tk0); tk1 = make_token(tok_punc, "(", 0.0, 0); append_token_list(tkl, tk1); tk2 = make_token(tok_identifier, "a", 0.0, 0); append_token_list(tkl, tk2); tk3 = make_token(tok_punc, ",", 0.0, 0); append_token_list(tkl, tk3); tk4 = make_token(tok_identifier, "b", 0.0, 0); append_token_list(tkl, tk4); tk5 = make_token(tok_punc, ")", 0.0, 0); append_token_list(tkl, tk5); result = parse_identifier(tkl); EXPECT_EQ(result->val, tk0); EXPECT_EQ(result->num_children, 2); EXPECT_EQ(result->children[0]->val, tk2); EXPECT_EQ(result->children[1]->val, tk4); EXPECT_EQ(result->children[0]->num_children, 0); EXPECT_EQ(result->children[1]->num_children, 0); destroy_token_list(tkl); delete_node(result); tkl = make_token_list(); tk0 = make_token(tok_identifier, "a_function_with_1_arg", 0.0, 0); append_token_list(tkl, tk0); tk1 = make_token(tok_punc, "(", 0.0, 0); append_token_list(tkl, tk1); tk2 = make_token(tok_identifier, "a", 0.0, 0); append_token_list(tkl, tk2); tk3 = make_token(tok_punc, ")", 0.0, 0); append_token_list(tkl, tk3); result = parse_identifier(tkl); EXPECT_EQ(result->val, tk0); EXPECT_EQ(result->num_children, 1); EXPECT_EQ(result->children[0]->val, tk2); EXPECT_EQ(result->children[0]->num_children, 0); destroy_token_list(tkl); delete_node(result); tkl = make_token_list(); tk0 = make_token(tok_identifier, "a_function_with_1_arg", 0.0, 0); append_token_list(tkl, tk0); tk1 = make_token(tok_punc, "(", 0.0, 0); append_token_list(tkl, tk1); tk2 = make_token(tok_punc, ")", 0.0, 0); append_token_list(tkl, tk2); result = parse_identifier(tkl); EXPECT_EQ(result->val, tk0); EXPECT_EQ(result->num_children, 0); destroy_token_list(tkl); delete_node(result); }
/* Lexer FSM main loop */ TokenList *lexer_split(const char *src) { LexerState state = ST_WHITESPACE; const size_t length = strlen(src); Buffer *tokens = buffer_new(); size_t i; /* terminating zero is handled like a normal character */ for (i=0; state!=ST_ERROR && i<length+1; i++) { const char c = src[i]; switch (state) { case ST_WHITESPACE: if (c=='"') /* start a quoted string, new token */ state = ST_QUOTE; else if (c=='\\') /* start escape-sequence, new token */ state = ST_ESCAPE; else if (c!='\0' && !isspace(c)) /* start a new token */ { state = ST_WORD; buffer_putchar(tokens, c); } /* else: whitespace -> ignore */ break; case ST_QUOTE: if (c=='"') /* end a quoted string */ state = ST_WORD; else if (c=='\\') /* start escape-sequence */ state = ST_QUOTE_ESCAPE; else if (c!='\0') /* append a char from inside quotes to token */ buffer_putchar(tokens, c); else /* '\0' -> set error flag */ state = ST_ERROR; break; case ST_WORD: if (c=='\0' || isspace(c)) /* end a token */ { state = ST_WHITESPACE; buffer_putchar(tokens, '\0'); } else if (c=='"') /* start a quoted string */ state = ST_QUOTE; else if (c=='\\') /* start escape-sequence */ state = ST_ESCAPE; else /* append a char to token */ buffer_putchar(tokens, c); break; case ST_ESCAPE: if (c!='\0') { state = ST_WORD; buffer_putchar(tokens, c); } else /* '\0' -> set error flag */ state = ST_ERROR; break; case ST_QUOTE_ESCAPE: if (c!='\0') { state = ST_QUOTE; buffer_putchar(tokens, c); } else /* '\0' -> set error flag */ state = ST_ERROR; break; default: break; } } /* Detect lexing errors */ if (state == ST_ERROR) { buffer_delete(tokens); return NULL; } else { TokenList *tl = (TokenList *) malloc(sizeof(TokenList)); tl->buf = tokens; tl->tokens = make_token_list(tokens); return tl; } }