void test_eof_bof(void) { const char *b = "test"; scanner_t *s = scanner_init(b); assert(scanner_peek(s) == 't'); assert(scanner_advance(s) == 't'); assert(scanner_advance(s) == 'e'); assert(scanner_peek(s) == 's'); assert(scanner_advance(s) == 's'); assert(scanner_advance(s) == 't'); assert(scanner_peek(s) == 0); assert(scanner_current(s) == 't'); assert(scanner_advance(s) == 0); assert(scanner_advance(s) == 0); assert(scanner_advance(s) == 0); assert(scanner_advance(s) == 0); assert(scanner_current(s) == 0); assert(scanner_backup(s) == 't'); assert(scanner_backup(s) == 's'); assert(scanner_peek(s) == 't'); assert(scanner_backup(s) == 'e'); assert(scanner_backup(s) == 't'); assert(scanner_backup(s) == 0); assert(scanner_current(s) == 0); assert(scanner_peek(s) == 't'); scanner_destroy(s); }
void test_buffer_walk(void) { const char *b = "this is a small test buffer"; scanner_t *s = scanner_init(b); assert(scanner_peek(s) == 't'); assert(scanner_advance(s) == 't'); assert(scanner_advance(s) == 'h'); assert(scanner_advance(s) == 'i'); assert(scanner_advance(s) == 's'); assert(scanner_peek(s) == ' '); assert(scanner_current(s) == 's'); assert(scanner_advance(s) == ' '); assert(scanner_backup(s) == 's'); char *xthis = (char*)scanner_accept(s, (acceptfn)test_accept_fn); assert(strcmp(xthis, "this") == 0); free(xthis); size_t i = 4; char c; while ((c = scanner_advance(s))) assert(c == b[i++]); assert(strlen(b) == i); scanner_destroy(s); }
/* identify the exponent of a number */ static statefn exponent(scanner_t *s) { /* accept exponent signs if any */ char c = scanner_peek(s); if (c == '+' || c == '-') scanner_advance(s); if (!is_num(scanner_peek(s))) { fprintf(stderr, "lexer error: expected exponent\n"); return error; } while (is_num(scanner_advance(s))); scanner_backup(s); return done; }
/* get the next token out of a scanner */ token_t * lexer_nextitem(scanner_t *s) { /* try to match longest tokens first */ static lexcomp_t (*tokenizers[])(scanner_t*) = { tokenize_text, tokenize_identifier, tokenize_number, tokenize_bitops, tokenize_relops, tokenize_mathops, tokenize_miscops, }; lexcomp_t lc; size_t i; /* consume all whitespace */ while (is_white(scanner_advance(s))); scanner_backup(s); scanner_ignore(s); if (scanner_peek(s) == 0) return token_init(tokStackEmpty, ""); for (i = 0; i < sizeof(tokenizers)/sizeof(tokenizers[0]); i++) { if ((lc = tokenizers[i](s)) != tokNoMatch) { token_t *t = (token_t*)scanner_accept(s, (acceptfn)tok_maker); t->lexcomp = lc; return t; } } return token_init(tokNoMatch, ""); }
/* lex text: * "[^"]*" */ lexcomp_t tokenize_text(scanner_t *s) { if (scanner_peek(s) != '"') return tokNoMatch; scanner_advance(s); while (scanner_advance(s) != '"'); return tokText; }
lexcomp_t tokenize_number(scanner_t *s) { if (!is_num(scanner_peek(s))) return tokNoMatch; nextstate state = integer; while ((statefn)state != done && (statefn)state != error) state = (nextstate)(*state)(s); return ((statefn)state == error ? tokNoMatch : tokNumber); }
/* identify the fractional part of a number */ static statefn fractional(scanner_t *s) { if (!is_num(scanner_peek(s))) { fprintf(stderr, "lexer error: expected fractional part\n"); return error; } while (is_num(scanner_advance(s))); if (scanner_current(s) == 'e' || scanner_current(s) == 'E') return (statefn)exponent; scanner_backup(s); return done; }
/* lex variable and function names: * id: [a-zA-Z_][a-zA-Z0-9_]* * func: [a-zA-Z_][a-zA-Z0-9_]*( */ lexcomp_t tokenize_identifier(scanner_t *s) { if (!is_alpha(scanner_peek(s))) return tokNoMatch; char c = scanner_advance(s); while (is_alpha(c) || is_num(c)) c = scanner_advance(s); scanner_backup(s); lexcomp_t rw = reserved_word(s); if (rw != tokNoMatch) return rw; if (scanner_advance(s) == '(') return tokFunction; scanner_backup(s); return tokId; }
/* return current token from scanner */ static Token *peek(Parser *p) { return scanner_peek(p->scn); }