static token charLit(int n, int indLine, int indCol) { if (ch == '\'') error(indLine, indCol, "empty literal"); if (ch == '\\') { next_ch(); if (ch == 'n') n = '\n'; else if (ch == '\\') n = '\\'; else error(indLine, indCol, "unknow esacape char"); } next_ch(); if (ch != '\'') error(indLine, indCol, "it is not character literal"); next_ch(); return (token){tk_Integer, indLine, indCol, n, NULL}; }
static token identOrInt(int indLine, int indCol) { int n; int position = 0; int isNumber = TRUE; static char id[MAXIDENT]; while (isalnum(ch) || ch == '_') { id[position++] = ch; if (!isdigit(ch)) isNumber = FALSE; next_ch(); } if (position == 0) error(indLine, indCol, "unrecognized character (%d) '%c'\n", ch, ch); id[position++] = '\0'; if (isdigit(id[0])) { if (!isNumber) error(indLine, indCol, "invalid number: %s\n", id); n = strtol(id, NULL, 0); if (n == LONG_MAX && errno == ERANGE) error(indLine, indCol, "Number out of limits"); return (token){tk_Integer, indLine, indCol, n, NULL}; } return (token){getIdentType(id, indLine, indCol), indLine, indCol, 0, id}; }
static token stringLit(char start, int indLine, int indCol) { static char buf[MAXSTRING]; int position = 0; next_ch(); while (ch != start) { if (ch == '\n') error(indLine, indCol, "EOL in string"); if (ch == EOF) error(indLine, indCol, "EOF in string"); buf[position++] = ch; next_ch(); } buf[position++] = '\0'; next_ch(); return (token){tk_String, indLine, indCol, 0, buf}; }
static token follow(int expect, TokenType ifyes, TokenType ifno, int indLine, int indCol) { if (ch == expect) { next_ch(); return (token){ifyes, indLine, indCol, 0, NULL}; } if (ifno == tk_EOI) error(indLine, indCol, "follow: unrecognized character '%c' (%d)\n", ch, ch); return (token){ifno, indLine, indCol, 0, NULL}; }
static token divOrCmt(int indLine, int indCol) { if (ch == '/') { next_ch(); while (ch != EOF && ch != '\n') next_ch(); if (ch == EOF) return (token){tk_EOI, indLine, indCol, 0, NULL}; if (ch == '\n') { next_ch(); return getTok(); } } if (ch != '*') return (token){tk_Div, indLine, indCol, 0, NULL}; /* comment detected */ while (TRUE) { switch (ch) { case '*': next_ch(); if (ch == '/') { next_ch(); return getTok(); } break; case EOF: error(indLine, indCol, "EOF in comment"); } next_ch(); } }
static void next_sym(prolog_obj_t *o) { again: switch(o->ch) { case ' ': case '\t': next_ch(o); goto again; case '\n': o->lc++; next_ch(o); goto again; case '\0': case EOF: o->sym = EOI; break; case '#': comment(o); next_ch(o); goto again; case '(': o->sym = LPAR; next_ch(o); break; case ')': o->sym = RPAR; next_ch(o); break; case '[': o->sym = LSPAR; next_ch(o); break; case ']': o->sym = RSPAR; next_ch(o); break; case '.': o->sym = PERIOD; next_ch(o); break; case ',': o->sym = COMMA; next_ch(o); break; case '?': next_ch(o); if(o->ch != '-') RECOVER(o,"expected '-'"); o->sym = QUERY; next_ch(o); break; case ':': next_ch(o); if(o->ch != '-') RECOVER(o, "expected '-'"); o->sym = ASSIGN; next_ch(o); break; default: if(isdigit(o->ch)) { /*integer*/ o->ival = 0; while(isdigit(o->ch)) { o->ival = o->ival*10 + (o->ch - '0'); next_ch(o); } o->sym = INT; } else if(isalpha(o->ch)) { /*variable, atom or keyword*/ size_t i = 0; while(isalnum(o->ch)) { if(i > ID_MAX) RECOVER(o, "identifier too longer"); o->id[i++] = o->ch; next_ch(o); } o->id[i] = '\0'; if(isupper(o->id[0])) { /*variable*/ o->sym = VARLEX; break; } else { /*id or keyword*/ o->sym = 0; while(words[o->sym] && strcmp(words[o->sym], o->id)) o->sym++; if(!words[o->sym]) /*id*/ o->sym = ID; break; } } else { RECOVER(o, "invalid char"); } break; } }
token getTok() { /* skip white space */ while (isspace(ch)) next_ch(); int indLine = line, indCol = col; switch (ch) { case '{': next_ch(); return (token){tk_Lbrace, indLine, indCol, 0, NULL}; case '}': next_ch(); return (token){tk_Rbrace, indLine, indCol, 0, NULL}; case '(': next_ch(); return (token){tk_Lparen, indLine, indCol, 0, NULL}; case ')': next_ch(); return (token){tk_Rparen, indLine, indCol, 0, NULL}; case '+': next_ch(); return (token){tk_Add, indLine, indCol, 0, NULL}; case '-': next_ch(); return (token){tk_Sub, indLine, indCol, 0, NULL}; case '*': next_ch(); return (token){tk_Mul, indLine, indCol, 0, NULL}; case '%': next_ch(); return (token){tk_Mod, indLine, indCol, 0, NULL}; case ';': next_ch(); return (token){tk_Semi, indLine, indCol, 0, NULL}; case ',': next_ch(); return (token){tk_Comma, indLine, indCol, 0, NULL}; case '/': next_ch(); return divOrCmt(indLine, indCol); case '\'': next_ch(); return charLit(ch, indLine, indCol); case '<': next_ch(); return follow('=', tk_Leq, tk_Lss, indLine, indCol); case '>': next_ch(); return follow('=', tk_Geq, tk_Gtr, indLine, indCol); case '=': next_ch(); return follow('=', tk_Eq, tk_Assign, indLine, indCol); case '!': next_ch(); return follow('=', tk_Neq, tk_Not, indLine, indCol); case '&': next_ch(); return follow('&', tk_And, tk_EOI, indLine, indCol); case '|': next_ch(); return follow('|', tk_Or, tk_EOI, indLine, indCol); case '"' : return stringLit(ch, indLine, indCol); case EOF: return (token){tk_EOI, indLine, indCol, 0, NULL}; default: return identOrInt(indLine, indCol); } }