scanner::token scanner::read_string(char delimiter, token result) { m_string.reset(); m_params.reset(); while (true) { char ch = read_char(); if (!state_ok()) { return m_state; } if (ch == '\n') { ++m_line; } if (ch == delimiter || ch == EOF) { m_string.push_back(0); m_id = m_string.begin(); return result; } if (ch == '\\') { m_string.push_back('\\'); ch = read_char(); } m_string.push_back(ch); } return m_state; }
static State prim_wsnl(Parser* p, int pos, bool ignored) { while (1) { int c = p->input[pos]; if (c == 0 || c > 32) break; pos++; } return state_ok(pos, tlNull); }
void scanner::comment(char delimiter) { while(state_ok()) { char ch = read_char(); if ('\n' == ch) { ++m_line; } if (delimiter == ch || -1 == ch) { return; } } }
static State prim_char(Parser* p, int pos, const char* chars, bool ignored) { //print("CHAR: %s", chars); int c = p->input[pos]; if (!c) return state_fail(pos); while (*chars) { if (*chars == c) { return state_ok(pos + 1, tlINT(c)); } chars++; } return state_fail(pos); }
static State prim_text(Parser* p, int pos, const char* chars, bool ignored) { //print("TEXT: %s", chars); int start = pos; while (*chars) { int c = p->input[pos]; if (!c) return state_fail(start); if (*chars != c) return state_fail(start); pos++; chars++; } if (ignored) return state_ok(pos, tlNull); #ifndef NO_VALUE int len = pos - start; char* buf = malloc(len + 1); memcpy(buf, p->input + start, len); buf[len] = 0; return state_ok(pos, tlStringFromTake(buf, len)); #else return state_ok(pos, tlNull); #endif }
bool scanner::read_params() { unsigned param_num = 0; while (state_ok()) { char ch = read_char(); switch (m_normalized[(unsigned char) ch]) { case '0': param_num = 10*param_num + (ch - '0'); break; case ']': m_params.push_back(parameter(param_num)); return true; case ':': m_params.push_back(parameter(param_num)); param_num = 0; break; default: m_string.reset(); m_string.push_back(ch); while (true) { ch = read_char(); if (ch == ':' || ch == ']') { m_string.push_back(0); m_params.push_back(parameter(symbol(m_string.c_ptr()))); param_num = 0; if (ch == ':') { unread_char(); } else { return true; } break; } if (ch == EOF) { // TODO: use error reporting m_err << "ERROR: unexpected character: '" << ((int)ch) << " " << ch << "'.\n"; m_state = ERROR_TOKEN; break; } m_string.push_back(ch); } break; } } return false; }
scanner::token scanner::read_id(char first_char) { char ch; m_string.reset(); m_params.reset(); m_string.push_back(first_char); bool is_arith = (m_normalized[(unsigned char) first_char] == '+'); bool is_alpha = (m_normalized[(unsigned char) first_char] == 'a'); ch = read_char(); // In SMT2 "-20" is an identifier. if (!m_smt2 && state_ok() && first_char == '-' && m_normalized[(unsigned char) ch] == '0') { return read_number(ch, false); } if (state_ok() && first_char == '|') { return read_symbol(ch); } while (state_ok()) { switch(m_normalized[(unsigned char) ch]) { case '+': if (is_arith) { m_string.push_back(ch); break; } // strings can have hyphens. if (!is_alpha || ch != '-') { goto bail_out; } case 'a': case ':': case '.': case '0': if (is_arith) { goto bail_out; } m_string.push_back(ch); break; case '[': m_string.push_back(0); m_id = m_string.begin(); if (read_params()) { return ID_TOKEN; } else { return m_state; } default: goto bail_out; } ch = read_char(); } return m_state; bail_out: m_string.push_back(0); m_id = m_string.begin(); unread_char(); return ID_TOKEN; }
static State prim_any(Parser* p, int pos, bool ignored) { int c = p->input[pos]; if (!c) return state_fail(pos); return state_ok(pos + 1, tlINT(c)); }
static State prim_pos(Parser* p, int pos, bool ignored) { return state_ok(pos, tlINT(pos)); }
static State state_ok(int pos, tlHandle value) { #ifndef NO_VALUE return (State){.ok=1,.pos=pos,.value=value}; #else return (State){.ok=1,.pos=pos}; #endif } typedef State(Rule)(Parser*,int); static State parser_enter(Parser* p, const char* name, int pos) { //print(">> enter: %s %d '%c'", name, pos, p->input[pos]); return (State){}; } static const char* parser_set_anchor(Parser* p, const char* anchor) { const char* prev = p->anchor; p->anchor = anchor; //print("SET ANCHOR %s", anchor); return prev; } static State parser_error(Parser* p, const char* name, int begin, int end) { const int pos = end; if (p->error_msg) return state_fail(pos); if (end < begin) end = begin; p->error_rule = name; p->error_msg = p->anchor; p->error_pos_begin = begin; p->error_pos_end = end; parser_process_error(p); return state_fail(pos); } static State parser_fail(Parser* p, const char* name, int pos) { p->backtracking = true; return state_fail(pos); } static void parser_commit(Parser* p, int pos, const char* name, int step) { //print(">> commit: %s@%d %d", name, step, pos); if (pos > p->upto) { p->backtracking = false; } if (!p->backtracking && pos >= p->upto) { //print(">> COMMIT: %s@%d %d", name, step, pos); p->upto = pos; p->last_rule = name; p->last_step = step; } } static State parser_pass(Parser* p, const char* name, int start, State state, int cache_token) { //print("<< pass: %s (cache: %d) %d -- %s", name, cache_token, state.pos, tl_repr(state.value)); if (!cache_token) return state; for (int i = p->cache_at - 1; i >= 0; i--) { if (p->cache[i].start == start && p->cache[i].token == cache_token) { assert(p->cache[i].state.ok); if (p->cache[i].state.pos == state.pos) { assert(p->cache[i].state.value == state.value); return state; } //print("CACHE UPDATE: %s %d(%d), %d - %d(was: %d)", name, i, p->cache_at, start, state.pos, p->cache[i].state.pos); p->cache[i].start = start; p->cache[i].token = cache_token; p->cache[i].state = state; return state; } } if (p->cache_len <= p->cache_at) { if (p->cache_len == 0) p->cache_len = 1024; else p->cache_len *= 2; p->cache = realloc(p->cache, p->cache_len * sizeof(CacheState)); //print("resizing cache: %d", p->cache_len); } //print("CACHE INSERT: %s %d, %d - %d", name, p->cache_at, start, state.pos); p->cache[p->cache_at].start = start; p->cache[p->cache_at].token = cache_token; p->cache[p->cache_at].state = state; p->cache_at++; return state; } static State cached(Parser* p, const char* name, int pos, int cache_token) { for (int i = p->cache_at - 1; i >= 0; i--) { if (p->cache[i].start == pos && p->cache[i].token == cache_token) { //print("CACHE HIT: %s %d(%d), %d - %d", name, i, p->cache_at, pos, p->cache[i].state.pos); assert(p->cache[i].state.ok); return p->cache[i].state; } } //print("CACHE MISS: %s %d %d", name, pos, p->cache_at); return state_fail(pos); } static State prim_end(Parser* p, int pos, bool ignored) { int c = p->input[pos]; if (c) return state_fail(pos); return state_ok(pos, tlNull); }