static int scanner_symbol(mvc * c, int cur) { struct scanner *lc = &c->scanner; int next = 0; int started = lc->started; switch (cur) { case '/': lc->started = 1; next = scanner_getc(lc); if (next == '*') { lc->started = started; cur = skip_c_comment(lc); if (cur < 0) return EOF; return tokenize(c, cur); } else { utf8_putchar(lc, next); return scanner_token(lc, cur); } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return number(c, cur); case '#': if ((cur = skip_sql_comment(lc)) == EOF) return cur; return tokenize(c, cur); case '\'': case '"': return scanner_string(c, cur); case '{': return scanner_body(c); case '-': lc->started = 1; next = scanner_getc(lc); if (next == '-') { lc->started = started; if ((cur = skip_sql_comment(lc)) == EOF) return cur; return tokenize(c, cur); } lc->started = 1; utf8_putchar(lc, next); return scanner_token(lc, cur); case '~': /* binary not */ case '^': /* binary xor */ case '*': case '?': case '%': case '+': case '(': case ')': case ',': case '=': case '[': case ']': lc->started = 1; return scanner_token(lc, cur); case '&': lc->started = 1; cur = scanner_getc(lc); if(cur == '<') { next = scanner_getc(lc); if(next == '|') { return scanner_token(lc, GEOM_OVERLAP_OR_BELOW); } else { utf8_putchar(lc, next); //put the char back return scanner_token(lc, GEOM_OVERLAP_OR_LEFT); } } else if(cur == '>') return scanner_token(lc, GEOM_OVERLAP_OR_RIGHT); else if(cur == '&') return scanner_token(lc, GEOM_OVERLAP); else {/* binary and */ utf8_putchar(lc, cur); //put the char back return scanner_token(lc, '&'); } case '@': lc->started = 1; return scanner_token(lc, AT); case ';': lc->started = 0; return scanner_token(lc, SCOLON); case '<': lc->started = 1; cur = scanner_getc(lc); if (cur == '=') { return scanner_token( lc, COMPARISON); } else if (cur == '>') { return scanner_token( lc, COMPARISON); } else if (cur == '<') { next = scanner_getc(lc); if (next == '=') { return scanner_token( lc, LEFT_SHIFT_ASSIGN); } else if (next == '|') { return scanner_token(lc, GEOM_BELOW); } else { utf8_putchar(lc, next); //put the char back return scanner_token( lc, LEFT_SHIFT); } } else if(cur == '-') { next = scanner_getc(lc); if(next == '>') { return scanner_token(lc, GEOM_DIST); } else { //put the characters back and fall in the next possible case utf8_putchar(lc, next); utf8_putchar(lc, cur); return scanner_token( lc, COMPARISON); } } else { utf8_putchar(lc, cur); return scanner_token( lc, COMPARISON); } case '>': lc->started = 1; cur = scanner_getc(lc); if (cur == '>') { cur = scanner_getc(lc); if (cur == '=') return scanner_token( lc, RIGHT_SHIFT_ASSIGN); utf8_putchar(lc, cur); return scanner_token( lc, RIGHT_SHIFT); } else if (cur != '=') { utf8_putchar(lc, cur); return scanner_token( lc, COMPARISON); } else { return scanner_token( lc, COMPARISON); } case '.': lc->started = 1; cur = scanner_getc(lc); if (!iswdigit(cur)) { utf8_putchar(lc, cur); return scanner_token( lc, '.'); } else { utf8_putchar(lc, cur); cur = '.'; return number(c, cur); } case '|': /* binary or or string concat */ lc->started = 1; cur = scanner_getc(lc); if (cur == '|') { return scanner_token(lc, CONCATSTRING); } else if (cur == '&') { next = scanner_getc(lc); if(next == '>') { return scanner_token(lc, GEOM_OVERLAP_OR_ABOVE); } else { utf8_putchar(lc, next); //put the char back utf8_putchar(lc, cur); //put the char back return scanner_token(lc, '|'); } } else if (cur == '>') { next = scanner_getc(lc); if(next == '>') { return scanner_token(lc, GEOM_ABOVE); } else { utf8_putchar(lc, next); //put the char back utf8_putchar(lc, cur); //put the char back return scanner_token(lc, '|'); } } else { utf8_putchar(lc, cur); return scanner_token(lc, '|'); } } (void)sql_error( c, 3, "unexpected symbol (%lc)", (wint_t) cur); return LEX_ERROR; }
bool tokz_get_token(Tokenizer *tokz, Token *tok) { int c, c2, e; if (!(tokz->flags&TOKZ_READ_FROM_BUFFER)) assert(tokz->file!=NULL); tok_free(tok); if(!TOK_IS_INVALID(&(tokz->ungettok))){ *tok=tokz->ungettok; tokz->ungettok.type=TOK_INVALID; return TRUE; } while(1){ e=0; do{ c=GETCH(); }while(c!='\n' && c!=EOF && isspace(c)); tok->line=tokz->line; switch(c){ case EOF: TOK_SET_OP(tok, OP_EOF); return TRUE; case '\n': INC_LINE(); if(tokz->flags&TOKZ_IGNORE_NEXTLINE) continue; TOK_SET_OP(tok, OP_NEXTLINE); return TRUE; case '\\': do{ c=GETCH(); if(c==EOF){ TOK_SET_OP(tok, OP_EOF); return FALSE; } if(!isspace(c) && e==0){ e=E_TOKZ_EOL_EXPECTED; tokz_warn_error(tokz, tokz->line, e); if(!(tokz->flags&TOKZ_ERROR_TOLERANT)) return FALSE; } }while(c!='\n'); INC_LINE(); continue; case '#': if(tokz->flags&TOKZ_READ_COMMENTS){ e=scan_line_comment(tok, tokz); break; }else if((e=skip_line_comment(tokz))){ break; } continue; case '/': c2=GETCH(); if(c2=='='){ TOK_SET_OP(tok, OP_AS_DIV); return TRUE; } if(c2!='*'){ UNGETCH(c2); TOK_SET_OP(tok, OP_DIV); return TRUE; } if(tokz->flags&TOKZ_READ_COMMENTS){ e=scan_c_comment(tok, tokz); break; }else if((e=skip_c_comment(tokz))){ break; } continue; case '\"': e=scan_string(tok, tokz, TRUE); break; case '\'': e=scan_char(tok, tokz); break; default: if(('0'<=c && c<='9') || c=='-' || c=='+'){ e=scan_number(tok, tokz, c); break; } if(START_IDENT(c)) e=scan_identifier(tok, tokz, c); else e=scan_op(tok, tokz, c); } if(!e) return TRUE; tokz_warn_error(tokz, tokz->line, e); return FALSE; } }