Example #1
0
static
int scanner_symbol(mvc * c, int cur)
{
	struct scanner *lc = &c->scanner;
	int next = 0;
	int started = lc->started;

	switch (cur) {
	case '/':
		lc->started = 1;
		next = scanner_getc(lc);
		if (next == '*') {
			lc->started = started;
			cur = skip_c_comment(lc);
			if (cur < 0)
				return EOF;
			return tokenize(c, cur);
		} else {
			utf8_putchar(lc, next); 
			return scanner_token(lc, cur);
		}
	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
	case '8':
	case '9':
		return number(c, cur);
	case '#':
		if ((cur = skip_sql_comment(lc)) == EOF)
			return cur;
		return tokenize(c, cur);
	case '\'':
	case '"':
		return scanner_string(c, cur);
	case '{':
		return scanner_body(c);
	case '-':
		lc->started = 1;
		next = scanner_getc(lc);
		if (next == '-') {
			lc->started = started;
			if ((cur = skip_sql_comment(lc)) == EOF)
				return cur;
			return tokenize(c, cur);
		}
		lc->started = 1;
		utf8_putchar(lc, next); 
		return scanner_token(lc, cur);
	case '~': /* binary not */
	case '^': /* binary xor */
	case '*':
	case '?':
	case '%':
	case '+':
	case '(':
	case ')':
	case ',':
	case '=':
	case '[':
	case ']':
		lc->started = 1;
		return scanner_token(lc, cur);
	case '&':
		lc->started = 1;
		cur = scanner_getc(lc);
		if(cur == '<') {
			next = scanner_getc(lc);
			if(next == '|') {
				return scanner_token(lc, GEOM_OVERLAP_OR_BELOW);
			} else {
				utf8_putchar(lc, next); //put the char back
				return scanner_token(lc, GEOM_OVERLAP_OR_LEFT);
			}
		} else if(cur == '>')
			return scanner_token(lc, GEOM_OVERLAP_OR_RIGHT);
		else if(cur == '&')
			return scanner_token(lc, GEOM_OVERLAP);
		else {/* binary and */
			utf8_putchar(lc, cur); //put the char back
			return scanner_token(lc, '&');
		}
	case '@':
		lc->started = 1;
		return scanner_token(lc, AT);
	case ';':
		lc->started = 0;
		return scanner_token(lc, SCOLON);
	case '<':
		lc->started = 1;
		cur = scanner_getc(lc);
		if (cur == '=') {
			return scanner_token( lc, COMPARISON);
		} else if (cur == '>') {
			return scanner_token( lc, COMPARISON);
		} else if (cur == '<') {
			next = scanner_getc(lc);
			if (next == '=') {
				return scanner_token( lc, LEFT_SHIFT_ASSIGN);
			} else if (next == '|') {
				return scanner_token(lc, GEOM_BELOW);
			} else {
				utf8_putchar(lc, next); //put the char back
				return scanner_token( lc, LEFT_SHIFT);
			}
		} else if(cur == '-') {
			next = scanner_getc(lc);
			if(next == '>') {
				return scanner_token(lc, GEOM_DIST);
			} else {
				//put the characters back and fall in the next possible case
				utf8_putchar(lc, next);
				utf8_putchar(lc, cur);
				return scanner_token( lc, COMPARISON);
			}
		} else {
			utf8_putchar(lc, cur); 
			return scanner_token( lc, COMPARISON);
		}
	case '>':
		lc->started = 1;
		cur = scanner_getc(lc);
		if (cur == '>') {
			cur = scanner_getc(lc);
			if (cur == '=')
				return scanner_token( lc, RIGHT_SHIFT_ASSIGN);
			utf8_putchar(lc, cur); 
			return scanner_token( lc, RIGHT_SHIFT);
		} else if (cur != '=') {
			utf8_putchar(lc, cur); 
			return scanner_token( lc, COMPARISON);
		} else {
			return scanner_token( lc, COMPARISON);
		}
	case '.':
		lc->started = 1;
		cur = scanner_getc(lc);
		if (!iswdigit(cur)) {
			utf8_putchar(lc, cur); 
			return scanner_token( lc, '.');
		} else {
			utf8_putchar(lc, cur); 
			cur = '.';
			return number(c, cur);
		}
	case '|': /* binary or or string concat */
		lc->started = 1;
		cur = scanner_getc(lc);
		if (cur == '|') {
			return scanner_token(lc, CONCATSTRING);
		} else if (cur == '&') {
			next = scanner_getc(lc);
			if(next == '>') {
				return scanner_token(lc, GEOM_OVERLAP_OR_ABOVE);
			} else {
				utf8_putchar(lc, next); //put the char back
				utf8_putchar(lc, cur); //put the char back
				return scanner_token(lc, '|');
			}
		} else if (cur == '>') {
			next = scanner_getc(lc);
			if(next == '>') {
				return scanner_token(lc, GEOM_ABOVE);
			} else {
				utf8_putchar(lc, next); //put the char back
				utf8_putchar(lc, cur); //put the char back
				return scanner_token(lc, '|');
			}
		} else {
			utf8_putchar(lc, cur); 
			return scanner_token(lc, '|');
		}
	}
	(void)sql_error( c, 3, "unexpected symbol (%lc)", (wint_t) cur);
	return LEX_ERROR;
}
Example #2
0
bool tokz_get_token(Tokenizer *tokz, Token *tok)
{
    int c, c2, e;
    
    if (!(tokz->flags&TOKZ_READ_FROM_BUFFER))
    assert(tokz->file!=NULL);
    
    tok_free(tok);
    
    if(!TOK_IS_INVALID(&(tokz->ungettok))){
        *tok=tokz->ungettok;
        tokz->ungettok.type=TOK_INVALID;
        return TRUE;
    }

    while(1){
    
        e=0;
        
        do{
            c=GETCH();
        }while(c!='\n' && c!=EOF && isspace(c));
    
        tok->line=tokz->line;
    
        switch(c){
        case EOF:
            TOK_SET_OP(tok, OP_EOF);
            return TRUE;
            
        case '\n':
            INC_LINE();
            
            if(tokz->flags&TOKZ_IGNORE_NEXTLINE)
                continue;
            
            TOK_SET_OP(tok, OP_NEXTLINE);
            
            return TRUE;
            
        case '\\':
            do{
                c=GETCH();
                if(c==EOF){
                    TOK_SET_OP(tok, OP_EOF);
                    return FALSE;
                }
                if(!isspace(c) && e==0){
                    e=E_TOKZ_EOL_EXPECTED;
                    tokz_warn_error(tokz, tokz->line, e);
                    if(!(tokz->flags&TOKZ_ERROR_TOLERANT))
                        return FALSE;
                }
            }while(c!='\n');
            
            INC_LINE();
            continue;

        case '#':
            if(tokz->flags&TOKZ_READ_COMMENTS){
                e=scan_line_comment(tok, tokz);
                break;
            }else if((e=skip_line_comment(tokz))){
                break;
            }
            
            continue;
            
        case '/':
            c2=GETCH();
            
            if(c2=='='){
                TOK_SET_OP(tok, OP_AS_DIV);
                return TRUE;
            }
            
            if(c2!='*'){
                UNGETCH(c2);
                TOK_SET_OP(tok, OP_DIV);
                return TRUE;
            }
            
            if(tokz->flags&TOKZ_READ_COMMENTS){
                e=scan_c_comment(tok, tokz);
                break;
            }else if((e=skip_c_comment(tokz))){
                break;
            }
            
            continue;
            
        case '\"':
            e=scan_string(tok, tokz, TRUE);
            break;

        case '\'':
            e=scan_char(tok, tokz);
            break;

        default: 
            if(('0'<=c && c<='9') || c=='-' || c=='+'){
                e=scan_number(tok, tokz, c);
                break;
            }

             if(START_IDENT(c))
                e=scan_identifier(tok, tokz, c);
            else
                e=scan_op(tok, tokz, c);
        }
        
        if(!e)
            return TRUE;
        
        tokz_warn_error(tokz, tokz->line, e);
        return FALSE;
    }
}