Exemple #1
0
static void
S_compose_inner_queries(QueryParser *self, Vector *elems,
                        String *default_field) {
    const int32_t default_occur = QParser_IVARS(self)->default_occur;

    // Generate all queries.  Apply any fields.
    for (uint32_t i = Vec_Get_Size(elems); i--;) {
        String *field = default_field;
        ParserElem *elem = (ParserElem*)Vec_Fetch(elems, i);

        // Apply field.
        if (i > 0) {
            // Field specifier must immediately precede any query.
            ParserElem* maybe_field_elem
                = (ParserElem*)Vec_Fetch(elems, i - 1);
            if (ParserElem_Get_Type(maybe_field_elem) == TOKEN_FIELD) {
                field = (String*)ParserElem_As(maybe_field_elem, STRING);
            }
        }

        if (ParserElem_Get_Type(elem) == TOKEN_STRING) {
            String *text = (String*)ParserElem_As(elem, STRING);
            LeafQuery *query = LeafQuery_new(field, text);
            ParserElem *new_elem
                = ParserElem_new(TOKEN_QUERY, (Obj*)query);
            if (default_occur == MUST) {
                ParserElem_Require(new_elem);
            }
            Vec_Store(elems, i, (Obj*)new_elem);
        }
    }
}
Exemple #2
0
static ParserElem*
S_consume_text(StringIterator *iter) {
    StringIterator *temp = StrIter_Clone(iter);

    while (1) {
        int32_t code_point = StrIter_Next(temp);
        if (code_point == '\\') {
            code_point = StrIter_Next(temp);
            if (code_point == STR_OOB) {
                break;
            }
        }
        else if (code_point == STR_OOB) {
            break;
        }
        else if (StrHelp_is_whitespace(code_point)
            || code_point == '"'
            || code_point == '('
            || code_point == ')'
           ) {
            StrIter_Recede(temp, 1);
            break;
        }
    }

    String *text = StrIter_crop(iter, temp);
    StrIter_Assign(iter, temp);
    DECREF(temp);
    return ParserElem_new(TOKEN_STRING, (Obj*)text);
}
Exemple #3
0
static void
S_balance_parens(QueryParser *self, Vector *elems) {
    UNUSED_VAR(self);
    // Count paren balance, eliminate unbalanced right parens.
    int64_t paren_depth = 0;
    size_t i = 0;
    while (i < Vec_Get_Size(elems)) {
        ParserElem *elem = (ParserElem*)Vec_Fetch(elems, i);
        if (ParserElem_Get_Type(elem) == TOKEN_OPEN_PAREN) {
            paren_depth++;
        }
        else if (ParserElem_Get_Type(elem) == TOKEN_CLOSE_PAREN) {
            if (paren_depth > 0) {
                paren_depth--;
            }
            else {
                Vec_Excise(elems, i, 1);
                continue;
            }
        }
        i++;
    }

    // Insert implicit parens.
    while (paren_depth--) {
        ParserElem *elem = ParserElem_new(TOKEN_CLOSE_PAREN, NULL);
        Vec_Push(elems, (Obj*)elem);
    }
}
Exemple #4
0
static ParserElem*
S_consume_keyword(StringIterator *iter, const char *keyword,
                  size_t keyword_len, int type) {
    if (!StrIter_Starts_With_Utf8(iter, keyword, keyword_len)) {
        return NULL;
    }
    StringIterator *temp = StrIter_Clone(iter);
    StrIter_Advance(temp, keyword_len);
    int32_t lookahead = StrIter_Next(temp);
    if (lookahead == STR_OOB) {
        DECREF(temp);
        return NULL;
    }
    if (StrHelp_is_whitespace(lookahead)
        || lookahead == '"'
        || lookahead == '('
        || lookahead == ')'
        || lookahead == '+'
        || lookahead == '-'
       ) {
        StrIter_Recede(temp, 1);
        StrIter_Assign(iter, temp);
        DECREF(temp);
        return ParserElem_new(type, NULL);
    }
    DECREF(temp);
    return NULL;
}
Exemple #5
0
static void
S_parse_subqueries(QueryParser *self, Vector *elems) {
    const int32_t default_occur = QParser_IVARS(self)->default_occur;
    while (1) {
        // Work from the inside out, starting with the leftmost innermost
        // paren group.
        size_t left = SIZE_MAX;
        size_t right = SIZE_MAX;
        String *field = NULL;
        for (size_t i = 0, max = Vec_Get_Size(elems); i < max; i++) {
            ParserElem *elem = (ParserElem*)Vec_Fetch(elems, i);
            uint32_t type = ParserElem_Get_Type(elem);
            if (type == TOKEN_OPEN_PAREN) {
                left = i;
            }
            else if (type == TOKEN_CLOSE_PAREN) {
                right = i;
                break;
            }
            else if (type == TOKEN_FIELD && i < max - 1) {
                // If a field applies to an enclosing paren, pass it along.
                ParserElem *next_elem = (ParserElem*)Vec_Fetch(elems, i + 1);
                uint32_t next_type = ParserElem_Get_Type(next_elem);
                if (next_type == TOKEN_OPEN_PAREN) {
                    field = (String*)ParserElem_As(elem, STRING);
                }
            }
        }

        // Break out of loop when there are no parens left.
        if (right == SIZE_MAX) {
            break;
        }

        // Create the subquery.
        Vector *sub_elems = Vec_Slice(elems, left + 1, right - left - 1);
        Query *subquery = S_parse_subquery(self, sub_elems, field, true);
        ParserElem *new_elem = ParserElem_new(TOKEN_QUERY, (Obj*)subquery);
        if (default_occur == MUST) {
            ParserElem_Require(new_elem);
        }
        DECREF(sub_elems);

        // Replace the elements used to create the subquery with the subquery
        // itself.
        if (left > 0) {
            ParserElem *maybe_field = (ParserElem*)Vec_Fetch(elems, left - 1);
            uint32_t maybe_field_type = ParserElem_Get_Type(maybe_field);
            if (maybe_field_type == TOKEN_FIELD) {
                left -= 1;
            }
        }
        Vec_Excise(elems, left + 1, right - left);
        Vec_Store(elems, left, (Obj*)new_elem);
    }
}
Exemple #6
0
static ParserElem*
S_consume_field(StringIterator *iter) {
    StringIterator *temp = StrIter_Clone(iter);

    // Field names constructs must start with a letter or underscore.
    int32_t code_point = StrIter_Next(temp);
    if (code_point == STR_OOB) {
        DECREF(temp);
        return NULL;
    }
    if (!(isalpha(code_point) || code_point == '_')) {
        DECREF(temp);
        return NULL;
    }

    // Only alphanumerics and underscores are allowed in field names.
    while (':' != (code_point = StrIter_Next(temp))) {
        if (code_point == STR_OOB) {
            DECREF(temp);
            return NULL;
        }
        if (!(isalnum(code_point) || code_point == '_')) {
            DECREF(temp);
            return NULL;
        }
    }

    // Field name constructs must be followed by something sensible.
    int32_t lookahead = StrIter_Next(temp);
    if (lookahead == STR_OOB) {
        DECREF(temp);
        return NULL;
    }
    if (!(isalnum(lookahead)
          || lookahead == '_'
          || lookahead > 127
          || lookahead == '"'
          || lookahead == '('
         )
       ) {
        DECREF(temp);
        return NULL;
    }

    // Consume string data.
    StrIter_Recede(temp, 2); // Back up over lookahead and colon.
    String *field = StrIter_crop(iter, temp);
    StrIter_Advance(temp, 1); // Skip colon.
    StrIter_Assign(iter, temp);
    DECREF(temp);
    return ParserElem_new(TOKEN_FIELD, (Obj*)field);
}
Exemple #7
0
static ParserElem*
S_consume_quoted_string(StringIterator *iter) {
    StringIterator *temp = StrIter_Clone(iter);

    if (StrIter_Next(temp) != '"') {
        THROW(ERR, "Internal error: expected a quote");
    }

    while (1) {
        int32_t code_point = StrIter_Next(temp);
        if (code_point == STR_OOB || code_point == '"') {
            break;
        }
        else if (code_point == '\\') {
            StrIter_Next(temp);
        }
    }

    String *text = StrIter_crop(iter, temp);
    StrIter_Assign(iter, temp);
    DECREF(temp);
    return ParserElem_new(TOKEN_STRING, (Obj*)text);
}
Exemple #8
0
Vector*
QueryLexer_Tokenize_IMP(QueryLexer *self, String *query_string) {
    QueryLexerIVARS *const ivars = QueryLexer_IVARS(self);

    Vector *elems = Vec_new(0);
    if (!query_string) { return elems; }

    StringIterator *iter = Str_Top(query_string);

    while (StrIter_Has_Next(iter)) {
        ParserElem *elem = NULL;

        if (StrIter_Skip_Whitespace(iter)) {
            // Fast-forward past whitespace.
            continue;
        }

        if (ivars->heed_colons) {
            ParserElem *elem = S_consume_field(iter);
            if (elem) {
                Vec_Push(elems, (Obj*)elem);
            }
        }

        int32_t code_point = StrIter_Next(iter);
        switch (code_point) {
            case '(':
                elem = ParserElem_new(TOKEN_OPEN_PAREN, NULL);
                break;
            case ')':
                elem = ParserElem_new(TOKEN_CLOSE_PAREN, NULL);
                break;
            case '+':
                if (StrIter_Has_Next(iter)
                    && !StrIter_Skip_Whitespace(iter)
                   ) {
                    elem = ParserElem_new(TOKEN_PLUS, NULL);
                }
                else {
                    elem = ParserElem_new(TOKEN_STRING, (Obj*)Str_newf("+"));
                }
                break;
            case '-':
                if (StrIter_Has_Next(iter)
                    && !StrIter_Skip_Whitespace(iter)
                   ) {
                    elem = ParserElem_new(TOKEN_MINUS, NULL);
                }
                else {
                    elem = ParserElem_new(TOKEN_STRING, (Obj*)Str_newf("-"));
                }
                break;
            case '"':
                StrIter_Recede(iter, 1);
                elem = S_consume_quoted_string(iter);
                break;
            case 'O':
                StrIter_Recede(iter, 1);
                elem = S_consume_keyword(iter, "OR", 2, TOKEN_OR);
                if (!elem) {
                    elem = S_consume_text(iter);
                }
                break;
            case 'A':
                StrIter_Recede(iter, 1);
                elem = S_consume_keyword(iter, "AND", 3, TOKEN_AND);
                if (!elem) {
                    elem = S_consume_text(iter);
                }
                break;
            case 'N':
                StrIter_Recede(iter, 1);
                elem = S_consume_keyword(iter, "NOT", 3, TOKEN_NOT);
                if (!elem) {
                    elem = S_consume_text(iter);
                }
                break;
            default:
                StrIter_Recede(iter, 1);
                elem = S_consume_text(iter);
                break;
        }
        Vec_Push(elems, (Obj*)elem);
    }

    DECREF(iter);
    return elems;
}