Example #1
0
static String*
S_unescape(QueryParser *self, String *orig, CharBuf *buf) {
    StringIterator *iter = Str_Top(orig);
    int32_t code_point;
    UNUSED_VAR(self);

    CB_Set_Size(buf, 0);
    CB_Grow(buf, Str_Get_Size(orig) + 4);

    while (STRITER_DONE != (code_point = StrIter_Next(iter))) {
        if (code_point == '\\') {
            int32_t next_code_point = StrIter_Next(iter);
            if (next_code_point == ':'
                || next_code_point == '"'
                || next_code_point == '\\'
               ) {
                CB_Cat_Char(buf, next_code_point);
            }
            else {
                CB_Cat_Char(buf, code_point);
                if (next_code_point != STRITER_DONE) {
                    CB_Cat_Char(buf, next_code_point);
                }
            }
        }
        else {
            CB_Cat_Char(buf, code_point);
        }
    }

    DECREF(iter);
    return CB_To_String(buf);
}
Example #2
0
static bool
S_cb_equals(CharBuf *cb, String *other) {
    String *string = CB_To_String(cb);
    bool retval = Str_Equals(string, (Obj*)other);
    DECREF(string);
    return retval;
}
Example #3
0
Obj*
TextTermStepper_Get_Value_IMP(TextTermStepper *self) {
    TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self);
    if (ivars->string == NULL) {
        ivars->string = CB_To_String((CharBuf*)ivars->value);
    }
    return (Obj*)ivars->string;
}
Example #4
0
static String*
S_encode_entities(String *text, CharBuf *buf) {
    StringIterator *iter = Str_Top(text);
    size_t space = 0;
    const int MAX_ENTITY_BYTES = 9; // &#dddddd;

    // Scan first so that we only allocate once.
    int32_t code_point;
    while (STRITER_DONE != (code_point = StrIter_Next(iter))) {
        if (code_point > 127
                || (!isgraph(code_point) && !isspace(code_point))
                || code_point == '<'
                || code_point == '>'
                || code_point == '&'
                || code_point == '"'
           ) {
            space += MAX_ENTITY_BYTES;
        }
        else {
            space += 1;
        }
    }

    CB_Grow(buf, space);
    CB_Set_Size(buf, 0);
    DECREF(iter);
    iter = Str_Top(text);
    while (STRITER_DONE != (code_point = StrIter_Next(iter))) {
        if (code_point > 127
                || (!isgraph(code_point) && !isspace(code_point))
           ) {
            CB_catf(buf, "&#%u32;", code_point);
        }
        else if (code_point == '<') {
            CB_Cat_Trusted_Utf8(buf, "&lt;", 4);
        }
        else if (code_point == '>') {
            CB_Cat_Trusted_Utf8(buf, "&gt;", 4);
        }
        else if (code_point == '&') {
            CB_Cat_Trusted_Utf8(buf, "&amp;", 5);
        }
        else if (code_point == '"') {
            CB_Cat_Trusted_Utf8(buf, "&quot;", 6);
        }
        else {
            CB_Cat_Char(buf, code_point);
        }
    }

    DECREF(iter);
    return CB_To_String(buf);
}
Example #5
0
// Surround a smiley with lots of whitespace.
static String*
S_smiley_with_whitespace(int *num_spaces_ptr) {
    int32_t spaces[] = {
        ' ',    '\t',   '\r',   '\n',   0x000B, 0x000C, 0x000D, 0x0085,
        0x00A0, 0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
        0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029, 0x202F,
        0x205F, 0x3000
    };
    int num_spaces = sizeof(spaces) / sizeof(uint32_t);

    CharBuf *buf = CB_new(0);
    for (int i = 0; i < num_spaces; i++) { CB_Cat_Char(buf, spaces[i]); }
    CB_Cat_Char(buf, smiley_cp);
    for (int i = 0; i < num_spaces; i++) { CB_Cat_Char(buf, spaces[i]); }

    String *retval = CB_To_String(buf);
    if (num_spaces_ptr) { *num_spaces_ptr = num_spaces; }

    DECREF(buf);
    return retval;
}
Example #6
0
static Hash*
S_extract_tv_cache(ByteBuf *field_buf) {
    Hash       *tv_cache  = Hash_new(0);
    const char *tv_string = BB_Get_Buf(field_buf);
    int32_t     num_terms = NumUtil_decode_c32(&tv_string);
    CharBuf    *text_buf  = CB_new(0);

    // Read the number of highlightable terms in the field.
    for (int32_t i = 0; i < num_terms; i++) {
        size_t   overlap = NumUtil_decode_c32(&tv_string);
        size_t   len     = NumUtil_decode_c32(&tv_string);

        // Decompress the term text.
        CB_Set_Size(text_buf, overlap);
        CB_Cat_Trusted_Utf8(text_buf, tv_string, len);
        tv_string += len;

        // Get positions & offsets string.
        const char *bookmark_ptr  = tv_string;
        int32_t     num_positions = NumUtil_decode_c32(&tv_string);
        while (num_positions--) {
            // Leave nums compressed to save a little mem.
            NumUtil_skip_cint(&tv_string);
            NumUtil_skip_cint(&tv_string);
            NumUtil_skip_cint(&tv_string);
        }
        len = tv_string - bookmark_ptr;

        // Store the $text => $posdata pair in the output hash.
        String *text = CB_To_String(text_buf);
        Hash_Store(tv_cache, (Obj*)text,
                   (Obj*)BB_new_bytes(bookmark_ptr, len));
        DECREF(text);
    }
    DECREF(text_buf);

    return tv_cache;
}
Example #7
0
static void
test_iterator(TestBatchRunner *runner) {
    static const int32_t code_points[] = {
        0x41,
        0x7F,
        0x80,
        0x7FF,
        0x800,
        0xFFFF,
        0x10000,
        0x10FFFF
    };
    static size_t num_code_points
        = sizeof(code_points) / sizeof(code_points[0]);

    CharBuf *buf = CB_new(0);
    for (size_t i = 0; i < num_code_points; ++i) {
        CB_Cat_Char(buf, code_points[i]);
    }
    String *string = CB_To_String(buf);

    {
        StringIterator *iter = Str_Top(string);

        TEST_TRUE(runner, StrIter_Equals(iter, (Obj*)iter),
                  "StringIterator equal to self");
        TEST_FALSE(runner, StrIter_Equals(iter, (Obj*)CFISH_TRUE),
                   "StringIterator not equal non-iterators");

        DECREF(iter);
    }

    {
        StringIterator *top  = Str_Top(string);
        StringIterator *tail = Str_Tail(string);

        TEST_INT_EQ(runner, StrIter_Compare_To(top, (Obj*)tail), -1,
                    "Compare_To top < tail");
        TEST_INT_EQ(runner, StrIter_Compare_To(tail, (Obj*)top), 1,
                    "Compare_To tail > top");
        TEST_INT_EQ(runner, StrIter_Compare_To(top, (Obj*)top), 0,
                    "Compare_To top == top");

        StringIterator *clone = StrIter_Clone(top);
        TEST_TRUE(runner, StrIter_Equals(clone, (Obj*)top), "Clone");

        StrIter_Assign(clone, tail);
        TEST_TRUE(runner, StrIter_Equals(clone, (Obj*)tail), "Assign");

        String *other = Str_newf("Other string");
        StringIterator *other_iter = Str_Top(other);
        TEST_FALSE(runner, StrIter_Equals(other_iter, (Obj*)tail),
                   "Equals returns false for different strings");
        StrIter_Assign(clone, other_iter);
        TEST_TRUE(runner, StrIter_Equals(clone, (Obj*)other_iter),
                  "Assign iterator with different string");

        DECREF(other);
        DECREF(other_iter);
        DECREF(clone);
        DECREF(top);
        DECREF(tail);
    }

    {
        StringIterator *iter = Str_Top(string);

        for (size_t i = 0; i < num_code_points; ++i) {
            TEST_TRUE(runner, StrIter_Has_Next(iter), "Has_Next %d", i);
            int32_t code_point = StrIter_Next(iter);
            TEST_INT_EQ(runner, code_point, code_points[i], "Next %d", i);
        }

        TEST_TRUE(runner, !StrIter_Has_Next(iter),
                  "Has_Next at end of string");
        TEST_INT_EQ(runner, StrIter_Next(iter), STR_OOB,
                    "Next at end of string");

        StringIterator *tail = Str_Tail(string);
        TEST_TRUE(runner, StrIter_Equals(iter, (Obj*)tail), "Equals tail");

        DECREF(tail);
        DECREF(iter);
    }

    {
        StringIterator *iter = Str_Tail(string);

        for (size_t i = num_code_points; i--;) {
            TEST_TRUE(runner, StrIter_Has_Prev(iter), "Has_Prev %d", i);
            int32_t code_point = StrIter_Prev(iter);
            TEST_INT_EQ(runner, code_point, code_points[i], "Prev %d", i);
        }

        TEST_TRUE(runner, !StrIter_Has_Prev(iter),
                  "Has_Prev at end of string");
        TEST_INT_EQ(runner, StrIter_Prev(iter), STR_OOB,
                    "Prev at start of string");

        StringIterator *top = Str_Top(string);
        TEST_TRUE(runner, StrIter_Equals(iter, (Obj*)top), "Equals top");

        DECREF(top);
        DECREF(iter);
    }

    {
        StringIterator *iter = Str_Top(string);

        StrIter_Next(iter);
        TEST_INT_EQ(runner, StrIter_Advance(iter, 2), 2,
                    "Advance returns number of code points");
        TEST_INT_EQ(runner, StrIter_Next(iter), code_points[3],
                    "Advance works");
        TEST_INT_EQ(runner,
                    StrIter_Advance(iter, 1000000), num_code_points - 4,
                    "Advance past end of string");

        StrIter_Prev(iter);
        TEST_INT_EQ(runner, StrIter_Recede(iter, 2), 2,
                    "Recede returns number of code points");
        TEST_INT_EQ(runner, StrIter_Prev(iter), code_points[num_code_points-4],
                    "Recede works");
        TEST_INT_EQ(runner, StrIter_Recede(iter, 1000000), num_code_points - 4,
                    "Recede past start of string");

        DECREF(iter);
    }

    DECREF(string);
    DECREF(buf);
}