示例#1
0
文件: QueryParser.c 项目: kidaa/lucy
static String*
S_unescape(QueryParser *self, String *orig, CharBuf *buf) {
    StringIterator *iter = Str_Top(orig);
    int32_t code_point;
    UNUSED_VAR(self);

    CB_Set_Size(buf, 0);
    CB_Grow(buf, Str_Get_Size(orig) + 4);

    while (STRITER_DONE != (code_point = StrIter_Next(iter))) {
        if (code_point == '\\') {
            int32_t next_code_point = StrIter_Next(iter);
            if (next_code_point == ':'
                || next_code_point == '"'
                || next_code_point == '\\'
               ) {
                CB_Cat_Char(buf, next_code_point);
            }
            else {
                CB_Cat_Char(buf, code_point);
                if (next_code_point != STRITER_DONE) {
                    CB_Cat_Char(buf, next_code_point);
                }
            }
        }
        else {
            CB_Cat_Char(buf, code_point);
        }
    }

    DECREF(iter);
    return CB_To_String(buf);
}
示例#2
0
static void
test_Cat(TestBatchRunner *runner) {
    String  *wanted = Str_newf("a%s", smiley);
    CharBuf *got    = S_get_cb("");

    CB_Cat(got, wanted);
    TEST_TRUE(runner, S_cb_equals(got, wanted), "Cat");
    DECREF(got);

    got = S_get_cb("a");
    CB_Cat_Char(got, 0x263A);
    TEST_TRUE(runner, S_cb_equals(got, wanted), "Cat_Char");
    DECREF(got);

    got = S_get_cb("a");
    CB_Cat_Utf8(got, smiley, smiley_len);
    TEST_TRUE(runner, S_cb_equals(got, wanted), "Cat_Utf8");
    DECREF(got);

    got = S_get_cb("a");
    CB_Cat_Trusted_Utf8(got, smiley, smiley_len);
    TEST_TRUE(runner, S_cb_equals(got, wanted), "Cat_Trusted_Utf8");
    DECREF(got);

    DECREF(wanted);
}
示例#3
0
static void
test_Cat(TestBatch *batch) {
    CharBuf *wanted = CB_newf("a%s", smiley);
    CharBuf *got    = S_get_cb("");

    CB_Cat(got, wanted);
    TEST_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Cat");
    DECREF(got);

    got = S_get_cb("a");
    CB_Cat_Char(got, 0x263A);
    TEST_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Cat_Char");
    DECREF(got);

    got = S_get_cb("a");
    CB_Cat_Str(got, smiley, smiley_len);
    TEST_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Cat_Str");
    DECREF(got);

    got = S_get_cb("a");
    CB_Cat_Trusted_Str(got, smiley, smiley_len);
    TEST_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Cat_Trusted_Str");
    DECREF(got);

    DECREF(wanted);
}
示例#4
0
String*
TestUtils_random_string(size_t length) {
    CharBuf *buf = CB_new(length);
    while (length--) {
        CB_Cat_Char(buf, S_random_code_point());
    }
    String *string = CB_Yield_String(buf);
    DECREF(buf);
    return string;
}
示例#5
0
// Surround a smiley with lots of whitespace.
static String*
S_smiley_with_whitespace(int *num_spaces_ptr) {
    int32_t spaces[] = {
        ' ',    '\t',   '\r',   '\n',   0x000B, 0x000C, 0x000D, 0x0085,
        0x00A0, 0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
        0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029, 0x202F,
        0x205F, 0x3000
    };
    int num_spaces = sizeof(spaces) / sizeof(uint32_t);

    CharBuf *buf = CB_new(0);
    for (int i = 0; i < num_spaces; i++) { CB_Cat_Char(buf, spaces[i]); }
    CB_Cat_Char(buf, smiley_cp);
    for (int i = 0; i < num_spaces; i++) { CB_Cat_Char(buf, spaces[i]); }

    String *retval = CB_To_String(buf);
    if (num_spaces_ptr) { *num_spaces_ptr = num_spaces; }

    DECREF(buf);
    return retval;
}
示例#6
0
static String*
S_encode_entities(String *text, CharBuf *buf) {
    StringIterator *iter = Str_Top(text);
    size_t space = 0;
    const int MAX_ENTITY_BYTES = 9; // &#dddddd;

    // Scan first so that we only allocate once.
    int32_t code_point;
    while (STRITER_DONE != (code_point = StrIter_Next(iter))) {
        if (code_point > 127
                || (!isgraph(code_point) && !isspace(code_point))
                || code_point == '<'
                || code_point == '>'
                || code_point == '&'
                || code_point == '"'
           ) {
            space += MAX_ENTITY_BYTES;
        }
        else {
            space += 1;
        }
    }

    CB_Grow(buf, space);
    CB_Set_Size(buf, 0);
    DECREF(iter);
    iter = Str_Top(text);
    while (STRITER_DONE != (code_point = StrIter_Next(iter))) {
        if (code_point > 127
                || (!isgraph(code_point) && !isspace(code_point))
           ) {
            CB_catf(buf, "&#%u32;", code_point);
        }
        else if (code_point == '<') {
            CB_Cat_Trusted_Utf8(buf, "&lt;", 4);
        }
        else if (code_point == '>') {
            CB_Cat_Trusted_Utf8(buf, "&gt;", 4);
        }
        else if (code_point == '&') {
            CB_Cat_Trusted_Utf8(buf, "&amp;", 5);
        }
        else if (code_point == '"') {
            CB_Cat_Trusted_Utf8(buf, "&quot;", 6);
        }
        else {
            CB_Cat_Char(buf, code_point);
        }
    }

    DECREF(iter);
    return CB_To_String(buf);
}
示例#7
0
String*
Method_Host_Name_IMP(Method *self) {
    StringIterator *iter = StrIter_new(self->name, 0);
    CharBuf *charbuf = CB_new(Str_Get_Size(self->name));
    int32_t code_point;
    while (STR_OOB != (code_point = StrIter_Next(iter))) {
        if (code_point != '_') {
            CB_Cat_Char(charbuf, code_point);
        }
    }
    String *host_name = CB_Yield_String(charbuf);
    DECREF(charbuf);
    DECREF(iter);
    return host_name;
}
示例#8
0
static void
test_escapes(TestBatch *batch) {
    CharBuf *string      = CB_new(10);
    CharBuf *json_wanted = CB_new(10);

    for (int i = 0; control_escapes[i] != NULL; i++) {
        CB_Truncate(string, 0);
        CB_Cat_Char(string, i);
        const char *escaped = control_escapes[i];
        CharBuf    *json    = Json_to_json((Obj*)string);
        CharBuf    *decoded = (CharBuf*)Json_from_json(json);

        CB_setf(json_wanted, "\"%s\"", escaped);
        CB_Trim(json);
        TEST_TRUE(batch, json != NULL && CB_Equals(json_wanted, (Obj*)json),
                  "encode control escape: %s", escaped);

        TEST_TRUE(batch, decoded != NULL && CB_Equals(string, (Obj*)decoded),
                  "decode control escape: %s", escaped);

        DECREF(json);
        DECREF(decoded);
    }

    for (int i = 0; quote_escapes_source[i] != NULL; i++) {
        const char *source  = quote_escapes_source[i];
        const char *escaped = quote_escapes_json[i];
        CB_setf(string, source, strlen(source));
        CharBuf *json    = Json_to_json((Obj*)string);
        CharBuf *decoded = (CharBuf*)Json_from_json(json);

        CB_setf(json_wanted, "\"%s\"", escaped);
        CB_Trim(json);
        TEST_TRUE(batch, json != NULL && CB_Equals(json_wanted, (Obj*)json),
                  "encode quote/backslash escapes: %s", source);

        TEST_TRUE(batch, decoded != NULL && CB_Equals(string, (Obj*)decoded),
                  "decode quote/backslash escapes: %s", source);

        DECREF(json);
        DECREF(decoded);
    }

    DECREF(json_wanted);
    DECREF(string);
}
示例#9
0
void
Err_Add_Frame_IMP(Err *self, const char *file, int line, const char *func) {
    CharBuf *buf = CB_new(0);
    CB_Cat(buf, self->mess);

    if (!Str_Ends_With_Utf8(self->mess, "\n", 1)) {
        CB_Cat_Char(buf, '\n');
    }

    if (func != NULL) {
        CB_catf(buf, "\t%s at %s line %i32\n", func, file, (int32_t)line);
    }
    else {
        CB_catf(buf, "\tat %s line %i32\n", file, (int32_t)line);
    }

    DECREF(self->mess);
    self->mess = CB_Yield_String(buf);
    DECREF(buf);
}
示例#10
0
static void
test_Trim(TestBatch *batch) {
    uint32_t spaces[] = {
        ' ',    '\t',   '\r',   '\n',   0x000B, 0x000C, 0x000D, 0x0085,
        0x00A0, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004,
        0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029,
        0x202F, 0x205F, 0x3000
    };
    uint32_t num_spaces = sizeof(spaces) / sizeof(uint32_t);
    uint32_t i;
    CharBuf *got = CB_new(0);

    // Surround a smiley with lots of whitespace.
    for (i = 0; i < num_spaces; i++) { CB_Cat_Char(got, spaces[i]); }
    CB_Cat_Char(got, 0x263A);
    for (i = 0; i < num_spaces; i++) { CB_Cat_Char(got, spaces[i]); }

    TEST_TRUE(batch, CB_Trim_Top(got), "Trim_Top returns true on success");
    TEST_FALSE(batch, CB_Trim_Top(got),
               "Trim_Top returns false on failure");
    TEST_TRUE(batch, CB_Trim_Tail(got), "Trim_Tail returns true on success");
    TEST_FALSE(batch, CB_Trim_Tail(got),
               "Trim_Tail returns false on failure");
    TEST_TRUE(batch, CB_Equals_Str(got, smiley, smiley_len),
              "Trim_Top and Trim_Tail worked");

    // Build the spacey smiley again.
    CB_Truncate(got, 0);
    for (i = 0; i < num_spaces; i++) { CB_Cat_Char(got, spaces[i]); }
    CB_Cat_Char(got, 0x263A);
    for (i = 0; i < num_spaces; i++) { CB_Cat_Char(got, spaces[i]); }

    TEST_TRUE(batch, CB_Trim(got), "Trim returns true on success");
    TEST_FALSE(batch, CB_Trim(got), "Trim returns false on failure");
    TEST_TRUE(batch, CB_Equals_Str(got, smiley, smiley_len),
              "Trim worked");

    DECREF(got);
}
示例#11
0
文件: Json.c 项目: pavansondur/lucy
static void
S_append_json_string(Obj *dump, CharBuf *json) {
    // Append opening quote.
    CB_Cat_Trusted_Str(json, "\"", 1);

    // Process string data.
    ZombieCharBuf *iterator = ZCB_WRAP((CharBuf*)dump);
    while (ZCB_Get_Size(iterator)) {
        uint32_t code_point = ZCB_Nip_One(iterator);
        if (code_point > 127) {
            // There is no need to escape any high characters, including those
            // above the BMP, as we assume that the destination channel can
            // handle arbitrary UTF-8 data.
            CB_Cat_Char(json, code_point);
        }
        else {
            char buffer[7];
            size_t len;
            switch (code_point & 127) {
                    // Perform all mandatory escapes enumerated in the JSON spec.
                    // Note that the spec makes escaping forward slash optional;
                    // we choose not to.
                case 0x00: case 0x01: case 0x02: case 0x03:
                case 0x04: case 0x05: case 0x06: case 0x07:
                case 0x0b: case 0x0e: case 0x0f:
                case 0x10: case 0x11: case 0x12: case 0x13:
                case 0x14: case 0x15: case 0x16: case 0x17:
                case 0x18: case 0x19: case 0x1a: case 0x1b:
                case 0x1c: case 0x1d: case 0x1e: case 0x1f: {
                        sprintf(buffer, "\\u%04x", (unsigned)code_point);
                        len = 6;
                        break;
                    }
                case '\b':
                    memcpy(buffer, "\\b", 2);
                    len = 2;
                    break;
                case '\t':
                    memcpy(buffer, "\\t", 2);
                    len = 2;
                    break;
                case '\n':
                    memcpy(buffer, "\\n", 2);
                    len = 2;
                    break;
                case '\f':
                    memcpy(buffer, "\\f", 2);
                    len = 2;
                    break;
                case '\r':
                    memcpy(buffer, "\\r", 2);
                    len = 2;
                    break;
                case '\\':
                    memcpy(buffer, "\\\\", 2);
                    len = 2;
                    break;
                case '\"':
                    memcpy(buffer, "\\\"", 2);
                    len = 2;
                    break;

                    // Ordinary printable ASCII.
                default:
                    buffer[0] = (char)code_point;
                    len = 1;
            }
            CB_Cat_Trusted_Str(json, buffer, len);
        }
    }

    // Append closing quote.
    CB_Cat_Trusted_Str(json, "\"", 1);
}
示例#12
0
static void
test_iterator(TestBatchRunner *runner) {
    static const int32_t code_points[] = {
        0x41,
        0x7F,
        0x80,
        0x7FF,
        0x800,
        0xFFFF,
        0x10000,
        0x10FFFF
    };
    static size_t num_code_points
        = sizeof(code_points) / sizeof(code_points[0]);

    CharBuf *buf = CB_new(0);
    for (size_t i = 0; i < num_code_points; ++i) {
        CB_Cat_Char(buf, code_points[i]);
    }
    String *string = CB_To_String(buf);

    {
        StringIterator *iter = Str_Top(string);

        TEST_TRUE(runner, StrIter_Equals(iter, (Obj*)iter),
                  "StringIterator equal to self");
        TEST_FALSE(runner, StrIter_Equals(iter, (Obj*)CFISH_TRUE),
                   "StringIterator not equal non-iterators");

        DECREF(iter);
    }

    {
        StringIterator *top  = Str_Top(string);
        StringIterator *tail = Str_Tail(string);

        TEST_INT_EQ(runner, StrIter_Compare_To(top, (Obj*)tail), -1,
                    "Compare_To top < tail");
        TEST_INT_EQ(runner, StrIter_Compare_To(tail, (Obj*)top), 1,
                    "Compare_To tail > top");
        TEST_INT_EQ(runner, StrIter_Compare_To(top, (Obj*)top), 0,
                    "Compare_To top == top");

        StringIterator *clone = StrIter_Clone(top);
        TEST_TRUE(runner, StrIter_Equals(clone, (Obj*)top), "Clone");

        StrIter_Assign(clone, tail);
        TEST_TRUE(runner, StrIter_Equals(clone, (Obj*)tail), "Assign");

        String *other = Str_newf("Other string");
        StringIterator *other_iter = Str_Top(other);
        TEST_FALSE(runner, StrIter_Equals(other_iter, (Obj*)tail),
                   "Equals returns false for different strings");
        StrIter_Assign(clone, other_iter);
        TEST_TRUE(runner, StrIter_Equals(clone, (Obj*)other_iter),
                  "Assign iterator with different string");

        DECREF(other);
        DECREF(other_iter);
        DECREF(clone);
        DECREF(top);
        DECREF(tail);
    }

    {
        StringIterator *iter = Str_Top(string);

        for (size_t i = 0; i < num_code_points; ++i) {
            TEST_TRUE(runner, StrIter_Has_Next(iter), "Has_Next %d", i);
            int32_t code_point = StrIter_Next(iter);
            TEST_INT_EQ(runner, code_point, code_points[i], "Next %d", i);
        }

        TEST_TRUE(runner, !StrIter_Has_Next(iter),
                  "Has_Next at end of string");
        TEST_INT_EQ(runner, StrIter_Next(iter), STR_OOB,
                    "Next at end of string");

        StringIterator *tail = Str_Tail(string);
        TEST_TRUE(runner, StrIter_Equals(iter, (Obj*)tail), "Equals tail");

        DECREF(tail);
        DECREF(iter);
    }

    {
        StringIterator *iter = Str_Tail(string);

        for (size_t i = num_code_points; i--;) {
            TEST_TRUE(runner, StrIter_Has_Prev(iter), "Has_Prev %d", i);
            int32_t code_point = StrIter_Prev(iter);
            TEST_INT_EQ(runner, code_point, code_points[i], "Prev %d", i);
        }

        TEST_TRUE(runner, !StrIter_Has_Prev(iter),
                  "Has_Prev at end of string");
        TEST_INT_EQ(runner, StrIter_Prev(iter), STR_OOB,
                    "Prev at start of string");

        StringIterator *top = Str_Top(string);
        TEST_TRUE(runner, StrIter_Equals(iter, (Obj*)top), "Equals top");

        DECREF(top);
        DECREF(iter);
    }

    {
        StringIterator *iter = Str_Top(string);

        StrIter_Next(iter);
        TEST_INT_EQ(runner, StrIter_Advance(iter, 2), 2,
                    "Advance returns number of code points");
        TEST_INT_EQ(runner, StrIter_Next(iter), code_points[3],
                    "Advance works");
        TEST_INT_EQ(runner,
                    StrIter_Advance(iter, 1000000), num_code_points - 4,
                    "Advance past end of string");

        StrIter_Prev(iter);
        TEST_INT_EQ(runner, StrIter_Recede(iter, 2), 2,
                    "Recede returns number of code points");
        TEST_INT_EQ(runner, StrIter_Prev(iter), code_points[num_code_points-4],
                    "Recede works");
        TEST_INT_EQ(runner, StrIter_Recede(iter, 1000000), num_code_points - 4,
                    "Recede past start of string");

        DECREF(iter);
    }

    DECREF(string);
    DECREF(buf);
}
示例#13
0
String*
Highlighter_Raw_Excerpt_IMP(Highlighter *self, String *field_val,
                            int32_t *start_ptr, HeatMap *heat_map) {
    HighlighterIVARS *const ivars = Highlighter_IVARS(self);

    // Find start of excerpt.

    StringIterator *top = Str_Top(field_val);

    int32_t  best_location = S_hottest(heat_map);
    int32_t  start;
    uint32_t max_skip;

    if ((uint32_t)best_location <= ivars->slop) {
        // If the beginning of the string falls within the window centered
        // around the hottest point in the field, start the fragment at the
        // beginning.
        start    = 0;
        max_skip = best_location;
    }
    else {
        start    = best_location - ivars->slop;
        max_skip = ivars->slop;
        StrIter_Advance(top, start);
    }

    uint32_t num_skipped;
    bool found_starting_edge
        = S_find_starting_boundary(top, max_skip, &num_skipped);
    start += num_skipped;

    // Find end of excerpt.

    StringIterator *tail = StrIter_Clone(top);

    uint32_t max_len = ivars->excerpt_length;
    if (!found_starting_edge) {
        // Leave space for starting ellipsis and space character.
        max_len -= 2;
    }

    bool found_ending_edge = true;
    uint32_t excerpt_len = StrIter_Advance(tail, max_len);

    // Skip up to slop code points but keep at least max_len - slop.
    if (excerpt_len > max_len - ivars->slop) {
        max_skip = excerpt_len - (max_len - ivars->slop);
        found_ending_edge
            = S_find_ending_boundary(tail, max_skip, &num_skipped);
        if (num_skipped >= excerpt_len) {
            excerpt_len = 0;
        }
        else {
            excerpt_len -= num_skipped;
        }
    }

    // Extract excerpt.

    String *raw_excerpt;

    if (!excerpt_len) {
        raw_excerpt = Str_new_from_trusted_utf8("", 0);
    }
    else {
        String  *substring = StrIter_substring(top, tail);
        CharBuf *buf       = CB_new(Str_Get_Size(substring) + 8);

        // If not starting on a sentence boundary, prepend an ellipsis.
        if (!found_starting_edge) {
            CB_Cat_Char(buf, ELLIPSIS_CODE_POINT);
            CB_Cat_Char(buf, ' ');
            start -= 2;
        }

        CB_Cat(buf, substring);

        // If not ending on a sentence boundary, append an ellipsis.
        if (!found_ending_edge) {
            CB_Cat_Char(buf, ELLIPSIS_CODE_POINT);
        }

        raw_excerpt = CB_Yield_String(buf);

        DECREF(buf);
        DECREF(substring);
    }

    *start_ptr = start;

    DECREF(top);
    DECREF(tail);
    return raw_excerpt;
}