Пример #1
0
CharBuf*
Json_to_json(Obj *dump) {
    // Validate object type, only allowing hashes and arrays per JSON spec.
    if (!dump || !(Obj_Is_A(dump, HASH) || Obj_Is_A(dump, VARRAY))) {
        if (!tolerant) {
            CharBuf *class_name = dump ? Obj_Get_Class_Name(dump) : NULL;
            CharBuf *mess = MAKE_MESS("Illegal top-level object type: %o",
                                      class_name);
            Err_set_error(Err_new(mess));
            return NULL;
        }
    }

    // Encode.
    CharBuf *json = CB_new(31);
    if (!S_to_json(dump, json, 0)) {
        DECREF(json);
        ERR_ADD_FRAME(Err_get_error());
        json = NULL;
    }
    else {
        // Append newline.
        CB_Cat_Trusted_Str(json, "\n", 1);
    }

    return json;
}
Пример #2
0
static void
test_Cat(TestBatch *batch) {
    CharBuf *wanted = CB_newf("a%s", smiley);
    CharBuf *got    = S_get_cb("");

    CB_Cat(got, wanted);
    TEST_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Cat");
    DECREF(got);

    got = S_get_cb("a");
    CB_Cat_Char(got, 0x263A);
    TEST_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Cat_Char");
    DECREF(got);

    got = S_get_cb("a");
    CB_Cat_Str(got, smiley, smiley_len);
    TEST_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Cat_Str");
    DECREF(got);

    got = S_get_cb("a");
    CB_Cat_Trusted_Str(got, smiley, smiley_len);
    TEST_TRUE(batch, CB_Equals(wanted, (Obj*)got), "Cat_Trusted_Str");
    DECREF(got);

    DECREF(wanted);
}
Пример #3
0
void
Indexer_prepare_commit(Indexer *self)
{
    VArray   *seg_readers     = PolyReader_Get_Seg_Readers(self->polyreader);
    uint32_t  num_seg_readers = VA_Get_Size(seg_readers);
    bool_t    merge_happened  = false;

    if ( !self->write_lock || self->prepared ) {
        THROW(ERR, "Can't call Prepare_Commit() more than once");
    }

    // Merge existing index data. 
    if (num_seg_readers) { 
        merge_happened = S_maybe_merge(self, seg_readers);
    }

    // Add a new segment and write a new snapshot file if... 
    if (   Seg_Get_Count(self->segment)      // Docs/segs added. 
        || merge_happened                        // Some segs merged. 
        || !Snapshot_Num_Entries(self->snapshot) // Initializing index. 
        || DelWriter_Updated(self->del_writer) 
    ) {
        Folder   *folder   = self->folder;
        Schema   *schema   = self->schema;
        Snapshot *snapshot = self->snapshot;
        CharBuf  *old_schema_name = S_find_schema_file(snapshot);
        uint64_t  schema_gen = old_schema_name
                             ? IxFileNames_extract_gen(old_schema_name) + 1
                             : 1;
        char      base36[StrHelp_MAX_BASE36_BYTES];
        CharBuf  *new_schema_name;
        
        StrHelp_to_base36(schema_gen, &base36);
        new_schema_name = CB_newf("schema_%s.json", base36);

        // Finish the segment, write schema file. 
        SegWriter_Finish(self->seg_writer);
        Schema_Write(schema, folder, new_schema_name);
        if (old_schema_name) {
            Snapshot_Delete_Entry(snapshot, old_schema_name);
        }
        Snapshot_Add_Entry(snapshot, new_schema_name);
        DECREF(new_schema_name);

        // Write temporary snapshot file. 
        DECREF(self->snapfile);
        self->snapfile = IxManager_Make_Snapshot_Filename(self->manager);
        CB_Cat_Trusted_Str(self->snapfile, ".temp", 5);
        Folder_Delete(folder, self->snapfile);
        Snapshot_Write_File(snapshot, folder, self->snapfile);

        self->needs_commit = true;
    }

    // Close reader, so that we can delete its files if appropriate. 
    PolyReader_Close(self->polyreader);

    self->prepared = true;
}
Пример #4
0
CharBuf*
Compiler_to_string(Compiler *self) {
    CharBuf *stringified_query = Query_To_String(self->parent);
    CharBuf *string = CB_new_from_trusted_utf8("compiler(", 9);
    CB_Cat(string, stringified_query);
    CB_Cat_Trusted_Str(string, ")", 1);
    DECREF(stringified_query);
    return string;
}
Пример #5
0
CharBuf*
PhraseQuery_to_string(PhraseQuery *self)
{
    uint32_t i;
    uint32_t num_terms = VA_Get_Size(self->terms);
    CharBuf *retval = CB_Clone(self->field);
    CB_Cat_Trusted_Str(retval, ":\"", 2);
    for (i = 0; i < num_terms; i++) {
        Obj *term = VA_Fetch(self->terms, i);
        CharBuf *term_string = Obj_To_String(term);
        CB_Cat(retval, term_string);
        DECREF(term_string);
        if (i < num_terms - 1) {
            CB_Cat_Trusted_Str(retval, " ",  1);
        }
    }
    CB_Cat_Trusted_Str(retval, "\"", 1);
    return retval;
}
Пример #6
0
static Hash*
S_extract_tv_cache(ByteBuf *field_buf) {
    Hash    *tv_cache  = Hash_new(0);
    char    *tv_string = BB_Get_Buf(field_buf);
    int32_t  num_terms = NumUtil_decode_c32(&tv_string);
    CharBuf *text      = CB_new(0);

    // Read the number of highlightable terms in the field.
    for (int32_t i = 0; i < num_terms; i++) {
        size_t   overlap = NumUtil_decode_c32(&tv_string);
        size_t   len     = NumUtil_decode_c32(&tv_string);

        // Decompress the term text.
        CB_Set_Size(text, overlap);
        CB_Cat_Trusted_Str(text, tv_string, len);
        tv_string += len;

        // Get positions & offsets string.
        char *bookmark_ptr      = tv_string;
        int32_t num_positions   = NumUtil_decode_c32(&tv_string);
        while (num_positions--) {
            // Leave nums compressed to save a little mem.
            NumUtil_skip_cint(&tv_string);
            NumUtil_skip_cint(&tv_string);
            NumUtil_skip_cint(&tv_string);
        }
        len = tv_string - bookmark_ptr;

        // Store the $text => $posdata pair in the output hash.
        Hash_Store(tv_cache, (Obj*)text,
                   (Obj*)BB_new_bytes(bookmark_ptr, len));
    }
    DECREF(text);

    return tv_cache;
}
Пример #7
0
void
CB_vcatf(CharBuf *self, const char *pattern, va_list args) {
    size_t      pattern_len   = strlen(pattern);
    const char *pattern_start = pattern;
    const char *pattern_end   = pattern + pattern_len;
    char        buf[64];

    for (; pattern < pattern_end; pattern++) {
        const char *slice_end = pattern;

        // Consume all characters leading up to a '%'.
        while (slice_end < pattern_end && *slice_end != '%') { slice_end++; }
        if (pattern != slice_end) {
            size_t size = slice_end - pattern;
            CB_Cat_Trusted_Str(self, pattern, size);
            pattern = slice_end;
        }

        if (pattern < pattern_end) {
            pattern++; // Move past '%'.

            switch (*pattern) {
                case '%': {
                        CB_Cat_Trusted_Str(self, "%", 1);
                    }
                    break;
                case 'o': {
                        Obj *obj = va_arg(args, Obj*);
                        if (!obj) {
                            CB_Cat_Trusted_Str(self, "[NULL]", 6);
                        }
                        else if (Obj_Is_A(obj, CHARBUF)) {
                            CB_Cat(self, (CharBuf*)obj);
                        }
                        else {
                            CharBuf *string = Obj_To_String(obj);
                            CB_Cat(self, string);
                            DECREF(string);
                        }
                    }
                    break;
                case 'i': {
                        int64_t val = 0;
                        size_t size;
                        if (pattern[1] == '8') {
                            val = va_arg(args, int32_t);
                            pattern++;
                        }
                        else if (pattern[1] == '3' && pattern[2] == '2') {
                            val = va_arg(args, int32_t);
                            pattern += 2;
                        }
                        else if (pattern[1] == '6' && pattern[2] == '4') {
                            val = va_arg(args, int64_t);
                            pattern += 2;
                        }
                        else {
                            S_die_invalid_pattern(pattern_start);
                        }
                        size = sprintf(buf, "%" I64P, val);
                        CB_Cat_Trusted_Str(self, buf, size);
                    }
                    break;
                case 'u': {
                        uint64_t val = 0;
                        size_t size;
                        if (pattern[1] == '8') {
                            val = va_arg(args, uint32_t);
                            pattern += 1;
                        }
                        else if (pattern[1] == '3' && pattern[2] == '2') {
                            val = va_arg(args, uint32_t);
                            pattern += 2;
                        }
                        else if (pattern[1] == '6' && pattern[2] == '4') {
                            val = va_arg(args, uint64_t);
                            pattern += 2;
                        }
                        else {
                            S_die_invalid_pattern(pattern_start);
                        }
                        size = sprintf(buf, "%" U64P, val);
                        CB_Cat_Trusted_Str(self, buf, size);
                    }
                    break;
                case 'f': {
                        if (pattern[1] == '6' && pattern[2] == '4') {
                            double num  = va_arg(args, double);
                            char bigbuf[512];
                            size_t size = sprintf(bigbuf, "%g", num);
                            CB_Cat_Trusted_Str(self, bigbuf, size);
                            pattern += 2;
                        }
                        else {
                            S_die_invalid_pattern(pattern_start);
                        }
                    }
                    break;
                case 'x': {
                        if (pattern[1] == '3' && pattern[2] == '2') {
                            unsigned long val = va_arg(args, uint32_t);
                            size_t size = sprintf(buf, "%.8lx", val);
                            CB_Cat_Trusted_Str(self, buf, size);
                            pattern += 2;
                        }
                        else {
                            S_die_invalid_pattern(pattern_start);
                        }
                    }
                    break;
                case 's': {
                        char *string = va_arg(args, char*);
                        if (string == NULL) {
                            CB_Cat_Trusted_Str(self, "[NULL]", 6);
                        }
                        else {
                            size_t size = strlen(string);
                            if (StrHelp_utf8_valid(string, size)) {
                                CB_Cat_Trusted_Str(self, string, size);
                            }
                            else {
                                CB_Cat_Trusted_Str(self, "[INVALID UTF8]", 14);
                            }
                        }
                    }
                    break;
                default: {
                        // Assume NULL-terminated pattern string, which
                        // eliminates the need for bounds checking if '%' is
                        // the last visible character.
                        S_die_invalid_pattern(pattern_start);
                    }
            }
Пример #8
0
static bool_t
S_to_json(Obj *dump, CharBuf *json, int32_t depth) {
    // Guard against infinite recursion in self-referencing data structures.
    if (depth > MAX_DEPTH) {
        CharBuf *mess = MAKE_MESS("Exceeded max depth of %i32", MAX_DEPTH);
        Err_set_error(Err_new(mess));
        return false;
    }

    if (!dump) {
        CB_Cat_Trusted_Str(json, "null", 4);
    }
    else if (dump == (Obj*)CFISH_TRUE) {
        CB_Cat_Trusted_Str(json, "true", 4);
    }
    else if (dump == (Obj*)CFISH_FALSE) {
        CB_Cat_Trusted_Str(json, "false", 5);
    }
    else if (Obj_Is_A(dump, CHARBUF)) {
        S_append_json_string(dump, json);
    }
    else if (Obj_Is_A(dump, INTNUM)) {
        CB_catf(json, "%i64", Obj_To_I64(dump));
    }
    else if (Obj_Is_A(dump, FLOATNUM)) {
        CB_catf(json, "%f64", Obj_To_F64(dump));
    }
    else if (Obj_Is_A(dump, VARRAY)) {
        VArray *array = (VArray*)dump;
        size_t size = VA_Get_Size(array);
        if (size == 0) {
            // Put empty array on single line.
            CB_Cat_Trusted_Str(json, "[]", 2);
            return true;
        }
        else if (size == 1) {
            Obj *elem = VA_Fetch(array, 0);
            if (!(Obj_Is_A(elem, HASH) || Obj_Is_A(elem, VARRAY))) {
                // Put array containing single scalar element on one line.
                CB_Cat_Trusted_Str(json, "[", 1);
                if (!S_to_json(elem, json, depth + 1)) {
                    return false;
                }
                CB_Cat_Trusted_Str(json, "]", 1);
                return true;
            }
        }
        // Fall back to spreading elements across multiple lines.
        CB_Cat_Trusted_Str(json, "[", 1);
        for (size_t i = 0; i < size; i++) {
            CB_Cat_Trusted_Str(json, "\n", 1);
            S_cat_whitespace(json, depth + 1);
            if (!S_to_json(VA_Fetch(array, i), json, depth + 1)) {
                return false;
            }
            if (i + 1 < size) {
                CB_Cat_Trusted_Str(json, ",", 1);
            }
        }
        CB_Cat_Trusted_Str(json, "\n", 1);
        S_cat_whitespace(json, depth);
        CB_Cat_Trusted_Str(json, "]", 1);
    }
    else if (Obj_Is_A(dump, HASH)) {
        Hash *hash = (Hash*)dump;
        size_t size = Hash_Get_Size(hash);

        // Put empty hash on single line.
        if (size == 0) {
            CB_Cat_Trusted_Str(json, "{}", 2);
            return true;
        }

        // Validate that all keys are strings, then sort.
        VArray *keys = Hash_Keys(hash);
        for (size_t i = 0; i < size; i++) {
            Obj *key = VA_Fetch(keys, i);
            if (!key || !Obj_Is_A(key, CHARBUF)) {
                DECREF(keys);
                CharBuf *key_class = key ? Obj_Get_Class_Name(key) : NULL;
                CharBuf *mess = MAKE_MESS("Illegal key type: %o", key_class);
                Err_set_error(Err_new(mess));
                return false;
            }
        }
        VA_Sort(keys, NULL, NULL);

        // Spread pairs across multiple lines.
        CB_Cat_Trusted_Str(json, "{", 1);
        for (size_t i = 0; i < size; i++) {
            Obj *key = VA_Fetch(keys, i);
            CB_Cat_Trusted_Str(json, "\n", 1);
            S_cat_whitespace(json, depth + 1);
            S_append_json_string(key, json);
            CB_Cat_Trusted_Str(json, ": ", 2);
            if (!S_to_json(Hash_Fetch(hash, key), json, depth + 1)) {
                DECREF(keys);
                return false;
            }
            if (i + 1 < size) {
                CB_Cat_Trusted_Str(json, ",", 1);
            }
        }
        CB_Cat_Trusted_Str(json, "\n", 1);
        S_cat_whitespace(json, depth);
        CB_Cat_Trusted_Str(json, "}", 1);

        DECREF(keys);
    }

    return true;
}
Пример #9
0
static void
S_cat_whitespace(CharBuf *json, int32_t depth) {
    while (depth--) {
        CB_Cat_Trusted_Str(json, indentation, INDENTATION_LEN);
    }
}
Пример #10
0
static void
S_append_json_string(Obj *dump, CharBuf *json) {
    // Append opening quote.
    CB_Cat_Trusted_Str(json, "\"", 1);

    // Process string data.
    ZombieCharBuf *iterator = ZCB_WRAP((CharBuf*)dump);
    while (ZCB_Get_Size(iterator)) {
        uint32_t code_point = ZCB_Nip_One(iterator);
        if (code_point > 127) {
            // There is no need to escape any high characters, including those
            // above the BMP, as we assume that the destination channel can
            // handle arbitrary UTF-8 data.
            CB_Cat_Char(json, code_point);
        }
        else {
            char buffer[7];
            size_t len;
            switch (code_point & 127) {
                    // Perform all mandatory escapes enumerated in the JSON spec.
                    // Note that the spec makes escaping forward slash optional;
                    // we choose not to.
                case 0x00: case 0x01: case 0x02: case 0x03:
                case 0x04: case 0x05: case 0x06: case 0x07:
                case 0x0b: case 0x0e: case 0x0f:
                case 0x10: case 0x11: case 0x12: case 0x13:
                case 0x14: case 0x15: case 0x16: case 0x17:
                case 0x18: case 0x19: case 0x1a: case 0x1b:
                case 0x1c: case 0x1d: case 0x1e: case 0x1f: {
                        sprintf(buffer, "\\u%04x", (unsigned)code_point);
                        len = 6;
                        break;
                    }
                case '\b':
                    memcpy(buffer, "\\b", 2);
                    len = 2;
                    break;
                case '\t':
                    memcpy(buffer, "\\t", 2);
                    len = 2;
                    break;
                case '\n':
                    memcpy(buffer, "\\n", 2);
                    len = 2;
                    break;
                case '\f':
                    memcpy(buffer, "\\f", 2);
                    len = 2;
                    break;
                case '\r':
                    memcpy(buffer, "\\r", 2);
                    len = 2;
                    break;
                case '\\':
                    memcpy(buffer, "\\\\", 2);
                    len = 2;
                    break;
                case '\"':
                    memcpy(buffer, "\\\"", 2);
                    len = 2;
                    break;

                    // Ordinary printable ASCII.
                default:
                    buffer[0] = (char)code_point;
                    len = 1;
            }
            CB_Cat_Trusted_Str(json, buffer, len);
        }
    }

    // Append closing quote.
    CB_Cat_Trusted_Str(json, "\"", 1);
}