Beispiel #1
0
LexIndex*
LexIndex_init(LexIndex *self, Schema *schema, Folder *folder, 
              Segment *segment, const CharBuf *field)
{
    i32_t    field_num = Seg_Field_Num(segment, field);
    CharBuf *seg_name  = Seg_Get_Name(segment);
    CharBuf *ixix_file = CB_newf("%o/lexicon-%i32.ixix", seg_name, field_num);
    CharBuf *ix_file   = CB_newf("%o/lexicon-%i32.ix", seg_name, field_num);
    Architecture *arch = Schema_Get_Architecture(schema);

    /* Init. */
    self->term  = ViewCB_new_from_trusted_utf8(NULL, 0);
    self->tinfo = TInfo_new(0,0,0,0);
    self->tick  = 0;

    /* Derive */
    self->field_type = Schema_Fetch_Type(schema, field);
    if (!self->field_type) {
        CharBuf *mess = MAKE_MESS("Unknown field: '%o'", field);
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        Err_throw_mess(mess);
    }
    INCREF(self->field_type);
    self->ixix_in = Folder_Open_In(folder, ixix_file);
    self->ix_in   = Folder_Open_In(folder, ix_file);
    if (!self->ixix_in || !self->ix_in) {
        CharBuf *mess =
             MAKE_MESS("Can't open either %o or %o", ix_file, ixix_file);
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        Err_throw_mess(mess);
    }
    self->index_interval = Arch_Index_Interval(arch);
    self->skip_interval  = Arch_Skip_Interval(arch);
    self->size    = (i32_t)(InStream_Length(self->ixix_in) / sizeof(i64_t));
    self->offsets = (i64_t*)InStream_Buf(self->ixix_in,
        (size_t)InStream_Length(self->ixix_in));
    self->data = InStream_Buf(self->ix_in, InStream_Length(self->ix_in));
    self->limit = self->data + InStream_Length(self->ix_in);

    DECREF(ixix_file);
    DECREF(ix_file);

    return self;
}
Beispiel #2
0
CharBuf*
Json_to_json(Obj *dump) {
    // Validate object type, only allowing hashes and arrays per JSON spec.
    if (!dump || !(Obj_Is_A(dump, HASH) || Obj_Is_A(dump, VARRAY))) {
        if (!tolerant) {
            CharBuf *class_name = dump ? Obj_Get_Class_Name(dump) : NULL;
            CharBuf *mess = MAKE_MESS("Illegal top-level object type: %o",
                                      class_name);
            Err_set_error(Err_new(mess));
            return NULL;
        }
    }

    // Encode.
    CharBuf *json = CB_new(31);
    if (!S_to_json(dump, json, 0)) {
        DECREF(json);
        ERR_ADD_FRAME(Err_get_error());
        json = NULL;
    }
    else {
        // Append newline.
        CB_Cat_Trusted_Str(json, "\n", 1);
    }

    return json;
}
Beispiel #3
0
bool
LFLock_Maybe_Delete_File_IMP(LockFileLock *self, String *path,
                             bool delete_mine, bool delete_other) {
    LockFileLockIVARS *const ivars = LFLock_IVARS(self);
    Folder *folder  = ivars->folder;
    bool    success = false;

    // Only delete locks that start with our lock name.
    if (!Str_Starts_With_Utf8(path, "locks", 5)) {
        return false;
    }
    StringIterator *iter = Str_Top(path);
    StrIter_Advance(iter, 5 + 1);
    if (!StrIter_Starts_With(iter, ivars->name)) {
        DECREF(iter);
        return false;
    }
    DECREF(iter);

    // Attempt to delete dead lock file.
    if (Folder_Exists(folder, path)) {
        Hash *hash = (Hash*)Json_slurp_json(folder, path);
        if (hash != NULL && Obj_Is_A((Obj*)hash, HASH)) {
            String *pid_buf = (String*)Hash_Fetch_Utf8(hash, "pid", 3);
            String *host    = (String*)Hash_Fetch_Utf8(hash, "host", 4);
            String *name    = (String*)Hash_Fetch_Utf8(hash, "name", 4);

            // Match hostname and lock name.
            if (host != NULL
                && Str_Is_A(host, STRING)
                && Str_Equals(host, (Obj*)ivars->host)
                && name != NULL
                && Str_Is_A(name, STRING)
                && Str_Equals(name, (Obj*)ivars->name)
                && pid_buf != NULL
                && Str_Is_A(pid_buf, STRING)
               ) {
                // Verify that pid is either mine or dead.
                int pid = (int)Str_To_I64(pid_buf);
                if ((delete_mine && pid == PID_getpid())  // This process.
                    || (delete_other && !PID_active(pid)) // Dead pid.
                   ) {
                    if (Folder_Delete(folder, path)) {
                        success = true;
                    }
                    else {
                        String *mess
                            = MAKE_MESS("Can't delete '%o'", path);
                        DECREF(hash);
                        Err_throw_mess(ERR, mess);
                    }
                }
            }
        }
        DECREF(hash);
    }

    return success;
}
Beispiel #4
0
bool_t
LFLock_maybe_delete_file(LockFileLock *self, const CharBuf *path,
                         bool_t delete_mine, bool_t delete_other) {
    Folder *folder  = self->folder;
    bool_t  success = false;
    ZombieCharBuf *scratch = ZCB_WRAP(path);

    // Only delete locks that start with our lock name.
    CharBuf *lock_dir_name = (CharBuf*)ZCB_WRAP_STR("locks", 5);
    if (!ZCB_Starts_With(scratch, lock_dir_name)) {
        return false;
    }
    ZCB_Nip(scratch, CB_Get_Size(lock_dir_name) + 1);
    if (!ZCB_Starts_With(scratch, self->name)) {
        return false;
    }

    // Attempt to delete dead lock file.
    if (Folder_Exists(folder, path)) {
        Hash *hash = (Hash*)Json_slurp_json(folder, path);
        if (hash != NULL && Obj_Is_A((Obj*)hash, HASH)) {
            CharBuf *pid_buf = (CharBuf*)Hash_Fetch_Str(hash, "pid", 3);
            CharBuf *host    = (CharBuf*)Hash_Fetch_Str(hash, "host", 4);
            CharBuf *name
                = (CharBuf*)Hash_Fetch_Str(hash, "name", 4);

            // Match hostname and lock name.
            if (host != NULL
                && CB_Equals(host, (Obj*)self->host)
                && name != NULL
                && CB_Equals(name, (Obj*)self->name)
                && pid_buf != NULL
               ) {
                // Verify that pid is either mine or dead.
                int pid = (int)CB_To_I64(pid_buf);
                if ((delete_mine && pid == PID_getpid())  // This process.
                    || (delete_other && !PID_active(pid)) // Dead pid.
                   ) {
                    if (Folder_Delete(folder, path)) {
                        success = true;
                    }
                    else {
                        CharBuf *mess
                            = MAKE_MESS("Can't delete '%o'", path);
                        DECREF(hash);
                        Err_throw_mess(ERR, mess);
                    }
                }
            }
        }
        DECREF(hash);
    }

    return success;
}
Beispiel #5
0
static Obj*
S_parse_json(char *text, size_t size) {
    void *json_parser = LucyParseJsonAlloc(lucy_Memory_wrapped_malloc);
    if (json_parser == NULL) {
        CharBuf *mess = MAKE_MESS("Failed to allocate JSON parser");
        Err_set_error(Err_new(mess));
        return NULL;
    }
    Obj *dump = S_do_parse_json(json_parser, text, size);
    LucyParseJsonFree(json_parser, lucy_Memory_wrapped_free);
    return dump;
}
Beispiel #6
0
LexIndex*
LexIndex_init(LexIndex *self, Schema *schema, Folder *folder,
              Segment *segment, String *field) {
    int32_t  field_num = Seg_Field_Num(segment, field);
    String  *seg_name  = Seg_Get_Name(segment);
    String  *ixix_file = Str_newf("%o/lexicon-%i32.ixix", seg_name, field_num);
    String  *ix_file   = Str_newf("%o/lexicon-%i32.ix", seg_name, field_num);
    Architecture *arch = Schema_Get_Architecture(schema);

    // Init.
    Lex_init((Lexicon*)self, field);
    LexIndexIVARS *const ivars = LexIndex_IVARS(self);
    ivars->tinfo        = TInfo_new(0);
    ivars->tick         = 0;

    // Derive
    ivars->field_type = Schema_Fetch_Type(schema, field);
    if (!ivars->field_type) {
        String *mess = MAKE_MESS("Unknown field: '%o'", field);
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        Err_throw_mess(ERR, mess);
    }
    ivars->field_type = (FieldType*)INCREF(ivars->field_type);
    ivars->term_stepper = FType_Make_Term_Stepper(ivars->field_type);
    ivars->ixix_in = Folder_Open_In(folder, ixix_file);
    if (!ivars->ixix_in) {
        Err *error = (Err*)INCREF(Err_get_error());
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        RETHROW(error);
    }
    ivars->ix_in = Folder_Open_In(folder, ix_file);
    if (!ivars->ix_in) {
        Err *error = (Err*)INCREF(Err_get_error());
        DECREF(ix_file);
        DECREF(ixix_file);
        DECREF(self);
        RETHROW(error);
    }
    ivars->index_interval = Arch_Index_Interval(arch);
    ivars->skip_interval  = Arch_Skip_Interval(arch);
    ivars->size    = (int32_t)(InStream_Length(ivars->ixix_in) / sizeof(int64_t));
    ivars->offsets = (const int64_t*)InStream_Buf(ivars->ixix_in,
            (size_t)InStream_Length(ivars->ixix_in));

    DECREF(ixix_file);
    DECREF(ix_file);

    return self;
}
Beispiel #7
0
DefaultDocReader*
DefDocReader_init(DefaultDocReader *self, Schema *schema, Folder *folder, 
                  Snapshot *snapshot, VArray *segments, i32_t seg_tick)
{
    Hash *metadata; 
    Segment *segment;
    DocReader_init((DocReader*)self, schema, folder, snapshot, segments,
        seg_tick);
    segment = DefDocReader_Get_Segment(self);
    metadata = (Hash*)Seg_Fetch_Metadata_Str(segment, "documents", 9);

    if (metadata) {
        CharBuf *seg_name  = Seg_Get_Name(segment);
        CharBuf *ix_file   = CB_newf("%o/documents.ix", seg_name);
        CharBuf *dat_file  = CB_newf("%o/documents.dat", seg_name);
        Obj     *format    = Hash_Fetch_Str(metadata, "format", 6);

        /* Check format. */
        if (!format) { THROW("Missing 'format' var"); }
        else {
            i64_t format_val = Obj_To_I64(format);
            if (format_val < DocWriter_current_file_format) {
                THROW("Obsolete doc storage format %i64; "
                    "Index regeneration is required", format_val);
            }
            else if (format_val != DocWriter_current_file_format) {
                THROW("Unsupported doc storage format: %i64", format_val);
            }
        }

        /* Get streams. */
        if (Folder_Exists(folder, ix_file)) {
            self->ix_in  = Folder_Open_In(folder, ix_file);
            self->dat_in = Folder_Open_In(folder, dat_file);
            if (!self->ix_in || !self->dat_in) {
                CharBuf *mess = MAKE_MESS("Can't open either %o or %o",
                    ix_file, dat_file);
                DECREF(ix_file);
                DECREF(dat_file);
                DECREF(self);
                Err_throw_mess(mess);
            }
        }
        DECREF(ix_file);
        DECREF(dat_file);
    }
    
    return self;
}
Beispiel #8
0
static CharBuf*
S_parse_string(char **json_ptr, char *const limit) {
    // Find terminating double quote, determine whether there are any escapes.
    char *top = *json_ptr + 1;
    char *end = NULL;
    bool_t saw_backslash = false;
    for (char *text = top; text < limit; text++) {
        if (*text == '"') {
            end = text;
            break;
        }
        else if (*text == '\\') {
            saw_backslash = true;
            if (text + 1 < limit && text[1] == 'u') {
                text += 5;
            }
            else {
                text += 1;
            }
        }
    }
    if (!end) {
        SET_ERROR(CB_newf("Unterminated string"), *json_ptr, limit);
        return NULL;
    }

    // Advance the text buffer to just beyond the closing quote.
    *json_ptr = end + 1;

    if (saw_backslash) {
        return S_unescape_text(top, end);
    }
    else {
        // Optimize common case where there are no escapes.
        size_t len = end - top;
        if (!StrHelp_utf8_valid(top, len)) {
            CharBuf *mess = MAKE_MESS("Bad UTF-8 in JSON");
            Err_set_error(Err_new(mess));
            return NULL;
        }
        return CB_new_from_trusted_utf8(top, len);
    }
}
Beispiel #9
0
bool_t
LFLock_request(LockFileLock *self) {
    Hash   *file_data;
    bool_t wrote_json;
    bool_t success = false;
    bool_t deletion_failed = false;

    if (Folder_Exists(self->folder, self->lock_path)) {
        Err_set_error((Err*)LockErr_new(CB_newf("Can't obtain lock: '%o' exists",
                                                self->lock_path)));
        return false;
    }

    // Create the "locks" subdirectory if necessary.
    CharBuf *lock_dir_name = (CharBuf*)ZCB_WRAP_STR("locks", 5);
    if (!Folder_Exists(self->folder, lock_dir_name)) {
        if (!Folder_MkDir(self->folder, lock_dir_name)) {
            Err *mkdir_err = (Err*)CERTIFY(Err_get_error(), ERR);
            LockErr *err = LockErr_new(CB_newf("Can't create 'locks' directory: %o",
                                               Err_Get_Mess(mkdir_err)));
            // Maybe our attempt failed because another process succeeded.
            if (Folder_Find_Folder(self->folder, lock_dir_name)) {
                DECREF(err);
            }
            else {
                // Nope, everything failed, so bail out.
                Err_set_error((Err*)err);
                return false;
            }
        }
    }

    // Prepare to write pid, lock name, and host to the lock file as JSON.
    file_data = Hash_new(3);
    Hash_Store_Str(file_data, "pid", 3,
                   (Obj*)CB_newf("%i32", (int32_t)PID_getpid()));
    Hash_Store_Str(file_data, "host", 4, INCREF(self->host));
    Hash_Store_Str(file_data, "name", 4, INCREF(self->name));

    // Write to a temporary file, then use the creation of a hard link to
    // ensure atomic but non-destructive creation of the lockfile with its
    // complete contents.
    wrote_json = Json_spew_json((Obj*)file_data, self->folder, self->link_path);
    if (wrote_json) {
        success = Folder_Hard_Link(self->folder, self->link_path,
                                   self->lock_path);
        if (!success) {
            Err *hard_link_err = (Err*)CERTIFY(Err_get_error(), ERR);
            Err_set_error((Err*)LockErr_new(CB_newf("Failed to obtain lock at '%o': %o",
                                                    self->lock_path,
                                                    Err_Get_Mess(hard_link_err))));
        }
        deletion_failed = !Folder_Delete(self->folder, self->link_path);
    }
    else {
        Err *spew_json_err = (Err*)CERTIFY(Err_get_error(), ERR);
        Err_set_error((Err*)LockErr_new(CB_newf("Failed to obtain lock at '%o': %o",
                                                self->lock_path,
                                                Err_Get_Mess(spew_json_err))));
    }
    DECREF(file_data);

    // Verify that our temporary file got zapped.
    if (wrote_json && deletion_failed) {
        CharBuf *mess = MAKE_MESS("Failed to delete '%o'", self->link_path);
        Err_throw_mess(ERR, mess);
    }

    return success;
}
Beispiel #10
0
bool
LFLock_Request_IMP(LockFileLock *self) {
    LockFileLockIVARS *const ivars = LFLock_IVARS(self);
    bool success = false;

    if (Folder_Exists(ivars->folder, ivars->lock_path)) {
        Err_set_error((Err*)LockErr_new(Str_newf("Can't obtain lock: '%o' exists",
                                                 ivars->lock_path)));
        return false;
    }

    // Create the "locks" subdirectory if necessary.
    String *lock_dir_name = (String*)SSTR_WRAP_UTF8("locks", 5);
    if (!Folder_Exists(ivars->folder, lock_dir_name)) {
        if (!Folder_MkDir(ivars->folder, lock_dir_name)) {
            Err *mkdir_err = (Err*)CERTIFY(Err_get_error(), ERR);
            LockErr *err = LockErr_new(Str_newf("Can't create 'locks' directory: %o",
                                                Err_Get_Mess(mkdir_err)));
            // Maybe our attempt failed because another process succeeded.
            if (Folder_Find_Folder(ivars->folder, lock_dir_name)) {
                DECREF(err);
            }
            else {
                // Nope, everything failed, so bail out.
                Err_set_error((Err*)err);
                return false;
            }
        }
    }

    // Prepare to write pid, lock name, and host to the lock file as JSON.
    Hash *file_data = Hash_new(3);
    Hash_Store_Utf8(file_data, "pid", 3,
                    (Obj*)Str_newf("%i32", (int32_t)PID_getpid()));
    Hash_Store_Utf8(file_data, "host", 4, INCREF(ivars->host));
    Hash_Store_Utf8(file_data, "name", 4, INCREF(ivars->name));
    String *json = Json_to_json((Obj*)file_data);
    DECREF(file_data);

    // Write to a temporary file, then use the creation of a hard link to
    // ensure atomic but non-destructive creation of the lockfile with its
    // complete contents.

    OutStream *outstream = Folder_Open_Out(ivars->folder, ivars->link_path);
    if (!outstream) {
        ERR_ADD_FRAME(Err_get_error());
        DECREF(json);
        return false;
    }

    struct lockfile_context context;
    context.outstream = outstream;
    context.json = json;
    Err *json_error = Err_trap(S_write_lockfile_json, &context);
    bool wrote_json = !json_error;
    DECREF(outstream);
    DECREF(json);
    if (wrote_json) {
        success = Folder_Hard_Link(ivars->folder, ivars->link_path,
                                   ivars->lock_path);
        if (!success) {
            Err *hard_link_err = (Err*)CERTIFY(Err_get_error(), ERR);
            Err_set_error((Err*)LockErr_new(Str_newf("Failed to obtain lock at '%o': %o",
                                                     ivars->lock_path,
                                                     Err_Get_Mess(hard_link_err))));
        }
    }
    else {
        Err_set_error((Err*)LockErr_new(Str_newf("Failed to obtain lock at '%o': %o",
                                                 ivars->lock_path,
                                                 Err_Get_Mess(json_error))));
        DECREF(json_error);
    }

    // Verify that our temporary file got zapped.
    bool deletion_failed = !Folder_Delete(ivars->folder, ivars->link_path);
    if (deletion_failed) {
        String *mess = MAKE_MESS("Failed to delete '%o'", ivars->link_path);
        Err_throw_mess(ERR, mess);
    }

    return success;
}
Beispiel #11
0
void
S_try_open_elements(void *context) {
    struct try_open_elements_context *args
        = (struct try_open_elements_context*)context;
    PolyReader *self              = args->self;
    PolyReaderIVARS *const ivars  = PolyReader_IVARS(self);
    VArray     *files             = Snapshot_List(ivars->snapshot);
    Folder     *folder            = PolyReader_Get_Folder(self);
    uint32_t    num_segs          = 0;
    uint64_t    latest_schema_gen = 0;
    CharBuf    *schema_file       = NULL;

    // Find schema file, count segments.
    for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) {
        CharBuf *entry = (CharBuf*)VA_Fetch(files, i);

        if (Seg_valid_seg_name(entry)) {
            num_segs++;
        }
        else if (CB_Starts_With_Str(entry, "schema_", 7)
                 && CB_Ends_With_Str(entry, ".json", 5)
                ) {
            uint64_t gen = IxFileNames_extract_gen(entry);
            if (gen > latest_schema_gen) {
                latest_schema_gen = gen;
                if (!schema_file) { schema_file = CB_Clone(entry); }
                else { CB_Mimic(schema_file, (Obj*)entry); }
            }
        }
    }

    // Read Schema.
    if (!schema_file) {
        DECREF(files);
        THROW(ERR, "Can't find a schema file.");
    }
    else {
        Hash *dump = (Hash*)Json_slurp_json(folder, schema_file);
        if (dump) { // read file successfully
            DECREF(ivars->schema);
            ivars->schema = (Schema*)CERTIFY(
                               VTable_Load_Obj(SCHEMA, (Obj*)dump), SCHEMA);
            DECREF(dump);
            DECREF(schema_file);
            schema_file = NULL;
        }
        else {
            CharBuf *mess = MAKE_MESS("Failed to parse %o", schema_file);
            DECREF(schema_file);
            DECREF(files);
            Err_throw_mess(ERR, mess);
        }
    }

    VArray *segments = VA_new(num_segs);
    for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) {
        CharBuf *entry = (CharBuf*)VA_Fetch(files, i);

        // Create a Segment for each segmeta.
        if (Seg_valid_seg_name(entry)) {
            int64_t seg_num = IxFileNames_extract_gen(entry);
            Segment *segment = Seg_new(seg_num);

            // Bail if reading the file fails (probably because it's been
            // deleted and a new snapshot file has been written so we need to
            // retry).
            if (Seg_Read_File(segment, folder)) {
                VA_Push(segments, (Obj*)segment);
            }
            else {
                CharBuf *mess = MAKE_MESS("Failed to read %o", entry);
                DECREF(segment);
                DECREF(segments);
                DECREF(files);
                Err_throw_mess(ERR, mess);
            }
        }
    }

    // Sort the segments by age.
    VA_Sort(segments, NULL, NULL);

    // Open individual SegReaders.
    struct try_open_segreader_context seg_context;
    seg_context.schema   = PolyReader_Get_Schema(self);
    seg_context.folder   = folder;
    seg_context.snapshot = PolyReader_Get_Snapshot(self);
    seg_context.segments = segments;
    seg_context.result   = NULL;
    args->seg_readers = VA_new(num_segs);
    Err *error = NULL;
    for (uint32_t seg_tick = 0; seg_tick < num_segs; seg_tick++) {
        seg_context.seg_tick = seg_tick;
        error = Err_trap(S_try_open_segreader, &seg_context);
        if (error) {
            break;
        }
        VA_Push(args->seg_readers, (Obj*)seg_context.result);
        seg_context.result = NULL;
    }

    DECREF(segments);
    DECREF(files);
    if (error) {
        DECREF(args->seg_readers);
        args->seg_readers = NULL;
        RETHROW(error);
    }
}
Beispiel #12
0
static CharBuf*
S_unescape_text(char *const top, char *const end) {
    // The unescaped string will never be longer than the escaped string
    // because only a \u escape can theoretically be too long and
    // StrHelp_encode_utf8_char guards against sequences over 4 bytes.
    // Therefore we can allocate once and not worry about reallocating.
    size_t cap = end - top + 1;
    char *target_buf = (char*)MALLOCATE(cap);
    size_t target_size = 0;
    for (char *text = top; text < end; text++) {
        if (*text != '\\') {
            target_buf[target_size++] = *text;
        }
        else {
            // Process escape.
            text++;
            switch (*text) {
                case '"':
                    target_buf[target_size++] = '"';
                    break;
                case '\\':
                    target_buf[target_size++] = '\\';
                    break;
                case '/':
                    target_buf[target_size++] = '/';
                    break;
                case 'b':
                    target_buf[target_size++] = '\b';
                    break;
                case 'f':
                    target_buf[target_size++] = '\f';
                    break;
                case 'n':
                    target_buf[target_size++] = '\n';
                    break;
                case 'r':
                    target_buf[target_size++] = '\r';
                    break;
                case 't':
                    target_buf[target_size++] = '\t';
                    break;
                case 'u': {
                        // Copy into a temp buffer because strtol will overrun
                        // into adjacent text data for e.g. "\uAAAA1".
                        char temp[5] = { 0, 0, 0, 0, 0 };
                        memcpy(temp, text + 1, 4);
                        text += 4;
                        char *num_end;
                        long code_point = strtol(temp, &num_end, 16);
                        char *temp_ptr = temp;
                        if (num_end != temp_ptr + 4 || code_point < 0) {
                            FREEMEM(target_buf);
                            SET_ERROR(CB_newf("Invalid \\u escape"), text - 5, end);
                            return NULL;
                        }
                        if (code_point >= 0xD800 && code_point <= 0xDFFF) {
                            FREEMEM(target_buf);
                            SET_ERROR(CB_newf("Surrogate pairs not supported"),
                                      text - 5, end);
                            return NULL;
                        }
                        target_size += StrHelp_encode_utf8_char((uint32_t)code_point,
                                                                target_buf + target_size);
                    }
                    break;
                default:
                    FREEMEM(target_buf);
                    SET_ERROR(CB_newf("Illegal escape"), text - 1, end);
                    return NULL;
            }
        }
    }

    // NULL-terminate, sanity check, then return the escaped string.
    target_buf[target_size] = '\0';
    if (!StrHelp_utf8_valid(target_buf, target_size)) {
        FREEMEM(target_buf);
        CharBuf *mess = MAKE_MESS("Bad UTF-8 in JSON");
        Err_set_error(Err_new(mess));
        return NULL;
    }
    return CB_new_steal_from_trusted_str(target_buf, target_size, cap);
}
Beispiel #13
0
static bool_t
S_to_json(Obj *dump, CharBuf *json, int32_t depth) {
    // Guard against infinite recursion in self-referencing data structures.
    if (depth > MAX_DEPTH) {
        CharBuf *mess = MAKE_MESS("Exceeded max depth of %i32", MAX_DEPTH);
        Err_set_error(Err_new(mess));
        return false;
    }

    if (!dump) {
        CB_Cat_Trusted_Str(json, "null", 4);
    }
    else if (dump == (Obj*)CFISH_TRUE) {
        CB_Cat_Trusted_Str(json, "true", 4);
    }
    else if (dump == (Obj*)CFISH_FALSE) {
        CB_Cat_Trusted_Str(json, "false", 5);
    }
    else if (Obj_Is_A(dump, CHARBUF)) {
        S_append_json_string(dump, json);
    }
    else if (Obj_Is_A(dump, INTNUM)) {
        CB_catf(json, "%i64", Obj_To_I64(dump));
    }
    else if (Obj_Is_A(dump, FLOATNUM)) {
        CB_catf(json, "%f64", Obj_To_F64(dump));
    }
    else if (Obj_Is_A(dump, VARRAY)) {
        VArray *array = (VArray*)dump;
        size_t size = VA_Get_Size(array);
        if (size == 0) {
            // Put empty array on single line.
            CB_Cat_Trusted_Str(json, "[]", 2);
            return true;
        }
        else if (size == 1) {
            Obj *elem = VA_Fetch(array, 0);
            if (!(Obj_Is_A(elem, HASH) || Obj_Is_A(elem, VARRAY))) {
                // Put array containing single scalar element on one line.
                CB_Cat_Trusted_Str(json, "[", 1);
                if (!S_to_json(elem, json, depth + 1)) {
                    return false;
                }
                CB_Cat_Trusted_Str(json, "]", 1);
                return true;
            }
        }
        // Fall back to spreading elements across multiple lines.
        CB_Cat_Trusted_Str(json, "[", 1);
        for (size_t i = 0; i < size; i++) {
            CB_Cat_Trusted_Str(json, "\n", 1);
            S_cat_whitespace(json, depth + 1);
            if (!S_to_json(VA_Fetch(array, i), json, depth + 1)) {
                return false;
            }
            if (i + 1 < size) {
                CB_Cat_Trusted_Str(json, ",", 1);
            }
        }
        CB_Cat_Trusted_Str(json, "\n", 1);
        S_cat_whitespace(json, depth);
        CB_Cat_Trusted_Str(json, "]", 1);
    }
    else if (Obj_Is_A(dump, HASH)) {
        Hash *hash = (Hash*)dump;
        size_t size = Hash_Get_Size(hash);

        // Put empty hash on single line.
        if (size == 0) {
            CB_Cat_Trusted_Str(json, "{}", 2);
            return true;
        }

        // Validate that all keys are strings, then sort.
        VArray *keys = Hash_Keys(hash);
        for (size_t i = 0; i < size; i++) {
            Obj *key = VA_Fetch(keys, i);
            if (!key || !Obj_Is_A(key, CHARBUF)) {
                DECREF(keys);
                CharBuf *key_class = key ? Obj_Get_Class_Name(key) : NULL;
                CharBuf *mess = MAKE_MESS("Illegal key type: %o", key_class);
                Err_set_error(Err_new(mess));
                return false;
            }
        }
        VA_Sort(keys, NULL, NULL);

        // Spread pairs across multiple lines.
        CB_Cat_Trusted_Str(json, "{", 1);
        for (size_t i = 0; i < size; i++) {
            Obj *key = VA_Fetch(keys, i);
            CB_Cat_Trusted_Str(json, "\n", 1);
            S_cat_whitespace(json, depth + 1);
            S_append_json_string(key, json);
            CB_Cat_Trusted_Str(json, ": ", 2);
            if (!S_to_json(Hash_Fetch(hash, key), json, depth + 1)) {
                DECREF(keys);
                return false;
            }
            if (i + 1 < size) {
                CB_Cat_Trusted_Str(json, ",", 1);
            }
        }
        CB_Cat_Trusted_Str(json, "\n", 1);
        S_cat_whitespace(json, depth);
        CB_Cat_Trusted_Str(json, "}", 1);

        DECREF(keys);
    }

    return true;
}
Beispiel #14
0
SortCache*
SortCache_init(SortCache *self, Schema *schema, Folder *folder,
               Segment *segment, i32_t field_num)
{
    CharBuf *field    = Seg_Field_Name(segment, field_num);
    CharBuf *seg_name = Seg_Get_Name(segment);
    CharBuf *ord_file = CB_newf("%o/sort-%i32.ord", seg_name, field_num);
    CharBuf *ix_file  = CB_newf("%o/sort-%i32.ix",  seg_name, field_num);
    CharBuf *dat_file = CB_newf("%o/sort-%i32.dat", seg_name, field_num);
    i64_t ord_len, ix_len, dat_len;

    /* Derive. */
    self->doc_max = Seg_Get_Count(segment);
    self->type    = Schema_Fetch_Type(schema, field);
    if (!self->type || !FType_Sortable(self->type)) {
        THROW("'%o' isn't a sortable field", field);
    }

    /* Open instreams. */
    self->ord_in  = Folder_Open_In(folder, ord_file);
    self->ix_in   = Folder_Open_In(folder, ix_file);
    self->dat_in  = Folder_Open_In(folder, dat_file);
    if (!self->ix_in || !self->dat_in || !self->ord_in) {
        CharBuf *mess = MAKE_MESS("Can't open either %o, %o or %o", ord_file, 
            ix_file, dat_file);
        DECREF(ord_file);
        DECREF(ix_file);
        DECREF(dat_file);
        Err_throw_mess(mess);
    }
    ord_len = InStream_Length(self->ord_in);
    ix_len  = InStream_Length(self->ix_in);
    dat_len = InStream_Length(self->dat_in);

    /* Calculate the number of unique values and derive the ord bit width. */
    self->num_uniq = (i32_t)(ix_len / 8) - 1; 
    self->width    = S_calc_width(self->num_uniq);

    /* Validate file lengths. */
    {
        double bytes_per_doc = self->width / 8.0;
        double max_ords      = ord_len / bytes_per_doc;
        if (max_ords < self->doc_max + 1) {
            THROW("Conflict between ord count max %f64 and doc_max %i32", 
                max_ords, self->doc_max);
        }
    }

    /* Mmap ords, offsets and character data. */
    self->ords      = InStream_Buf(self->ord_in, (size_t)ord_len);
    self->offsets   = (i64_t*)InStream_Buf(self->ix_in, (size_t)ix_len);
    self->char_data = InStream_Buf(self->dat_in, dat_len);
    {
        char *offs            = (char*)self->offsets;
        self->offsets_limit   = (i64_t*)(offs + ix_len);
        self->char_data_limit = self->char_data + dat_len;
    }

    DECREF(ord_file);
    DECREF(ix_file);
    DECREF(dat_file);

    return self;
}
Beispiel #15
0
static void
S_do_consolidate(CompoundFileWriter *self, CompoundFileWriterIVARS *ivars) {
    UNUSED_VAR(self);
    Folder    *folder       = ivars->folder;
    Hash      *metadata     = Hash_new(0);
    Hash      *sub_files    = Hash_new(0);
    Vector    *files        = Folder_List(folder, NULL);
    Vector    *merged       = Vec_new(Vec_Get_Size(files));
    String    *cf_file      = (String*)SSTR_WRAP_UTF8("cf.dat", 6);
    OutStream *outstream    = Folder_Open_Out(folder, (String*)cf_file);
    bool       rename_success;

    if (!outstream) { RETHROW(INCREF(Err_get_error())); }

    // Start metadata.
    Hash_Store_Utf8(metadata, "files", 5, INCREF(sub_files));
    Hash_Store_Utf8(metadata, "format", 6,
                    (Obj*)Str_newf("%i32", CFWriter_current_file_format));

    Vec_Sort(files);
    for (uint32_t i = 0, max = Vec_Get_Size(files); i < max; i++) {
        String *infilename = (String*)Vec_Fetch(files, i);

        if (!Str_Ends_With_Utf8(infilename, ".json", 5)) {
            InStream *instream   = Folder_Open_In(folder, infilename);
            Hash     *file_data  = Hash_new(2);
            int64_t   offset, len;

            if (!instream) { RETHROW(INCREF(Err_get_error())); }

            // Absorb the file.
            offset = OutStream_Tell(outstream);
            OutStream_Absorb(outstream, instream);
            len = OutStream_Tell(outstream) - offset;

            // Record offset and length.
            Hash_Store_Utf8(file_data, "offset", 6,
                            (Obj*)Str_newf("%i64", offset));
            Hash_Store_Utf8(file_data, "length", 6,
                            (Obj*)Str_newf("%i64", len));
            Hash_Store(sub_files, infilename, (Obj*)file_data);
            Vec_Push(merged, INCREF(infilename));

            // Add filler NULL bytes so that every sub-file begins on a file
            // position multiple of 8.
            OutStream_Align(outstream, 8);

            InStream_Close(instream);
            DECREF(instream);
        }
    }

    // Write metadata to cfmeta file.
    String *cfmeta_temp = (String*)SSTR_WRAP_UTF8("cfmeta.json.temp", 16);
    String *cfmeta_file = (String*)SSTR_WRAP_UTF8("cfmeta.json", 11);
    Json_spew_json((Obj*)metadata, (Folder*)ivars->folder, cfmeta_temp);
    rename_success = Folder_Rename(ivars->folder, cfmeta_temp, cfmeta_file);
    if (!rename_success) { RETHROW(INCREF(Err_get_error())); }

    // Clean up.
    OutStream_Close(outstream);
    DECREF(outstream);
    DECREF(files);
    DECREF(metadata);
    /*
    HashIterator *iter = HashIter_new(sub_files);
    while (HashIter_Next(iter)) {
        String *merged_file = HashIter_Get_Key(iter);
        if (!Folder_Delete(folder, merged_file)) {
            String *mess = MAKE_MESS("Can't delete '%o'", merged_file);
            DECREF(sub_files);
            Err_throw_mess(ERR, mess);
        }
    }
    DECREF(iter);
    */
    DECREF(sub_files);
    for (uint32_t i = 0, max = Vec_Get_Size(merged); i < max; i++) {
        String *merged_file = (String*)Vec_Fetch(merged, i);
        if (!Folder_Delete(folder, merged_file)) {
            String *mess = MAKE_MESS("Can't delete '%o'", merged_file);
            DECREF(merged);
            Err_throw_mess(ERR, mess);
        }
    }
    DECREF(merged);
}