LexIndex* LexIndex_init(LexIndex *self, Schema *schema, Folder *folder, Segment *segment, const CharBuf *field) { i32_t field_num = Seg_Field_Num(segment, field); CharBuf *seg_name = Seg_Get_Name(segment); CharBuf *ixix_file = CB_newf("%o/lexicon-%i32.ixix", seg_name, field_num); CharBuf *ix_file = CB_newf("%o/lexicon-%i32.ix", seg_name, field_num); Architecture *arch = Schema_Get_Architecture(schema); /* Init. */ self->term = ViewCB_new_from_trusted_utf8(NULL, 0); self->tinfo = TInfo_new(0,0,0,0); self->tick = 0; /* Derive */ self->field_type = Schema_Fetch_Type(schema, field); if (!self->field_type) { CharBuf *mess = MAKE_MESS("Unknown field: '%o'", field); DECREF(ix_file); DECREF(ixix_file); DECREF(self); Err_throw_mess(mess); } INCREF(self->field_type); self->ixix_in = Folder_Open_In(folder, ixix_file); self->ix_in = Folder_Open_In(folder, ix_file); if (!self->ixix_in || !self->ix_in) { CharBuf *mess = MAKE_MESS("Can't open either %o or %o", ix_file, ixix_file); DECREF(ix_file); DECREF(ixix_file); DECREF(self); Err_throw_mess(mess); } self->index_interval = Arch_Index_Interval(arch); self->skip_interval = Arch_Skip_Interval(arch); self->size = (i32_t)(InStream_Length(self->ixix_in) / sizeof(i64_t)); self->offsets = (i64_t*)InStream_Buf(self->ixix_in, (size_t)InStream_Length(self->ixix_in)); self->data = InStream_Buf(self->ix_in, InStream_Length(self->ix_in)); self->limit = self->data + InStream_Length(self->ix_in); DECREF(ixix_file); DECREF(ix_file); return self; }
CharBuf* Json_to_json(Obj *dump) { // Validate object type, only allowing hashes and arrays per JSON spec. if (!dump || !(Obj_Is_A(dump, HASH) || Obj_Is_A(dump, VARRAY))) { if (!tolerant) { CharBuf *class_name = dump ? Obj_Get_Class_Name(dump) : NULL; CharBuf *mess = MAKE_MESS("Illegal top-level object type: %o", class_name); Err_set_error(Err_new(mess)); return NULL; } } // Encode. CharBuf *json = CB_new(31); if (!S_to_json(dump, json, 0)) { DECREF(json); ERR_ADD_FRAME(Err_get_error()); json = NULL; } else { // Append newline. CB_Cat_Trusted_Str(json, "\n", 1); } return json; }
bool LFLock_Maybe_Delete_File_IMP(LockFileLock *self, String *path, bool delete_mine, bool delete_other) { LockFileLockIVARS *const ivars = LFLock_IVARS(self); Folder *folder = ivars->folder; bool success = false; // Only delete locks that start with our lock name. if (!Str_Starts_With_Utf8(path, "locks", 5)) { return false; } StringIterator *iter = Str_Top(path); StrIter_Advance(iter, 5 + 1); if (!StrIter_Starts_With(iter, ivars->name)) { DECREF(iter); return false; } DECREF(iter); // Attempt to delete dead lock file. if (Folder_Exists(folder, path)) { Hash *hash = (Hash*)Json_slurp_json(folder, path); if (hash != NULL && Obj_Is_A((Obj*)hash, HASH)) { String *pid_buf = (String*)Hash_Fetch_Utf8(hash, "pid", 3); String *host = (String*)Hash_Fetch_Utf8(hash, "host", 4); String *name = (String*)Hash_Fetch_Utf8(hash, "name", 4); // Match hostname and lock name. if (host != NULL && Str_Is_A(host, STRING) && Str_Equals(host, (Obj*)ivars->host) && name != NULL && Str_Is_A(name, STRING) && Str_Equals(name, (Obj*)ivars->name) && pid_buf != NULL && Str_Is_A(pid_buf, STRING) ) { // Verify that pid is either mine or dead. int pid = (int)Str_To_I64(pid_buf); if ((delete_mine && pid == PID_getpid()) // This process. || (delete_other && !PID_active(pid)) // Dead pid. ) { if (Folder_Delete(folder, path)) { success = true; } else { String *mess = MAKE_MESS("Can't delete '%o'", path); DECREF(hash); Err_throw_mess(ERR, mess); } } } } DECREF(hash); } return success; }
bool_t LFLock_maybe_delete_file(LockFileLock *self, const CharBuf *path, bool_t delete_mine, bool_t delete_other) { Folder *folder = self->folder; bool_t success = false; ZombieCharBuf *scratch = ZCB_WRAP(path); // Only delete locks that start with our lock name. CharBuf *lock_dir_name = (CharBuf*)ZCB_WRAP_STR("locks", 5); if (!ZCB_Starts_With(scratch, lock_dir_name)) { return false; } ZCB_Nip(scratch, CB_Get_Size(lock_dir_name) + 1); if (!ZCB_Starts_With(scratch, self->name)) { return false; } // Attempt to delete dead lock file. if (Folder_Exists(folder, path)) { Hash *hash = (Hash*)Json_slurp_json(folder, path); if (hash != NULL && Obj_Is_A((Obj*)hash, HASH)) { CharBuf *pid_buf = (CharBuf*)Hash_Fetch_Str(hash, "pid", 3); CharBuf *host = (CharBuf*)Hash_Fetch_Str(hash, "host", 4); CharBuf *name = (CharBuf*)Hash_Fetch_Str(hash, "name", 4); // Match hostname and lock name. if (host != NULL && CB_Equals(host, (Obj*)self->host) && name != NULL && CB_Equals(name, (Obj*)self->name) && pid_buf != NULL ) { // Verify that pid is either mine or dead. int pid = (int)CB_To_I64(pid_buf); if ((delete_mine && pid == PID_getpid()) // This process. || (delete_other && !PID_active(pid)) // Dead pid. ) { if (Folder_Delete(folder, path)) { success = true; } else { CharBuf *mess = MAKE_MESS("Can't delete '%o'", path); DECREF(hash); Err_throw_mess(ERR, mess); } } } } DECREF(hash); } return success; }
static Obj* S_parse_json(char *text, size_t size) { void *json_parser = LucyParseJsonAlloc(lucy_Memory_wrapped_malloc); if (json_parser == NULL) { CharBuf *mess = MAKE_MESS("Failed to allocate JSON parser"); Err_set_error(Err_new(mess)); return NULL; } Obj *dump = S_do_parse_json(json_parser, text, size); LucyParseJsonFree(json_parser, lucy_Memory_wrapped_free); return dump; }
LexIndex* LexIndex_init(LexIndex *self, Schema *schema, Folder *folder, Segment *segment, String *field) { int32_t field_num = Seg_Field_Num(segment, field); String *seg_name = Seg_Get_Name(segment); String *ixix_file = Str_newf("%o/lexicon-%i32.ixix", seg_name, field_num); String *ix_file = Str_newf("%o/lexicon-%i32.ix", seg_name, field_num); Architecture *arch = Schema_Get_Architecture(schema); // Init. Lex_init((Lexicon*)self, field); LexIndexIVARS *const ivars = LexIndex_IVARS(self); ivars->tinfo = TInfo_new(0); ivars->tick = 0; // Derive ivars->field_type = Schema_Fetch_Type(schema, field); if (!ivars->field_type) { String *mess = MAKE_MESS("Unknown field: '%o'", field); DECREF(ix_file); DECREF(ixix_file); DECREF(self); Err_throw_mess(ERR, mess); } ivars->field_type = (FieldType*)INCREF(ivars->field_type); ivars->term_stepper = FType_Make_Term_Stepper(ivars->field_type); ivars->ixix_in = Folder_Open_In(folder, ixix_file); if (!ivars->ixix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(ixix_file); DECREF(self); RETHROW(error); } ivars->ix_in = Folder_Open_In(folder, ix_file); if (!ivars->ix_in) { Err *error = (Err*)INCREF(Err_get_error()); DECREF(ix_file); DECREF(ixix_file); DECREF(self); RETHROW(error); } ivars->index_interval = Arch_Index_Interval(arch); ivars->skip_interval = Arch_Skip_Interval(arch); ivars->size = (int32_t)(InStream_Length(ivars->ixix_in) / sizeof(int64_t)); ivars->offsets = (const int64_t*)InStream_Buf(ivars->ixix_in, (size_t)InStream_Length(ivars->ixix_in)); DECREF(ixix_file); DECREF(ix_file); return self; }
DefaultDocReader* DefDocReader_init(DefaultDocReader *self, Schema *schema, Folder *folder, Snapshot *snapshot, VArray *segments, i32_t seg_tick) { Hash *metadata; Segment *segment; DocReader_init((DocReader*)self, schema, folder, snapshot, segments, seg_tick); segment = DefDocReader_Get_Segment(self); metadata = (Hash*)Seg_Fetch_Metadata_Str(segment, "documents", 9); if (metadata) { CharBuf *seg_name = Seg_Get_Name(segment); CharBuf *ix_file = CB_newf("%o/documents.ix", seg_name); CharBuf *dat_file = CB_newf("%o/documents.dat", seg_name); Obj *format = Hash_Fetch_Str(metadata, "format", 6); /* Check format. */ if (!format) { THROW("Missing 'format' var"); } else { i64_t format_val = Obj_To_I64(format); if (format_val < DocWriter_current_file_format) { THROW("Obsolete doc storage format %i64; " "Index regeneration is required", format_val); } else if (format_val != DocWriter_current_file_format) { THROW("Unsupported doc storage format: %i64", format_val); } } /* Get streams. */ if (Folder_Exists(folder, ix_file)) { self->ix_in = Folder_Open_In(folder, ix_file); self->dat_in = Folder_Open_In(folder, dat_file); if (!self->ix_in || !self->dat_in) { CharBuf *mess = MAKE_MESS("Can't open either %o or %o", ix_file, dat_file); DECREF(ix_file); DECREF(dat_file); DECREF(self); Err_throw_mess(mess); } } DECREF(ix_file); DECREF(dat_file); } return self; }
static CharBuf* S_parse_string(char **json_ptr, char *const limit) { // Find terminating double quote, determine whether there are any escapes. char *top = *json_ptr + 1; char *end = NULL; bool_t saw_backslash = false; for (char *text = top; text < limit; text++) { if (*text == '"') { end = text; break; } else if (*text == '\\') { saw_backslash = true; if (text + 1 < limit && text[1] == 'u') { text += 5; } else { text += 1; } } } if (!end) { SET_ERROR(CB_newf("Unterminated string"), *json_ptr, limit); return NULL; } // Advance the text buffer to just beyond the closing quote. *json_ptr = end + 1; if (saw_backslash) { return S_unescape_text(top, end); } else { // Optimize common case where there are no escapes. size_t len = end - top; if (!StrHelp_utf8_valid(top, len)) { CharBuf *mess = MAKE_MESS("Bad UTF-8 in JSON"); Err_set_error(Err_new(mess)); return NULL; } return CB_new_from_trusted_utf8(top, len); } }
bool_t LFLock_request(LockFileLock *self) { Hash *file_data; bool_t wrote_json; bool_t success = false; bool_t deletion_failed = false; if (Folder_Exists(self->folder, self->lock_path)) { Err_set_error((Err*)LockErr_new(CB_newf("Can't obtain lock: '%o' exists", self->lock_path))); return false; } // Create the "locks" subdirectory if necessary. CharBuf *lock_dir_name = (CharBuf*)ZCB_WRAP_STR("locks", 5); if (!Folder_Exists(self->folder, lock_dir_name)) { if (!Folder_MkDir(self->folder, lock_dir_name)) { Err *mkdir_err = (Err*)CERTIFY(Err_get_error(), ERR); LockErr *err = LockErr_new(CB_newf("Can't create 'locks' directory: %o", Err_Get_Mess(mkdir_err))); // Maybe our attempt failed because another process succeeded. if (Folder_Find_Folder(self->folder, lock_dir_name)) { DECREF(err); } else { // Nope, everything failed, so bail out. Err_set_error((Err*)err); return false; } } } // Prepare to write pid, lock name, and host to the lock file as JSON. file_data = Hash_new(3); Hash_Store_Str(file_data, "pid", 3, (Obj*)CB_newf("%i32", (int32_t)PID_getpid())); Hash_Store_Str(file_data, "host", 4, INCREF(self->host)); Hash_Store_Str(file_data, "name", 4, INCREF(self->name)); // Write to a temporary file, then use the creation of a hard link to // ensure atomic but non-destructive creation of the lockfile with its // complete contents. wrote_json = Json_spew_json((Obj*)file_data, self->folder, self->link_path); if (wrote_json) { success = Folder_Hard_Link(self->folder, self->link_path, self->lock_path); if (!success) { Err *hard_link_err = (Err*)CERTIFY(Err_get_error(), ERR); Err_set_error((Err*)LockErr_new(CB_newf("Failed to obtain lock at '%o': %o", self->lock_path, Err_Get_Mess(hard_link_err)))); } deletion_failed = !Folder_Delete(self->folder, self->link_path); } else { Err *spew_json_err = (Err*)CERTIFY(Err_get_error(), ERR); Err_set_error((Err*)LockErr_new(CB_newf("Failed to obtain lock at '%o': %o", self->lock_path, Err_Get_Mess(spew_json_err)))); } DECREF(file_data); // Verify that our temporary file got zapped. if (wrote_json && deletion_failed) { CharBuf *mess = MAKE_MESS("Failed to delete '%o'", self->link_path); Err_throw_mess(ERR, mess); } return success; }
bool LFLock_Request_IMP(LockFileLock *self) { LockFileLockIVARS *const ivars = LFLock_IVARS(self); bool success = false; if (Folder_Exists(ivars->folder, ivars->lock_path)) { Err_set_error((Err*)LockErr_new(Str_newf("Can't obtain lock: '%o' exists", ivars->lock_path))); return false; } // Create the "locks" subdirectory if necessary. String *lock_dir_name = (String*)SSTR_WRAP_UTF8("locks", 5); if (!Folder_Exists(ivars->folder, lock_dir_name)) { if (!Folder_MkDir(ivars->folder, lock_dir_name)) { Err *mkdir_err = (Err*)CERTIFY(Err_get_error(), ERR); LockErr *err = LockErr_new(Str_newf("Can't create 'locks' directory: %o", Err_Get_Mess(mkdir_err))); // Maybe our attempt failed because another process succeeded. if (Folder_Find_Folder(ivars->folder, lock_dir_name)) { DECREF(err); } else { // Nope, everything failed, so bail out. Err_set_error((Err*)err); return false; } } } // Prepare to write pid, lock name, and host to the lock file as JSON. Hash *file_data = Hash_new(3); Hash_Store_Utf8(file_data, "pid", 3, (Obj*)Str_newf("%i32", (int32_t)PID_getpid())); Hash_Store_Utf8(file_data, "host", 4, INCREF(ivars->host)); Hash_Store_Utf8(file_data, "name", 4, INCREF(ivars->name)); String *json = Json_to_json((Obj*)file_data); DECREF(file_data); // Write to a temporary file, then use the creation of a hard link to // ensure atomic but non-destructive creation of the lockfile with its // complete contents. OutStream *outstream = Folder_Open_Out(ivars->folder, ivars->link_path); if (!outstream) { ERR_ADD_FRAME(Err_get_error()); DECREF(json); return false; } struct lockfile_context context; context.outstream = outstream; context.json = json; Err *json_error = Err_trap(S_write_lockfile_json, &context); bool wrote_json = !json_error; DECREF(outstream); DECREF(json); if (wrote_json) { success = Folder_Hard_Link(ivars->folder, ivars->link_path, ivars->lock_path); if (!success) { Err *hard_link_err = (Err*)CERTIFY(Err_get_error(), ERR); Err_set_error((Err*)LockErr_new(Str_newf("Failed to obtain lock at '%o': %o", ivars->lock_path, Err_Get_Mess(hard_link_err)))); } } else { Err_set_error((Err*)LockErr_new(Str_newf("Failed to obtain lock at '%o': %o", ivars->lock_path, Err_Get_Mess(json_error)))); DECREF(json_error); } // Verify that our temporary file got zapped. bool deletion_failed = !Folder_Delete(ivars->folder, ivars->link_path); if (deletion_failed) { String *mess = MAKE_MESS("Failed to delete '%o'", ivars->link_path); Err_throw_mess(ERR, mess); } return success; }
void S_try_open_elements(void *context) { struct try_open_elements_context *args = (struct try_open_elements_context*)context; PolyReader *self = args->self; PolyReaderIVARS *const ivars = PolyReader_IVARS(self); VArray *files = Snapshot_List(ivars->snapshot); Folder *folder = PolyReader_Get_Folder(self); uint32_t num_segs = 0; uint64_t latest_schema_gen = 0; CharBuf *schema_file = NULL; // Find schema file, count segments. for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) { CharBuf *entry = (CharBuf*)VA_Fetch(files, i); if (Seg_valid_seg_name(entry)) { num_segs++; } else if (CB_Starts_With_Str(entry, "schema_", 7) && CB_Ends_With_Str(entry, ".json", 5) ) { uint64_t gen = IxFileNames_extract_gen(entry); if (gen > latest_schema_gen) { latest_schema_gen = gen; if (!schema_file) { schema_file = CB_Clone(entry); } else { CB_Mimic(schema_file, (Obj*)entry); } } } } // Read Schema. if (!schema_file) { DECREF(files); THROW(ERR, "Can't find a schema file."); } else { Hash *dump = (Hash*)Json_slurp_json(folder, schema_file); if (dump) { // read file successfully DECREF(ivars->schema); ivars->schema = (Schema*)CERTIFY( VTable_Load_Obj(SCHEMA, (Obj*)dump), SCHEMA); DECREF(dump); DECREF(schema_file); schema_file = NULL; } else { CharBuf *mess = MAKE_MESS("Failed to parse %o", schema_file); DECREF(schema_file); DECREF(files); Err_throw_mess(ERR, mess); } } VArray *segments = VA_new(num_segs); for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) { CharBuf *entry = (CharBuf*)VA_Fetch(files, i); // Create a Segment for each segmeta. if (Seg_valid_seg_name(entry)) { int64_t seg_num = IxFileNames_extract_gen(entry); Segment *segment = Seg_new(seg_num); // Bail if reading the file fails (probably because it's been // deleted and a new snapshot file has been written so we need to // retry). if (Seg_Read_File(segment, folder)) { VA_Push(segments, (Obj*)segment); } else { CharBuf *mess = MAKE_MESS("Failed to read %o", entry); DECREF(segment); DECREF(segments); DECREF(files); Err_throw_mess(ERR, mess); } } } // Sort the segments by age. VA_Sort(segments, NULL, NULL); // Open individual SegReaders. struct try_open_segreader_context seg_context; seg_context.schema = PolyReader_Get_Schema(self); seg_context.folder = folder; seg_context.snapshot = PolyReader_Get_Snapshot(self); seg_context.segments = segments; seg_context.result = NULL; args->seg_readers = VA_new(num_segs); Err *error = NULL; for (uint32_t seg_tick = 0; seg_tick < num_segs; seg_tick++) { seg_context.seg_tick = seg_tick; error = Err_trap(S_try_open_segreader, &seg_context); if (error) { break; } VA_Push(args->seg_readers, (Obj*)seg_context.result); seg_context.result = NULL; } DECREF(segments); DECREF(files); if (error) { DECREF(args->seg_readers); args->seg_readers = NULL; RETHROW(error); } }
static CharBuf* S_unescape_text(char *const top, char *const end) { // The unescaped string will never be longer than the escaped string // because only a \u escape can theoretically be too long and // StrHelp_encode_utf8_char guards against sequences over 4 bytes. // Therefore we can allocate once and not worry about reallocating. size_t cap = end - top + 1; char *target_buf = (char*)MALLOCATE(cap); size_t target_size = 0; for (char *text = top; text < end; text++) { if (*text != '\\') { target_buf[target_size++] = *text; } else { // Process escape. text++; switch (*text) { case '"': target_buf[target_size++] = '"'; break; case '\\': target_buf[target_size++] = '\\'; break; case '/': target_buf[target_size++] = '/'; break; case 'b': target_buf[target_size++] = '\b'; break; case 'f': target_buf[target_size++] = '\f'; break; case 'n': target_buf[target_size++] = '\n'; break; case 'r': target_buf[target_size++] = '\r'; break; case 't': target_buf[target_size++] = '\t'; break; case 'u': { // Copy into a temp buffer because strtol will overrun // into adjacent text data for e.g. "\uAAAA1". char temp[5] = { 0, 0, 0, 0, 0 }; memcpy(temp, text + 1, 4); text += 4; char *num_end; long code_point = strtol(temp, &num_end, 16); char *temp_ptr = temp; if (num_end != temp_ptr + 4 || code_point < 0) { FREEMEM(target_buf); SET_ERROR(CB_newf("Invalid \\u escape"), text - 5, end); return NULL; } if (code_point >= 0xD800 && code_point <= 0xDFFF) { FREEMEM(target_buf); SET_ERROR(CB_newf("Surrogate pairs not supported"), text - 5, end); return NULL; } target_size += StrHelp_encode_utf8_char((uint32_t)code_point, target_buf + target_size); } break; default: FREEMEM(target_buf); SET_ERROR(CB_newf("Illegal escape"), text - 1, end); return NULL; } } } // NULL-terminate, sanity check, then return the escaped string. target_buf[target_size] = '\0'; if (!StrHelp_utf8_valid(target_buf, target_size)) { FREEMEM(target_buf); CharBuf *mess = MAKE_MESS("Bad UTF-8 in JSON"); Err_set_error(Err_new(mess)); return NULL; } return CB_new_steal_from_trusted_str(target_buf, target_size, cap); }
static bool_t S_to_json(Obj *dump, CharBuf *json, int32_t depth) { // Guard against infinite recursion in self-referencing data structures. if (depth > MAX_DEPTH) { CharBuf *mess = MAKE_MESS("Exceeded max depth of %i32", MAX_DEPTH); Err_set_error(Err_new(mess)); return false; } if (!dump) { CB_Cat_Trusted_Str(json, "null", 4); } else if (dump == (Obj*)CFISH_TRUE) { CB_Cat_Trusted_Str(json, "true", 4); } else if (dump == (Obj*)CFISH_FALSE) { CB_Cat_Trusted_Str(json, "false", 5); } else if (Obj_Is_A(dump, CHARBUF)) { S_append_json_string(dump, json); } else if (Obj_Is_A(dump, INTNUM)) { CB_catf(json, "%i64", Obj_To_I64(dump)); } else if (Obj_Is_A(dump, FLOATNUM)) { CB_catf(json, "%f64", Obj_To_F64(dump)); } else if (Obj_Is_A(dump, VARRAY)) { VArray *array = (VArray*)dump; size_t size = VA_Get_Size(array); if (size == 0) { // Put empty array on single line. CB_Cat_Trusted_Str(json, "[]", 2); return true; } else if (size == 1) { Obj *elem = VA_Fetch(array, 0); if (!(Obj_Is_A(elem, HASH) || Obj_Is_A(elem, VARRAY))) { // Put array containing single scalar element on one line. CB_Cat_Trusted_Str(json, "[", 1); if (!S_to_json(elem, json, depth + 1)) { return false; } CB_Cat_Trusted_Str(json, "]", 1); return true; } } // Fall back to spreading elements across multiple lines. CB_Cat_Trusted_Str(json, "[", 1); for (size_t i = 0; i < size; i++) { CB_Cat_Trusted_Str(json, "\n", 1); S_cat_whitespace(json, depth + 1); if (!S_to_json(VA_Fetch(array, i), json, depth + 1)) { return false; } if (i + 1 < size) { CB_Cat_Trusted_Str(json, ",", 1); } } CB_Cat_Trusted_Str(json, "\n", 1); S_cat_whitespace(json, depth); CB_Cat_Trusted_Str(json, "]", 1); } else if (Obj_Is_A(dump, HASH)) { Hash *hash = (Hash*)dump; size_t size = Hash_Get_Size(hash); // Put empty hash on single line. if (size == 0) { CB_Cat_Trusted_Str(json, "{}", 2); return true; } // Validate that all keys are strings, then sort. VArray *keys = Hash_Keys(hash); for (size_t i = 0; i < size; i++) { Obj *key = VA_Fetch(keys, i); if (!key || !Obj_Is_A(key, CHARBUF)) { DECREF(keys); CharBuf *key_class = key ? Obj_Get_Class_Name(key) : NULL; CharBuf *mess = MAKE_MESS("Illegal key type: %o", key_class); Err_set_error(Err_new(mess)); return false; } } VA_Sort(keys, NULL, NULL); // Spread pairs across multiple lines. CB_Cat_Trusted_Str(json, "{", 1); for (size_t i = 0; i < size; i++) { Obj *key = VA_Fetch(keys, i); CB_Cat_Trusted_Str(json, "\n", 1); S_cat_whitespace(json, depth + 1); S_append_json_string(key, json); CB_Cat_Trusted_Str(json, ": ", 2); if (!S_to_json(Hash_Fetch(hash, key), json, depth + 1)) { DECREF(keys); return false; } if (i + 1 < size) { CB_Cat_Trusted_Str(json, ",", 1); } } CB_Cat_Trusted_Str(json, "\n", 1); S_cat_whitespace(json, depth); CB_Cat_Trusted_Str(json, "}", 1); DECREF(keys); } return true; }
SortCache* SortCache_init(SortCache *self, Schema *schema, Folder *folder, Segment *segment, i32_t field_num) { CharBuf *field = Seg_Field_Name(segment, field_num); CharBuf *seg_name = Seg_Get_Name(segment); CharBuf *ord_file = CB_newf("%o/sort-%i32.ord", seg_name, field_num); CharBuf *ix_file = CB_newf("%o/sort-%i32.ix", seg_name, field_num); CharBuf *dat_file = CB_newf("%o/sort-%i32.dat", seg_name, field_num); i64_t ord_len, ix_len, dat_len; /* Derive. */ self->doc_max = Seg_Get_Count(segment); self->type = Schema_Fetch_Type(schema, field); if (!self->type || !FType_Sortable(self->type)) { THROW("'%o' isn't a sortable field", field); } /* Open instreams. */ self->ord_in = Folder_Open_In(folder, ord_file); self->ix_in = Folder_Open_In(folder, ix_file); self->dat_in = Folder_Open_In(folder, dat_file); if (!self->ix_in || !self->dat_in || !self->ord_in) { CharBuf *mess = MAKE_MESS("Can't open either %o, %o or %o", ord_file, ix_file, dat_file); DECREF(ord_file); DECREF(ix_file); DECREF(dat_file); Err_throw_mess(mess); } ord_len = InStream_Length(self->ord_in); ix_len = InStream_Length(self->ix_in); dat_len = InStream_Length(self->dat_in); /* Calculate the number of unique values and derive the ord bit width. */ self->num_uniq = (i32_t)(ix_len / 8) - 1; self->width = S_calc_width(self->num_uniq); /* Validate file lengths. */ { double bytes_per_doc = self->width / 8.0; double max_ords = ord_len / bytes_per_doc; if (max_ords < self->doc_max + 1) { THROW("Conflict between ord count max %f64 and doc_max %i32", max_ords, self->doc_max); } } /* Mmap ords, offsets and character data. */ self->ords = InStream_Buf(self->ord_in, (size_t)ord_len); self->offsets = (i64_t*)InStream_Buf(self->ix_in, (size_t)ix_len); self->char_data = InStream_Buf(self->dat_in, dat_len); { char *offs = (char*)self->offsets; self->offsets_limit = (i64_t*)(offs + ix_len); self->char_data_limit = self->char_data + dat_len; } DECREF(ord_file); DECREF(ix_file); DECREF(dat_file); return self; }
static void S_do_consolidate(CompoundFileWriter *self, CompoundFileWriterIVARS *ivars) { UNUSED_VAR(self); Folder *folder = ivars->folder; Hash *metadata = Hash_new(0); Hash *sub_files = Hash_new(0); Vector *files = Folder_List(folder, NULL); Vector *merged = Vec_new(Vec_Get_Size(files)); String *cf_file = (String*)SSTR_WRAP_UTF8("cf.dat", 6); OutStream *outstream = Folder_Open_Out(folder, (String*)cf_file); bool rename_success; if (!outstream) { RETHROW(INCREF(Err_get_error())); } // Start metadata. Hash_Store_Utf8(metadata, "files", 5, INCREF(sub_files)); Hash_Store_Utf8(metadata, "format", 6, (Obj*)Str_newf("%i32", CFWriter_current_file_format)); Vec_Sort(files); for (uint32_t i = 0, max = Vec_Get_Size(files); i < max; i++) { String *infilename = (String*)Vec_Fetch(files, i); if (!Str_Ends_With_Utf8(infilename, ".json", 5)) { InStream *instream = Folder_Open_In(folder, infilename); Hash *file_data = Hash_new(2); int64_t offset, len; if (!instream) { RETHROW(INCREF(Err_get_error())); } // Absorb the file. offset = OutStream_Tell(outstream); OutStream_Absorb(outstream, instream); len = OutStream_Tell(outstream) - offset; // Record offset and length. Hash_Store_Utf8(file_data, "offset", 6, (Obj*)Str_newf("%i64", offset)); Hash_Store_Utf8(file_data, "length", 6, (Obj*)Str_newf("%i64", len)); Hash_Store(sub_files, infilename, (Obj*)file_data); Vec_Push(merged, INCREF(infilename)); // Add filler NULL bytes so that every sub-file begins on a file // position multiple of 8. OutStream_Align(outstream, 8); InStream_Close(instream); DECREF(instream); } } // Write metadata to cfmeta file. String *cfmeta_temp = (String*)SSTR_WRAP_UTF8("cfmeta.json.temp", 16); String *cfmeta_file = (String*)SSTR_WRAP_UTF8("cfmeta.json", 11); Json_spew_json((Obj*)metadata, (Folder*)ivars->folder, cfmeta_temp); rename_success = Folder_Rename(ivars->folder, cfmeta_temp, cfmeta_file); if (!rename_success) { RETHROW(INCREF(Err_get_error())); } // Clean up. OutStream_Close(outstream); DECREF(outstream); DECREF(files); DECREF(metadata); /* HashIterator *iter = HashIter_new(sub_files); while (HashIter_Next(iter)) { String *merged_file = HashIter_Get_Key(iter); if (!Folder_Delete(folder, merged_file)) { String *mess = MAKE_MESS("Can't delete '%o'", merged_file); DECREF(sub_files); Err_throw_mess(ERR, mess); } } DECREF(iter); */ DECREF(sub_files); for (uint32_t i = 0, max = Vec_Get_Size(merged); i < max; i++) { String *merged_file = (String*)Vec_Fetch(merged, i); if (!Folder_Delete(folder, merged_file)) { String *mess = MAKE_MESS("Can't delete '%o'", merged_file); DECREF(merged); Err_throw_mess(ERR, mess); } } DECREF(merged); }