void LexWriter_start_field(LexiconWriter *self, i32_t field_num) { CharBuf *const seg_name = Seg_Get_Name(self->segment); Folder *const folder = self->folder; Snapshot *const snapshot = LexWriter_Get_Snapshot(self); /* Open outstreams. */ CB_setf(self->dat_file, "%o/lexicon-%i32.dat", seg_name, field_num); CB_setf(self->ix_file, "%o/lexicon-%i32.ix", seg_name, field_num); CB_setf(self->ixix_file, "%o/lexicon-%i32.ixix", seg_name, field_num); Snapshot_Add_Entry(snapshot, self->dat_file); Snapshot_Add_Entry(snapshot, self->ix_file); Snapshot_Add_Entry(snapshot, self->ixix_file); self->dat_out = Folder_Open_Out(folder, self->dat_file); self->ix_out = Folder_Open_Out(folder, self->ix_file); self->ixix_out = Folder_Open_Out(folder, self->ixix_file); if (!self->dat_out) { THROW("Can't open %o", self->dat_file); } if (!self->ix_out) { THROW("Can't open %o", self->ix_file); } if (!self->ixix_out) { THROW("Can't open %o", self->ixix_file); } /* Initialize count and ix_count, TermInfo and last term text. */ self->count = 0; self->ix_count = 0; TInfo_Reset(self->last_tinfo); CB_Set_Size(self->last_text, 0); }
void LexWriter_start_field(LexiconWriter *self, int32_t field_num) { Segment *const segment = LexWriter_Get_Segment(self); Folder *const folder = LexWriter_Get_Folder(self); Schema *const schema = LexWriter_Get_Schema(self); CharBuf *const seg_name = Seg_Get_Name(segment); CharBuf *const field = Seg_Field_Name(segment, field_num); FieldType *const type = Schema_Fetch_Type(schema, field); // Open outstreams. CB_setf(self->dat_file, "%o/lexicon-%i32.dat", seg_name, field_num); CB_setf(self->ix_file, "%o/lexicon-%i32.ix", seg_name, field_num); CB_setf(self->ixix_file, "%o/lexicon-%i32.ixix", seg_name, field_num); self->dat_out = Folder_Open_Out(folder, self->dat_file); if (!self->dat_out) { RETHROW(INCREF(Err_get_error())); } self->ix_out = Folder_Open_Out(folder, self->ix_file); if (!self->ix_out) { RETHROW(INCREF(Err_get_error())); } self->ixix_out = Folder_Open_Out(folder, self->ixix_file); if (!self->ixix_out) { RETHROW(INCREF(Err_get_error())); } // Initialize count and ix_count, term stepper and term info stepper. self->count = 0; self->ix_count = 0; self->term_stepper = FType_Make_Term_Stepper(type); TermStepper_Reset(self->tinfo_stepper); }
bool FSDH_entry_is_dir(FSDirHandle *self) { FSDirHandleIVARS *const ivars = FSDH_IVARS(self); struct dirent *sys_dir_entry = (struct dirent*)ivars->sys_dir_entry; if (!sys_dir_entry) { return false; } // If d_type is available, try to avoid a stat() call. If it's not, or if // the type comes back as unknown, fall back to stat(). #ifdef CHY_HAS_DIRENT_D_TYPE if (sys_dir_entry->d_type == DT_DIR) { return true; } else if (sys_dir_entry->d_type != DT_UNKNOWN) { return false; } #endif struct stat stat_buf; if (!ivars->fullpath) { ivars->fullpath = CB_new(CB_Get_Size(ivars->dir) + 20); } CB_setf(ivars->fullpath, "%o%s%o", ivars->dir, CHY_DIR_SEP, ivars->entry); if (stat((char*)CB_Get_Ptr8(ivars->fullpath), &stat_buf) != -1) { if (stat_buf.st_mode & S_IFDIR) { return true; } } return false; }
static void test_escapes(TestBatch *batch) { CharBuf *string = CB_new(10); CharBuf *json_wanted = CB_new(10); for (int i = 0; control_escapes[i] != NULL; i++) { CB_Truncate(string, 0); CB_Cat_Char(string, i); const char *escaped = control_escapes[i]; CharBuf *json = Json_to_json((Obj*)string); CharBuf *decoded = (CharBuf*)Json_from_json(json); CB_setf(json_wanted, "\"%s\"", escaped); CB_Trim(json); TEST_TRUE(batch, json != NULL && CB_Equals(json_wanted, (Obj*)json), "encode control escape: %s", escaped); TEST_TRUE(batch, decoded != NULL && CB_Equals(string, (Obj*)decoded), "decode control escape: %s", escaped); DECREF(json); DECREF(decoded); } for (int i = 0; quote_escapes_source[i] != NULL; i++) { const char *source = quote_escapes_source[i]; const char *escaped = quote_escapes_json[i]; CB_setf(string, source, strlen(source)); CharBuf *json = Json_to_json((Obj*)string); CharBuf *decoded = (CharBuf*)Json_from_json(json); CB_setf(json_wanted, "\"%s\"", escaped); CB_Trim(json); TEST_TRUE(batch, json != NULL && CB_Equals(json_wanted, (Obj*)json), "encode quote/backslash escapes: %s", source); TEST_TRUE(batch, decoded != NULL && CB_Equals(string, (Obj*)decoded), "decode quote/backslash escapes: %s", source); DECREF(json); DECREF(decoded); } DECREF(json_wanted); DECREF(string); }
static void test_To_I64(TestBatch *batch) { CharBuf *charbuf = S_get_cb("10"); TEST_TRUE(batch, CB_To_I64(charbuf) == 10, "To_I64"); CB_setf(charbuf, "-10"); TEST_TRUE(batch, CB_To_I64(charbuf) == -10, "To_I64 negative"); DECREF(charbuf); }
static void test_Find(TestBatch *batch) { CharBuf *string = CB_new(10); CharBuf *substring = S_get_cb("foo"); TEST_TRUE(batch, CB_Find(string, substring) == -1, "Not in empty string"); CB_setf(string, "foo"); TEST_TRUE(batch, CB_Find(string, substring) == 0, "Find complete string"); CB_setf(string, "afoo"); TEST_TRUE(batch, CB_Find(string, substring) == 1, "Find after first"); CB_Set_Size(string, 3); TEST_TRUE(batch, CB_Find(string, substring) == -1, "Don't overrun"); CB_setf(string, "afood"); TEST_TRUE(batch, CB_Find(string, substring) == 1, "Find in middle"); DECREF(substring); DECREF(string); }
static SortFieldWriter* S_lazy_init_field_writer(SortWriter *self, int32_t field_num) { SortWriterIVARS *const ivars = SortWriter_IVARS(self); SortFieldWriter *field_writer = (SortFieldWriter*)VA_Fetch(ivars->field_writers, field_num); if (!field_writer) { // Open temp files. if (!ivars->temp_ord_out) { Folder *folder = ivars->folder; CharBuf *seg_name = Seg_Get_Name(ivars->segment); CharBuf *path = CB_newf("%o/sort_ord_temp", seg_name); ivars->temp_ord_out = Folder_Open_Out(folder, path); if (!ivars->temp_ord_out) { DECREF(path); RETHROW(INCREF(Err_get_error())); } CB_setf(path, "%o/sort_ix_temp", seg_name); ivars->temp_ix_out = Folder_Open_Out(folder, path); if (!ivars->temp_ix_out) { DECREF(path); RETHROW(INCREF(Err_get_error())); } CB_setf(path, "%o/sort_dat_temp", seg_name); ivars->temp_dat_out = Folder_Open_Out(folder, path); if (!ivars->temp_dat_out) { DECREF(path); RETHROW(INCREF(Err_get_error())); } DECREF(path); } CharBuf *field = Seg_Field_Name(ivars->segment, field_num); field_writer = SortFieldWriter_new(ivars->schema, ivars->snapshot, ivars->segment, ivars->polyreader, field, ivars->mem_pool, ivars->mem_thresh, ivars->temp_ord_out, ivars->temp_ix_out, ivars->temp_dat_out); VA_Store(ivars->field_writers, field_num, (Obj*)field_writer); } return field_writer; }
static void test_To_F64(TestBatch *batch) { CharBuf *charbuf = S_get_cb("1.5"); double difference = 1.5 - CB_To_F64(charbuf); if (difference < 0) { difference = 0 - difference; } TEST_TRUE(batch, difference < 0.001, "To_F64"); CB_setf(charbuf, "-1.5"); difference = 1.5 + CB_To_F64(charbuf); if (difference < 0) { difference = 0 - difference; } TEST_TRUE(batch, difference < 0.001, "To_F64 negative"); CB_setf(charbuf, "1.59"); double value_full = CB_To_F64(charbuf); CB_Set_Size(charbuf, 3); double value_short = CB_To_F64(charbuf); TEST_TRUE(batch, value_short < value_full, "TO_F64 doesn't run past end of string"); DECREF(charbuf); }
VArray* Folder_list_r(Folder *self, const CharBuf *path) { Folder *local_folder = Folder_Find_Folder(self, path); VArray *list = VA_new(0); if (local_folder) { CharBuf *dir = CB_new(20); CharBuf *prefix = CB_new(20); if (path && CB_Get_Size(path)) { CB_setf(prefix, "%o/", path); } S_add_to_file_list(local_folder, list, dir, prefix); DECREF(prefix); DECREF(dir); } return list; }
static void S_zap_dead_merge(FilePurger *self, Hash *candidates) { IndexManager *manager = self->manager; Lock *merge_lock = IxManager_Make_Merge_Lock(manager); Lock_Clear_Stale(merge_lock); if (!Lock_Is_Locked(merge_lock)) { Hash *merge_data = IxManager_Read_Merge_Data(manager); Obj *cutoff = merge_data ? Hash_Fetch_Str(merge_data, "cutoff", 6) : NULL; if (cutoff) { CharBuf *cutoff_seg = Seg_num_to_name(Obj_To_I64(cutoff)); if (Folder_Exists(self->folder, cutoff_seg)) { ZombieCharBuf *merge_json = ZCB_WRAP_STR("merge.json", 10); DirHandle *dh = Folder_Open_Dir(self->folder, cutoff_seg); CharBuf *entry = dh ? DH_Get_Entry(dh) : NULL; CharBuf *filepath = CB_new(32); if (!dh) { THROW(ERR, "Can't open segment dir '%o'", filepath); } Hash_Store(candidates, (Obj*)cutoff_seg, INCREF(&EMPTY)); Hash_Store(candidates, (Obj*)merge_json, INCREF(&EMPTY)); while (DH_Next(dh)) { // TODO: recursively delete subdirs within seg dir. CB_setf(filepath, "%o/%o", cutoff_seg, entry); Hash_Store(candidates, (Obj*)filepath, INCREF(&EMPTY)); } DECREF(filepath); DECREF(dh); } DECREF(cutoff_seg); } DECREF(merge_data); } DECREF(merge_lock); return; }
bool FSDH_entry_is_symlink(FSDirHandle *self) { FSDirHandleIVARS *const ivars = FSDH_IVARS(self); struct dirent *sys_dir_entry = (struct dirent*)ivars->sys_dir_entry; if (!sys_dir_entry) { return false; } #ifdef CHY_HAS_DIRENT_D_TYPE return sys_dir_entry->d_type == DT_LNK ? true : false; #else { struct stat stat_buf; if (!ivars->fullpath) { ivars->fullpath = CB_new(CB_Get_Size(ivars->dir) + 20); } CB_setf(ivars->fullpath, "%o%s%o", ivars->dir, CHY_DIR_SEP, ivars->entry); if (stat((char*)CB_Get_Ptr8(ivars->fullpath), &stat_buf) != -1) { if (stat_buf.st_mode & S_IFLNK) { return true; } } return false; } #endif // CHY_HAS_DIRENT_D_TYPE }
void SortWriter_finish(SortWriter *self) { SortWriterIVARS *const ivars = SortWriter_IVARS(self); VArray *const field_writers = ivars->field_writers; // If we have no data, bail out. if (!ivars->temp_ord_out) { return; } // If we've either flushed or added segments, flush everything so that any // one field can use the entire margin up to mem_thresh. if (ivars->flush_at_finish) { for (uint32_t i = 1, max = VA_Get_Size(field_writers); i < max; i++) { SortFieldWriter *field_writer = (SortFieldWriter*)VA_Fetch(field_writers, i); if (field_writer) { SortFieldWriter_Flush(field_writer); } } } // Close down temp streams. OutStream_Close(ivars->temp_ord_out); OutStream_Close(ivars->temp_ix_out); OutStream_Close(ivars->temp_dat_out); for (uint32_t i = 1, max = VA_Get_Size(field_writers); i < max; i++) { SortFieldWriter *field_writer = (SortFieldWriter*)VA_Delete(field_writers, i); if (field_writer) { CharBuf *field = Seg_Field_Name(ivars->segment, i); SortFieldWriter_Flip(field_writer); int32_t count = SortFieldWriter_Finish(field_writer); Hash_Store(ivars->counts, (Obj*)field, (Obj*)CB_newf("%i32", count)); int32_t null_ord = SortFieldWriter_Get_Null_Ord(field_writer); if (null_ord != -1) { Hash_Store(ivars->null_ords, (Obj*)field, (Obj*)CB_newf("%i32", null_ord)); } int32_t ord_width = SortFieldWriter_Get_Ord_Width(field_writer); Hash_Store(ivars->ord_widths, (Obj*)field, (Obj*)CB_newf("%i32", ord_width)); } DECREF(field_writer); } VA_Clear(field_writers); // Store metadata. Seg_Store_Metadata_Str(ivars->segment, "sort", 4, (Obj*)SortWriter_Metadata(self)); // Clean up. Folder *folder = ivars->folder; CharBuf *seg_name = Seg_Get_Name(ivars->segment); CharBuf *path = CB_newf("%o/sort_ord_temp", seg_name); Folder_Delete(folder, path); CB_setf(path, "%o/sort_ix_temp", seg_name); Folder_Delete(folder, path); CB_setf(path, "%o/sort_dat_temp", seg_name); Folder_Delete(folder, path); DECREF(path); }