void LexWriter_start_field(LexiconWriter *self, i32_t field_num) { CharBuf *const seg_name = Seg_Get_Name(self->segment); Folder *const folder = self->folder; Snapshot *const snapshot = LexWriter_Get_Snapshot(self); /* Open outstreams. */ CB_setf(self->dat_file, "%o/lexicon-%i32.dat", seg_name, field_num); CB_setf(self->ix_file, "%o/lexicon-%i32.ix", seg_name, field_num); CB_setf(self->ixix_file, "%o/lexicon-%i32.ixix", seg_name, field_num); Snapshot_Add_Entry(snapshot, self->dat_file); Snapshot_Add_Entry(snapshot, self->ix_file); Snapshot_Add_Entry(snapshot, self->ixix_file); self->dat_out = Folder_Open_Out(folder, self->dat_file); self->ix_out = Folder_Open_Out(folder, self->ix_file); self->ixix_out = Folder_Open_Out(folder, self->ixix_file); if (!self->dat_out) { THROW("Can't open %o", self->dat_file); } if (!self->ix_out) { THROW("Can't open %o", self->ix_file); } if (!self->ixix_out) { THROW("Can't open %o", self->ixix_file); } /* Initialize count and ix_count, TermInfo and last term text. */ self->count = 0; self->ix_count = 0; TInfo_Reset(self->last_tinfo); CB_Set_Size(self->last_text, 0); }
void TextTermStepper_Read_Delta_IMP(TextTermStepper *self, InStream *instream) { TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self); const uint32_t text_overlap = InStream_Read_C32(instream); const uint32_t finish_chars_len = InStream_Read_C32(instream); const uint32_t total_text_len = text_overlap + finish_chars_len; // Allocate space. CharBuf *charbuf = (CharBuf*)ivars->value; char *ptr = CB_Grow(charbuf, total_text_len); // Set the value text. InStream_Read_Bytes(instream, ptr + text_overlap, finish_chars_len); CB_Set_Size(charbuf, total_text_len); if (!StrHelp_utf8_valid(ptr, total_text_len)) { THROW(ERR, "Invalid UTF-8 sequence in '%o' at byte %i64", InStream_Get_Filename(instream), InStream_Tell(instream) - finish_chars_len); } // Null-terminate. ptr[total_text_len] = '\0'; // Invalidate string. DECREF(ivars->string); ivars->string = NULL; }
static String* S_unescape(QueryParser *self, String *orig, CharBuf *buf) { StringIterator *iter = Str_Top(orig); int32_t code_point; UNUSED_VAR(self); CB_Set_Size(buf, 0); CB_Grow(buf, Str_Get_Size(orig) + 4); while (STRITER_DONE != (code_point = StrIter_Next(iter))) { if (code_point == '\\') { int32_t next_code_point = StrIter_Next(iter); if (next_code_point == ':' || next_code_point == '"' || next_code_point == '\\' ) { CB_Cat_Char(buf, next_code_point); } else { CB_Cat_Char(buf, code_point); if (next_code_point != STRITER_DONE) { CB_Cat_Char(buf, next_code_point); } } } else { CB_Cat_Char(buf, code_point); } } DECREF(iter); return CB_To_String(buf); }
Obj* TextSortCache_value(TextSortCache *self, int32_t ord, Obj *blank) { if (ord == self->null_ord) { return NULL; } InStream_Seek(self->ix_in, ord * sizeof(int64_t)); int64_t offset = InStream_Read_I64(self->ix_in); if (offset == NULL_SENTINEL) { return NULL; } else { uint32_t next_ord = ord + 1; int64_t next_offset; while (1) { InStream_Seek(self->ix_in, next_ord * sizeof(int64_t)); next_offset = InStream_Read_I64(self->ix_in); if (next_offset != NULL_SENTINEL) { break; } next_ord++; } // Read character data into CharBuf. CERTIFY(blank, CHARBUF); int64_t len = next_offset - offset; char *ptr = CB_Grow((CharBuf*)blank, (size_t)len); InStream_Seek(self->dat_in, offset); InStream_Read_Bytes(self->dat_in, ptr, (size_t)len); ptr[len] = '\0'; CB_Set_Size((CharBuf*)blank, (size_t)len); } return blank; }
Hash* Hash_deserialize(Hash *self, InStream *instream) { uint32_t size = InStream_Read_C32(instream); uint32_t num_charbufs = InStream_Read_C32(instream); uint32_t num_other = size - num_charbufs; CharBuf *key = num_charbufs ? CB_new(0) : NULL; Hash_init(self, size); // Read key-value pairs with CharBuf keys. while (num_charbufs--) { uint32_t len = InStream_Read_C32(instream); char *key_buf = CB_Grow(key, len); InStream_Read_Bytes(instream, key_buf, len); key_buf[len] = '\0'; CB_Set_Size(key, len); Hash_Store(self, (Obj*)key, THAW(instream)); } DECREF(key); // Read remaining key/value pairs. while (num_other--) { Obj *k = THAW(instream); Hash_Store(self, k, THAW(instream)); DECREF(k); } return self; }
void TextTermStepper_read_key_frame(TextTermStepper *self, InStream *instream) { const uint32_t text_len = InStream_Read_C32(instream); CharBuf *value; char *ptr; // Allocate space. if (self->value == NULL) { self->value = (Obj*)CB_new(text_len); } value = (CharBuf*)self->value; ptr = CB_Grow(value, text_len); // Set the value text. InStream_Read_Bytes(instream, ptr, text_len); CB_Set_Size(value, text_len); if (!StrHelp_utf8_valid(ptr, text_len)) { THROW(ERR, "Invalid UTF-8 sequence in '%o' at byte %i64", InStream_Get_Filename(instream), InStream_Tell(instream) - text_len); } // Null-terminate. ptr[text_len] = '\0'; }
void CB_setf(CharBuf *self, const char *pattern, ...) { va_list args; CB_Set_Size(self, 0); va_start(args, pattern); CB_VCatF(self, pattern, args); va_end(args); }
void TextTermStepper_Reset_IMP(TextTermStepper *self) { TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self); CB_Set_Size((CharBuf*)ivars->value, 0); // Invalidate string. DECREF(ivars->string); ivars->string = NULL; }
static String* S_encode_entities(String *text, CharBuf *buf) { StringIterator *iter = Str_Top(text); size_t space = 0; const int MAX_ENTITY_BYTES = 9; // &#dddddd; // Scan first so that we only allocate once. int32_t code_point; while (STRITER_DONE != (code_point = StrIter_Next(iter))) { if (code_point > 127 || (!isgraph(code_point) && !isspace(code_point)) || code_point == '<' || code_point == '>' || code_point == '&' || code_point == '"' ) { space += MAX_ENTITY_BYTES; } else { space += 1; } } CB_Grow(buf, space); CB_Set_Size(buf, 0); DECREF(iter); iter = Str_Top(text); while (STRITER_DONE != (code_point = StrIter_Next(iter))) { if (code_point > 127 || (!isgraph(code_point) && !isspace(code_point)) ) { CB_catf(buf, "&#%u32;", code_point); } else if (code_point == '<') { CB_Cat_Trusted_Utf8(buf, "<", 4); } else if (code_point == '>') { CB_Cat_Trusted_Utf8(buf, ">", 4); } else if (code_point == '&') { CB_Cat_Trusted_Utf8(buf, "&", 5); } else if (code_point == '"') { CB_Cat_Trusted_Utf8(buf, """, 6); } else { CB_Cat_Char(buf, code_point); } } DECREF(iter); return CB_To_String(buf); }
void LexWriter_enter_temp_mode(LexiconWriter *self, OutStream *temp_outstream) { /* Assign outstream. */ if (self->dat_out != NULL) THROW("Can't enter temp mode (filename: %o) ", self->dat_file); self->dat_out = (OutStream*)INCREF(temp_outstream); /* Initialize count and ix_count, TermInfo and last term text. */ self->count = 0; self->ix_count = 0; TInfo_Reset(self->last_tinfo); CB_Set_Size(self->last_text, 0); /* Remember that we're in temp mode. */ self->temp_mode = true; }
static void test_Find(TestBatch *batch) { CharBuf *string = CB_new(10); CharBuf *substring = S_get_cb("foo"); TEST_TRUE(batch, CB_Find(string, substring) == -1, "Not in empty string"); CB_setf(string, "foo"); TEST_TRUE(batch, CB_Find(string, substring) == 0, "Find complete string"); CB_setf(string, "afoo"); TEST_TRUE(batch, CB_Find(string, substring) == 1, "Find after first"); CB_Set_Size(string, 3); TEST_TRUE(batch, CB_Find(string, substring) == -1, "Don't overrun"); CB_setf(string, "afood"); TEST_TRUE(batch, CB_Find(string, substring) == 1, "Find in middle"); DECREF(substring); DECREF(string); }
static void test_To_F64(TestBatch *batch) { CharBuf *charbuf = S_get_cb("1.5"); double difference = 1.5 - CB_To_F64(charbuf); if (difference < 0) { difference = 0 - difference; } TEST_TRUE(batch, difference < 0.001, "To_F64"); CB_setf(charbuf, "-1.5"); difference = 1.5 + CB_To_F64(charbuf); if (difference < 0) { difference = 0 - difference; } TEST_TRUE(batch, difference < 0.001, "To_F64 negative"); CB_setf(charbuf, "1.59"); double value_full = CB_To_F64(charbuf); CB_Set_Size(charbuf, 3); double value_short = CB_To_F64(charbuf); TEST_TRUE(batch, value_short < value_full, "TO_F64 doesn't run past end of string"); DECREF(charbuf); }
static Hash* S_extract_tv_cache(ByteBuf *field_buf) { Hash *tv_cache = Hash_new(0); const char *tv_string = BB_Get_Buf(field_buf); int32_t num_terms = NumUtil_decode_c32(&tv_string); CharBuf *text_buf = CB_new(0); // Read the number of highlightable terms in the field. for (int32_t i = 0; i < num_terms; i++) { size_t overlap = NumUtil_decode_c32(&tv_string); size_t len = NumUtil_decode_c32(&tv_string); // Decompress the term text. CB_Set_Size(text_buf, overlap); CB_Cat_Trusted_Utf8(text_buf, tv_string, len); tv_string += len; // Get positions & offsets string. const char *bookmark_ptr = tv_string; int32_t num_positions = NumUtil_decode_c32(&tv_string); while (num_positions--) { // Leave nums compressed to save a little mem. NumUtil_skip_cint(&tv_string); NumUtil_skip_cint(&tv_string); NumUtil_skip_cint(&tv_string); } len = tv_string - bookmark_ptr; // Store the $text => $posdata pair in the output hash. String *text = CB_To_String(text_buf); Hash_Store(tv_cache, (Obj*)text, (Obj*)BB_new_bytes(bookmark_ptr, len)); DECREF(text); } DECREF(text_buf); return tv_cache; }
void StrHelp_add_indent(CharBuf *charbuf, size_t amount) { u32_t num_margins = 1; size_t new_size; char *limit = CBEND(charbuf); char *source = charbuf->ptr; char *dest; /* Add a margin for every newline. */ for ( ; source < limit; source++) { if (*source == '\n') num_margins++; } /* Make space for margins. */ new_size = CB_Get_Size(charbuf) + (num_margins * amount); CB_Grow(charbuf, new_size); source = CBEND(charbuf); CB_Set_Size(charbuf, new_size); dest = CBEND(charbuf); *dest-- = '\0'; source--; while (source >= charbuf->ptr) { if (*source == '\n') { int i = amount; while (i--) { *dest-- = ' '; } } *dest-- = *source--; } while (dest >= charbuf->ptr) { *dest-- = ' '; } }
bool FSDH_next(FSDirHandle *self) { FSDirHandleIVARS *const ivars = FSDH_IVARS(self); ivars->sys_dir_entry = (struct dirent*)readdir((DIR*)ivars->sys_dirhandle); if (!ivars->sys_dir_entry) { CB_Set_Size(ivars->entry, 0); return false; } else { struct dirent *sys_dir_entry = (struct dirent*)ivars->sys_dir_entry; #ifdef CHY_HAS_DIRENT_D_NAMLEN size_t len = sys_dir_entry->d_namlen; #else size_t len = strlen(sys_dir_entry->d_name); #endif if (SI_is_updir(sys_dir_entry->d_name, len)) { return FSDH_Next(self); } else { CB_Mimic_Str(ivars->entry, sys_dir_entry->d_name, len); return true; } } }
bool FSDH_next(FSDirHandle *self) { FSDirHandleIVARS *const ivars = FSDH_IVARS(self); HANDLE dirhandle = (HANDLE)ivars->sys_dirhandle; WIN32_FIND_DATA *find_data = (WIN32_FIND_DATA*)ivars->sys_dir_entry; // Attempt to move forward or absorb cached iter. if (!dirhandle || dirhandle == INVALID_HANDLE_VALUE) { return false; } else if (ivars->delayed_iter) { ivars->delayed_iter = false; } else if ((FindNextFile(dirhandle, find_data) == 0)) { // Iterator exhausted. Verify that no errors were encountered. CB_Set_Size(ivars->entry, 0); if (GetLastError() != ERROR_NO_MORE_FILES) { char *win_error = Err_win_error(); ivars->saved_error = Err_new(CB_newf("Error while traversing directory: %s", win_error)); FREEMEM(win_error); } return false; } // Process the results of the iteration. size_t len = strlen(find_data->cFileName); if (SI_is_updir(find_data->cFileName, len)) { return FSDH_Next(self); } else { CB_Mimic_Str(ivars->entry, find_data->cFileName, len); return true; } }
static void S_add_to_file_list(Folder *self, VArray *list, CharBuf *dir, CharBuf *prefix) { size_t orig_prefix_size = CB_Get_Size(prefix); DirHandle *dh = Folder_Open_Dir(self, dir); CharBuf *entry; if (!dh) { RETHROW(INCREF(Err_get_error())); } entry = DH_Get_Entry(dh); while (DH_Next(dh)) { // Updates entry if (!S_is_updir(entry)) { CharBuf *relpath = CB_newf("%o%o", prefix, entry); if (VA_Get_Size(list) == VA_Get_Capacity(list)) { VA_Grow(list, VA_Get_Size(list) * 2); } VA_Push(list, (Obj*)relpath); if (DH_Entry_Is_Dir(dh) && !DH_Entry_Is_Symlink(dh)) { CharBuf *subdir = CB_Get_Size(dir) ? CB_newf("%o/%o", dir, entry) : CB_Clone(entry); CB_catf(prefix, "%o/", entry); S_add_to_file_list(self, list, subdir, prefix); // recurse CB_Set_Size(prefix, orig_prefix_size); DECREF(subdir); } } } if (!DH_Close(dh)) { RETHROW(INCREF(Err_get_error())); } DECREF(dh); }
void TextTermStepper_reset(TextTermStepper *self) { CB_Set_Size((CharBuf*)self->value, 0); }