void Method_Set_Host_Alias_IMP(Method *self, String *name) { if (self->host_alias) { THROW(ERR, "Can't Set_Host_Alias more than once"); } self->host_alias_internal = Str_new_from_trusted_utf8(Str_Get_Ptr8(name), Str_Get_Size(name)); self->host_alias = Str_new_wrap_trusted_utf8(Str_Get_Ptr8(self->host_alias_internal), Str_Get_Size(self->host_alias_internal)); }
String* Highlighter_Highlight_IMP(Highlighter *self, String *text) { HighlighterIVARS *const ivars = Highlighter_IVARS(self); size_t size = Str_Get_Size(text) + Str_Get_Size(ivars->pre_tag) + Str_Get_Size(ivars->post_tag); CharBuf *buf = CB_new(size); CB_Cat(buf, ivars->pre_tag); CB_Cat(buf, text); CB_Cat(buf, ivars->post_tag); String *retval = CB_Yield_String(buf); DECREF(buf); return retval; }
Inversion* RegexTokenizer_Transform_Text_IMP(RegexTokenizer *self, String *text) { Inversion *new_inversion = Inversion_new(NULL); RegexTokenizer_Tokenize_Utf8(self, Str_Get_Ptr8(text), Str_Get_Size(text), new_inversion); return new_inversion; }
static void S_write_lockfile_json(void *context) { struct lockfile_context *stuff = (struct lockfile_context*)context; size_t size = Str_Get_Size(stuff->json); OutStream_Write_Bytes(stuff->outstream, Str_Get_Ptr8(stuff->json), size); OutStream_Close(stuff->outstream); }
InStream* CFReader_Local_Open_In_IMP(CompoundFileReader *self, String *name) { CompoundFileReaderIVARS *const ivars = CFReader_IVARS(self); Hash *entry = (Hash*)Hash_Fetch(ivars->records, name); if (!entry) { InStream *instream = Folder_Local_Open_In(ivars->real_folder, name); if (!instream) { ERR_ADD_FRAME(Err_get_error()); } return instream; } else { Obj *len = Hash_Fetch_Utf8(entry, "length", 6); Obj *offset = Hash_Fetch_Utf8(entry, "offset", 6); if (!len || !offset) { Err_set_error(Err_new(Str_newf("Malformed entry for '%o' in '%o'", name, Folder_Get_Path(ivars->real_folder)))); return NULL; } else if (Str_Get_Size(ivars->path)) { String *fullpath = Str_newf("%o/%o", ivars->path, name); InStream *instream = InStream_Reopen(ivars->instream, fullpath, Obj_To_I64(offset), Obj_To_I64(len)); DECREF(fullpath); return instream; } else { return InStream_Reopen(ivars->instream, name, Obj_To_I64(offset), Obj_To_I64(len)); } } }
void Inverter_Add_Field_IMP(Inverter *self, InverterEntry *entry) { InverterIVARS *const ivars = Inverter_IVARS(self); InverterEntryIVARS *const entry_ivars = InvEntry_IVARS(entry); // Get an Inversion, going through analyzer if appropriate. if (entry_ivars->analyzer) { DECREF(entry_ivars->inversion); entry_ivars->inversion = Analyzer_Transform_Text(entry_ivars->analyzer, (String*)entry_ivars->value); Inversion_Invert(entry_ivars->inversion); } else if (entry_ivars->indexed || entry_ivars->highlightable) { String *value = (String*)entry_ivars->value; size_t token_len = Str_Get_Size(value); Token *seed = Token_new(Str_Get_Ptr8(value), token_len, 0, token_len, 1.0f, 1); DECREF(entry_ivars->inversion); entry_ivars->inversion = Inversion_new(seed); DECREF(seed); Inversion_Invert(entry_ivars->inversion); // Nearly a no-op. } // Prime the iterator. VA_Push(ivars->entries, INCREF(entry)); ivars->sorted = false; }
Inversion* PolyAnalyzer_Transform_Text_IMP(PolyAnalyzer *self, String *text) { VArray *const analyzers = PolyAnalyzer_IVARS(self)->analyzers; const uint32_t num_analyzers = VA_Get_Size(analyzers); Inversion *retval; if (num_analyzers == 0) { size_t token_len = Str_Get_Size(text); const char *buf = Str_Get_Ptr8(text); Token *seed = Token_new(buf, token_len, 0, token_len, 1.0f, 1); retval = Inversion_new(seed); DECREF(seed); } else { Analyzer *first_analyzer = (Analyzer*)VA_Fetch(analyzers, 0); retval = Analyzer_Transform_Text(first_analyzer, text); for (uint32_t i = 1; i < num_analyzers; i++) { Analyzer *analyzer = (Analyzer*)VA_Fetch(analyzers, i); Inversion *new_inversion = Analyzer_Transform(analyzer, retval); DECREF(retval); retval = new_inversion; } } return retval; }
void Freezer_serialize_string(String *string, OutStream *outstream) { size_t size = Str_Get_Size(string); const char *buf = Str_Get_Ptr8(string); OutStream_Write_C64(outstream, size); OutStream_Write_Bytes(outstream, buf, size); }
static String* S_unescape(QueryParser *self, String *orig, CharBuf *buf) { StringIterator *iter = Str_Top(orig); int32_t code_point; UNUSED_VAR(self); CB_Set_Size(buf, 0); CB_Grow(buf, Str_Get_Size(orig) + 4); while (STRITER_DONE != (code_point = StrIter_Next(iter))) { if (code_point == '\\') { int32_t next_code_point = StrIter_Next(iter); if (next_code_point == ':' || next_code_point == '"' || next_code_point == '\\' ) { CB_Cat_Char(buf, next_code_point); } else { CB_Cat_Char(buf, code_point); if (next_code_point != STRITER_DONE) { CB_Cat_Char(buf, next_code_point); } } } else { CB_Cat_Char(buf, code_point); } } DECREF(iter); return CB_To_String(buf); }
Inversion* WhitespaceTokenizer_Transform_Text_IMP(WhitespaceTokenizer *self, String *text) { Inversion *new_inversion = Inversion_new(NULL); WhitespaceTokenizer_Tokenize_Str(self, (char*)Str_Get_Ptr8(text), Str_Get_Size(text), new_inversion); return new_inversion; }
void Freezer_serialize_string(String *string, OutStream *outstream) { size_t size = Str_Get_Size(string); const char *buf = Str_Get_Ptr8(string); if (size > INT32_MAX) { THROW(ERR, "Can't serialize string above 2GB: %u64", (uint64_t)size); } OutStream_Write_CU64(outstream, size); OutStream_Write_Bytes(outstream, buf, size); }
Method* Method_init(Method *self, String *name, cfish_method_t callback_func, uint32_t offset) { /* The `name` member which Method exposes via the `Get_Name` accessor uses * a "wrapped" string because that is effectively threadsafe: an INCREF * results in a copy and the only reference is owned by an immortal * object. */ self->name_internal = Str_new_from_trusted_utf8(Str_Get_Ptr8(name), Str_Get_Size(name)); self->name = Str_new_wrap_trusted_utf8(Str_Get_Ptr8(self->name_internal), Str_Get_Size(self->name_internal)); self->host_alias = NULL; self->callback_func = callback_func; self->offset = offset; self->is_excluded = false; return self; }
static void S_set_name(Class *self, const char *utf8, size_t size) { /* * We use a "wrapped" String for `name` because it's effectively * threadsafe: the sole reference is owned by an immortal object and any * INCREF spawns a copy. */ self->name_internal = Str_new_from_trusted_utf8(utf8, size); self->name = Str_new_wrap_trusted_utf8(Str_Get_Ptr8(self->name_internal), Str_Get_Size(self->name_internal)); }
static void test_Get_Ptr8(TestBatchRunner *runner) { String *string = S_get_str("Banana"); const char *ptr8 = Str_Get_Ptr8(string); TEST_TRUE(runner, strcmp(ptr8, "Banana") == 0, "Get_Ptr8"); size_t size = Str_Get_Size(string); TEST_INT_EQ(runner, size, 6, "Get_Size"); DECREF(string); }
static void test_utf8proc_normalization(TestBatchRunner *runner) { SKIP(runner, 1, "utf8proc can't handle control chars or Unicode non-chars"); return; for (int32_t i = 0; i < 100; i++) { String *source = TestUtils_random_string(rand() % 40); // Normalize once. uint8_t *normalized; int32_t check = utf8proc_map((const uint8_t*)Str_Get_Ptr8(source), Str_Get_Size(source), &normalized, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD); if (check < 0) { lucy_Json_set_tolerant(1); String *json = lucy_Json_to_json((Obj*)source); if (!json) { json = Str_newf("[failed to encode]"); } FAIL(runner, "Failed to normalize: %s", Str_Get_Ptr8(json)); DECREF(json); DECREF(source); return; } // Normalize again. size_t normalized_len = strlen((char*)normalized); uint8_t *dupe; int32_t dupe_check = utf8proc_map(normalized, normalized_len, &dupe, UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD); if (dupe_check < 0) { THROW(ERR, "Unexpected normalization error: %i32", dupe_check); } int comparison = strcmp((char*)normalized, (char*)dupe); free(dupe); free(normalized); DECREF(source); if (comparison != 0) { FAIL(runner, "Not fully normalized"); return; } } PASS(runner, "Normalization successful."); }
static char* S_fullpath_ptr(FSFolder *self, String *path) { FSFolderIVARS *const ivars = FSFolder_IVARS(self); size_t folder_size = Str_Get_Size(ivars->path); size_t path_size = Str_Get_Size(path); size_t full_size = folder_size + 1 + path_size; const char *folder_ptr = Str_Get_Ptr8(ivars->path); const char *path_ptr = Str_Get_Ptr8(path); char *buf = (char*)MALLOCATE(full_size + 1); memcpy(buf, folder_ptr, folder_size); buf[folder_size] = DIR_SEP[0]; memcpy(buf + folder_size + 1, path_ptr, path_size); buf[full_size] = '\0'; if (DIR_SEP[0] != '/') { for (size_t i = 0; i < full_size; ++i) { if (buf[i] == '/') { buf[i] = DIR_SEP[0]; } } } return buf; }
Snapshot* Snapshot_Read_File_IMP(Snapshot *self, Folder *folder, String *path) { SnapshotIVARS *const ivars = Snapshot_IVARS(self); // Eliminate all prior data. Pick a snapshot file. S_zero_out(self); ivars->path = (path != NULL && Str_Get_Size(path) > 0) ? Str_Clone(path) : IxFileNames_latest_snapshot(folder); if (ivars->path) { Hash *snap_data = (Hash*)CERTIFY(Json_slurp_json(folder, ivars->path), HASH); Obj *format_obj = CERTIFY(Hash_Fetch_Utf8(snap_data, "format", 6), OBJ); int32_t format = (int32_t)Json_obj_to_i64(format_obj); Obj *subformat_obj = Hash_Fetch_Utf8(snap_data, "subformat", 9); int32_t subformat = subformat_obj ? (int32_t)Json_obj_to_i64(subformat_obj) : 0; // Verify that we can read the index properly. if (format > Snapshot_current_file_format) { THROW(ERR, "Snapshot format too recent: %i32, %i32", format, Snapshot_current_file_format); } // Build up list of entries. Vector *list = (Vector*)INCREF(CERTIFY( Hash_Fetch_Utf8(snap_data, "entries", 7), VECTOR)); if (format == 1 || (format == 2 && subformat < 1)) { Vector *cleaned = S_clean_segment_contents(list); DECREF(list); list = cleaned; } Hash_Clear(ivars->entries); for (uint32_t i = 0, max = Vec_Get_Size(list); i < max; i++) { String *entry = (String*)CERTIFY(Vec_Fetch(list, i), STRING); Hash_Store(ivars->entries, entry, (Obj*)CFISH_TRUE); } DECREF(list); DECREF(snap_data); } return self; }
String* Method_Host_Name_IMP(Method *self) { StringIterator *iter = StrIter_new(self->name, 0); CharBuf *charbuf = CB_new(Str_Get_Size(self->name)); int32_t code_point; while (STR_OOB != (code_point = StrIter_Next(iter))) { if (code_point != '_') { CB_Cat_Char(charbuf, code_point); } } String *host_name = CB_Yield_String(charbuf); DECREF(charbuf); DECREF(iter); return host_name; }
Folder* FSFolder_Local_Find_Folder_IMP(FSFolder *self, String *name) { FSFolderIVARS *const ivars = FSFolder_IVARS(self); Folder *subfolder = NULL; if (!name || !Str_Get_Size(name)) { // No entity can be identified by NULL or empty string. return NULL; } else if (!S_is_local_entry(name)) { return NULL; } else if (Str_Starts_With_Utf8(name, ".", 1)) { // Don't allow access outside of the main dir. return NULL; } else if (NULL != (subfolder = (Folder*)Hash_Fetch(ivars->entries, (Obj*)name))) { if (Folder_Is_A(subfolder, FOLDER)) { return subfolder; } else { return NULL; } } String *fullpath = S_fullpath(self, name); if (S_dir_ok(fullpath)) { subfolder = (Folder*)FSFolder_new(fullpath); if (!subfolder) { DECREF(fullpath); THROW(ERR, "Failed to open FSFolder at '%o'", fullpath); } // Try to open a CompoundFileReader. On failure, just use the // existing folder. String *cfmeta_file = (String*)SSTR_WRAP_UTF8("cfmeta.json", 11); if (Folder_Local_Exists(subfolder, cfmeta_file)) { CompoundFileReader *cf_reader = CFReader_open(subfolder); if (cf_reader) { DECREF(subfolder); subfolder = (Folder*)cf_reader; } } Hash_Store(ivars->entries, (Obj*)name, (Obj*)subfolder); } DECREF(fullpath); return subfolder; }
String* Highlighter_Create_Excerpt_IMP(Highlighter *self, HitDoc *hit_doc) { HighlighterIVARS *const ivars = Highlighter_IVARS(self); String *field_val = (String*)HitDoc_Extract(hit_doc, ivars->field); String *retval; if (!field_val || !Obj_Is_A((Obj*)field_val, STRING)) { retval = NULL; } else if (!Str_Get_Size(field_val)) { // Empty string yields empty string. retval = Str_new_from_trusted_utf8("", 0); } else { DocVector *doc_vec = Searcher_Fetch_Doc_Vec(ivars->searcher, HitDoc_Get_Doc_ID(hit_doc)); VArray *maybe_spans = Compiler_Highlight_Spans(ivars->compiler, ivars->searcher, doc_vec, ivars->field); VArray *score_spans = maybe_spans ? maybe_spans : VA_new(0); VA_Sort(score_spans, NULL, NULL); HeatMap *heat_map = HeatMap_new(score_spans, (ivars->excerpt_length * 2) / 3); int32_t top; String *raw_excerpt = Highlighter_Raw_Excerpt(self, field_val, &top, heat_map); String *highlighted = Highlighter_Highlight_Excerpt(self, score_spans, raw_excerpt, top); DECREF(raw_excerpt); DECREF(heat_map); DECREF(score_spans); DECREF(doc_vec); retval = highlighted; } DECREF(field_val); return retval; }
RawPosting* MatchPost_Read_Raw_IMP(MatchPosting *self, InStream *instream, int32_t last_doc_id, String *term_text, MemoryPool *mem_pool) { const char *const text_buf = Str_Get_Ptr8(term_text); const size_t text_size = Str_Get_Size(term_text); const uint32_t doc_code = InStream_Read_C32(instream); const uint32_t delta_doc = doc_code >> 1; const int32_t doc_id = last_doc_id + delta_doc; const uint32_t freq = (doc_code & 1) ? 1 : InStream_Read_C32(instream); const size_t base_size = VTable_Get_Obj_Alloc_Size(RAWPOSTING); size_t raw_post_bytes = MAX_RAW_POSTING_LEN(base_size, text_size); void *const allocation = MemPool_Grab(mem_pool, raw_post_bytes); UNUSED_VAR(self); return RawPost_new(allocation, doc_id, freq, text_buf, text_size); }
void SortFieldWriter_Add_IMP(SortFieldWriter *self, int32_t doc_id, Obj *value) { SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self); Counter *counter = ivars->counter; Counter_Add(counter, ivars->mem_per_entry); if (ivars->prim_id == FType_TEXT) { int64_t size = Str_Get_Size((String*)value) + 1; size = SI_increase_to_word_multiple(size); Counter_Add(counter, size); } else if (ivars->prim_id == FType_BLOB) { int64_t size = Blob_Get_Size((Blob*)value) + 1; size = SI_increase_to_word_multiple(size); Counter_Add(counter, size); } SFWriterElem *elem = S_SFWriterElem_create(Obj_Clone(value), doc_id); SortFieldWriter_Feed(self, (Obj*)elem); ivars->count++; }
void TextTermStepper_Write_Delta_IMP(TextTermStepper *self, OutStream *outstream, Obj *value) { TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self); CharBuf *charbuf = (CharBuf*)ivars->value; const char *last_text = CB_Get_Ptr8(charbuf); size_t last_size = CB_Get_Size(charbuf); const char *new_text = NULL; size_t new_size = 0; if (Obj_is_a(value, STRING)) { String *new_string = (String*)value; new_text = Str_Get_Ptr8(new_string); new_size = Str_Get_Size(new_string); } else if (Obj_is_a(value, CHARBUF)) { CharBuf *new_charbuf = (CharBuf*)value; new_text = CB_Get_Ptr8(new_charbuf); new_size = CB_Get_Size(new_charbuf); } else { THROW(ERR, "'value' must be a String or CharBuf"); } // Count how many bytes the strings share at the top. const int32_t overlap = StrHelp_overlap(last_text, new_text, last_size, new_size); const char *const diff_start_str = new_text + overlap; const size_t diff_len = new_size - overlap; // Write number of common bytes and common bytes. OutStream_Write_C32(outstream, overlap); OutStream_Write_String(outstream, diff_start_str, diff_len); // Update value. CB_Mimic_Utf8(charbuf, new_text, new_size); // Invalidate string. DECREF(ivars->string); ivars->string = NULL; }
void Snapshot_Write_File_IMP(Snapshot *self, Folder *folder, String *path) { SnapshotIVARS *const ivars = Snapshot_IVARS(self); Hash *all_data = Hash_new(0); Vector *list = Snapshot_List(self); // Update path. DECREF(ivars->path); if (path != NULL && Str_Get_Size(path) != 0) { ivars->path = Str_Clone(path); } else { String *latest = IxFileNames_latest_snapshot(folder); uint64_t gen = latest ? IxFileNames_extract_gen(latest) + 1 : 1; char base36[StrHelp_MAX_BASE36_BYTES]; StrHelp_to_base36(gen, &base36); ivars->path = Str_newf("snapshot_%s.json", &base36); DECREF(latest); } // Don't overwrite. if (Folder_Exists(folder, ivars->path)) { THROW(ERR, "Snapshot file '%o' already exists", ivars->path); } // Sort, then store file names. Vec_Sort(list); Hash_Store_Utf8(all_data, "entries", 7, (Obj*)list); // Create a JSON-izable data structure. Hash_Store_Utf8(all_data, "format", 6, (Obj*)Str_newf("%i32", (int32_t)Snapshot_current_file_format)); Hash_Store_Utf8(all_data, "subformat", 9, (Obj*)Str_newf("%i32", (int32_t)Snapshot_current_file_subformat)); // Write out JSON-ized data to the new file. Json_spew_json((Obj*)all_data, folder, ivars->path); DECREF(all_data); }
RawPosting* ScorePost_Read_Raw_IMP(ScorePosting *self, InStream *instream, int32_t last_doc_id, String *term_text, MemoryPool *mem_pool) { const char *const text_buf = Str_Get_Ptr8(term_text); const size_t text_size = Str_Get_Size(term_text); const uint32_t doc_code = InStream_Read_C32(instream); const uint32_t delta_doc = doc_code >> 1; const int32_t doc_id = last_doc_id + delta_doc; const uint32_t freq = (doc_code & 1) ? 1 : InStream_Read_C32(instream); const size_t base_size = Class_Get_Obj_Alloc_Size(RAWPOSTING); size_t raw_post_bytes = MAX_RAW_POSTING_LEN(base_size, text_size, freq); void *const allocation = MemPool_Grab(mem_pool, raw_post_bytes); RawPosting *const raw_posting = RawPost_new(allocation, doc_id, freq, text_buf, text_size); RawPostingIVARS *const raw_post_ivars = RawPost_IVARS(raw_posting); uint32_t num_prox = freq; char *const start = raw_post_ivars->blob + text_size; char *dest = start; UNUSED_VAR(self); // Field_boost. *((uint8_t*)dest) = InStream_Read_U8(instream); dest++; // Read positions. while (num_prox--) { dest += InStream_Read_Raw_C64(instream, dest); } // Resize raw posting memory allocation. raw_post_ivars->aux_len = dest - start; raw_post_bytes = dest - (char*)raw_posting; MemPool_Resize(mem_pool, raw_posting, raw_post_bytes); return raw_posting; }
static Class* S_simple_subclass(Class *parent, String *name) { if (parent->flags & CFISH_fFINAL) { THROW(ERR, "Can't subclass final class %o", Class_Get_Name(parent)); } Class *subclass = (Class*)Memory_wrapped_calloc(parent->class_alloc_size, 1); Class_Init_Obj(parent->klass, subclass); subclass->parent = parent; subclass->flags = parent->flags; subclass->obj_alloc_size = parent->obj_alloc_size; subclass->class_alloc_size = parent->class_alloc_size; subclass->methods = (Method**)CALLOCATE(1, sizeof(Method*)); S_set_name(subclass, Str_Get_Ptr8(name), Str_Get_Size(name)); memcpy(subclass->vtable, parent->vtable, parent->class_alloc_size - offsetof(Class, vtable)); return subclass; }
Query* QParser_Expand_Leaf_IMP(QueryParser *self, Query *query) { QueryParserIVARS *const ivars = QParser_IVARS(self); LeafQuery *leaf_query = (LeafQuery*)query; Schema *schema = ivars->schema; bool is_phrase = false; bool ambiguous = false; // Determine whether we can actually process the input. if (!Query_is_a(query, LEAFQUERY)) { return NULL; } String *full_text = LeafQuery_Get_Text(leaf_query); if (!Str_Get_Size(full_text)) { return NULL; } // If quoted, always generate PhraseQuery. StringIterator *top = Str_Top(full_text); StringIterator *tail = Str_Tail(full_text); StrIter_Skip_Next_Whitespace(top); StrIter_Skip_Prev_Whitespace(tail); if (StrIter_Starts_With_Utf8(top, "\"", 1)) { is_phrase = true; StrIter_Advance(top, 1); if (StrIter_Ends_With_Utf8(tail, "\"", 1) && !StrIter_Ends_With_Utf8(tail, "\\\"", 2) ) { StrIter_Recede(tail, 1); } } String *source_text = StrIter_substring(top, tail); // Either use LeafQuery's field or default to Parser's list. Vector *fields; if (LeafQuery_Get_Field(leaf_query)) { fields = Vec_new(1); Vec_Push(fields, INCREF(LeafQuery_Get_Field(leaf_query))); } else { fields = (Vector*)INCREF(ivars->fields); } CharBuf *unescape_buf = CB_new(Str_Get_Size(source_text)); Vector *queries = Vec_new(Vec_Get_Size(fields)); for (uint32_t i = 0, max = Vec_Get_Size(fields); i < max; i++) { String *field = (String*)Vec_Fetch(fields, i); Analyzer *analyzer = ivars->analyzer ? ivars->analyzer : Schema_Fetch_Analyzer(schema, field); if (!analyzer) { Vec_Push(queries, (Obj*)QParser_Make_Term_Query(self, field, (Obj*)source_text)); } else { // Extract token texts. String *split_source = S_unescape(self, source_text, unescape_buf); Vector *maybe_texts = Analyzer_Split(analyzer, split_source); uint32_t num_maybe_texts = Vec_Get_Size(maybe_texts); Vector *token_texts = Vec_new(num_maybe_texts); // Filter out zero-length token texts. for (uint32_t j = 0; j < num_maybe_texts; j++) { String *token_text = (String*)Vec_Fetch(maybe_texts, j); if (Str_Get_Size(token_text)) { Vec_Push(token_texts, INCREF(token_text)); } } if (Vec_Get_Size(token_texts) == 0) { /* Query might include stop words. Who knows? */ ambiguous = true; } // Add either a TermQuery or a PhraseQuery. if (is_phrase || Vec_Get_Size(token_texts) > 1) { Vec_Push(queries, (Obj*) QParser_Make_Phrase_Query(self, field, token_texts)); } else if (Vec_Get_Size(token_texts) == 1) { Vec_Push(queries, (Obj*)QParser_Make_Term_Query(self, field, Vec_Fetch(token_texts, 0))); } DECREF(token_texts); DECREF(maybe_texts); DECREF(split_source); } } Query *retval; if (Vec_Get_Size(queries) == 0) { retval = (Query*)NoMatchQuery_new(); if (ambiguous) { NoMatchQuery_Set_Fails_To_Match((NoMatchQuery*)retval, false); } } else if (Vec_Get_Size(queries) == 1) { retval = (Query*)INCREF(Vec_Fetch(queries, 0)); } else { retval = QParser_Make_OR_Query(self, queries); } // Clean up. DECREF(unescape_buf); DECREF(queries); DECREF(fields); DECREF(source_text); DECREF(tail); DECREF(top); return retval; }
String* Highlighter_Highlight_Excerpt_IMP(Highlighter *self, VArray *spans, String *raw_excerpt, int32_t top) { int32_t hl_start = 0; int32_t hl_end = 0; StringIterator *iter = Str_Top(raw_excerpt); StringIterator *temp = Str_Top(raw_excerpt); CharBuf *buf = CB_new(Str_Get_Size(raw_excerpt) + 32); CharBuf *encode_buf = NULL; int32_t raw_excerpt_end = top + Str_Length(raw_excerpt); for (uint32_t i = 0, max = VA_Get_Size(spans); i < max; i++) { Span *span = (Span*)VA_Fetch(spans, i); int32_t offset = Span_Get_Offset(span); if (offset < top) { continue; } else if (offset >= raw_excerpt_end) { break; } else { int32_t relative_start = offset - top; int32_t relative_end = relative_start + Span_Get_Length(span); if (relative_start <= hl_end) { if (relative_end > hl_end) { hl_end = relative_end; } } else { if (hl_start < hl_end) { // Highlight previous section int32_t highlighted_len = hl_end - hl_start; StrIter_Assign(temp, iter); StrIter_Advance(iter, highlighted_len); String *to_cat = StrIter_substring(temp, iter); String *encoded = S_do_encode(self, to_cat, &encode_buf); String *hl_frag = Highlighter_Highlight(self, encoded); CB_Cat(buf, hl_frag); DECREF(hl_frag); DECREF(encoded); DECREF(to_cat); } int32_t non_highlighted_len = relative_start - hl_end; StrIter_Assign(temp, iter); StrIter_Advance(iter, non_highlighted_len); String *to_cat = StrIter_substring(temp, iter); String *encoded = S_do_encode(self, to_cat, &encode_buf); CB_Cat(buf, (String*)encoded); DECREF(encoded); DECREF(to_cat); hl_start = relative_start; hl_end = relative_end; } } } if (hl_start < hl_end) { // Highlight final section int32_t highlighted_len = hl_end - hl_start; StrIter_Assign(temp, iter); StrIter_Advance(iter, highlighted_len); String *to_cat = StrIter_substring(temp, iter); String *encoded = S_do_encode(self, to_cat, &encode_buf); String *hl_frag = Highlighter_Highlight(self, encoded); CB_Cat(buf, hl_frag); DECREF(hl_frag); DECREF(encoded); DECREF(to_cat); } // Last text, beyond last highlight span. if (StrIter_Has_Next(iter)) { String *to_cat = StrIter_substring(iter, NULL); String *encoded = S_do_encode(self, to_cat, &encode_buf); CB_Cat(buf, encoded); DECREF(encoded); DECREF(to_cat); } String *highlighted = CB_Yield_String(buf); DECREF(encode_buf); DECREF(buf); DECREF(temp); DECREF(iter); return highlighted; }
void DocWriter_Add_Inverted_Doc_IMP(DocWriter *self, Inverter *inverter, int32_t doc_id) { DocWriterIVARS *const ivars = DocWriter_IVARS(self); OutStream *dat_out = S_lazy_init(self); OutStream *ix_out = ivars->ix_out; uint32_t num_stored = 0; int64_t start = OutStream_Tell(dat_out); int64_t expected = OutStream_Tell(ix_out) / 8; // Verify doc id. if (doc_id != expected) { THROW(ERR, "Expected doc id %i64 but got %i32", expected, doc_id); } // Write the number of stored fields. Inverter_Iterate(inverter); while (Inverter_Next(inverter)) { FieldType *type = Inverter_Get_Type(inverter); if (FType_Stored(type)) { num_stored++; } } OutStream_Write_C32(dat_out, num_stored); Inverter_Iterate(inverter); while (Inverter_Next(inverter)) { // Only store fields marked as "stored". FieldType *type = Inverter_Get_Type(inverter); if (FType_Stored(type)) { String *field = Inverter_Get_Field_Name(inverter); Obj *value = Inverter_Get_Value(inverter); Freezer_serialize_string(field, dat_out); switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: { const char *buf = Str_Get_Ptr8((String*)value); size_t size = Str_Get_Size((String*)value); OutStream_Write_C32(dat_out, size); OutStream_Write_Bytes(dat_out, buf, size); break; } case FType_BLOB: { char *buf = BB_Get_Buf((ByteBuf*)value); size_t size = BB_Get_Size((ByteBuf*)value); OutStream_Write_C32(dat_out, size); OutStream_Write_Bytes(dat_out, buf, size); break; } case FType_INT32: { int32_t val = Int32_Get_Value((Integer32*)value); OutStream_Write_C32(dat_out, val); break; } case FType_INT64: { int64_t val = Int64_Get_Value((Integer64*)value); OutStream_Write_C64(dat_out, val); break; } case FType_FLOAT32: { float val = Float32_Get_Value((Float32*)value); OutStream_Write_F32(dat_out, val); break; } case FType_FLOAT64: { double val = Float64_Get_Value((Float64*)value); OutStream_Write_F64(dat_out, val); break; } default: THROW(ERR, "Unrecognized type: %o", type); } } } // Write file pointer. OutStream_Write_I64(ix_out, start); }
void Class_bootstrap(const cfish_ParcelSpec *parcel_spec) { const ClassSpec *specs = parcel_spec->class_specs; const NovelMethSpec *novel_specs = parcel_spec->novel_specs; const OverriddenMethSpec *overridden_specs = parcel_spec->overridden_specs; const InheritedMethSpec *inherited_specs = parcel_spec->inherited_specs; uint32_t num_classes = parcel_spec->num_classes; /* Pass 1: * - Allocate memory. * - Initialize global Class pointers. */ for (uint32_t i = 0; i < num_classes; ++i) { const ClassSpec *spec = &specs[i]; Class *parent = NULL; if (spec->parent) { parent = *spec->parent; if (!parent) { // Wrong order of class specs or inheritance cycle. fprintf(stderr, "Parent class of '%s' not initialized\n", spec->name); abort(); } } uint32_t novel_offset = parent ? parent->class_alloc_size : offsetof(Class, vtable); uint32_t class_alloc_size = novel_offset + spec->num_novel_meths * sizeof(cfish_method_t); Class *klass = (Class*)CALLOCATE(class_alloc_size, 1); // Needed to calculate size of subclasses. klass->class_alloc_size = class_alloc_size; // Initialize the global pointer to the Class. if (!Atomic_cas_ptr((void**)spec->klass, NULL, klass)) { // Another thread beat us to it. FREEMEM(klass); } } /* Pass 2: * - Initialize IVARS_OFFSET. * - Initialize 'klass' ivar and refcount by calling Init_Obj. * - Initialize parent, flags, obj_alloc_size, class_alloc_size. * - Assign parcel_spec. * - Initialize method pointers and offsets. */ uint32_t num_novel = 0; uint32_t num_overridden = 0; uint32_t num_inherited = 0; for (uint32_t i = 0; i < num_classes; ++i) { const ClassSpec *spec = &specs[i]; Class *klass = *spec->klass; Class *parent = spec->parent ? *spec->parent : NULL; uint32_t ivars_offset = 0; if (spec->ivars_offset_ptr != NULL) { if (parent) { Class *ancestor = parent; while (ancestor && ancestor->parcel_spec == parcel_spec) { ancestor = ancestor->parent; } ivars_offset = ancestor ? ancestor->obj_alloc_size : 0; *spec->ivars_offset_ptr = ivars_offset; } else { *spec->ivars_offset_ptr = 0; } } // CLASS->obj_alloc_size is always 0, so Init_Obj doesn't clear any // values set in the previous pass or by another thread. Class_Init_Obj_IMP(CLASS, klass); klass->parent = parent; klass->parcel_spec = parcel_spec; // CLASS->obj_alloc_size must stay at 0. if (klass != CLASS) { klass->obj_alloc_size = ivars_offset + spec->ivars_size; } if (cfish_Class_bootstrap_hook1 != NULL) { cfish_Class_bootstrap_hook1(klass); } klass->flags = 0; if (klass == CLASS || klass == METHOD || klass == BOOLEAN || klass == STRING ) { klass->flags |= CFISH_fREFCOUNTSPECIAL; } if (spec->flags & cfish_ClassSpec_FINAL) { klass->flags |= CFISH_fFINAL; } if (parent) { // Copy parent vtable. uint32_t parent_vt_size = parent->class_alloc_size - offsetof(Class, vtable); memcpy(klass->vtable, parent->vtable, parent_vt_size); } for (size_t i = 0; i < spec->num_inherited_meths; ++i) { const InheritedMethSpec *mspec = &inherited_specs[num_inherited++]; *mspec->offset = *mspec->parent_offset; } for (size_t i = 0; i < spec->num_overridden_meths; ++i) { const OverriddenMethSpec *mspec = &overridden_specs[num_overridden++]; *mspec->offset = *mspec->parent_offset; Class_Override_IMP(klass, mspec->func, *mspec->offset); } uint32_t novel_offset = parent ? parent->class_alloc_size : offsetof(Class, vtable); for (size_t i = 0; i < spec->num_novel_meths; ++i) { const NovelMethSpec *mspec = &novel_specs[num_novel++]; *mspec->offset = novel_offset; novel_offset += sizeof(cfish_method_t); Class_Override_IMP(klass, mspec->func, *mspec->offset); } } /* Now it's safe to call methods. * * Pass 3: * - Inititalize name and method array. * - Register class. */ num_novel = 0; num_overridden = 0; num_inherited = 0; for (uint32_t i = 0; i < num_classes; ++i) { const ClassSpec *spec = &specs[i]; Class *klass = *spec->klass; String *name_internal = Str_new_from_trusted_utf8(spec->name, strlen(spec->name)); if (!Atomic_cas_ptr((void**)&klass->name_internal, NULL, name_internal) ) { DECREF(name_internal); name_internal = klass->name_internal; } String *name = Str_new_wrap_trusted_utf8(Str_Get_Ptr8(name_internal), Str_Get_Size(name_internal)); if (!Atomic_cas_ptr((void**)&klass->name, NULL, name)) { DECREF(name); name = klass->name; } Method **methods = (Method**)MALLOCATE((spec->num_novel_meths + 1) * sizeof(Method*)); // Only store novel methods for now. for (size_t i = 0; i < spec->num_novel_meths; ++i) { const NovelMethSpec *mspec = &novel_specs[num_novel++]; String *name = SSTR_WRAP_C(mspec->name); Method *method = Method_new(name, mspec->callback_func, *mspec->offset); methods[i] = method; } methods[spec->num_novel_meths] = NULL; if (!Atomic_cas_ptr((void**)&klass->methods, NULL, methods)) { // Another thread beat us to it. for (size_t i = 0; i < spec->num_novel_meths; ++i) { Method_Destroy(methods[i]); } FREEMEM(methods); } Class_add_to_registry(klass); } }