void Indexer_delete_by_term(Indexer *self, CharBuf *field, Obj *term) { Schema *schema = self->schema; FieldType *type = Schema_Fetch_Type(schema, field); // Raise exception if the field isn't indexed. if (!type || !FType_Indexed(type)) THROW(ERR, "%o is not an indexed field", field); // Analyze term if appropriate, then zap. if (FType_Is_A(type, FULLTEXTTYPE)) { CERTIFY(term, CHARBUF); { Analyzer *analyzer = Schema_Fetch_Analyzer(schema, field); VArray *terms = Analyzer_Split(analyzer, (CharBuf*)term); Obj *analyzed_term = VA_Fetch(terms, 0); if (analyzed_term) { DelWriter_Delete_By_Term(self->del_writer, field, analyzed_term); } DECREF(terms); } } else { DelWriter_Delete_By_Term(self->del_writer, field, term); } }
void Indexer_Delete_By_Term_IMP(Indexer *self, String *field, Obj *term) { IndexerIVARS *const ivars = Indexer_IVARS(self); Schema *schema = ivars->schema; FieldType *type = Schema_Fetch_Type(schema, field); // Raise exception if the field isn't indexed. if (!type || !FType_Indexed(type)) { THROW(ERR, "%o is not an indexed field", field); } // Analyze term if appropriate, then zap. if (FType_is_a(type, FULLTEXTTYPE)) { CERTIFY(term, STRING); Analyzer *analyzer = Schema_Fetch_Analyzer(schema, field); Vector *terms = Analyzer_Split(analyzer, (String*)term); Obj *analyzed_term = Vec_Fetch(terms, 0); if (analyzed_term) { DelWriter_Delete_By_Term(ivars->del_writer, field, analyzed_term); } DECREF(terms); } else { DelWriter_Delete_By_Term(ivars->del_writer, field, term); } }
InverterEntry* InvEntry_init(InverterEntry *self, Schema *schema, String *field, int32_t field_num) { InverterEntryIVARS *const ivars = InvEntry_IVARS(self); ivars->field_num = field_num; ivars->field = field ? Str_Clone(field) : NULL; ivars->inversion = NULL; if (schema) { ivars->analyzer = (Analyzer*)INCREF(Schema_Fetch_Analyzer(schema, field)); ivars->sim = (Similarity*)INCREF(Schema_Fetch_Sim(schema, field)); ivars->type = (FieldType*)INCREF(Schema_Fetch_Type(schema, field)); if (!ivars->type) { THROW(ERR, "Unknown field: '%o'", field); } uint8_t prim_id = FType_Primitive_ID(ivars->type); switch (prim_id & FType_PRIMITIVE_ID_MASK) { case FType_TEXT: ivars->value = NULL; break; case FType_BLOB: ivars->value = (Obj*)ViewBB_new(NULL, 0); break; case FType_INT32: ivars->value = (Obj*)Int32_new(0); break; case FType_INT64: ivars->value = (Obj*)Int64_new(0); break; case FType_FLOAT32: ivars->value = (Obj*)Float32_new(0); break; case FType_FLOAT64: ivars->value = (Obj*)Float64_new(0); break; default: THROW(ERR, "Unrecognized primitive id: %i8", prim_id); } ivars->indexed = FType_Indexed(ivars->type); if (ivars->indexed && FType_Is_A(ivars->type, NUMERICTYPE)) { THROW(ERR, "Field '%o' spec'd as indexed, but numerical types cannot " "be indexed yet", field); } if (FType_Is_A(ivars->type, FULLTEXTTYPE)) { ivars->highlightable = FullTextType_Highlightable((FullTextType*)ivars->type); } } return self; }
QueryParser* QParser_init(QueryParser *self, Schema *schema, Analyzer *analyzer, String *default_boolop, Vector *fields) { QueryParserIVARS *const ivars = QParser_IVARS(self); // Init. ivars->heed_colons = false; ivars->lexer = QueryLexer_new(); // Assign. ivars->schema = (Schema*)INCREF(schema); ivars->analyzer = (Analyzer*)INCREF(analyzer); ivars->default_boolop = default_boolop ? Str_Clone(default_boolop) : Str_new_from_trusted_utf8("OR", 2); if (fields) { ivars->fields = Vec_Clone(fields); for (uint32_t i = 0, max = Vec_Get_Size(fields); i < max; i++) { CERTIFY(Vec_Fetch(fields, i), STRING); } Vec_Sort(ivars->fields); } else { Vector *all_fields = Schema_All_Fields(schema); uint32_t num_fields = Vec_Get_Size(all_fields); ivars->fields = Vec_new(num_fields); for (uint32_t i = 0; i < num_fields; i++) { String *field = (String*)Vec_Fetch(all_fields, i); FieldType *type = Schema_Fetch_Type(schema, field); if (type && FType_Indexed(type)) { Vec_Push(ivars->fields, INCREF(field)); } } DECREF(all_fields); } Vec_Sort(ivars->fields); // Derive default "occur" from default boolean operator. if (Str_Equals_Utf8(ivars->default_boolop, "OR", 2)) { ivars->default_occur = SHOULD; } else if (Str_Equals_Utf8(ivars->default_boolop, "AND", 3)) { ivars->default_occur = MUST; } else { THROW(ERR, "Invalid value for default_boolop: %o", ivars->default_boolop); } return self; }
// Indicate whether it is safe to build a SegLexicon using the given // parameters. Will return false if the field is not indexed or if no terms // are present for this field in this segment. static bool S_has_data(Schema *schema, Folder *folder, Segment *segment, String *field) { FieldType *type = Schema_Fetch_Type(schema, field); if (!type || !FType_Indexed(type)) { // If the field isn't indexed, bail out. return false; } else { // Bail out if there are no terms for this field in this segment. int32_t field_num = Seg_Field_Num(segment, field); String *seg_name = Seg_Get_Name(segment); String *file = Str_newf("%o/lexicon-%i32.dat", seg_name, field_num); bool retval = Folder_Exists(folder, file); DECREF(file); return retval; } }