static void test_Compare_Values(TestBatchRunner *runner) { FieldType *type = (FieldType*)DummyFieldType_new(); StackString *a = SSTR_WRAP_UTF8("a", 1); StackString *b = SSTR_WRAP_UTF8("b", 1); TEST_TRUE(runner, FType_Compare_Values(type, (Obj*)a, (Obj*)b) < 0, "a less than b"); TEST_TRUE(runner, FType_Compare_Values(type, (Obj*)b, (Obj*)a) > 0, "b greater than a"); TEST_TRUE(runner, FType_Compare_Values(type, (Obj*)b, (Obj*)b) == 0, "b equals b"); DECREF(type); }
void LexIndex_seek(LexIndex *self, Obj *target) { FieldType *type = self->field_type; i32_t lo = 0; i32_t hi = self->size - 1; i32_t result = -100; if (target == NULL || self->size == 0) { self->tick = 0; return; } else { if ( !OBJ_IS_A(target, CHARBUF)) { THROW("Target is a %o, and not comparable to a %o", Obj_Get_Class_Name(target), CHARBUF.name); } /* TODO: Obj *first_obj = VA_Fetch(terms, 0); if ( !Obj_Is_A(target, Obj_Get_VTable(first_obj)) ) { THROW("Target is a %o, and not comparable to a %o", Obj_Get_Class_Name(target), Obj_Get_Class_Name(first_obj)); } */ } /* Divide and conquer. */ while (hi >= lo) { const i32_t mid = lo + ((hi - lo) / 2); const i64_t offset = (i64_t)Math_decode_bigend_u64(self->offsets + mid); char *data = self->data + offset; size_t size = Math_decode_c32(&data); i64_t comparison; ViewCB_Assign_Str(self->term, data, size); comparison = FType_Compare_Values(type, target, (Obj*)self->term); if (comparison < 0) { hi = mid - 1; } else if (comparison > 0) { lo = mid + 1; } else { result = mid; break; } } /* Record the index of the entry we've seeked to, then read entry. */ self->tick = hi == -1 ? 0 /* indicating that target lt first entry */ : result == -100 ? hi /* if result is still -100, it wasn't set */ : result; S_read_entry(self); }
i32_t SortCache_find(SortCache *self, Obj *term) { FieldType *const type = self->type; i32_t lo = 0; i32_t hi = self->num_uniq - 1; i32_t result = -100; ZombieCharBuf value = ZCB_BLANK; if ( term != NULL && !OBJ_IS_A(term, CHARBUF)) { THROW("term is a %o, and not comparable to a %o", Obj_Get_Class_Name(term), CHARBUF.name); } /* Binary search. */ while (hi >= lo) { const i32_t mid = lo + ((hi - lo) / 2); ViewCharBuf *val = SortCache_Value(self, mid, (ViewCharBuf*)&value); i64_t comparison = FType_Compare_Values(type, term, (Obj*)val); if (comparison < 0) { hi = mid - 1; } else if (comparison > 0) { lo = mid + 1; } else { result = mid; break; } } if (hi < 0) { /* Target is "less than" the first cache entry. */ return -1; } else if (result == -100) { /* If result is still -100, it wasn't set. */ return hi; } else { return result; } }
void LexIndex_Seek_IMP(LexIndex *self, Obj *target) { LexIndexIVARS *const ivars = LexIndex_IVARS(self); TermStepper *term_stepper = ivars->term_stepper; InStream *ix_in = ivars->ix_in; FieldType *type = ivars->field_type; int32_t lo = 0; int32_t hi = ivars->size - 1; int32_t result = -100; if (target == NULL || ivars->size == 0) { ivars->tick = 0; return; } else { if (!Obj_is_a(target, STRING)) { THROW(ERR, "Target is a %o, and not comparable to a %o", Obj_get_class_name(target), Class_Get_Name(STRING)); } /* TODO: Obj *first_obj = Vec_Fetch(terms, 0); if (!Obj_is_a(target, Obj_get_class(first_obj))) { THROW(ERR, "Target is a %o, and not comparable to a %o", Obj_get_class_name(target), Obj_get_class_name(first_obj)); } */ } // Divide and conquer. while (hi >= lo) { const int32_t mid = lo + ((hi - lo) / 2); const int64_t offset = (int64_t)NumUtil_decode_bigend_u64(ivars->offsets + mid); InStream_Seek(ix_in, offset); TermStepper_Read_Key_Frame(term_stepper, ix_in); // Compare values. There is no need for a NULL-check because the term // number is alway between 0 and ivars->size - 1. Obj *value = TermStepper_Get_Value(term_stepper); int32_t comparison = FType_Compare_Values(type, target, value); if (comparison < 0) { hi = mid - 1; } else if (comparison > 0) { lo = mid + 1; } else { result = mid; break; } } // Record the index of the entry we've seeked to, then read entry. ivars->tick = hi == -1 // indicating that target lt first entry ? 0 : result == -100 // if result is still -100, it wasn't set ? hi : result; S_read_entry(self); }
static int32_t S_write_files(SortFieldWriter *self, OutStream *ord_out, OutStream *ix_out, OutStream *dat_out) { SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self); int8_t prim_id = ivars->prim_id; int32_t doc_max = (int32_t)Seg_Get_Count(ivars->segment); bool has_nulls = ivars->count == doc_max ? false : true; size_t size = (doc_max + 1) * sizeof(int32_t); int32_t *ords = (int32_t*)MALLOCATE(size); int32_t ord = 0; int64_t dat_start = OutStream_Tell(dat_out); // Assign -1 as a stand-in for the NULL ord. for (int32_t i = 0; i <= doc_max; i++) { ords[i] = -1; } // Grab the first item and record its ord. Add a dummy ord for invalid // doc id 0. SFWriterElem *elem = (SFWriterElem*)SortFieldWriter_Fetch(self); SFWriterElemIVARS *elem_ivars = SFWriterElem_IVARS(elem); if (elem_ivars->doc_id > doc_max) { THROW(ERR, "doc_id %i32 greater than doc_max %i32", elem_ivars->doc_id, doc_max); } ords[elem_ivars->doc_id] = ord; ords[0] = 0; // Build array of ords, write non-NULL sorted values. Obj *last_val = INCREF(elem_ivars->value); S_write_val(elem_ivars->value, prim_id, ix_out, dat_out, dat_start); DECREF(elem); while (NULL != (elem = (SFWriterElem*)SortFieldWriter_Fetch(self))) { elem_ivars = SFWriterElem_IVARS(elem); if (elem_ivars->value != last_val) { int32_t comparison = FType_Compare_Values(ivars->type, elem_ivars->value, last_val); if (comparison != 0) { ord++; S_write_val(elem_ivars->value, prim_id, ix_out, dat_out, dat_start); } DECREF(last_val); last_val = INCREF(elem_ivars->value); } if (elem_ivars->doc_id > doc_max) { THROW(ERR, "doc_id %i32 greater than doc_max %i32", elem_ivars->doc_id, doc_max); } ords[elem_ivars->doc_id] = ord; DECREF(elem); } DECREF(last_val); // If there are NULL values, write one now and record the NULL ord. if (has_nulls) { S_write_val(NULL, prim_id, ix_out, dat_out, dat_start); ord++; ivars->null_ord = ord; } int32_t null_ord = ivars->null_ord; // Write one extra file pointer so that we can always derive length. if (ivars->var_width) { OutStream_Write_I64(ix_out, OutStream_Tell(dat_out) - dat_start); } // Calculate cardinality and ord width. int32_t cardinality = ord + 1; ivars->ord_width = S_calc_width(cardinality); int32_t ord_width = ivars->ord_width; // Write ords. const double BITS_PER_BYTE = 8.0; double bytes_per_doc = ord_width / BITS_PER_BYTE; double byte_count = ceil((doc_max + 1) * bytes_per_doc); char *compressed_ords = (char*)CALLOCATE((size_t)byte_count, sizeof(char)); for (int32_t i = 0; i <= doc_max; i++) { int32_t real_ord = ords[i] == -1 ? null_ord : ords[i]; S_write_ord(compressed_ords, ord_width, i, real_ord); } OutStream_Write_Bytes(ord_out, compressed_ords, (size_t)byte_count); FREEMEM(compressed_ords); FREEMEM(ords); return cardinality; }