Example #1
0
static void
test_Compare_Values(TestBatchRunner *runner) {
    FieldType     *type = (FieldType*)DummyFieldType_new();
    StackString *a    = SSTR_WRAP_UTF8("a", 1);
    StackString *b    = SSTR_WRAP_UTF8("b", 1);

    TEST_TRUE(runner,
              FType_Compare_Values(type, (Obj*)a, (Obj*)b) < 0,
              "a less than b");
    TEST_TRUE(runner,
              FType_Compare_Values(type, (Obj*)b, (Obj*)a) > 0,
              "b greater than a");
    TEST_TRUE(runner,
              FType_Compare_Values(type, (Obj*)b, (Obj*)b) == 0,
              "b equals b");

    DECREF(type);
}
Example #2
0
void
LexIndex_seek(LexIndex *self, Obj *target)
{
    FieldType   *type   = self->field_type;
    i32_t        lo     = 0;
    i32_t        hi     = self->size - 1;
    i32_t        result = -100;

    if (target == NULL || self->size == 0) { 
        self->tick = 0;
        return;
    }
    else {
        if ( !OBJ_IS_A(target, CHARBUF)) {
            THROW("Target is a %o, and not comparable to a %o",
                Obj_Get_Class_Name(target), CHARBUF.name);
        }
        /* TODO: 
        Obj *first_obj = VA_Fetch(terms, 0);
        if ( !Obj_Is_A(target, Obj_Get_VTable(first_obj)) ) {
            THROW("Target is a %o, and not comparable to a %o",
                Obj_Get_Class_Name(target), Obj_Get_Class_Name(first_obj));
        }
        */
    }

    /* Divide and conquer. */
    while (hi >= lo) {
        const i32_t mid = lo + ((hi - lo) / 2);
        const i64_t offset 
            = (i64_t)Math_decode_bigend_u64(self->offsets + mid);
        char *data = self->data + offset;
        size_t size = Math_decode_c32(&data);
        i64_t comparison;

        ViewCB_Assign_Str(self->term, data, size);
        comparison = FType_Compare_Values(type, target, (Obj*)self->term);
        if (comparison < 0) {
            hi = mid - 1;
        }
        else if (comparison > 0) {
            lo = mid + 1;
        }
        else {
            result = mid;
            break;
        }
    }

    /* Record the index of the entry we've seeked to, then read entry. */
    self->tick = hi == -1   ? 0  /* indicating that target lt first entry */
           : result == -100 ? hi /* if result is still -100, it wasn't set */
           : result;
    S_read_entry(self);
}
Example #3
0
i32_t
SortCache_find(SortCache *self, Obj *term)
{
    FieldType *const type = self->type;
    i32_t          lo     = 0;
    i32_t          hi     = self->num_uniq - 1;
    i32_t          result = -100;
    ZombieCharBuf  value  = ZCB_BLANK;

    if ( term != NULL && !OBJ_IS_A(term, CHARBUF)) {
        THROW("term is a %o, and not comparable to a %o",
            Obj_Get_Class_Name(term), CHARBUF.name);
    }

    /* Binary search. */
    while (hi >= lo) {
        const i32_t mid = lo + ((hi - lo) / 2);
        ViewCharBuf *val = SortCache_Value(self, mid, (ViewCharBuf*)&value);
        i64_t comparison = FType_Compare_Values(type, term, (Obj*)val);
        if (comparison < 0) {
            hi = mid - 1;
        }
        else if (comparison > 0) {
            lo = mid + 1;
        }
        else {
            result = mid;
            break;
        }
    }

    if (hi < 0) { 
        /* Target is "less than" the first cache entry. */
        return -1;
    }
    else if (result == -100) {
        /* If result is still -100, it wasn't set. */
        return hi;
    }
    else {
        return result;
    }
}
Example #4
0
void
LexIndex_Seek_IMP(LexIndex *self, Obj *target) {
    LexIndexIVARS *const ivars = LexIndex_IVARS(self);
    TermStepper *term_stepper = ivars->term_stepper;
    InStream    *ix_in        = ivars->ix_in;
    FieldType   *type         = ivars->field_type;
    int32_t      lo           = 0;
    int32_t      hi           = ivars->size - 1;
    int32_t      result       = -100;

    if (target == NULL || ivars->size == 0) {
        ivars->tick = 0;
        return;
    }
    else {
        if (!Obj_is_a(target, STRING)) {
            THROW(ERR, "Target is a %o, and not comparable to a %o",
                  Obj_get_class_name(target), Class_Get_Name(STRING));
        }
        /* TODO:
        Obj *first_obj = Vec_Fetch(terms, 0);
        if (!Obj_is_a(target, Obj_get_class(first_obj))) {
            THROW(ERR, "Target is a %o, and not comparable to a %o",
                Obj_get_class_name(target), Obj_get_class_name(first_obj));
        }
        */
    }

    // Divide and conquer.
    while (hi >= lo) {
        const int32_t mid = lo + ((hi - lo) / 2);
        const int64_t offset
            = (int64_t)NumUtil_decode_bigend_u64(ivars->offsets + mid);
        InStream_Seek(ix_in, offset);
        TermStepper_Read_Key_Frame(term_stepper, ix_in);

        // Compare values.  There is no need for a NULL-check because the term
        // number is alway between 0 and ivars->size - 1.
        Obj *value = TermStepper_Get_Value(term_stepper);
        int32_t comparison = FType_Compare_Values(type, target, value);

        if (comparison < 0) {
            hi = mid - 1;
        }
        else if (comparison > 0) {
            lo = mid + 1;
        }
        else {
            result = mid;
            break;
        }
    }

    // Record the index of the entry we've seeked to, then read entry.
    ivars->tick = hi == -1 // indicating that target lt first entry
                 ? 0
                 : result == -100 // if result is still -100, it wasn't set
                 ? hi
                 : result;
    S_read_entry(self);
}
static int32_t
S_write_files(SortFieldWriter *self, OutStream *ord_out, OutStream *ix_out,
              OutStream *dat_out) {
    SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self);
    int8_t    prim_id   = ivars->prim_id;
    int32_t   doc_max   = (int32_t)Seg_Get_Count(ivars->segment);
    bool      has_nulls = ivars->count == doc_max ? false : true;
    size_t    size      = (doc_max + 1) * sizeof(int32_t);
    int32_t  *ords      = (int32_t*)MALLOCATE(size);
    int32_t   ord       = 0;
    int64_t   dat_start = OutStream_Tell(dat_out);

    // Assign -1 as a stand-in for the NULL ord.
    for (int32_t i = 0; i <= doc_max; i++) {
        ords[i] = -1;
    }

    // Grab the first item and record its ord.  Add a dummy ord for invalid
    // doc id 0.
    SFWriterElem *elem = (SFWriterElem*)SortFieldWriter_Fetch(self);
    SFWriterElemIVARS *elem_ivars = SFWriterElem_IVARS(elem);
    if (elem_ivars->doc_id > doc_max) {
        THROW(ERR, "doc_id %i32 greater than doc_max %i32",
              elem_ivars->doc_id, doc_max);
    }
    ords[elem_ivars->doc_id] = ord;
    ords[0] = 0;

    // Build array of ords, write non-NULL sorted values.
    Obj *last_val = INCREF(elem_ivars->value);
    S_write_val(elem_ivars->value, prim_id, ix_out, dat_out, dat_start);
    DECREF(elem);
    while (NULL != (elem = (SFWriterElem*)SortFieldWriter_Fetch(self))) {
        elem_ivars = SFWriterElem_IVARS(elem);
        if (elem_ivars->value != last_val) {
            int32_t comparison
                = FType_Compare_Values(ivars->type, elem_ivars->value,
                                       last_val);
            if (comparison != 0) {
                ord++;
                S_write_val(elem_ivars->value, prim_id, ix_out, dat_out,
                            dat_start);
            }
            DECREF(last_val);
            last_val = INCREF(elem_ivars->value);
        }
        if (elem_ivars->doc_id > doc_max) {
            THROW(ERR, "doc_id %i32 greater than doc_max %i32",
                  elem_ivars->doc_id, doc_max);
        }
        ords[elem_ivars->doc_id] = ord;
        DECREF(elem);
    }
    DECREF(last_val);

    // If there are NULL values, write one now and record the NULL ord.
    if (has_nulls) {
        S_write_val(NULL, prim_id, ix_out, dat_out, dat_start);
        ord++;
        ivars->null_ord = ord;
    }
    int32_t null_ord = ivars->null_ord;

    // Write one extra file pointer so that we can always derive length.
    if (ivars->var_width) {
        OutStream_Write_I64(ix_out, OutStream_Tell(dat_out) - dat_start);
    }

    // Calculate cardinality and ord width.
    int32_t cardinality = ord + 1;
    ivars->ord_width     = S_calc_width(cardinality);
    int32_t ord_width   = ivars->ord_width;

    // Write ords.
    const double BITS_PER_BYTE = 8.0;
    double bytes_per_doc = ord_width / BITS_PER_BYTE;
    double byte_count = ceil((doc_max + 1) * bytes_per_doc);
    char *compressed_ords
        = (char*)CALLOCATE((size_t)byte_count, sizeof(char));
    for (int32_t i = 0; i <= doc_max; i++) {
        int32_t real_ord = ords[i] == -1 ? null_ord : ords[i];
        S_write_ord(compressed_ords, ord_width, i, real_ord);
    }
    OutStream_Write_Bytes(ord_out, compressed_ords, (size_t)byte_count);
    FREEMEM(compressed_ords);

    FREEMEM(ords);
    return cardinality;
}