Ejemplo n.º 1
0
void
Freezer_serialize_blob(Blob *blob, OutStream *outstream) {
    size_t size = Blob_Get_Size(blob);
    if (size > INT32_MAX) {
        THROW(ERR, "Can't serialize blob above 2GB: %u64", (uint64_t)size);
    }
    OutStream_Write_CU64(outstream, size);
    OutStream_Write_Bytes(outstream, Blob_Get_Buf(blob), size);
}
Ejemplo n.º 2
0
void
BlobSortEx_Feed_IMP(BlobSortEx *self, Obj *item) {
    BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self);
    BlobSortEx_Feed_t super_feed
        = SUPER_METHOD_PTR(BLOBSORTEX, LUCY_BlobSortEx_Feed);
    super_feed(self, item);

    // Flush() if necessary.
    Blob *blob = (Blob*)CERTIFY(item, BLOB);
    ivars->mem_consumed += Blob_Get_Size(blob);
    if (ivars->mem_consumed >= ivars->mem_thresh) {
        BlobSortEx_Flush(self);
    }
}
void
SortFieldWriter_Add_IMP(SortFieldWriter *self, int32_t doc_id, Obj *value) {
    SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self);
    Counter *counter   = ivars->counter;
    Counter_Add(counter, ivars->mem_per_entry);
    if (ivars->prim_id == FType_TEXT) {
        int64_t size = Str_Get_Size((String*)value) + 1;
        size = SI_increase_to_word_multiple(size);
        Counter_Add(counter, size);
    }
    else if (ivars->prim_id == FType_BLOB) {
        int64_t size = Blob_Get_Size((Blob*)value) + 1;
        size = SI_increase_to_word_multiple(size);
        Counter_Add(counter, size);
    }
    SFWriterElem *elem = S_SFWriterElem_create(Obj_Clone(value), doc_id);
    SortFieldWriter_Feed(self, (Obj*)elem);
    ivars->count++;
}
Ejemplo n.º 4
0
uint32_t
BlobSortEx_Refill_IMP(BlobSortEx *self) {
    BlobSortExIVARS *const ivars = BlobSortEx_IVARS(self);

    // Make sure buffer is empty, then set buffer tick vars.
    if (ivars->buf_max - ivars->buf_tick > 0) {
        THROW(ERR, "Refill called but buffer contains %u32 items",
              ivars->buf_max - ivars->buf_tick);
    }
    ivars->buf_tick = 0;
    ivars->buf_max  = 0;

    // Read in elements.
    while (1) {
        Blob *elem = NULL;

        if (ivars->mem_consumed >= ivars->mem_thresh) {
            ivars->mem_consumed = 0;
            break;
        }
        else if (ivars->external_tick >= Vec_Get_Size(ivars->external)) {
            break;
        }
        else {
            elem = (Blob*)Vec_Fetch(ivars->external, ivars->external_tick);
            ivars->external_tick++;
            // Should be + sizeof(Blob), but that's ok.
            ivars->mem_consumed += Blob_Get_Size(elem);
        }

        if (ivars->buf_max == ivars->buf_cap) {
            BlobSortEx_Grow_Buffer(self,
                                 Memory_oversize(ivars->buf_max + 1,
                                                 sizeof(Obj*)));
        }
        ivars->buffer[ivars->buf_max++] = INCREF(elem);
    }

    return ivars->buf_max;
}
Ejemplo n.º 5
0
static TermVector*
S_extract_tv_from_tv_buf(String *field, String *term_text, Blob *tv_buf) {
    TermVector *retval      = NULL;
    const char *posdata     = Blob_Get_Buf(tv_buf);
    const char *posdata_end = posdata + Blob_Get_Size(tv_buf);
    int32_t    *positions   = NULL;
    int32_t    *starts      = NULL;
    int32_t    *ends        = NULL;
    uint32_t    num_pos     = 0;

    if (posdata != posdata_end) {
        num_pos   = NumUtil_decode_cu32(&posdata);
        positions = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t));
        starts    = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t));
        ends      = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t));
    }

    // Expand CI32s.
    for (uint32_t i = 0; i < num_pos; i++) {
        positions[i] = NumUtil_decode_ci32(&posdata);
        starts[i]    = NumUtil_decode_ci32(&posdata);
        ends[i]      = NumUtil_decode_ci32(&posdata);
    }

    if (posdata != posdata_end) {
        THROW(ERR, "Bad encoding of posdata");
    }
    else {
        I32Array *posits_map = I32Arr_new_steal(positions, num_pos);
        I32Array *starts_map = I32Arr_new_steal(starts, num_pos);
        I32Array *ends_map   = I32Arr_new_steal(ends, num_pos);
        retval = TV_new(field, term_text, posits_map, starts_map, ends_map);
        DECREF(posits_map);
        DECREF(starts_map);
        DECREF(ends_map);
    }

    return retval;
}
Ejemplo n.º 6
0
void
Freezer_serialize_blob(Blob *blob, OutStream *outstream) {
    size_t size = Blob_Get_Size(blob);
    OutStream_Write_C32(outstream, size);
    OutStream_Write_Bytes(outstream, Blob_Get_Buf(blob), size);
}
Ejemplo n.º 7
0
void
DocWriter_Add_Inverted_Doc_IMP(DocWriter *self, Inverter *inverter,
                               int32_t doc_id) {
    DocWriterIVARS *const ivars = DocWriter_IVARS(self);
    OutStream *dat_out    = S_lazy_init(self);
    OutStream *ix_out     = ivars->ix_out;
    uint32_t   num_stored = 0;
    int64_t    start      = OutStream_Tell(dat_out);
    int64_t    expected   = OutStream_Tell(ix_out) / 8;

    // Verify doc id.
    if (doc_id != expected) {
        THROW(ERR, "Expected doc id %i64 but got %i32", expected, doc_id);
    }

    // Write the number of stored fields.
    Inverter_Iterate(inverter);
    while (Inverter_Next(inverter)) {
        FieldType *type = Inverter_Get_Type(inverter);
        if (FType_Stored(type)) { num_stored++; }
    }
    OutStream_Write_CU32(dat_out, num_stored);

    Inverter_Iterate(inverter);
    while (Inverter_Next(inverter)) {
        // Only store fields marked as "stored".
        FieldType *type = Inverter_Get_Type(inverter);
        if (FType_Stored(type)) {
            String *field = Inverter_Get_Field_Name(inverter);
            Obj *value = Inverter_Get_Value(inverter);
            Freezer_serialize_string(field, dat_out);
            switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) {
                case FType_TEXT: {
                    const char *buf  = Str_Get_Ptr8((String*)value);
                    size_t      size = Str_Get_Size((String*)value);
                    if (size > INT32_MAX) {
                        THROW(ERR, "Field %o over 2GB: %u64", field,
                              (uint64_t)size);
                    }
                    OutStream_Write_CU32(dat_out, (uint32_t)size);
                    OutStream_Write_Bytes(dat_out, buf, size);
                    break;
                }
                case FType_BLOB: {
                    const char *buf  = Blob_Get_Buf((Blob*)value);
                    size_t      size = Blob_Get_Size((Blob*)value);
                    if (size > INT32_MAX) {
                        THROW(ERR, "Field %o over 2GB: %u64", field,
                              (uint64_t)size);
                    }
                    OutStream_Write_CU32(dat_out, (uint32_t)size);
                    OutStream_Write_Bytes(dat_out, buf, size);
                    break;
                }
                case FType_INT32: {
                    int32_t val = (int32_t)Int_Get_Value((Integer*)value);
                    OutStream_Write_CI32(dat_out, val);
                    break;
                }
                case FType_INT64: {
                    int64_t val = Int_Get_Value((Integer*)value);
                    OutStream_Write_CI64(dat_out, val);
                    break;
                }
                case FType_FLOAT32: {
                    float val = (float)Float_Get_Value((Float*)value);
                    OutStream_Write_F32(dat_out, val);
                    break;
                }
                case FType_FLOAT64: {
                    double val = Float_Get_Value((Float*)value);
                    OutStream_Write_F64(dat_out, val);
                    break;
                }
                default:
                    THROW(ERR, "Unrecognized type: %o", type);
            }
        }
    }

    // Write file pointer.
    OutStream_Write_I64(ix_out, start);
}
static void
S_write_val(Obj *val, int8_t prim_id, OutStream *ix_out, OutStream *dat_out,
            int64_t dat_start) {
    if (val) {
        switch (prim_id & FType_PRIMITIVE_ID_MASK) {
            case FType_TEXT: {
                    String *string = (String*)val;
                    int64_t dat_pos = OutStream_Tell(dat_out) - dat_start;
                    OutStream_Write_I64(ix_out, dat_pos);
                    OutStream_Write_Bytes(dat_out, Str_Get_Ptr8(string),
                                          Str_Get_Size(string));
                    break;
                }
            case FType_BLOB: {
                    Blob *blob = (Blob*)val;
                    int64_t dat_pos = OutStream_Tell(dat_out) - dat_start;
                    OutStream_Write_I64(ix_out, dat_pos);
                    OutStream_Write_Bytes(dat_out, Blob_Get_Buf(blob),
                                          Blob_Get_Size(blob));
                    break;
                }
            case FType_INT32: {
                    int32_t i32 = (int32_t)Int_Get_Value((Integer*)val);
                    OutStream_Write_I32(dat_out, i32);
                    break;
                }
            case FType_INT64: {
                    int64_t i64 = Int_Get_Value((Integer*)val);
                    OutStream_Write_I64(dat_out, i64);
                    break;
                }
            case FType_FLOAT32: {
                    float f32 = (float)Float_Get_Value((Float*)val);
                    OutStream_Write_F32(dat_out, f32);
                    break;
                }
            case FType_FLOAT64: {
                    double f64 = Float_Get_Value((Float*)val);
                    OutStream_Write_F64(dat_out, f64);
                    break;
                }
            default:
                THROW(ERR, "Unrecognized primitive id: %i32", (int32_t)prim_id);
        }
    }
    else {
        switch (prim_id & FType_PRIMITIVE_ID_MASK) {
            case FType_TEXT:
            case FType_BLOB: {
                    int64_t dat_pos = OutStream_Tell(dat_out) - dat_start;
                    OutStream_Write_I64(ix_out, dat_pos);
                }
                break;
            case FType_INT32:
                OutStream_Write_I32(dat_out, 0);
                break;
            case FType_INT64:
                OutStream_Write_I64(dat_out, 0);
                break;
            case FType_FLOAT64:
                OutStream_Write_F64(dat_out, 0.0);
                break;
            case FType_FLOAT32:
                OutStream_Write_F32(dat_out, 0.0f);
                break;
            default:
                THROW(ERR, "Unrecognized primitive id: %i32", (int32_t)prim_id);
        }
    }
}