示例#1
0
void
Freezer_serialize_blob(Blob *blob, OutStream *outstream) {
    size_t size = Blob_Get_Size(blob);
    if (size > INT32_MAX) {
        THROW(ERR, "Can't serialize blob above 2GB: %u64", (uint64_t)size);
    }
    OutStream_Write_CU64(outstream, size);
    OutStream_Write_Bytes(outstream, Blob_Get_Buf(blob), size);
}
示例#2
0
文件: DocVector.c 项目: unlobe/lucy
static TermVector*
S_extract_tv_from_tv_buf(String *field, String *term_text, Blob *tv_buf) {
    TermVector *retval      = NULL;
    const char *posdata     = Blob_Get_Buf(tv_buf);
    const char *posdata_end = posdata + Blob_Get_Size(tv_buf);
    int32_t    *positions   = NULL;
    int32_t    *starts      = NULL;
    int32_t    *ends        = NULL;
    uint32_t    num_pos     = 0;

    if (posdata != posdata_end) {
        num_pos   = NumUtil_decode_cu32(&posdata);
        positions = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t));
        starts    = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t));
        ends      = (int32_t*)MALLOCATE(num_pos * sizeof(int32_t));
    }

    // Expand CI32s.
    for (uint32_t i = 0; i < num_pos; i++) {
        positions[i] = NumUtil_decode_ci32(&posdata);
        starts[i]    = NumUtil_decode_ci32(&posdata);
        ends[i]      = NumUtil_decode_ci32(&posdata);
    }

    if (posdata != posdata_end) {
        THROW(ERR, "Bad encoding of posdata");
    }
    else {
        I32Array *posits_map = I32Arr_new_steal(positions, num_pos);
        I32Array *starts_map = I32Arr_new_steal(starts, num_pos);
        I32Array *ends_map   = I32Arr_new_steal(ends, num_pos);
        retval = TV_new(field, term_text, posits_map, starts_map, ends_map);
        DECREF(posits_map);
        DECREF(starts_map);
        DECREF(ends_map);
    }

    return retval;
}
示例#3
0
文件: DocVector.c 项目: unlobe/lucy
static Hash*
S_extract_tv_cache(Blob *field_buf) {
    Hash       *tv_cache  = Hash_new(0);
    const char *tv_string = Blob_Get_Buf(field_buf);
    int32_t     num_terms = NumUtil_decode_ci32(&tv_string);
    ByteBuf    *text_buf  = BB_new(0);

    // Read the number of highlightable terms in the field.
    for (int32_t i = 0; i < num_terms; i++) {
        size_t   overlap = NumUtil_decode_cu32(&tv_string);
        size_t   len     = NumUtil_decode_cu32(&tv_string);

        // Decompress the term text.
        BB_Set_Size(text_buf, overlap);
        BB_Cat_Bytes(text_buf, tv_string, len);
        tv_string += len;

        // Get positions & offsets string.
        const char *bookmark_ptr  = tv_string;
        int32_t     num_positions = NumUtil_decode_ci32(&tv_string);
        while (num_positions--) {
            // Leave nums compressed to save a little mem.
            NumUtil_skip_cint(&tv_string);
            NumUtil_skip_cint(&tv_string);
            NumUtil_skip_cint(&tv_string);
        }
        len = tv_string - bookmark_ptr;

        // Store the $text => $posdata pair in the output hash.
        String *text = BB_Trusted_Utf8_To_String(text_buf);
        Hash_Store(tv_cache, text, (Obj*)Blob_new(bookmark_ptr, len));
        DECREF(text);
    }
    DECREF(text_buf);

    return tv_cache;
}
示例#4
0
文件: Freezer.c 项目: rectang/lucy
void
Freezer_serialize_blob(Blob *blob, OutStream *outstream) {
    size_t size = Blob_Get_Size(blob);
    OutStream_Write_C32(outstream, size);
    OutStream_Write_Bytes(outstream, Blob_Get_Buf(blob), size);
}
示例#5
0
文件: DocWriter.c 项目: apache/lucy
void
DocWriter_Add_Inverted_Doc_IMP(DocWriter *self, Inverter *inverter,
                               int32_t doc_id) {
    DocWriterIVARS *const ivars = DocWriter_IVARS(self);
    OutStream *dat_out    = S_lazy_init(self);
    OutStream *ix_out     = ivars->ix_out;
    uint32_t   num_stored = 0;
    int64_t    start      = OutStream_Tell(dat_out);
    int64_t    expected   = OutStream_Tell(ix_out) / 8;

    // Verify doc id.
    if (doc_id != expected) {
        THROW(ERR, "Expected doc id %i64 but got %i32", expected, doc_id);
    }

    // Write the number of stored fields.
    Inverter_Iterate(inverter);
    while (Inverter_Next(inverter)) {
        FieldType *type = Inverter_Get_Type(inverter);
        if (FType_Stored(type)) { num_stored++; }
    }
    OutStream_Write_CU32(dat_out, num_stored);

    Inverter_Iterate(inverter);
    while (Inverter_Next(inverter)) {
        // Only store fields marked as "stored".
        FieldType *type = Inverter_Get_Type(inverter);
        if (FType_Stored(type)) {
            String *field = Inverter_Get_Field_Name(inverter);
            Obj *value = Inverter_Get_Value(inverter);
            Freezer_serialize_string(field, dat_out);
            switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) {
                case FType_TEXT: {
                    const char *buf  = Str_Get_Ptr8((String*)value);
                    size_t      size = Str_Get_Size((String*)value);
                    if (size > INT32_MAX) {
                        THROW(ERR, "Field %o over 2GB: %u64", field,
                              (uint64_t)size);
                    }
                    OutStream_Write_CU32(dat_out, (uint32_t)size);
                    OutStream_Write_Bytes(dat_out, buf, size);
                    break;
                }
                case FType_BLOB: {
                    const char *buf  = Blob_Get_Buf((Blob*)value);
                    size_t      size = Blob_Get_Size((Blob*)value);
                    if (size > INT32_MAX) {
                        THROW(ERR, "Field %o over 2GB: %u64", field,
                              (uint64_t)size);
                    }
                    OutStream_Write_CU32(dat_out, (uint32_t)size);
                    OutStream_Write_Bytes(dat_out, buf, size);
                    break;
                }
                case FType_INT32: {
                    int32_t val = (int32_t)Int_Get_Value((Integer*)value);
                    OutStream_Write_CI32(dat_out, val);
                    break;
                }
                case FType_INT64: {
                    int64_t val = Int_Get_Value((Integer*)value);
                    OutStream_Write_CI64(dat_out, val);
                    break;
                }
                case FType_FLOAT32: {
                    float val = (float)Float_Get_Value((Float*)value);
                    OutStream_Write_F32(dat_out, val);
                    break;
                }
                case FType_FLOAT64: {
                    double val = Float_Get_Value((Float*)value);
                    OutStream_Write_F64(dat_out, val);
                    break;
                }
                default:
                    THROW(ERR, "Unrecognized type: %o", type);
            }
        }
    }

    // Write file pointer.
    OutStream_Write_I64(ix_out, start);
}
static void
S_write_val(Obj *val, int8_t prim_id, OutStream *ix_out, OutStream *dat_out,
            int64_t dat_start) {
    if (val) {
        switch (prim_id & FType_PRIMITIVE_ID_MASK) {
            case FType_TEXT: {
                    String *string = (String*)val;
                    int64_t dat_pos = OutStream_Tell(dat_out) - dat_start;
                    OutStream_Write_I64(ix_out, dat_pos);
                    OutStream_Write_Bytes(dat_out, Str_Get_Ptr8(string),
                                          Str_Get_Size(string));
                    break;
                }
            case FType_BLOB: {
                    Blob *blob = (Blob*)val;
                    int64_t dat_pos = OutStream_Tell(dat_out) - dat_start;
                    OutStream_Write_I64(ix_out, dat_pos);
                    OutStream_Write_Bytes(dat_out, Blob_Get_Buf(blob),
                                          Blob_Get_Size(blob));
                    break;
                }
            case FType_INT32: {
                    int32_t i32 = (int32_t)Int_Get_Value((Integer*)val);
                    OutStream_Write_I32(dat_out, i32);
                    break;
                }
            case FType_INT64: {
                    int64_t i64 = Int_Get_Value((Integer*)val);
                    OutStream_Write_I64(dat_out, i64);
                    break;
                }
            case FType_FLOAT32: {
                    float f32 = (float)Float_Get_Value((Float*)val);
                    OutStream_Write_F32(dat_out, f32);
                    break;
                }
            case FType_FLOAT64: {
                    double f64 = Float_Get_Value((Float*)val);
                    OutStream_Write_F64(dat_out, f64);
                    break;
                }
            default:
                THROW(ERR, "Unrecognized primitive id: %i32", (int32_t)prim_id);
        }
    }
    else {
        switch (prim_id & FType_PRIMITIVE_ID_MASK) {
            case FType_TEXT:
            case FType_BLOB: {
                    int64_t dat_pos = OutStream_Tell(dat_out) - dat_start;
                    OutStream_Write_I64(ix_out, dat_pos);
                }
                break;
            case FType_INT32:
                OutStream_Write_I32(dat_out, 0);
                break;
            case FType_INT64:
                OutStream_Write_I64(dat_out, 0);
                break;
            case FType_FLOAT64:
                OutStream_Write_F64(dat_out, 0.0);
                break;
            case FType_FLOAT32:
                OutStream_Write_F32(dat_out, 0.0f);
                break;
            default:
                THROW(ERR, "Unrecognized primitive id: %i32", (int32_t)prim_id);
        }
    }
}