Ejemplo n.º 1
0
static OutStream*
S_lazy_init(HighlightWriter *self)
{
    if (!self->dat_out) {
        Segment  *segment  = self->segment;
        Folder   *folder   = self->folder;
        CharBuf  *seg_name = Seg_Get_Name(segment);

        // Open outstreams. 
        {
            CharBuf *ix_file = CB_newf("%o/highlight.ix", seg_name);
            self->ix_out = Folder_Open_Out(folder, ix_file);
            DECREF(ix_file);
            if (!self->ix_out) { RETHROW(INCREF(Err_get_error())); }
        }
        {
            CharBuf *dat_file = CB_newf("%o/highlight.dat", seg_name);
            self->dat_out = Folder_Open_Out(folder, dat_file);
            DECREF(dat_file);
            if (!self->dat_out) { RETHROW(INCREF(Err_get_error())); }
        }

        // Go past invalid doc 0. 
        OutStream_Write_I64(self->ix_out, 0);
    }

    return self->dat_out;
}
Ejemplo n.º 2
0
static OutStream*
S_lazy_init(HighlightWriter *self) {
    HighlightWriterIVARS *const ivars = HLWriter_IVARS(self);
    if (!ivars->dat_out) {
        Segment  *segment  = ivars->segment;
        Folder   *folder   = ivars->folder;
        String   *seg_name = Seg_Get_Name(segment);

        // Open outstreams.
        String *ix_file = Str_newf("%o/highlight.ix", seg_name);
        ivars->ix_out = Folder_Open_Out(folder, ix_file);
        DECREF(ix_file);
        if (!ivars->ix_out) { RETHROW(INCREF(Err_get_error())); }

        String *dat_file = Str_newf("%o/highlight.dat", seg_name);
        ivars->dat_out = Folder_Open_Out(folder, dat_file);
        DECREF(dat_file);
        if (!ivars->dat_out) { RETHROW(INCREF(Err_get_error())); }

        // Go past invalid doc 0.
        OutStream_Write_I64(ivars->ix_out, 0);
    }

    return ivars->dat_out;
}
Ejemplo n.º 3
0
void
DocWriter_finish(DocWriter *self) {
    if (self->dat_out) {
        // Write one final file pointer, so that we can derive the length of
        // the last record.
        int64_t end = OutStream_Tell(self->dat_out);
        OutStream_Write_I64(self->ix_out, end);

        // Close down output streams.
        OutStream_Close(self->dat_out);
        OutStream_Close(self->ix_out);
        Seg_Store_Metadata_Str(self->segment, "documents", 9,
                               (Obj*)DocWriter_Metadata(self));
    }
}
Ejemplo n.º 4
0
static void
S_add_last_term_to_ix(LexiconWriter *self)
{
    // Write file pointer to index record. 
    OutStream_Write_I64(self->ixix_out, OutStream_Tell(self->ix_out));

    // Write term and file pointer to main record.  Track count of terms added
    // to ix.
    TermStepper_Write_Key_Frame(self->term_stepper,
        self->ix_out, TermStepper_Get_Value(self->term_stepper));
    TermStepper_Write_Key_Frame(self->tinfo_stepper, 
        self->ix_out, TermStepper_Get_Value(self->tinfo_stepper));
    OutStream_Write_C64(self->ix_out, OutStream_Tell(self->dat_out));
    self->ix_count++;
}
Ejemplo n.º 5
0
void
HLWriter_Finish_IMP(HighlightWriter *self) {
    HighlightWriterIVARS *const ivars = HLWriter_IVARS(self);
    if (ivars->dat_out) {
        // Write one final file pointer, so that we can derive the length of
        // the last record.
        int64_t end = OutStream_Tell(ivars->dat_out);
        OutStream_Write_I64(ivars->ix_out, end);

        // Close down the output streams.
        OutStream_Close(ivars->dat_out);
        OutStream_Close(ivars->ix_out);
        Seg_Store_Metadata_Utf8(ivars->segment, "highlight", 9,
                                (Obj*)HLWriter_Metadata(self));
    }
}
Ejemplo n.º 6
0
void
HLWriter_Add_Inverted_Doc_IMP(HighlightWriter *self, Inverter *inverter,
                              int32_t doc_id) {
    HighlightWriterIVARS *const ivars = HLWriter_IVARS(self);
    OutStream *dat_out = S_lazy_init(self);
    OutStream *ix_out  = ivars->ix_out;
    int64_t    filepos = OutStream_Tell(dat_out);
    uint32_t num_highlightable = 0;
    int32_t expected = (int32_t)(OutStream_Tell(ix_out) / 8);

    // Verify doc id.
    if (doc_id != expected) {
        THROW(ERR, "Expected doc id %i32 but got %i32", expected, doc_id);
    }

    // Write index data.
    OutStream_Write_I64(ix_out, filepos);

    // Count, then write number of highlightable fields.
    Inverter_Iterate(inverter);
    while (Inverter_Next(inverter)) {
        FieldType *type = Inverter_Get_Type(inverter);
        if (FType_Is_A(type, FULLTEXTTYPE)
            && FullTextType_Highlightable((FullTextType*)type)
           ) {
            num_highlightable++;
        }
    }
    OutStream_Write_C32(dat_out, num_highlightable);

    Inverter_Iterate(inverter);
    while (Inverter_Next(inverter)) {
        FieldType *type = Inverter_Get_Type(inverter);
        if (FType_Is_A(type, FULLTEXTTYPE)
            && FullTextType_Highlightable((FullTextType*)type)
           ) {
            String    *field     = Inverter_Get_Field_Name(inverter);
            Inversion *inversion = Inverter_Get_Inversion(inverter);
            ByteBuf   *tv_buf    = HLWriter_TV_Buf(self, inversion);
            Freezer_serialize_string(field, dat_out);
            Freezer_serialize_bytebuf(tv_buf, dat_out);
            DECREF(tv_buf);
        }
    }
}
Ejemplo n.º 7
0
static OutStream*
S_lazy_init(DocWriter *self) {
    if (!self->dat_out) {
        Folder  *folder   = self->folder;
        CharBuf *seg_name = Seg_Get_Name(self->segment);

        // Get streams.
        CharBuf *ix_file = CB_newf("%o/documents.ix", seg_name);
        self->ix_out = Folder_Open_Out(folder, ix_file);
        DECREF(ix_file);
        if (!self->ix_out) { RETHROW(INCREF(Err_get_error())); }
        CharBuf *dat_file = CB_newf("%o/documents.dat", seg_name);
        self->dat_out = Folder_Open_Out(folder, dat_file);
        DECREF(dat_file);
        if (!self->dat_out) { RETHROW(INCREF(Err_get_error())); }

        // Go past non-doc #0.
        OutStream_Write_I64(self->ix_out, 0);
    }

    return self->dat_out;
}
Ejemplo n.º 8
0
void
HLWriter_Add_Segment_IMP(HighlightWriter *self, SegReader *reader,
                         I32Array *doc_map) {
    HighlightWriterIVARS *const ivars = HLWriter_IVARS(self);
    int32_t doc_max = SegReader_Doc_Max(reader);

    if (doc_max == 0) {
        // Bail if the supplied segment is empty.
        return;
    }
    else {
        DefaultHighlightReader *hl_reader
            = (DefaultHighlightReader*)CERTIFY(
                  SegReader_Obtain(reader, Class_Get_Name(HIGHLIGHTREADER)),
                  DEFAULTHIGHLIGHTREADER);
        OutStream *dat_out = S_lazy_init(self);
        OutStream *ix_out  = ivars->ix_out;
        int32_t    orig;
        ByteBuf   *bb = BB_new(0);

        for (orig = 1; orig <= doc_max; orig++) {
            // Skip deleted docs.
            if (doc_map && !I32Arr_Get(doc_map, orig)) {
                continue;
            }

            // Write file pointer.
            OutStream_Write_I64(ix_out, OutStream_Tell(dat_out));

            // Copy the raw record.
            DefHLReader_Read_Record(hl_reader, orig, bb);
            OutStream_Write_Bytes(dat_out, BB_Get_Buf(bb), BB_Get_Size(bb));

            BB_Set_Size(bb, 0);
        }
        DECREF(bb);
    }
}
Ejemplo n.º 9
0
static OutStream*
S_lazy_init(DocWriter *self) {
    DocWriterIVARS *const ivars = DocWriter_IVARS(self);
    if (!ivars->dat_out) {
        Folder *folder   = ivars->folder;
        String *seg_name = Seg_Get_Name(ivars->segment);

        // Get streams.
        String *ix_file = Str_newf("%o/documents.ix", seg_name);
        ivars->ix_out = Folder_Open_Out(folder, ix_file);
        DECREF(ix_file);
        if (!ivars->ix_out) { RETHROW(INCREF(Err_get_error())); }
        String *dat_file = Str_newf("%o/documents.dat", seg_name);
        ivars->dat_out = Folder_Open_Out(folder, dat_file);
        DECREF(dat_file);
        if (!ivars->dat_out) { RETHROW(INCREF(Err_get_error())); }

        // Go past non-doc #0.
        OutStream_Write_I64(ivars->ix_out, 0);
    }

    return ivars->dat_out;
}
Ejemplo n.º 10
0
void
DocWriter_Add_Segment_IMP(DocWriter *self, SegReader *reader,
                          I32Array *doc_map) {
    DocWriterIVARS *const ivars = DocWriter_IVARS(self);
    int32_t doc_max = SegReader_Doc_Max(reader);

    if (doc_max == 0) {
        // Bail if the supplied segment is empty.
        return;
    }
    else {
        OutStream *const dat_out = S_lazy_init(self);
        OutStream *const ix_out  = ivars->ix_out;
        ByteBuf   *const buffer  = BB_new(0);
        DefaultDocReader *const doc_reader
            = (DefaultDocReader*)CERTIFY(
                  SegReader_Obtain(reader, VTable_Get_Name(DOCREADER)),
                  DEFAULTDOCREADER);

        for (int32_t i = 1, max = SegReader_Doc_Max(reader); i <= max; i++) {
            if (I32Arr_Get(doc_map, i)) {
                int64_t  start = OutStream_Tell(dat_out);

                // Copy record over.
                DefDocReader_Read_Record(doc_reader, buffer, i);
                char *buf   = BB_Get_Buf(buffer);
                size_t size = BB_Get_Size(buffer);
                OutStream_Write_Bytes(dat_out, buf, size);

                // Write file pointer.
                OutStream_Write_I64(ix_out, start);
            }
        }

        DECREF(buffer);
    }
}
Ejemplo n.º 11
0
void
DocWriter_add_inverted_doc(DocWriter *self, Inverter *inverter,
                           int32_t doc_id) {
    OutStream *dat_out    = S_lazy_init(self);
    OutStream *ix_out     = self->ix_out;
    uint32_t   num_stored = 0;
    int64_t    start      = OutStream_Tell(dat_out);
    int64_t    expected   = OutStream_Tell(ix_out) / 8;

    // Verify doc id.
    if (doc_id != expected) {
        THROW(ERR, "Expected doc id %i64 but got %i32", expected, doc_id);
    }

    // Write the number of stored fields.
    Inverter_Iterate(inverter);
    while (Inverter_Next(inverter)) {
        FieldType *type = Inverter_Get_Type(inverter);
        if (FType_Stored(type)) { num_stored++; }
    }
    OutStream_Write_C32(dat_out, num_stored);

    Inverter_Iterate(inverter);
    while (Inverter_Next(inverter)) {
        // Only store fields marked as "stored".
        FieldType *type = Inverter_Get_Type(inverter);
        if (FType_Stored(type)) {
            CharBuf *field = Inverter_Get_Field_Name(inverter);
            Obj *value = Inverter_Get_Value(inverter);
            CB_Serialize(field, dat_out);
            Obj_Serialize(value, dat_out);
        }
    }

    // Write file pointer.
    OutStream_Write_I64(ix_out, start);
}
Ejemplo n.º 12
0
void
DocWriter_Add_Inverted_Doc_IMP(DocWriter *self, Inverter *inverter,
                               int32_t doc_id) {
    DocWriterIVARS *const ivars = DocWriter_IVARS(self);
    OutStream *dat_out    = S_lazy_init(self);
    OutStream *ix_out     = ivars->ix_out;
    uint32_t   num_stored = 0;
    int64_t    start      = OutStream_Tell(dat_out);
    int64_t    expected   = OutStream_Tell(ix_out) / 8;

    // Verify doc id.
    if (doc_id != expected) {
        THROW(ERR, "Expected doc id %i64 but got %i32", expected, doc_id);
    }

    // Write the number of stored fields.
    Inverter_Iterate(inverter);
    while (Inverter_Next(inverter)) {
        FieldType *type = Inverter_Get_Type(inverter);
        if (FType_Stored(type)) { num_stored++; }
    }
    OutStream_Write_C32(dat_out, num_stored);

    Inverter_Iterate(inverter);
    while (Inverter_Next(inverter)) {
        // Only store fields marked as "stored".
        FieldType *type = Inverter_Get_Type(inverter);
        if (FType_Stored(type)) {
            String *field = Inverter_Get_Field_Name(inverter);
            Obj *value = Inverter_Get_Value(inverter);
            Freezer_serialize_string(field, dat_out);
            switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) {
                case FType_TEXT: {
                    const char *buf  = Str_Get_Ptr8((String*)value);
                    size_t      size = Str_Get_Size((String*)value);
                    OutStream_Write_C32(dat_out, size);
                    OutStream_Write_Bytes(dat_out, buf, size);
                    break;
                }
                case FType_BLOB: {
                    char   *buf  = BB_Get_Buf((ByteBuf*)value);
                    size_t  size = BB_Get_Size((ByteBuf*)value);
                    OutStream_Write_C32(dat_out, size);
                    OutStream_Write_Bytes(dat_out, buf, size);
                    break;
                }
                case FType_INT32: {
                    int32_t val = Int32_Get_Value((Integer32*)value);
                    OutStream_Write_C32(dat_out, val);
                    break;
                }
                case FType_INT64: {
                    int64_t val = Int64_Get_Value((Integer64*)value);
                    OutStream_Write_C64(dat_out, val);
                    break;
                }
                case FType_FLOAT32: {
                    float val = Float32_Get_Value((Float32*)value);
                    OutStream_Write_F32(dat_out, val);
                    break;
                }
                case FType_FLOAT64: {
                    double val = Float64_Get_Value((Float64*)value);
                    OutStream_Write_F64(dat_out, val);
                    break;
                }
                default:
                    THROW(ERR, "Unrecognized type: %o", type);
            }
        }
    }

    // Write file pointer.
    OutStream_Write_I64(ix_out, start);
}
Ejemplo n.º 13
0
static int32_t
S_write_files(SortFieldWriter *self, OutStream *ord_out, OutStream *ix_out,
              OutStream *dat_out) {
    SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self);
    int8_t    prim_id   = ivars->prim_id;
    int32_t   doc_max   = (int32_t)Seg_Get_Count(ivars->segment);
    bool      has_nulls = ivars->count == doc_max ? false : true;
    size_t    size      = (doc_max + 1) * sizeof(int32_t);
    int32_t  *ords      = (int32_t*)MALLOCATE(size);
    int32_t   ord       = 0;
    int64_t   dat_start = OutStream_Tell(dat_out);

    // Assign -1 as a stand-in for the NULL ord.
    for (int32_t i = 0; i <= doc_max; i++) {
        ords[i] = -1;
    }

    // Grab the first item and record its ord.  Add a dummy ord for invalid
    // doc id 0.
    SFWriterElem *elem = (SFWriterElem*)SortFieldWriter_Fetch(self);
    SFWriterElemIVARS *elem_ivars = SFWriterElem_IVARS(elem);
    if (elem_ivars->doc_id > doc_max) {
        THROW(ERR, "doc_id %i32 greater than doc_max %i32",
              elem_ivars->doc_id, doc_max);
    }
    ords[elem_ivars->doc_id] = ord;
    ords[0] = 0;

    // Build array of ords, write non-NULL sorted values.
    Obj *last_val = INCREF(elem_ivars->value);
    S_write_val(elem_ivars->value, prim_id, ix_out, dat_out, dat_start);
    DECREF(elem);
    while (NULL != (elem = (SFWriterElem*)SortFieldWriter_Fetch(self))) {
        elem_ivars = SFWriterElem_IVARS(elem);
        if (elem_ivars->value != last_val) {
            int32_t comparison
                = FType_Compare_Values(ivars->type, elem_ivars->value,
                                       last_val);
            if (comparison != 0) {
                ord++;
                S_write_val(elem_ivars->value, prim_id, ix_out, dat_out,
                            dat_start);
            }
            DECREF(last_val);
            last_val = INCREF(elem_ivars->value);
        }
        if (elem_ivars->doc_id > doc_max) {
            THROW(ERR, "doc_id %i32 greater than doc_max %i32",
                  elem_ivars->doc_id, doc_max);
        }
        ords[elem_ivars->doc_id] = ord;
        DECREF(elem);
    }
    DECREF(last_val);

    // If there are NULL values, write one now and record the NULL ord.
    if (has_nulls) {
        S_write_val(NULL, prim_id, ix_out, dat_out, dat_start);
        ord++;
        ivars->null_ord = ord;
    }
    int32_t null_ord = ivars->null_ord;

    // Write one extra file pointer so that we can always derive length.
    if (ivars->var_width) {
        OutStream_Write_I64(ix_out, OutStream_Tell(dat_out) - dat_start);
    }

    // Calculate cardinality and ord width.
    int32_t cardinality = ord + 1;
    ivars->ord_width     = S_calc_width(cardinality);
    int32_t ord_width   = ivars->ord_width;

    // Write ords.
    const double BITS_PER_BYTE = 8.0;
    double bytes_per_doc = ord_width / BITS_PER_BYTE;
    double byte_count = ceil((doc_max + 1) * bytes_per_doc);
    char *compressed_ords
        = (char*)CALLOCATE((size_t)byte_count, sizeof(char));
    for (int32_t i = 0; i <= doc_max; i++) {
        int32_t real_ord = ords[i] == -1 ? null_ord : ords[i];
        S_write_ord(compressed_ords, ord_width, i, real_ord);
    }
    OutStream_Write_Bytes(ord_out, compressed_ords, (size_t)byte_count);
    FREEMEM(compressed_ords);

    FREEMEM(ords);
    return cardinality;
}
Ejemplo n.º 14
0
static void
S_write_val(Obj *val, int8_t prim_id, OutStream *ix_out, OutStream *dat_out,
            int64_t dat_start) {
    if (val) {
        switch (prim_id & FType_PRIMITIVE_ID_MASK) {
            case FType_TEXT: {
                    String *string = (String*)val;
                    int64_t dat_pos = OutStream_Tell(dat_out) - dat_start;
                    OutStream_Write_I64(ix_out, dat_pos);
                    OutStream_Write_Bytes(dat_out, Str_Get_Ptr8(string),
                                          Str_Get_Size(string));
                    break;
                }
            case FType_BLOB: {
                    Blob *blob = (Blob*)val;
                    int64_t dat_pos = OutStream_Tell(dat_out) - dat_start;
                    OutStream_Write_I64(ix_out, dat_pos);
                    OutStream_Write_Bytes(dat_out, Blob_Get_Buf(blob),
                                          Blob_Get_Size(blob));
                    break;
                }
            case FType_INT32: {
                    int32_t i32 = (int32_t)Int_Get_Value((Integer*)val);
                    OutStream_Write_I32(dat_out, i32);
                    break;
                }
            case FType_INT64: {
                    int64_t i64 = Int_Get_Value((Integer*)val);
                    OutStream_Write_I64(dat_out, i64);
                    break;
                }
            case FType_FLOAT32: {
                    float f32 = (float)Float_Get_Value((Float*)val);
                    OutStream_Write_F32(dat_out, f32);
                    break;
                }
            case FType_FLOAT64: {
                    double f64 = Float_Get_Value((Float*)val);
                    OutStream_Write_F64(dat_out, f64);
                    break;
                }
            default:
                THROW(ERR, "Unrecognized primitive id: %i32", (int32_t)prim_id);
        }
    }
    else {
        switch (prim_id & FType_PRIMITIVE_ID_MASK) {
            case FType_TEXT:
            case FType_BLOB: {
                    int64_t dat_pos = OutStream_Tell(dat_out) - dat_start;
                    OutStream_Write_I64(ix_out, dat_pos);
                }
                break;
            case FType_INT32:
                OutStream_Write_I32(dat_out, 0);
                break;
            case FType_INT64:
                OutStream_Write_I64(dat_out, 0);
                break;
            case FType_FLOAT64:
                OutStream_Write_F64(dat_out, 0.0);
                break;
            case FType_FLOAT32:
                OutStream_Write_F32(dat_out, 0.0f);
                break;
            default:
                THROW(ERR, "Unrecognized primitive id: %i32", (int32_t)prim_id);
        }
    }
}