Example #1
0
Inversion*
PolyAnalyzer_transform_text(PolyAnalyzer *self, CharBuf *text) {
    VArray *const   analyzers     = self->analyzers;
    const uint32_t  num_analyzers = VA_Get_Size(analyzers);
    Inversion      *retval;

    if (num_analyzers == 0) {
        size_t  token_len = CB_Get_Size(text);
        char   *buf       = (char*)CB_Get_Ptr8(text);
        Token  *seed      = Token_new(buf, token_len, 0, token_len, 1.0f, 1);
        retval = Inversion_new(seed);
        DECREF(seed);
    }
    else {
        Analyzer *first_analyzer = (Analyzer*)VA_Fetch(analyzers, 0);
        retval = Analyzer_Transform_Text(first_analyzer, text);
        for (uint32_t i = 1; i < num_analyzers; i++) {
            Analyzer *analyzer = (Analyzer*)VA_Fetch(analyzers, i);
            Inversion *new_inversion = Analyzer_Transform(analyzer, retval);
            DECREF(retval);
            retval = new_inversion;
        }
    }

    return retval;
}
Example #2
0
void
Folder_consolidate(Folder *self, const CharBuf *path) {
    Folder *folder = Folder_Find_Folder(self, path);
    Folder *enclosing_folder = Folder_Enclosing_Folder(self, path);
    if (!folder) {
        THROW(ERR, "Can't consolidate %o", path);
    }
    else if (Folder_Is_A(folder, COMPOUNDFILEREADER)) {
        THROW(ERR, "Can't consolidate %o twice", path);
    }
    else {
        CompoundFileWriter *cf_writer = CFWriter_new(folder);
        CFWriter_Consolidate(cf_writer);
        DECREF(cf_writer);
        if (CB_Get_Size(path)) {
            ZombieCharBuf *name = IxFileNames_local_part(path, ZCB_BLANK());
            CompoundFileReader *cf_reader = CFReader_open(folder);
            if (!cf_reader) {
                RETHROW(INCREF(Err_get_error()));
            }
            Hash_Store(enclosing_folder->entries, (Obj*)name,
                       (Obj*)cf_reader);
        }
    }
}
Example #3
0
InStream*
CFReader_local_open_in(CompoundFileReader *self, const CharBuf *name) {
    Hash *entry = (Hash*)Hash_Fetch(self->records, (Obj*)name);

    if (!entry) {
        InStream *instream = Folder_Local_Open_In(self->real_folder, name);
        if (!instream) {
            ERR_ADD_FRAME(Err_get_error());
        }
        return instream;
    }
    else {
        Obj *len    = Hash_Fetch_Str(entry, "length", 6);
        Obj *offset = Hash_Fetch_Str(entry, "offset", 6);
        if (!len || !offset) {
            Err_set_error(Err_new(CB_newf("Malformed entry for '%o' in '%o'",
                                          name, Folder_Get_Path(self->real_folder))));
            return NULL;
        }
        else if (CB_Get_Size(self->path)) {
            CharBuf *fullpath = CB_newf("%o/%o", self->path, name);
            InStream *instream = InStream_Reopen(self->instream, fullpath,
                                                 Obj_To_I64(offset), Obj_To_I64(len));
            DECREF(fullpath);
            return instream;
        }
        else {
            return InStream_Reopen(self->instream, name, Obj_To_I64(offset),
                                   Obj_To_I64(len));
        }
    }
}
Example #4
0
Inversion*
StandardTokenizer_transform_text(StandardTokenizer *self, CharBuf *text) {
    Inversion *new_inversion = Inversion_new(NULL);
    StandardTokenizer_Tokenize_Str(self, (char*)CB_Get_Ptr8(text),
                                   CB_Get_Size(text), new_inversion);
    return new_inversion;
}
Example #5
0
bool
FSDH_entry_is_dir(FSDirHandle *self) {
    FSDirHandleIVARS *const ivars = FSDH_IVARS(self);
    struct dirent *sys_dir_entry = (struct dirent*)ivars->sys_dir_entry;
    if (!sys_dir_entry) { return false; }

    // If d_type is available, try to avoid a stat() call.  If it's not, or if
    // the type comes back as unknown, fall back to stat().
    #ifdef CHY_HAS_DIRENT_D_TYPE
    if (sys_dir_entry->d_type == DT_DIR) {
        return true;
    }
    else if (sys_dir_entry->d_type != DT_UNKNOWN) {
        return false;
    }
    #endif

    struct stat stat_buf;
    if (!ivars->fullpath) {
        ivars->fullpath = CB_new(CB_Get_Size(ivars->dir) + 20);
    }
    CB_setf(ivars->fullpath, "%o%s%o", ivars->dir, CHY_DIR_SEP,
            ivars->entry);
    if (stat((char*)CB_Get_Ptr8(ivars->fullpath), &stat_buf) != -1) {
        if (stat_buf.st_mode & S_IFDIR) { return true; }
    }
    return false;
}
Example #6
0
RawPosting*
RichPost_read_raw(RichPosting *self, InStream *instream, int32_t last_doc_id, 
                  CharBuf *term_text, MemoryPool *mem_pool)
{
    char *const    text_buf       = (char*)CB_Get_Ptr8(term_text);
    const size_t   text_size      = CB_Get_Size(term_text);
    const uint32_t doc_code       = InStream_Read_C32(instream);
    const uint32_t delta_doc      = doc_code >> 1;
    const int32_t  doc_id         = last_doc_id + delta_doc;
    const uint32_t freq           = (doc_code & 1) 
                                  ? 1 
                                  : InStream_Read_C32(instream);
    size_t raw_post_bytes         = MAX_RAW_POSTING_LEN(text_size, freq);
    void *const allocation        = MemPool_Grab(mem_pool, raw_post_bytes);
    RawPosting *const raw_posting = RawPost_new(allocation, doc_id, freq,
        text_buf, text_size);
    uint32_t num_prox = freq;
    char *const start = raw_posting->blob + text_size;
    char *      dest  = start;
    UNUSED_VAR(self);

    // Read positions and per-position boosts. 
    while (num_prox--) {
        dest += InStream_Read_Raw_C64(instream, dest);
        *((uint8_t*)dest) = InStream_Read_U8(instream);
        dest++;
    }

    // Resize raw posting memory allocation. 
    raw_posting->aux_len = dest - start;
    raw_post_bytes       = dest - (char*)raw_posting;
    MemPool_Resize(mem_pool, raw_posting, raw_post_bytes);

    return raw_posting;
}
Example #7
0
Obj*
Json_from_json(CharBuf *json) {
    Obj *dump = S_parse_json((char*)CB_Get_Ptr8(json), CB_Get_Size(json));
    if (!dump) {
        ERR_ADD_FRAME(Err_get_error());
    }
    return dump;
}
Example #8
0
RAMFolder*
RAMFolder_init(RAMFolder *self, const CharBuf *path)
{
    Folder_init((Folder*)self, path);
    self->elems = Hash_new(16);
    if (CB_Get_Size(self->path) != 0) S_read_fsfolder(self);
    return self;
}
Example #9
0
static CharBuf*
S_fullpath(RAMFolder *self, const CharBuf *path)
{
    if (CB_Get_Size(self->path)) {
        return CB_newf("%o/%o", self->path, path);
    }
    else {
        return CB_Clone(path);
    }
}
Example #10
0
bool_t
LFLock_maybe_delete_file(LockFileLock *self, const CharBuf *path,
                         bool_t delete_mine, bool_t delete_other) {
    Folder *folder  = self->folder;
    bool_t  success = false;
    ZombieCharBuf *scratch = ZCB_WRAP(path);

    // Only delete locks that start with our lock name.
    CharBuf *lock_dir_name = (CharBuf*)ZCB_WRAP_STR("locks", 5);
    if (!ZCB_Starts_With(scratch, lock_dir_name)) {
        return false;
    }
    ZCB_Nip(scratch, CB_Get_Size(lock_dir_name) + 1);
    if (!ZCB_Starts_With(scratch, self->name)) {
        return false;
    }

    // Attempt to delete dead lock file.
    if (Folder_Exists(folder, path)) {
        Hash *hash = (Hash*)Json_slurp_json(folder, path);
        if (hash != NULL && Obj_Is_A((Obj*)hash, HASH)) {
            CharBuf *pid_buf = (CharBuf*)Hash_Fetch_Str(hash, "pid", 3);
            CharBuf *host    = (CharBuf*)Hash_Fetch_Str(hash, "host", 4);
            CharBuf *name
                = (CharBuf*)Hash_Fetch_Str(hash, "name", 4);

            // Match hostname and lock name.
            if (host != NULL
                && CB_Equals(host, (Obj*)self->host)
                && name != NULL
                && CB_Equals(name, (Obj*)self->name)
                && pid_buf != NULL
               ) {
                // Verify that pid is either mine or dead.
                int pid = (int)CB_To_I64(pid_buf);
                if ((delete_mine && pid == PID_getpid())  // This process.
                    || (delete_other && !PID_active(pid)) // Dead pid.
                   ) {
                    if (Folder_Delete(folder, path)) {
                        success = true;
                    }
                    else {
                        CharBuf *mess
                            = MAKE_MESS("Can't delete '%o'", path);
                        DECREF(hash);
                        Err_throw_mess(ERR, mess);
                    }
                }
            }
        }
        DECREF(hash);
    }

    return success;
}
Example #11
0
void
TextTermStepper_Write_Delta_IMP(TextTermStepper *self, OutStream *outstream,
                                Obj *value) {
    TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self);
    CharBuf    *charbuf   = (CharBuf*)ivars->value;
    const char *last_text = CB_Get_Ptr8(charbuf);
    size_t      last_size = CB_Get_Size(charbuf);
    const char *new_text  = NULL;
    size_t      new_size  = 0;

    if (Obj_is_a(value, STRING)) {
        String *new_string = (String*)value;
        new_text = Str_Get_Ptr8(new_string);
        new_size = Str_Get_Size(new_string);
    }
    else if (Obj_is_a(value, CHARBUF)) {
        CharBuf *new_charbuf = (CharBuf*)value;
        new_text = CB_Get_Ptr8(new_charbuf);
        new_size = CB_Get_Size(new_charbuf);
    }
    else {
        THROW(ERR, "'value' must be a String or CharBuf");
    }

    // Count how many bytes the strings share at the top.
    const int32_t overlap = StrHelp_overlap(last_text, new_text,
                                            last_size, new_size);
    const char *const diff_start_str = new_text + overlap;
    const size_t diff_len            = new_size - overlap;

    // Write number of common bytes and common bytes.
    OutStream_Write_C32(outstream, overlap);
    OutStream_Write_String(outstream, diff_start_str, diff_len);

    // Update value.
    CB_Mimic_Utf8(charbuf, new_text, new_size);

    // Invalidate string.
    DECREF(ivars->string);
    ivars->string = NULL;
}
Example #12
0
bool_t
Folder_delete_tree(Folder *self, const CharBuf *path) {
    Folder *enclosing_folder = Folder_Enclosing_Folder(self, path);

    // Don't allow Folder to delete itself.
    if (!path || !CB_Get_Size(path)) {
        return false;
    }

    if (enclosing_folder) {
        ZombieCharBuf *local = IxFileNames_local_part(path, ZCB_BLANK());
        if (Folder_Local_Is_Directory(enclosing_folder, (CharBuf*)local)) {
            Folder *inner_folder
                = Folder_Local_Find_Folder(enclosing_folder, (CharBuf*)local);
            DirHandle *dh = Folder_Local_Open_Dir(inner_folder);
            if (dh) {
                VArray *files = VA_new(20);
                VArray *dirs  = VA_new(20);
                CharBuf *entry = DH_Get_Entry(dh);
                while (DH_Next(dh)) {
                    VA_Push(files, (Obj*)CB_Clone(entry));
                    if (DH_Entry_Is_Dir(dh) && !DH_Entry_Is_Symlink(dh)) {
                        VA_Push(dirs, (Obj*)CB_Clone(entry));
                    }
                }
                for (uint32_t i = 0, max = VA_Get_Size(dirs); i < max; i++) {
                    CharBuf *name = (CharBuf*)VA_Fetch(files, i);
                    bool_t success = Folder_Delete_Tree(inner_folder, name);
                    if (!success && Folder_Local_Exists(inner_folder, name)) {
                        break;
                    }
                }
                for (uint32_t i = 0, max = VA_Get_Size(files); i < max; i++) {
                    CharBuf *name = (CharBuf*)VA_Fetch(files, i);
                    bool_t success = Folder_Local_Delete(inner_folder, name);
                    if (!success && Folder_Local_Exists(inner_folder, name)) {
                        break;
                    }
                }
                DECREF(dirs);
                DECREF(files);
                DECREF(dh);
            }
        }
        return Folder_Local_Delete(enclosing_folder, (CharBuf*)local);
    }
    else {
        // Return failure if the entry wasn't there in the first place.
        return false;
    }
}
Example #13
0
void
TextTermStepper_Write_Key_Frame_IMP(TextTermStepper *self,
                                    OutStream *outstream, Obj *value) {
    TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self);
    CharBuf *charbuf = (CharBuf*)ivars->value;
    CB_Mimic(charbuf, value);
    const char *buf  = CB_Get_Ptr8(charbuf);
    size_t      size = CB_Get_Size(charbuf);
    OutStream_Write_C32(outstream, size);
    OutStream_Write_Bytes(outstream, buf, size);
    // Invalidate string.
    DECREF(ivars->string);
    ivars->string = NULL;
}
void 
LexWriter_add_term(LexiconWriter* self, CharBuf* term_text, TermInfo* tinfo) 
{
    char   *text_ptr   = (char*)CB_Get_Ptr8(term_text);
    char   *last_ptr   = (char*)CB_Get_Ptr8(self->last_text);
    size_t  text_size  = CB_Get_Size(term_text);
    size_t  last_size  = CB_Get_Size(self->last_text);

    /* Write a subset of entries to lexicon.ix. */
    if (    (self->count % self->index_interval == 0)
         && !self->temp_mode
    ) {
        S_add_last_term_to_ix(self, last_ptr, last_size);
    }

    /* Write the record; track number of terms. */
    LexStepper_Write_Record(self->stepper, self->dat_out, text_ptr, text_size, 
        last_ptr, last_size, tinfo, self->last_tinfo);
    self->count++;

    /* Remember for delta encoding. */
    CB_Copy(self->last_text, term_text);
    TInfo_copy(self->last_tinfo, tinfo);
}
Example #15
0
File: Folder.c Project: theory/lucy
static void
S_add_to_file_list(Folder *self, VArray *list, CharBuf *dir, CharBuf *prefix) {
    size_t     orig_prefix_size = CB_Get_Size(prefix);
    DirHandle *dh = Folder_Open_Dir(self, dir);
    CharBuf   *entry;

    if (!dh) {
        RETHROW(INCREF(Err_get_error()));
    }

    entry = DH_Get_Entry(dh);
    while (DH_Next(dh)) { // Updates entry
        if (!S_is_updir(entry)) {
            CharBuf *relpath = CB_newf("%o%o", prefix, entry);
            if (VA_Get_Size(list) == VA_Get_Capacity(list)) {
                VA_Grow(list, VA_Get_Size(list) * 2);
            }
            VA_Push(list, (Obj*)relpath);

            if (DH_Entry_Is_Dir(dh) && !DH_Entry_Is_Symlink(dh)) {
                CharBuf *subdir = CB_Get_Size(dir)
                                  ? CB_newf("%o/%o", dir, entry)
                                  : CB_Clone(entry);
                CB_catf(prefix, "%o/", entry);
                S_add_to_file_list(self, list, subdir, prefix); // recurse
                CB_Set_Size(prefix, orig_prefix_size);
                DECREF(subdir);
            }
        }
    }

    if (!DH_Close(dh)) {
        RETHROW(INCREF(Err_get_error()));
    }
    DECREF(dh);
}
Example #16
0
void
TextTermStepper_write_delta(TextTermStepper *self, OutStream *outstream,
                            Obj *value)
{
    CharBuf *new_value  = (CharBuf*)CERTIFY(value, CHARBUF);
    CharBuf *last_value = (CharBuf*)self->value;
    char    *new_text  = (char*)CB_Get_Ptr8(new_value);
    size_t   new_size  = CB_Get_Size(new_value);
    char    *last_text = (char*)CB_Get_Ptr8(last_value);
    size_t   last_size = CB_Get_Size(last_value);

    // Count how many bytes the strings share at the top.  
    const int32_t overlap = StrHelp_overlap(last_text, new_text,
        last_size, new_size);
    const char *const diff_start_str = new_text + overlap;
    const size_t diff_len            = new_size - overlap;

    // Write number of common bytes and common bytes. 
    OutStream_Write_C32(outstream, overlap);
    OutStream_Write_String(outstream, diff_start_str, diff_len);

    // Update value. 
    CB_Mimic((CharBuf*)self->value, value);
}
Example #17
0
File: Folder.c Project: theory/lucy
VArray*
Folder_list_r(Folder *self, const CharBuf *path) {
    Folder *local_folder = Folder_Find_Folder(self, path);
    VArray *list =  VA_new(0);
    if (local_folder) {
        CharBuf *dir    = CB_new(20);
        CharBuf *prefix = CB_new(20);
        if (path && CB_Get_Size(path)) {
            CB_setf(prefix, "%o/", path);
        }
        S_add_to_file_list(local_folder, list, dir, prefix);
        DECREF(prefix);
        DECREF(dir);
    }
    return list;
}
Example #18
0
RawPosting*
MatchPost_read_raw(MatchPosting *self, InStream *instream, i32_t last_doc_id,
                   CharBuf *term_text, MemoryPool *mem_pool)
{
    const size_t text_size        = CB_Get_Size(term_text);
    const u32_t  doc_code         = InStream_Read_C32(instream);
    const u32_t  delta_doc        = doc_code >> 1;
    const i32_t  doc_id           = last_doc_id + delta_doc;
    const u32_t  freq             = (doc_code & 1) 
                                       ? 1 
                                       : InStream_Read_C32(instream);
    size_t raw_post_bytes         = MAX_RAW_POSTING_LEN(text_size);
    void *const allocation        = MemPool_Grab(mem_pool, raw_post_bytes);
    UNUSED_VAR(self);

    return RawPost_new(allocation, doc_id, freq, term_text->ptr, text_size);
}
Example #19
0
File: Folder.c Project: theory/lucy
Folder*
Folder_find_folder(Folder *self, const CharBuf *path) {
    if (!path || !CB_Get_Size(path)) {
        return self;
    }
    else {
        ZombieCharBuf *scratch = ZCB_WRAP(path);
        Folder *enclosing_folder = S_enclosing_folder(self, scratch);
        if (!enclosing_folder) {
            return NULL;
        }
        else {
            return Folder_Local_Find_Folder(enclosing_folder,
                                            (CharBuf*)scratch);
        }
    }
}
Example #20
0
FSDirHandle*
FSDH_do_open(FSDirHandle *self, const CharBuf *dir) {
    size_t  dir_path_size = CB_Get_Size(dir);
    char   *dir_path_ptr  = (char*)CB_Get_Ptr8(dir);
    char    search_string[MAX_PATH + 1];
    char   *path_ptr = search_string;

    DH_init((DirHandle*)self, dir);
    FSDirHandleIVARS *const ivars = FSDH_IVARS(self);
    ivars->sys_dir_entry    = MALLOCATE(sizeof(WIN32_FIND_DATA));
    ivars->sys_dirhandle    = INVALID_HANDLE_VALUE;
    ivars->saved_error      = NULL;

    if (dir_path_size >= MAX_PATH - 2) {
        // Deal with Windows ceiling on file path lengths.
        Err_set_error(Err_new(CB_newf("Directory path is too long: %o",
                                      dir)));
        CFISH_DECREF(self);
        return NULL;
    }

    // Append trailing wildcard so Windows lists dir contents rather than just
    // the dir name itself.
    memcpy(path_ptr, dir_path_ptr, dir_path_size);
    memcpy(path_ptr + dir_path_size, "\\*\0", 3);

    ivars->sys_dirhandle
        = FindFirstFile(search_string, (WIN32_FIND_DATA*)ivars->sys_dir_entry);
    if (INVALID_HANDLE_VALUE == ivars->sys_dirhandle) {
        // Directory inaccessible or doesn't exist.
        Err_set_error(Err_new(CB_newf("Failed to open dir '%o'", dir)));
        CFISH_DECREF(self);
        return NULL;
    }
    else {
        // Compensate for the fact that FindFirstFile has already returned the
        // first entry but DirHandle's API requires that you call Next() to
        // start the iterator.
        ivars->delayed_iter = true;
    }

    return self;
}
Example #21
0
bool_t
Json_spew_json(Obj *dump, Folder *folder, const CharBuf *path) {
    CharBuf *json = Json_to_json(dump);
    if (!json) {
        ERR_ADD_FRAME(Err_get_error());
        return false;
    }
    OutStream *outstream = Folder_Open_Out(folder, path);
    if (!outstream) {
        ERR_ADD_FRAME(Err_get_error());
        DECREF(json);
        return false;
    }
    size_t size = CB_Get_Size(json);
    OutStream_Write_Bytes(outstream, CB_Get_Ptr8(json), size);
    OutStream_Close(outstream);
    DECREF(outstream);
    DECREF(json);
    return true;
}
Example #22
0
File: Folder.c Project: theory/lucy
bool
Folder_mkdir(Folder *self, const CharBuf *path) {
    Folder *enclosing_folder = Folder_Enclosing_Folder(self, path);
    bool result = false;

    if (!CB_Get_Size(path)) {
        Err_set_error(Err_new(CB_newf("Invalid path: '%o'", path)));
    }
    else if (!enclosing_folder) {
        Err_set_error(Err_new(CB_newf("Can't recursively create dir %o",
                                      path)));
    }
    else {
        ZombieCharBuf *name = IxFileNames_local_part(path, ZCB_BLANK());
        result = Folder_Local_MkDir(enclosing_folder, (CharBuf*)name);
        if (!result) {
            ERR_ADD_FRAME(Err_get_error());
        }
    }

    return result;
}
Example #23
0
bool
FSDH_entry_is_symlink(FSDirHandle *self) {
    FSDirHandleIVARS *const ivars = FSDH_IVARS(self);
    struct dirent *sys_dir_entry = (struct dirent*)ivars->sys_dir_entry;
    if (!sys_dir_entry) { return false; }

    #ifdef CHY_HAS_DIRENT_D_TYPE
    return sys_dir_entry->d_type == DT_LNK ? true : false;
    #else
    {
        struct stat stat_buf;
        if (!ivars->fullpath) {
            ivars->fullpath = CB_new(CB_Get_Size(ivars->dir) + 20);
        }
        CB_setf(ivars->fullpath, "%o%s%o", ivars->dir, CHY_DIR_SEP,
                ivars->entry);
        if (stat((char*)CB_Get_Ptr8(ivars->fullpath), &stat_buf) != -1) {
            if (stat_buf.st_mode & S_IFLNK) { return true; }
        }
        return false;
    }
    #endif // CHY_HAS_DIRENT_D_TYPE
}
Example #24
0
void
StrHelp_add_indent(CharBuf *charbuf, size_t amount)
{
    u32_t num_margins = 1;
    size_t new_size; 
    char *limit   = CBEND(charbuf);
    char *source  = charbuf->ptr;
    char *dest;

    /* Add a margin for every newline. */
    for ( ; source < limit; source++) {
        if (*source == '\n')
            num_margins++;
    }

    /* Make space for margins. */
    new_size = CB_Get_Size(charbuf) + (num_margins * amount);
    CB_Grow(charbuf, new_size);
    source = CBEND(charbuf);
    CB_Set_Size(charbuf, new_size);
    dest = CBEND(charbuf);
    *dest-- = '\0';
    source--;

    while (source >= charbuf->ptr) {
        if (*source == '\n') {
            int i = amount;
            while (i--) {
                *dest-- = ' ';
            }
        }
        *dest-- = *source--;
    }
    while (dest >= charbuf->ptr) {
        *dest-- =  ' ';
    }
}
Example #25
0
void
DocWriter_add_inverted_doc(DocWriter *self, Inverter *inverter,
                           int32_t doc_id) {
    DocWriterIVARS *const ivars = DocWriter_IVARS(self);
    OutStream *dat_out    = S_lazy_init(self);
    OutStream *ix_out     = ivars->ix_out;
    uint32_t   num_stored = 0;
    int64_t    start      = OutStream_Tell(dat_out);
    int64_t    expected   = OutStream_Tell(ix_out) / 8;

    // Verify doc id.
    if (doc_id != expected) {
        THROW(ERR, "Expected doc id %i64 but got %i32", expected, doc_id);
    }

    // Write the number of stored fields.
    Inverter_Iterate(inverter);
    while (Inverter_Next(inverter)) {
        FieldType *type = Inverter_Get_Type(inverter);
        if (FType_Stored(type)) { num_stored++; }
    }
    OutStream_Write_C32(dat_out, num_stored);

    Inverter_Iterate(inverter);
    while (Inverter_Next(inverter)) {
        // Only store fields marked as "stored".
        FieldType *type = Inverter_Get_Type(inverter);
        if (FType_Stored(type)) {
            CharBuf *field = Inverter_Get_Field_Name(inverter);
            Obj *value = Inverter_Get_Value(inverter);
            Freezer_serialize_charbuf(field, dat_out);
            switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) {
                case FType_TEXT: {
                    uint8_t *buf  = CB_Get_Ptr8((CharBuf*)value);
                    size_t   size = CB_Get_Size((CharBuf*)value);
                    OutStream_Write_C32(dat_out, size);
                    OutStream_Write_Bytes(dat_out, buf, size);
                    break;
                }
                case FType_BLOB: {
                    char   *buf  = BB_Get_Buf((ByteBuf*)value);
                    size_t  size = BB_Get_Size((ByteBuf*)value);
                    OutStream_Write_C32(dat_out, size);
                    OutStream_Write_Bytes(dat_out, buf, size);
                    break;
                }
                case FType_INT32: {
                    int32_t val = Int32_Get_Value((Integer32*)value);
                    OutStream_Write_C32(dat_out, val);
                    break;
                }
                case FType_INT64: {
                    int64_t val = Int64_Get_Value((Integer64*)value);
                    OutStream_Write_C64(dat_out, val);
                    break;
                }
                case FType_FLOAT32: {
                    float val = Float32_Get_Value((Float32*)value);
                    OutStream_Write_F32(dat_out, val);
                    break;
                }
                case FType_FLOAT64: {
                    double val = Float64_Get_Value((Float64*)value);
                    OutStream_Write_F64(dat_out, val);
                    break;
                }
                default:
                    THROW(ERR, "Unrecognized type: %o", type);
            }
        }
    }

    // Write file pointer.
    OutStream_Write_I64(ix_out, start);
}
Example #26
0
FSFileHandle*
FSFH_do_open(FSFileHandle *self, const CharBuf *path, uint32_t flags) {
    FH_do_open((FileHandle*)self, path, flags);
    if (!path || !CB_Get_Size(path)) {
        Err_set_error(Err_new(CB_newf("Missing required param 'path'")));
        CFISH_DECREF(self);
        return NULL;
    }

    // Attempt to open file.
    if (flags & FH_WRITE_ONLY) {
        self->fd = open((char*)CB_Get_Ptr8(path), SI_posix_flags(flags), 0666);
        if (self->fd == -1) {
            self->fd = 0;
            Err_set_error(Err_new(CB_newf("Attempt to open '%o' failed: %s",
                                          path, strerror(errno))));
            CFISH_DECREF(self);
            return NULL;
        }
        if (flags & FH_EXCLUSIVE) {
            self->len = 0;
        }
        else {
            // Derive length.
            self->len = lseek64(self->fd, I64_C(0), SEEK_END);
            if (self->len == -1) {
                Err_set_error(Err_new(CB_newf("lseek64 on %o failed: %s",
                                              self->path, strerror(errno))));
                CFISH_DECREF(self);
                return NULL;
            }
            else {
                int64_t check_val = lseek64(self->fd, I64_C(0), SEEK_SET);
                if (check_val == -1) {
                    Err_set_error(Err_new(CB_newf("lseek64 on %o failed: %s",
                                                  self->path, strerror(errno))));
                    CFISH_DECREF(self);
                    return NULL;
                }
            }
        }
    }
    else if (flags & FH_READ_ONLY) {
        if (SI_init_read_only(self)) {
            // On 64-bit systems, map the whole file up-front.
            if (IS_64_BIT && self->len) {
                self->buf = (char*)SI_map(self, 0, self->len);
                if (!self->buf) {
                    // An error occurred during SI_map, which has set
                    // Err_error for us already.
                    CFISH_DECREF(self);
                    return NULL;
                }
            }
        }
        else {
            CFISH_DECREF(self);
            return NULL;
        }
    }
    else {
        Err_set_error(Err_new(CB_newf("Must specify FH_READ_ONLY or FH_WRITE_ONLY to open '%o'",
                                      path)));
        CFISH_DECREF(self);
        return NULL;
    }

    return self;
}
Example #27
0
static void
S_write_terms_and_postings(PostingPool *self, PostingWriter *post_writer,
                           OutStream *skip_stream) {
    PostingPoolIVARS *const ivars = PostPool_IVARS(self);
    TermInfo      *const tinfo            = TInfo_new(0);
    TermInfo      *const skip_tinfo       = TInfo_new(0);
    TermInfoIVARS *const tinfo_ivars      = TInfo_IVARS(tinfo);
    TermInfoIVARS *const skip_tinfo_ivars = TInfo_IVARS(skip_tinfo);
    LexiconWriter *const lex_writer       = ivars->lex_writer;
    SkipStepper   *const skip_stepper     = ivars->skip_stepper;
    SkipStepperIVARS *const skip_stepper_ivars
        = SkipStepper_IVARS(skip_stepper);
    int32_t        last_skip_doc          = 0;
    int64_t        last_skip_filepos      = 0;
    const int32_t  skip_interval
        = Arch_Skip_Interval(Schema_Get_Architecture(ivars->schema));

    // Prime heldover variables.
    RawPosting *posting
        = (RawPosting*)CERTIFY(PostPool_Fetch(self), RAWPOSTING);
    RawPostingIVARS *post_ivars = RawPost_IVARS(posting);
    CharBuf *last_term_text
        = CB_new_from_trusted_utf8(post_ivars->blob, post_ivars->content_len);
    const char *last_text_buf  = CB_Get_Ptr8(last_term_text);
    uint32_t    last_text_size = CB_Get_Size(last_term_text);
    SkipStepper_Set_ID_And_Filepos(skip_stepper, 0, 0);

    // Initialize sentinel to be used on the last iter, using an empty string
    // in order to make LexiconWriter Do The Right Thing.
    size_t sentinel_size = Class_Get_Obj_Alloc_Size(RAWPOSTING)
                           + 20;  // blob length + cushion
    char empty_string[] = "";
    RawPosting *sentinel = RawPost_new(alloca(sentinel_size), 0, 1,
                                       empty_string, 0);

    while (1) {
        bool same_text_as_last = true;

        if (posting == NULL) {
            // On the last iter, use an empty string to make LexiconWriter
            // DTRT.
            posting = sentinel;
            post_ivars = RawPost_IVARS(posting);
            same_text_as_last = false;
        }
        else {
            // Compare once.
            if (post_ivars->content_len != last_text_size
                || memcmp(&post_ivars->blob, last_text_buf, last_text_size) != 0
               ) {
                same_text_as_last = false;
            }
        }

        // If the term text changes, process the last term.
        if (!same_text_as_last) {
            // Hand off to LexiconWriter.
            LexWriter_Add_Term(lex_writer, (Obj*)last_term_text, tinfo);

            // Start each term afresh.
            TInfo_Reset(tinfo);
            PostWriter_Start_Term(post_writer, tinfo);

            // Init skip data in preparation for the next term.
            skip_stepper_ivars->doc_id  = 0;
            skip_stepper_ivars->filepos = tinfo_ivars->post_filepos;
            last_skip_doc         = 0;
            last_skip_filepos     = tinfo_ivars->post_filepos;

            // Remember the term_text so we can write string diffs.
            CB_Mimic_Utf8(last_term_text, post_ivars->blob,
                          post_ivars->content_len);
            last_text_buf  = CB_Get_Ptr8(last_term_text);
            last_text_size = CB_Get_Size(last_term_text);
        }

        // Bail on last iter before writing invalid posting data.
        if (posting == sentinel) { break; }

        // Write posting data.
        PostWriter_Write_Posting(post_writer, posting);

        // Doc freq lags by one iter.
        tinfo_ivars->doc_freq++;

        //  Write skip data.
        if (skip_stream != NULL
            && same_text_as_last
            && tinfo_ivars->doc_freq % skip_interval == 0
            && tinfo_ivars->doc_freq != 0
           ) {
            // If first skip group, save skip stream pos for term info.
            if (tinfo_ivars->doc_freq == skip_interval) {
                tinfo_ivars->skip_filepos = OutStream_Tell(skip_stream);
            }
            // Write deltas.
            last_skip_doc               = skip_stepper_ivars->doc_id;
            last_skip_filepos           = skip_stepper_ivars->filepos;
            skip_stepper_ivars->doc_id  = post_ivars->doc_id;
            PostWriter_Update_Skip_Info(post_writer, skip_tinfo);
            skip_stepper_ivars->filepos = skip_tinfo_ivars->post_filepos;
            SkipStepper_Write_Record(skip_stepper, skip_stream,
                                     last_skip_doc, last_skip_filepos);
        }

        // Retrieve the next posting from the sort pool.
        // DECREF(posting);  // No!!  DON'T destroy!!!

        posting = (RawPosting*)PostPool_Fetch(self);
        post_ivars = RawPost_IVARS(posting);
    }

    // Clean up.
    DECREF(last_term_text);
    DECREF(skip_tinfo);
    DECREF(tinfo);
}