コード例 #1
0
void
LexWriter_start_field(LexiconWriter *self, i32_t field_num)
{
    CharBuf  *const seg_name = Seg_Get_Name(self->segment);
    Folder   *const folder   = self->folder;
    Snapshot *const snapshot = LexWriter_Get_Snapshot(self);

    /* Open outstreams. */
    CB_setf(self->dat_file,  "%o/lexicon-%i32.dat",  seg_name, field_num);
    CB_setf(self->ix_file,   "%o/lexicon-%i32.ix",   seg_name, field_num);
    CB_setf(self->ixix_file, "%o/lexicon-%i32.ixix", seg_name, field_num);
    Snapshot_Add_Entry(snapshot, self->dat_file);
    Snapshot_Add_Entry(snapshot, self->ix_file);
    Snapshot_Add_Entry(snapshot, self->ixix_file);
    self->dat_out  = Folder_Open_Out(folder, self->dat_file);
    self->ix_out   = Folder_Open_Out(folder, self->ix_file);
    self->ixix_out = Folder_Open_Out(folder, self->ixix_file);
    if (!self->dat_out)  { THROW("Can't open %o", self->dat_file); }
    if (!self->ix_out)   { THROW("Can't open %o", self->ix_file); }
    if (!self->ixix_out) { THROW("Can't open %o", self->ixix_file); }

    /* Initialize count and ix_count, TermInfo and last term text. */
    self->count    = 0;
    self->ix_count = 0;
    TInfo_Reset(self->last_tinfo);
    CB_Set_Size(self->last_text, 0);
}
コード例 #2
0
ファイル: TextType.c プロジェクト: kidaa/lucy
void
TextTermStepper_Read_Delta_IMP(TextTermStepper *self, InStream *instream) {
    TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self);
    const uint32_t text_overlap     = InStream_Read_C32(instream);
    const uint32_t finish_chars_len = InStream_Read_C32(instream);
    const uint32_t total_text_len   = text_overlap + finish_chars_len;

    // Allocate space.
    CharBuf *charbuf = (CharBuf*)ivars->value;
    char    *ptr     = CB_Grow(charbuf, total_text_len);

    // Set the value text.
    InStream_Read_Bytes(instream, ptr + text_overlap, finish_chars_len);
    CB_Set_Size(charbuf, total_text_len);
    if (!StrHelp_utf8_valid(ptr, total_text_len)) {
        THROW(ERR, "Invalid UTF-8 sequence in '%o' at byte %i64",
              InStream_Get_Filename(instream),
              InStream_Tell(instream) - finish_chars_len);
    }

    // Null-terminate.
    ptr[total_text_len] = '\0';

    // Invalidate string.
    DECREF(ivars->string);
    ivars->string = NULL;
}
コード例 #3
0
ファイル: QueryParser.c プロジェクト: kidaa/lucy
static String*
S_unescape(QueryParser *self, String *orig, CharBuf *buf) {
    StringIterator *iter = Str_Top(orig);
    int32_t code_point;
    UNUSED_VAR(self);

    CB_Set_Size(buf, 0);
    CB_Grow(buf, Str_Get_Size(orig) + 4);

    while (STRITER_DONE != (code_point = StrIter_Next(iter))) {
        if (code_point == '\\') {
            int32_t next_code_point = StrIter_Next(iter);
            if (next_code_point == ':'
                || next_code_point == '"'
                || next_code_point == '\\'
               ) {
                CB_Cat_Char(buf, next_code_point);
            }
            else {
                CB_Cat_Char(buf, code_point);
                if (next_code_point != STRITER_DONE) {
                    CB_Cat_Char(buf, next_code_point);
                }
            }
        }
        else {
            CB_Cat_Char(buf, code_point);
        }
    }

    DECREF(iter);
    return CB_To_String(buf);
}
コード例 #4
0
ファイル: TextSortCache.c プロジェクト: gitpan/KinoSearch
Obj*
TextSortCache_value(TextSortCache *self, int32_t ord, Obj *blank)
{
    if (ord == self->null_ord) {
        return NULL;
    }
    InStream_Seek(self->ix_in, ord * sizeof(int64_t));
    int64_t offset = InStream_Read_I64(self->ix_in);
    if (offset == NULL_SENTINEL) { 
        return NULL; 
    }
    else {
        uint32_t next_ord = ord + 1;
        int64_t next_offset;
        while (1) {
            InStream_Seek(self->ix_in, next_ord * sizeof(int64_t));
            next_offset = InStream_Read_I64(self->ix_in);
            if (next_offset != NULL_SENTINEL) { break; }
            next_ord++;
        }

        // Read character data into CharBuf. 
        CERTIFY(blank, CHARBUF);
        int64_t len = next_offset - offset;
        char *ptr = CB_Grow((CharBuf*)blank, (size_t)len);
        InStream_Seek(self->dat_in, offset);
        InStream_Read_Bytes(self->dat_in, ptr, (size_t)len);
        ptr[len] = '\0';
        CB_Set_Size((CharBuf*)blank, (size_t)len);
    }
    return blank;
}
コード例 #5
0
ファイル: Hash.c プロジェクト: pavansondur/lucy
Hash*
Hash_deserialize(Hash *self, InStream *instream) {
    uint32_t size         = InStream_Read_C32(instream);
    uint32_t num_charbufs = InStream_Read_C32(instream);
    uint32_t num_other    = size - num_charbufs;
    CharBuf *key          = num_charbufs ? CB_new(0) : NULL;

    Hash_init(self, size);

    // Read key-value pairs with CharBuf keys.
    while (num_charbufs--) {
        uint32_t len = InStream_Read_C32(instream);
        char *key_buf = CB_Grow(key, len);
        InStream_Read_Bytes(instream, key_buf, len);
        key_buf[len] = '\0';
        CB_Set_Size(key, len);
        Hash_Store(self, (Obj*)key, THAW(instream));
    }
    DECREF(key);

    // Read remaining key/value pairs.
    while (num_other--) {
        Obj *k = THAW(instream);
        Hash_Store(self, k, THAW(instream));
        DECREF(k);
    }

    return self;
}
コード例 #6
0
ファイル: TextType.c プロジェクト: gitpan/KinoSearch
void
TextTermStepper_read_key_frame(TextTermStepper *self, InStream *instream)
{ 
    const uint32_t text_len = InStream_Read_C32(instream);
    CharBuf *value;
    char *ptr;

    // Allocate space. 
    if (self->value == NULL) {
        self->value = (Obj*)CB_new(text_len);
    }
    value = (CharBuf*)self->value;
    ptr   = CB_Grow(value, text_len);

    // Set the value text. 
    InStream_Read_Bytes(instream, ptr, text_len);
    CB_Set_Size(value, text_len);
    if (!StrHelp_utf8_valid(ptr, text_len)) {
        THROW(ERR, "Invalid UTF-8 sequence in '%o' at byte %i64",
            InStream_Get_Filename(instream), 
            InStream_Tell(instream) - text_len);
    }

    // Null-terminate. 
    ptr[text_len] = '\0';
}
コード例 #7
0
ファイル: CharBuf.c プロジェクト: pavansondur/lucy
void
CB_setf(CharBuf *self, const char *pattern, ...) {
    va_list args;
    CB_Set_Size(self, 0);
    va_start(args, pattern);
    CB_VCatF(self, pattern, args);
    va_end(args);
}
コード例 #8
0
ファイル: TextType.c プロジェクト: kidaa/lucy
void
TextTermStepper_Reset_IMP(TextTermStepper *self) {
    TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self);
    CB_Set_Size((CharBuf*)ivars->value, 0);
    // Invalidate string.
    DECREF(ivars->string);
    ivars->string = NULL;
}
コード例 #9
0
ファイル: Highlighter.c プロジェクト: hernan604/lucy
static String*
S_encode_entities(String *text, CharBuf *buf) {
    StringIterator *iter = Str_Top(text);
    size_t space = 0;
    const int MAX_ENTITY_BYTES = 9; // &#dddddd;

    // Scan first so that we only allocate once.
    int32_t code_point;
    while (STRITER_DONE != (code_point = StrIter_Next(iter))) {
        if (code_point > 127
                || (!isgraph(code_point) && !isspace(code_point))
                || code_point == '<'
                || code_point == '>'
                || code_point == '&'
                || code_point == '"'
           ) {
            space += MAX_ENTITY_BYTES;
        }
        else {
            space += 1;
        }
    }

    CB_Grow(buf, space);
    CB_Set_Size(buf, 0);
    DECREF(iter);
    iter = Str_Top(text);
    while (STRITER_DONE != (code_point = StrIter_Next(iter))) {
        if (code_point > 127
                || (!isgraph(code_point) && !isspace(code_point))
           ) {
            CB_catf(buf, "&#%u32;", code_point);
        }
        else if (code_point == '<') {
            CB_Cat_Trusted_Utf8(buf, "&lt;", 4);
        }
        else if (code_point == '>') {
            CB_Cat_Trusted_Utf8(buf, "&gt;", 4);
        }
        else if (code_point == '&') {
            CB_Cat_Trusted_Utf8(buf, "&amp;", 5);
        }
        else if (code_point == '"') {
            CB_Cat_Trusted_Utf8(buf, "&quot;", 6);
        }
        else {
            CB_Cat_Char(buf, code_point);
        }
    }

    DECREF(iter);
    return CB_To_String(buf);
}
コード例 #10
0
void
LexWriter_enter_temp_mode(LexiconWriter *self, OutStream *temp_outstream)
{
    /* Assign outstream. */
    if (self->dat_out != NULL)
        THROW("Can't enter temp mode (filename: %o) ", self->dat_file);
    self->dat_out = (OutStream*)INCREF(temp_outstream);

    /* Initialize count and ix_count, TermInfo and last term text. */
    self->count    = 0;
    self->ix_count = 0;
    TInfo_Reset(self->last_tinfo);
    CB_Set_Size(self->last_text, 0);

    /* Remember that we're in temp mode. */
    self->temp_mode = true;
}
コード例 #11
0
ファイル: TestCharBuf.c プロジェクト: pavansondur/lucy
static void
test_Find(TestBatch *batch) {
    CharBuf *string = CB_new(10);
    CharBuf *substring = S_get_cb("foo");

    TEST_TRUE(batch, CB_Find(string, substring) == -1, "Not in empty string");
    CB_setf(string, "foo");
    TEST_TRUE(batch, CB_Find(string, substring) == 0, "Find complete string");
    CB_setf(string, "afoo");
    TEST_TRUE(batch, CB_Find(string, substring) == 1, "Find after first");
    CB_Set_Size(string, 3);
    TEST_TRUE(batch, CB_Find(string, substring) == -1, "Don't overrun");
    CB_setf(string, "afood");
    TEST_TRUE(batch, CB_Find(string, substring) == 1, "Find in middle");

    DECREF(substring);
    DECREF(string);
}
コード例 #12
0
ファイル: TestCharBuf.c プロジェクト: pavansondur/lucy
static void
test_To_F64(TestBatch *batch) {
    CharBuf *charbuf = S_get_cb("1.5");
    double difference = 1.5 - CB_To_F64(charbuf);
    if (difference < 0) { difference = 0 - difference; }
    TEST_TRUE(batch, difference < 0.001, "To_F64");

    CB_setf(charbuf, "-1.5");
    difference = 1.5 + CB_To_F64(charbuf);
    if (difference < 0) { difference = 0 - difference; }
    TEST_TRUE(batch, difference < 0.001, "To_F64 negative");

    CB_setf(charbuf, "1.59");
    double value_full = CB_To_F64(charbuf);
    CB_Set_Size(charbuf, 3);
    double value_short = CB_To_F64(charbuf);
    TEST_TRUE(batch, value_short < value_full,
              "TO_F64 doesn't run past end of string");

    DECREF(charbuf);
}
コード例 #13
0
ファイル: DocVector.c プロジェクト: github188/SimpleCode
static Hash*
S_extract_tv_cache(ByteBuf *field_buf) {
    Hash       *tv_cache  = Hash_new(0);
    const char *tv_string = BB_Get_Buf(field_buf);
    int32_t     num_terms = NumUtil_decode_c32(&tv_string);
    CharBuf    *text_buf  = CB_new(0);

    // Read the number of highlightable terms in the field.
    for (int32_t i = 0; i < num_terms; i++) {
        size_t   overlap = NumUtil_decode_c32(&tv_string);
        size_t   len     = NumUtil_decode_c32(&tv_string);

        // Decompress the term text.
        CB_Set_Size(text_buf, overlap);
        CB_Cat_Trusted_Utf8(text_buf, tv_string, len);
        tv_string += len;

        // Get positions & offsets string.
        const char *bookmark_ptr  = tv_string;
        int32_t     num_positions = NumUtil_decode_c32(&tv_string);
        while (num_positions--) {
            // Leave nums compressed to save a little mem.
            NumUtil_skip_cint(&tv_string);
            NumUtil_skip_cint(&tv_string);
            NumUtil_skip_cint(&tv_string);
        }
        len = tv_string - bookmark_ptr;

        // Store the $text => $posdata pair in the output hash.
        String *text = CB_To_String(text_buf);
        Hash_Store(tv_cache, (Obj*)text,
                   (Obj*)BB_new_bytes(bookmark_ptr, len));
        DECREF(text);
    }
    DECREF(text_buf);

    return tv_cache;
}
コード例 #14
0
void
StrHelp_add_indent(CharBuf *charbuf, size_t amount)
{
    u32_t num_margins = 1;
    size_t new_size; 
    char *limit   = CBEND(charbuf);
    char *source  = charbuf->ptr;
    char *dest;

    /* Add a margin for every newline. */
    for ( ; source < limit; source++) {
        if (*source == '\n')
            num_margins++;
    }

    /* Make space for margins. */
    new_size = CB_Get_Size(charbuf) + (num_margins * amount);
    CB_Grow(charbuf, new_size);
    source = CBEND(charbuf);
    CB_Set_Size(charbuf, new_size);
    dest = CBEND(charbuf);
    *dest-- = '\0';
    source--;

    while (source >= charbuf->ptr) {
        if (*source == '\n') {
            int i = amount;
            while (i--) {
                *dest-- = ' ';
            }
        }
        *dest-- = *source--;
    }
    while (dest >= charbuf->ptr) {
        *dest-- =  ' ';
    }
}
コード例 #15
0
ファイル: FSDirHandle.c プロジェクト: theory/lucy
bool
FSDH_next(FSDirHandle *self) {
    FSDirHandleIVARS *const ivars = FSDH_IVARS(self);
    ivars->sys_dir_entry = (struct dirent*)readdir((DIR*)ivars->sys_dirhandle);
    if (!ivars->sys_dir_entry) {
        CB_Set_Size(ivars->entry, 0);
        return false;
    }
    else {
        struct dirent *sys_dir_entry = (struct dirent*)ivars->sys_dir_entry;
        #ifdef CHY_HAS_DIRENT_D_NAMLEN
        size_t len = sys_dir_entry->d_namlen;
        #else
        size_t len = strlen(sys_dir_entry->d_name);
        #endif
        if (SI_is_updir(sys_dir_entry->d_name, len)) {
            return FSDH_Next(self);
        }
        else {
            CB_Mimic_Str(ivars->entry, sys_dir_entry->d_name, len);
            return true;
        }
    }
}
コード例 #16
0
ファイル: FSDirHandle.c プロジェクト: theory/lucy
bool
FSDH_next(FSDirHandle *self) {
    FSDirHandleIVARS *const ivars = FSDH_IVARS(self);
    HANDLE           dirhandle = (HANDLE)ivars->sys_dirhandle;
    WIN32_FIND_DATA *find_data = (WIN32_FIND_DATA*)ivars->sys_dir_entry;

    // Attempt to move forward or absorb cached iter.
    if (!dirhandle || dirhandle == INVALID_HANDLE_VALUE) {
        return false;
    }
    else if (ivars->delayed_iter) {
        ivars->delayed_iter = false;
    }
    else if ((FindNextFile(dirhandle, find_data) == 0)) {
        // Iterator exhausted.  Verify that no errors were encountered.
        CB_Set_Size(ivars->entry, 0);
        if (GetLastError() != ERROR_NO_MORE_FILES) {
            char *win_error = Err_win_error();
            ivars->saved_error
                = Err_new(CB_newf("Error while traversing directory: %s",
                                  win_error));
            FREEMEM(win_error);
        }
        return false;
    }

    // Process the results of the iteration.
    size_t len = strlen(find_data->cFileName);
    if (SI_is_updir(find_data->cFileName, len)) {
        return FSDH_Next(self);
    }
    else {
        CB_Mimic_Str(ivars->entry, find_data->cFileName, len);
        return true;
    }
}
コード例 #17
0
ファイル: Folder.c プロジェクト: theory/lucy
static void
S_add_to_file_list(Folder *self, VArray *list, CharBuf *dir, CharBuf *prefix) {
    size_t     orig_prefix_size = CB_Get_Size(prefix);
    DirHandle *dh = Folder_Open_Dir(self, dir);
    CharBuf   *entry;

    if (!dh) {
        RETHROW(INCREF(Err_get_error()));
    }

    entry = DH_Get_Entry(dh);
    while (DH_Next(dh)) { // Updates entry
        if (!S_is_updir(entry)) {
            CharBuf *relpath = CB_newf("%o%o", prefix, entry);
            if (VA_Get_Size(list) == VA_Get_Capacity(list)) {
                VA_Grow(list, VA_Get_Size(list) * 2);
            }
            VA_Push(list, (Obj*)relpath);

            if (DH_Entry_Is_Dir(dh) && !DH_Entry_Is_Symlink(dh)) {
                CharBuf *subdir = CB_Get_Size(dir)
                                  ? CB_newf("%o/%o", dir, entry)
                                  : CB_Clone(entry);
                CB_catf(prefix, "%o/", entry);
                S_add_to_file_list(self, list, subdir, prefix); // recurse
                CB_Set_Size(prefix, orig_prefix_size);
                DECREF(subdir);
            }
        }
    }

    if (!DH_Close(dh)) {
        RETHROW(INCREF(Err_get_error()));
    }
    DECREF(dh);
}
コード例 #18
0
ファイル: TextType.c プロジェクト: gitpan/KinoSearch
void
TextTermStepper_reset(TextTermStepper *self)
{
    CB_Set_Size((CharBuf*)self->value, 0);
}