Esempio n. 1
0
void
Method_Set_Host_Alias_IMP(Method *self, String *name) {
    if (self->host_alias) {
        THROW(ERR, "Can't Set_Host_Alias more than once");
    }
    self->host_alias_internal
        = Str_new_from_trusted_utf8(Str_Get_Ptr8(name), Str_Get_Size(name));
    self->host_alias
        = Str_new_wrap_trusted_utf8(Str_Get_Ptr8(self->host_alias_internal),
                                    Str_Get_Size(self->host_alias_internal));
}
Esempio n. 2
0
String*
Highlighter_Highlight_IMP(Highlighter *self, String *text) {
    HighlighterIVARS *const ivars = Highlighter_IVARS(self);
    size_t size = Str_Get_Size(text)
                  + Str_Get_Size(ivars->pre_tag)
                  + Str_Get_Size(ivars->post_tag);
    CharBuf *buf = CB_new(size);
    CB_Cat(buf, ivars->pre_tag);
    CB_Cat(buf, text);
    CB_Cat(buf, ivars->post_tag);
    String *retval = CB_Yield_String(buf);
    DECREF(buf);
    return retval;
}
Esempio n. 3
0
Inversion*
RegexTokenizer_Transform_Text_IMP(RegexTokenizer *self, String *text) {
    Inversion *new_inversion = Inversion_new(NULL);
    RegexTokenizer_Tokenize_Utf8(self, Str_Get_Ptr8(text),
                                 Str_Get_Size(text), new_inversion);
    return new_inversion;
}
Esempio n. 4
0
static void
S_write_lockfile_json(void *context) {
    struct lockfile_context *stuff = (struct lockfile_context*)context;
    size_t size = Str_Get_Size(stuff->json);
    OutStream_Write_Bytes(stuff->outstream, Str_Get_Ptr8(stuff->json), size);
    OutStream_Close(stuff->outstream);
}
Esempio n. 5
0
InStream*
CFReader_Local_Open_In_IMP(CompoundFileReader *self, String *name) {
    CompoundFileReaderIVARS *const ivars = CFReader_IVARS(self);
    Hash *entry = (Hash*)Hash_Fetch(ivars->records, name);

    if (!entry) {
        InStream *instream = Folder_Local_Open_In(ivars->real_folder, name);
        if (!instream) {
            ERR_ADD_FRAME(Err_get_error());
        }
        return instream;
    }
    else {
        Obj *len    = Hash_Fetch_Utf8(entry, "length", 6);
        Obj *offset = Hash_Fetch_Utf8(entry, "offset", 6);
        if (!len || !offset) {
            Err_set_error(Err_new(Str_newf("Malformed entry for '%o' in '%o'",
                                           name, Folder_Get_Path(ivars->real_folder))));
            return NULL;
        }
        else if (Str_Get_Size(ivars->path)) {
            String *fullpath = Str_newf("%o/%o", ivars->path, name);
            InStream *instream = InStream_Reopen(ivars->instream, fullpath,
                                                 Obj_To_I64(offset), Obj_To_I64(len));
            DECREF(fullpath);
            return instream;
        }
        else {
            return InStream_Reopen(ivars->instream, name, Obj_To_I64(offset),
                                   Obj_To_I64(len));
        }
    }
}
Esempio n. 6
0
void
Inverter_Add_Field_IMP(Inverter *self, InverterEntry *entry) {
    InverterIVARS *const ivars = Inverter_IVARS(self);
    InverterEntryIVARS *const entry_ivars = InvEntry_IVARS(entry);

    // Get an Inversion, going through analyzer if appropriate.
    if (entry_ivars->analyzer) {
        DECREF(entry_ivars->inversion);
        entry_ivars->inversion
            = Analyzer_Transform_Text(entry_ivars->analyzer,
                                      (String*)entry_ivars->value);
        Inversion_Invert(entry_ivars->inversion);
    }
    else if (entry_ivars->indexed || entry_ivars->highlightable) {
        String *value = (String*)entry_ivars->value;
        size_t token_len = Str_Get_Size(value);
        Token *seed = Token_new(Str_Get_Ptr8(value),
                                token_len, 0, token_len, 1.0f, 1);
        DECREF(entry_ivars->inversion);
        entry_ivars->inversion = Inversion_new(seed);
        DECREF(seed);
        Inversion_Invert(entry_ivars->inversion); // Nearly a no-op.
    }

    // Prime the iterator.
    VA_Push(ivars->entries, INCREF(entry));
    ivars->sorted = false;
}
Esempio n. 7
0
Inversion*
PolyAnalyzer_Transform_Text_IMP(PolyAnalyzer *self, String *text) {
    VArray *const   analyzers     = PolyAnalyzer_IVARS(self)->analyzers;
    const uint32_t  num_analyzers = VA_Get_Size(analyzers);
    Inversion      *retval;

    if (num_analyzers == 0) {
        size_t      token_len = Str_Get_Size(text);
        const char *buf       = Str_Get_Ptr8(text);
        Token *seed = Token_new(buf, token_len, 0, token_len, 1.0f, 1);
        retval = Inversion_new(seed);
        DECREF(seed);
    }
    else {
        Analyzer *first_analyzer = (Analyzer*)VA_Fetch(analyzers, 0);
        retval = Analyzer_Transform_Text(first_analyzer, text);
        for (uint32_t i = 1; i < num_analyzers; i++) {
            Analyzer *analyzer = (Analyzer*)VA_Fetch(analyzers, i);
            Inversion *new_inversion = Analyzer_Transform(analyzer, retval);
            DECREF(retval);
            retval = new_inversion;
        }
    }

    return retval;
}
Esempio n. 8
0
void
Freezer_serialize_string(String *string, OutStream *outstream) {
    size_t      size = Str_Get_Size(string);
    const char *buf  = Str_Get_Ptr8(string);
    OutStream_Write_C64(outstream, size);
    OutStream_Write_Bytes(outstream, buf, size);
}
Esempio n. 9
0
static String*
S_unescape(QueryParser *self, String *orig, CharBuf *buf) {
    StringIterator *iter = Str_Top(orig);
    int32_t code_point;
    UNUSED_VAR(self);

    CB_Set_Size(buf, 0);
    CB_Grow(buf, Str_Get_Size(orig) + 4);

    while (STRITER_DONE != (code_point = StrIter_Next(iter))) {
        if (code_point == '\\') {
            int32_t next_code_point = StrIter_Next(iter);
            if (next_code_point == ':'
                || next_code_point == '"'
                || next_code_point == '\\'
               ) {
                CB_Cat_Char(buf, next_code_point);
            }
            else {
                CB_Cat_Char(buf, code_point);
                if (next_code_point != STRITER_DONE) {
                    CB_Cat_Char(buf, next_code_point);
                }
            }
        }
        else {
            CB_Cat_Char(buf, code_point);
        }
    }

    DECREF(iter);
    return CB_To_String(buf);
}
Inversion*
WhitespaceTokenizer_Transform_Text_IMP(WhitespaceTokenizer *self,
                                       String *text) {
    Inversion *new_inversion = Inversion_new(NULL);
    WhitespaceTokenizer_Tokenize_Str(self, (char*)Str_Get_Ptr8(text),
                                     Str_Get_Size(text), new_inversion);
    return new_inversion;
}
Esempio n. 11
0
void
Freezer_serialize_string(String *string, OutStream *outstream) {
    size_t      size = Str_Get_Size(string);
    const char *buf  = Str_Get_Ptr8(string);
    if (size > INT32_MAX) {
        THROW(ERR, "Can't serialize string above 2GB: %u64", (uint64_t)size);
    }
    OutStream_Write_CU64(outstream, size);
    OutStream_Write_Bytes(outstream, buf, size);
}
Esempio n. 12
0
Method*
Method_init(Method *self, String *name, cfish_method_t callback_func,
            uint32_t offset) {
    /* The `name` member which Method exposes via the `Get_Name` accessor uses
     * a "wrapped" string because that is effectively threadsafe: an INCREF
     * results in a copy and the only reference is owned by an immortal
     * object. */
    self->name_internal
        = Str_new_from_trusted_utf8(Str_Get_Ptr8(name), Str_Get_Size(name));
    self->name
        = Str_new_wrap_trusted_utf8(Str_Get_Ptr8(self->name_internal),
                                    Str_Get_Size(self->name_internal));

    self->host_alias    = NULL;
    self->callback_func = callback_func;
    self->offset        = offset;
    self->is_excluded   = false;

    return self;
}
Esempio n. 13
0
static void
S_set_name(Class *self, const char *utf8, size_t size) {
    /*
     * We use a "wrapped" String for `name` because it's effectively
     * threadsafe: the sole reference is owned by an immortal object and any
     * INCREF spawns a copy.
     */
    self->name_internal = Str_new_from_trusted_utf8(utf8, size);
    self->name = Str_new_wrap_trusted_utf8(Str_Get_Ptr8(self->name_internal),
                                           Str_Get_Size(self->name_internal));
}
Esempio n. 14
0
static void
test_Get_Ptr8(TestBatchRunner *runner) {
    String *string = S_get_str("Banana");

    const char *ptr8 = Str_Get_Ptr8(string);
    TEST_TRUE(runner, strcmp(ptr8, "Banana") == 0, "Get_Ptr8");

    size_t size = Str_Get_Size(string);
    TEST_INT_EQ(runner, size, 6, "Get_Size");

    DECREF(string);
}
Esempio n. 15
0
static void
test_utf8proc_normalization(TestBatchRunner *runner) {
    SKIP(runner, 1,
         "utf8proc can't handle control chars or Unicode non-chars");
    return;

    for (int32_t i = 0; i < 100; i++) {
        String *source = TestUtils_random_string(rand() % 40);

        // Normalize once.
        uint8_t *normalized;
        int32_t check = utf8proc_map((const uint8_t*)Str_Get_Ptr8(source),
                                     Str_Get_Size(source),
                                     &normalized,
                                     UTF8PROC_STABLE  |
                                     UTF8PROC_COMPOSE |
                                     UTF8PROC_COMPAT  |
                                     UTF8PROC_CASEFOLD);
        if (check < 0) {
            lucy_Json_set_tolerant(1);
            String *json = lucy_Json_to_json((Obj*)source);
            if (!json) {
                json = Str_newf("[failed to encode]");
            }
            FAIL(runner, "Failed to normalize: %s", Str_Get_Ptr8(json));
            DECREF(json);
            DECREF(source);
            return;
        }

        // Normalize again.
        size_t normalized_len = strlen((char*)normalized);
        uint8_t *dupe;
        int32_t dupe_check = utf8proc_map(normalized, normalized_len, &dupe,
                                          UTF8PROC_STABLE  |
                                          UTF8PROC_COMPOSE |
                                          UTF8PROC_COMPAT  |
                                          UTF8PROC_CASEFOLD);
        if (dupe_check < 0) {
            THROW(ERR, "Unexpected normalization error: %i32", dupe_check);
        }
        int comparison = strcmp((char*)normalized, (char*)dupe);
        free(dupe);
        free(normalized);
        DECREF(source);
        if (comparison != 0) {
            FAIL(runner, "Not fully normalized");
            return;
        }
    }
    PASS(runner, "Normalization successful.");
}
Esempio n. 16
0
static char*
S_fullpath_ptr(FSFolder *self, String *path) {
    FSFolderIVARS *const ivars = FSFolder_IVARS(self);
    size_t folder_size = Str_Get_Size(ivars->path);
    size_t path_size   = Str_Get_Size(path);
    size_t full_size   = folder_size + 1 + path_size;
    const char *folder_ptr = Str_Get_Ptr8(ivars->path);
    const char *path_ptr   = Str_Get_Ptr8(path);

    char *buf = (char*)MALLOCATE(full_size + 1);
    memcpy(buf, folder_ptr, folder_size);
    buf[folder_size] = DIR_SEP[0];
    memcpy(buf + folder_size + 1, path_ptr, path_size);
    buf[full_size] = '\0';

    if (DIR_SEP[0] != '/') {
        for (size_t i = 0; i < full_size; ++i) {
            if (buf[i] == '/') { buf[i] = DIR_SEP[0]; }
        }
    }

    return buf;
}
Esempio n. 17
0
Snapshot*
Snapshot_Read_File_IMP(Snapshot *self, Folder *folder, String *path) {
    SnapshotIVARS *const ivars = Snapshot_IVARS(self);

    // Eliminate all prior data. Pick a snapshot file.
    S_zero_out(self);
    ivars->path = (path != NULL && Str_Get_Size(path) > 0)
                  ? Str_Clone(path)
                  : IxFileNames_latest_snapshot(folder);

    if (ivars->path) {
        Hash *snap_data
            = (Hash*)CERTIFY(Json_slurp_json(folder, ivars->path), HASH);
        Obj *format_obj
            = CERTIFY(Hash_Fetch_Utf8(snap_data, "format", 6), OBJ);
        int32_t format = (int32_t)Json_obj_to_i64(format_obj);
        Obj *subformat_obj = Hash_Fetch_Utf8(snap_data, "subformat", 9);
        int32_t subformat = subformat_obj
                            ? (int32_t)Json_obj_to_i64(subformat_obj)
                            : 0;

        // Verify that we can read the index properly.
        if (format > Snapshot_current_file_format) {
            THROW(ERR, "Snapshot format too recent: %i32, %i32",
                  format, Snapshot_current_file_format);
        }

        // Build up list of entries.
        Vector *list = (Vector*)INCREF(CERTIFY(
                           Hash_Fetch_Utf8(snap_data, "entries", 7),
                           VECTOR));
        if (format == 1 || (format == 2 && subformat < 1)) {
            Vector *cleaned = S_clean_segment_contents(list);
            DECREF(list);
            list = cleaned;
        }
        Hash_Clear(ivars->entries);
        for (uint32_t i = 0, max = Vec_Get_Size(list); i < max; i++) {
            String *entry
                = (String*)CERTIFY(Vec_Fetch(list, i), STRING);
            Hash_Store(ivars->entries, entry, (Obj*)CFISH_TRUE);
        }

        DECREF(list);
        DECREF(snap_data);
    }

    return self;
}
Esempio n. 18
0
String*
Method_Host_Name_IMP(Method *self) {
    StringIterator *iter = StrIter_new(self->name, 0);
    CharBuf *charbuf = CB_new(Str_Get_Size(self->name));
    int32_t code_point;
    while (STR_OOB != (code_point = StrIter_Next(iter))) {
        if (code_point != '_') {
            CB_Cat_Char(charbuf, code_point);
        }
    }
    String *host_name = CB_Yield_String(charbuf);
    DECREF(charbuf);
    DECREF(iter);
    return host_name;
}
Esempio n. 19
0
Folder*
FSFolder_Local_Find_Folder_IMP(FSFolder *self, String *name) {
    FSFolderIVARS *const ivars = FSFolder_IVARS(self);

    Folder *subfolder = NULL;
    if (!name || !Str_Get_Size(name)) {
        // No entity can be identified by NULL or empty string.
        return NULL;
    }
    else if (!S_is_local_entry(name)) {
        return NULL;
    }
    else if (Str_Starts_With_Utf8(name, ".", 1)) {
        // Don't allow access outside of the main dir.
        return NULL;
    }
    else if (NULL != (subfolder = (Folder*)Hash_Fetch(ivars->entries, (Obj*)name))) {
        if (Folder_Is_A(subfolder, FOLDER)) {
            return subfolder;
        }
        else {
            return NULL;
        }
    }

    String *fullpath = S_fullpath(self, name);
    if (S_dir_ok(fullpath)) {
        subfolder = (Folder*)FSFolder_new(fullpath);
        if (!subfolder) {
            DECREF(fullpath);
            THROW(ERR, "Failed to open FSFolder at '%o'", fullpath);
        }
        // Try to open a CompoundFileReader. On failure, just use the
        // existing folder.
        String *cfmeta_file = (String*)SSTR_WRAP_UTF8("cfmeta.json", 11);
        if (Folder_Local_Exists(subfolder, cfmeta_file)) {
            CompoundFileReader *cf_reader = CFReader_open(subfolder);
            if (cf_reader) {
                DECREF(subfolder);
                subfolder = (Folder*)cf_reader;
            }
        }
        Hash_Store(ivars->entries, (Obj*)name, (Obj*)subfolder);
    }
    DECREF(fullpath);

    return subfolder;
}
Esempio n. 20
0
String*
Highlighter_Create_Excerpt_IMP(Highlighter *self, HitDoc *hit_doc) {
    HighlighterIVARS *const ivars = Highlighter_IVARS(self);
    String *field_val = (String*)HitDoc_Extract(hit_doc, ivars->field);
    String *retval;

    if (!field_val || !Obj_Is_A((Obj*)field_val, STRING)) {
        retval = NULL;
    }
    else if (!Str_Get_Size(field_val)) {
        // Empty string yields empty string.
        retval = Str_new_from_trusted_utf8("", 0);
    }
    else {
        DocVector *doc_vec
            = Searcher_Fetch_Doc_Vec(ivars->searcher,
                                     HitDoc_Get_Doc_ID(hit_doc));
        VArray *maybe_spans
            = Compiler_Highlight_Spans(ivars->compiler, ivars->searcher,
                                       doc_vec, ivars->field);
        VArray *score_spans = maybe_spans ? maybe_spans : VA_new(0);
        VA_Sort(score_spans, NULL, NULL);
        HeatMap *heat_map
            = HeatMap_new(score_spans, (ivars->excerpt_length * 2) / 3);

        int32_t top;
        String *raw_excerpt
            = Highlighter_Raw_Excerpt(self, field_val, &top, heat_map);
        String *highlighted
            = Highlighter_Highlight_Excerpt(self, score_spans, raw_excerpt,
                                            top);

        DECREF(raw_excerpt);
        DECREF(heat_map);
        DECREF(score_spans);
        DECREF(doc_vec);

        retval = highlighted;
    }

    DECREF(field_val);
    return retval;
}
Esempio n. 21
0
RawPosting*
MatchPost_Read_Raw_IMP(MatchPosting *self, InStream *instream,
                       int32_t last_doc_id, String *term_text,
                       MemoryPool *mem_pool) {
    const char *const text_buf  = Str_Get_Ptr8(term_text);
    const size_t      text_size = Str_Get_Size(term_text);
    const uint32_t    doc_code  = InStream_Read_C32(instream);
    const uint32_t    delta_doc = doc_code >> 1;
    const int32_t     doc_id    = last_doc_id + delta_doc;
    const uint32_t    freq      = (doc_code & 1)
                                  ? 1
                                  : InStream_Read_C32(instream);
    const size_t base_size = VTable_Get_Obj_Alloc_Size(RAWPOSTING);
    size_t raw_post_bytes  = MAX_RAW_POSTING_LEN(base_size, text_size);
    void *const allocation = MemPool_Grab(mem_pool, raw_post_bytes);
    UNUSED_VAR(self);

    return RawPost_new(allocation, doc_id, freq, text_buf, text_size);
}
void
SortFieldWriter_Add_IMP(SortFieldWriter *self, int32_t doc_id, Obj *value) {
    SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self);
    Counter *counter   = ivars->counter;
    Counter_Add(counter, ivars->mem_per_entry);
    if (ivars->prim_id == FType_TEXT) {
        int64_t size = Str_Get_Size((String*)value) + 1;
        size = SI_increase_to_word_multiple(size);
        Counter_Add(counter, size);
    }
    else if (ivars->prim_id == FType_BLOB) {
        int64_t size = Blob_Get_Size((Blob*)value) + 1;
        size = SI_increase_to_word_multiple(size);
        Counter_Add(counter, size);
    }
    SFWriterElem *elem = S_SFWriterElem_create(Obj_Clone(value), doc_id);
    SortFieldWriter_Feed(self, (Obj*)elem);
    ivars->count++;
}
Esempio n. 23
0
void
TextTermStepper_Write_Delta_IMP(TextTermStepper *self, OutStream *outstream,
                                Obj *value) {
    TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self);
    CharBuf    *charbuf   = (CharBuf*)ivars->value;
    const char *last_text = CB_Get_Ptr8(charbuf);
    size_t      last_size = CB_Get_Size(charbuf);
    const char *new_text  = NULL;
    size_t      new_size  = 0;

    if (Obj_is_a(value, STRING)) {
        String *new_string = (String*)value;
        new_text = Str_Get_Ptr8(new_string);
        new_size = Str_Get_Size(new_string);
    }
    else if (Obj_is_a(value, CHARBUF)) {
        CharBuf *new_charbuf = (CharBuf*)value;
        new_text = CB_Get_Ptr8(new_charbuf);
        new_size = CB_Get_Size(new_charbuf);
    }
    else {
        THROW(ERR, "'value' must be a String or CharBuf");
    }

    // Count how many bytes the strings share at the top.
    const int32_t overlap = StrHelp_overlap(last_text, new_text,
                                            last_size, new_size);
    const char *const diff_start_str = new_text + overlap;
    const size_t diff_len            = new_size - overlap;

    // Write number of common bytes and common bytes.
    OutStream_Write_C32(outstream, overlap);
    OutStream_Write_String(outstream, diff_start_str, diff_len);

    // Update value.
    CB_Mimic_Utf8(charbuf, new_text, new_size);

    // Invalidate string.
    DECREF(ivars->string);
    ivars->string = NULL;
}
Esempio n. 24
0
void
Snapshot_Write_File_IMP(Snapshot *self, Folder *folder, String *path) {
    SnapshotIVARS *const ivars = Snapshot_IVARS(self);
    Hash   *all_data = Hash_new(0);
    Vector *list     = Snapshot_List(self);

    // Update path.
    DECREF(ivars->path);
    if (path != NULL && Str_Get_Size(path) != 0) {
        ivars->path = Str_Clone(path);
    }
    else {
        String *latest = IxFileNames_latest_snapshot(folder);
        uint64_t gen = latest ? IxFileNames_extract_gen(latest) + 1 : 1;
        char base36[StrHelp_MAX_BASE36_BYTES];
        StrHelp_to_base36(gen, &base36);
        ivars->path = Str_newf("snapshot_%s.json", &base36);
        DECREF(latest);
    }

    // Don't overwrite.
    if (Folder_Exists(folder, ivars->path)) {
        THROW(ERR, "Snapshot file '%o' already exists", ivars->path);
    }

    // Sort, then store file names.
    Vec_Sort(list);
    Hash_Store_Utf8(all_data, "entries", 7, (Obj*)list);

    // Create a JSON-izable data structure.
    Hash_Store_Utf8(all_data, "format", 6,
                    (Obj*)Str_newf("%i32", (int32_t)Snapshot_current_file_format));
    Hash_Store_Utf8(all_data, "subformat", 9,
                    (Obj*)Str_newf("%i32", (int32_t)Snapshot_current_file_subformat));

    // Write out JSON-ized data to the new file.
    Json_spew_json((Obj*)all_data, folder, ivars->path);

    DECREF(all_data);
}
Esempio n. 25
0
RawPosting*
ScorePost_Read_Raw_IMP(ScorePosting *self, InStream *instream,
                       int32_t last_doc_id, String *term_text,
                       MemoryPool *mem_pool) {
    const char *const text_buf  = Str_Get_Ptr8(term_text);
    const size_t      text_size = Str_Get_Size(term_text);
    const uint32_t    doc_code  = InStream_Read_C32(instream);
    const uint32_t    delta_doc = doc_code >> 1;
    const int32_t     doc_id    = last_doc_id + delta_doc;
    const uint32_t    freq      = (doc_code & 1)
                                  ? 1
                                  : InStream_Read_C32(instream);
    const size_t base_size = Class_Get_Obj_Alloc_Size(RAWPOSTING);
    size_t raw_post_bytes  = MAX_RAW_POSTING_LEN(base_size, text_size, freq);
    void *const allocation = MemPool_Grab(mem_pool, raw_post_bytes);
    RawPosting *const raw_posting
        = RawPost_new(allocation, doc_id, freq, text_buf, text_size);
    RawPostingIVARS *const raw_post_ivars = RawPost_IVARS(raw_posting);
    uint32_t num_prox = freq;
    char *const start = raw_post_ivars->blob + text_size;
    char *dest        = start;
    UNUSED_VAR(self);

    // Field_boost.
    *((uint8_t*)dest) = InStream_Read_U8(instream);
    dest++;

    // Read positions.
    while (num_prox--) {
        dest += InStream_Read_Raw_C64(instream, dest);
    }

    // Resize raw posting memory allocation.
    raw_post_ivars->aux_len = dest - start;
    raw_post_bytes       = dest - (char*)raw_posting;
    MemPool_Resize(mem_pool, raw_posting, raw_post_bytes);

    return raw_posting;
}
Esempio n. 26
0
static Class*
S_simple_subclass(Class *parent, String *name) {
    if (parent->flags & CFISH_fFINAL) {
        THROW(ERR, "Can't subclass final class %o", Class_Get_Name(parent));
    }

    Class *subclass
        = (Class*)Memory_wrapped_calloc(parent->class_alloc_size, 1);
    Class_Init_Obj(parent->klass, subclass);

    subclass->parent           = parent;
    subclass->flags            = parent->flags;
    subclass->obj_alloc_size   = parent->obj_alloc_size;
    subclass->class_alloc_size = parent->class_alloc_size;
    subclass->methods          = (Method**)CALLOCATE(1, sizeof(Method*));

    S_set_name(subclass, Str_Get_Ptr8(name), Str_Get_Size(name));

    memcpy(subclass->vtable, parent->vtable,
           parent->class_alloc_size - offsetof(Class, vtable));

    return subclass;
}
Esempio n. 27
0
Query*
QParser_Expand_Leaf_IMP(QueryParser *self, Query *query) {
    QueryParserIVARS *const ivars = QParser_IVARS(self);
    LeafQuery *leaf_query = (LeafQuery*)query;
    Schema    *schema     = ivars->schema;
    bool       is_phrase  = false;
    bool       ambiguous  = false;

    // Determine whether we can actually process the input.
    if (!Query_is_a(query, LEAFQUERY)) { return NULL; }
    String *full_text = LeafQuery_Get_Text(leaf_query);
    if (!Str_Get_Size(full_text)) { return NULL; }

    // If quoted, always generate PhraseQuery.
    StringIterator *top  = Str_Top(full_text);
    StringIterator *tail = Str_Tail(full_text);
    StrIter_Skip_Next_Whitespace(top);
    StrIter_Skip_Prev_Whitespace(tail);
    if (StrIter_Starts_With_Utf8(top, "\"", 1)) {
        is_phrase = true;
        StrIter_Advance(top, 1);
        if (StrIter_Ends_With_Utf8(tail, "\"", 1)
            && !StrIter_Ends_With_Utf8(tail, "\\\"", 2)
        ) {
            StrIter_Recede(tail, 1);
        }
    }
    String *source_text = StrIter_substring(top, tail);

    // Either use LeafQuery's field or default to Parser's list.
    Vector *fields;
    if (LeafQuery_Get_Field(leaf_query)) {
        fields = Vec_new(1);
        Vec_Push(fields, INCREF(LeafQuery_Get_Field(leaf_query)));
    }
    else {
        fields = (Vector*)INCREF(ivars->fields);
    }

    CharBuf *unescape_buf = CB_new(Str_Get_Size(source_text));
    Vector  *queries      = Vec_new(Vec_Get_Size(fields));
    for (uint32_t i = 0, max = Vec_Get_Size(fields); i < max; i++) {
        String   *field    = (String*)Vec_Fetch(fields, i);
        Analyzer *analyzer = ivars->analyzer
                             ? ivars->analyzer
                             : Schema_Fetch_Analyzer(schema, field);

        if (!analyzer) {
            Vec_Push(queries,
                    (Obj*)QParser_Make_Term_Query(self, field,
                                                  (Obj*)source_text));
        }
        else {
            // Extract token texts.
            String *split_source = S_unescape(self, source_text, unescape_buf);
            Vector *maybe_texts = Analyzer_Split(analyzer, split_source);
            uint32_t num_maybe_texts = Vec_Get_Size(maybe_texts);
            Vector *token_texts = Vec_new(num_maybe_texts);

            // Filter out zero-length token texts.
            for (uint32_t j = 0; j < num_maybe_texts; j++) {
                String *token_text = (String*)Vec_Fetch(maybe_texts, j);
                if (Str_Get_Size(token_text)) {
                    Vec_Push(token_texts, INCREF(token_text));
                }
            }

            if (Vec_Get_Size(token_texts) == 0) {
                /* Query might include stop words.  Who knows? */
                ambiguous = true;
            }

            // Add either a TermQuery or a PhraseQuery.
            if (is_phrase || Vec_Get_Size(token_texts) > 1) {
                Vec_Push(queries, (Obj*)
                        QParser_Make_Phrase_Query(self, field, token_texts));
            }
            else if (Vec_Get_Size(token_texts) == 1) {
                Vec_Push(queries,
                        (Obj*)QParser_Make_Term_Query(self, field, Vec_Fetch(token_texts, 0)));
            }

            DECREF(token_texts);
            DECREF(maybe_texts);
            DECREF(split_source);
        }
    }

    Query *retval;
    if (Vec_Get_Size(queries) == 0) {
        retval = (Query*)NoMatchQuery_new();
        if (ambiguous) {
            NoMatchQuery_Set_Fails_To_Match((NoMatchQuery*)retval, false);
        }
    }
    else if (Vec_Get_Size(queries) == 1) {
        retval = (Query*)INCREF(Vec_Fetch(queries, 0));
    }
    else {
        retval = QParser_Make_OR_Query(self, queries);
    }

    // Clean up.
    DECREF(unescape_buf);
    DECREF(queries);
    DECREF(fields);
    DECREF(source_text);
    DECREF(tail);
    DECREF(top);

    return retval;
}
Esempio n. 28
0
String*
Highlighter_Highlight_Excerpt_IMP(Highlighter *self, VArray *spans,
                                  String *raw_excerpt, int32_t top) {
    int32_t         hl_start        = 0;
    int32_t         hl_end          = 0;
    StringIterator *iter            = Str_Top(raw_excerpt);
    StringIterator *temp            = Str_Top(raw_excerpt);
    CharBuf        *buf             = CB_new(Str_Get_Size(raw_excerpt) + 32);
    CharBuf        *encode_buf      = NULL;
    int32_t         raw_excerpt_end = top + Str_Length(raw_excerpt);

    for (uint32_t i = 0, max = VA_Get_Size(spans); i < max; i++) {
        Span *span = (Span*)VA_Fetch(spans, i);
        int32_t offset = Span_Get_Offset(span);
        if (offset < top) {
            continue;
        }
        else if (offset >= raw_excerpt_end) {
            break;
        }
        else {
            int32_t relative_start = offset - top;
            int32_t relative_end   = relative_start + Span_Get_Length(span);

            if (relative_start <= hl_end) {
                if (relative_end > hl_end) {
                    hl_end = relative_end;
                }
            }
            else {
                if (hl_start < hl_end) {
                    // Highlight previous section
                    int32_t highlighted_len = hl_end - hl_start;
                    StrIter_Assign(temp, iter);
                    StrIter_Advance(iter, highlighted_len);
                    String *to_cat = StrIter_substring(temp, iter);
                    String *encoded = S_do_encode(self, to_cat, &encode_buf);
                    String *hl_frag = Highlighter_Highlight(self, encoded);
                    CB_Cat(buf, hl_frag);
                    DECREF(hl_frag);
                    DECREF(encoded);
                    DECREF(to_cat);
                }

                int32_t non_highlighted_len = relative_start - hl_end;
                StrIter_Assign(temp, iter);
                StrIter_Advance(iter, non_highlighted_len);
                String *to_cat = StrIter_substring(temp, iter);
                String *encoded = S_do_encode(self, to_cat, &encode_buf);
                CB_Cat(buf, (String*)encoded);
                DECREF(encoded);
                DECREF(to_cat);

                hl_start = relative_start;
                hl_end   = relative_end;
            }
        }
    }

    if (hl_start < hl_end) {
        // Highlight final section
        int32_t highlighted_len = hl_end - hl_start;
        StrIter_Assign(temp, iter);
        StrIter_Advance(iter, highlighted_len);
        String *to_cat = StrIter_substring(temp, iter);
        String *encoded = S_do_encode(self, to_cat, &encode_buf);
        String *hl_frag = Highlighter_Highlight(self, encoded);
        CB_Cat(buf, hl_frag);
        DECREF(hl_frag);
        DECREF(encoded);
        DECREF(to_cat);
    }

    // Last text, beyond last highlight span.
    if (StrIter_Has_Next(iter)) {
        String *to_cat = StrIter_substring(iter, NULL);
        String *encoded = S_do_encode(self, to_cat, &encode_buf);
        CB_Cat(buf, encoded);
        DECREF(encoded);
        DECREF(to_cat);
    }

    String *highlighted = CB_Yield_String(buf);
    DECREF(encode_buf);
    DECREF(buf);
    DECREF(temp);
    DECREF(iter);
    return highlighted;
}
Esempio n. 29
0
void
DocWriter_Add_Inverted_Doc_IMP(DocWriter *self, Inverter *inverter,
                               int32_t doc_id) {
    DocWriterIVARS *const ivars = DocWriter_IVARS(self);
    OutStream *dat_out    = S_lazy_init(self);
    OutStream *ix_out     = ivars->ix_out;
    uint32_t   num_stored = 0;
    int64_t    start      = OutStream_Tell(dat_out);
    int64_t    expected   = OutStream_Tell(ix_out) / 8;

    // Verify doc id.
    if (doc_id != expected) {
        THROW(ERR, "Expected doc id %i64 but got %i32", expected, doc_id);
    }

    // Write the number of stored fields.
    Inverter_Iterate(inverter);
    while (Inverter_Next(inverter)) {
        FieldType *type = Inverter_Get_Type(inverter);
        if (FType_Stored(type)) { num_stored++; }
    }
    OutStream_Write_C32(dat_out, num_stored);

    Inverter_Iterate(inverter);
    while (Inverter_Next(inverter)) {
        // Only store fields marked as "stored".
        FieldType *type = Inverter_Get_Type(inverter);
        if (FType_Stored(type)) {
            String *field = Inverter_Get_Field_Name(inverter);
            Obj *value = Inverter_Get_Value(inverter);
            Freezer_serialize_string(field, dat_out);
            switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) {
                case FType_TEXT: {
                    const char *buf  = Str_Get_Ptr8((String*)value);
                    size_t      size = Str_Get_Size((String*)value);
                    OutStream_Write_C32(dat_out, size);
                    OutStream_Write_Bytes(dat_out, buf, size);
                    break;
                }
                case FType_BLOB: {
                    char   *buf  = BB_Get_Buf((ByteBuf*)value);
                    size_t  size = BB_Get_Size((ByteBuf*)value);
                    OutStream_Write_C32(dat_out, size);
                    OutStream_Write_Bytes(dat_out, buf, size);
                    break;
                }
                case FType_INT32: {
                    int32_t val = Int32_Get_Value((Integer32*)value);
                    OutStream_Write_C32(dat_out, val);
                    break;
                }
                case FType_INT64: {
                    int64_t val = Int64_Get_Value((Integer64*)value);
                    OutStream_Write_C64(dat_out, val);
                    break;
                }
                case FType_FLOAT32: {
                    float val = Float32_Get_Value((Float32*)value);
                    OutStream_Write_F32(dat_out, val);
                    break;
                }
                case FType_FLOAT64: {
                    double val = Float64_Get_Value((Float64*)value);
                    OutStream_Write_F64(dat_out, val);
                    break;
                }
                default:
                    THROW(ERR, "Unrecognized type: %o", type);
            }
        }
    }

    // Write file pointer.
    OutStream_Write_I64(ix_out, start);
}
Esempio n. 30
0
void
Class_bootstrap(const cfish_ParcelSpec *parcel_spec) {
    const ClassSpec          *specs            = parcel_spec->class_specs;
    const NovelMethSpec      *novel_specs      = parcel_spec->novel_specs;
    const OverriddenMethSpec *overridden_specs = parcel_spec->overridden_specs;
    const InheritedMethSpec  *inherited_specs  = parcel_spec->inherited_specs;
    uint32_t num_classes = parcel_spec->num_classes;

    /* Pass 1:
     * - Allocate memory.
     * - Initialize global Class pointers.
     */
    for (uint32_t i = 0; i < num_classes; ++i) {
        const ClassSpec *spec = &specs[i];
        Class *parent = NULL;

        if (spec->parent) {
            parent = *spec->parent;
            if (!parent) {
                // Wrong order of class specs or inheritance cycle.
                fprintf(stderr, "Parent class of '%s' not initialized\n",
                        spec->name);
                abort();
            }
        }

        uint32_t novel_offset = parent
                                ? parent->class_alloc_size
                                : offsetof(Class, vtable);
        uint32_t class_alloc_size = novel_offset
                                    + spec->num_novel_meths
                                      * sizeof(cfish_method_t);

        Class *klass = (Class*)CALLOCATE(class_alloc_size, 1);

        // Needed to calculate size of subclasses.
        klass->class_alloc_size = class_alloc_size;

        // Initialize the global pointer to the Class.
        if (!Atomic_cas_ptr((void**)spec->klass, NULL, klass)) {
            // Another thread beat us to it.
            FREEMEM(klass);
        }
    }

    /* Pass 2:
     * - Initialize IVARS_OFFSET.
     * - Initialize 'klass' ivar and refcount by calling Init_Obj.
     * - Initialize parent, flags, obj_alloc_size, class_alloc_size.
     * - Assign parcel_spec.
     * - Initialize method pointers and offsets.
     */
    uint32_t num_novel      = 0;
    uint32_t num_overridden = 0;
    uint32_t num_inherited  = 0;
    for (uint32_t i = 0; i < num_classes; ++i) {
        const ClassSpec *spec = &specs[i];
        Class *klass  = *spec->klass;
        Class *parent = spec->parent ? *spec->parent : NULL;

        uint32_t ivars_offset = 0;
        if (spec->ivars_offset_ptr != NULL) {
            if (parent) {
                Class *ancestor = parent;
                while (ancestor && ancestor->parcel_spec == parcel_spec) {
                    ancestor = ancestor->parent;
                }
                ivars_offset = ancestor ? ancestor->obj_alloc_size : 0;
                *spec->ivars_offset_ptr = ivars_offset;
            }
            else {
                *spec->ivars_offset_ptr = 0;
            }
        }

        // CLASS->obj_alloc_size is always 0, so Init_Obj doesn't clear any
        // values set in the previous pass or by another thread.
        Class_Init_Obj_IMP(CLASS, klass);

        klass->parent      = parent;
        klass->parcel_spec = parcel_spec;

        // CLASS->obj_alloc_size must stay at 0.
        if (klass != CLASS) {
            klass->obj_alloc_size = ivars_offset + spec->ivars_size;
        }
        if (cfish_Class_bootstrap_hook1 != NULL) {
            cfish_Class_bootstrap_hook1(klass);
        }

        klass->flags = 0;
        if (klass == CLASS
            || klass == METHOD
            || klass == BOOLEAN
            || klass == STRING
           ) {
            klass->flags |= CFISH_fREFCOUNTSPECIAL;
        }
        if (spec->flags & cfish_ClassSpec_FINAL) {
            klass->flags |= CFISH_fFINAL;
        }

        if (parent) {
            // Copy parent vtable.
            uint32_t parent_vt_size = parent->class_alloc_size
                                      - offsetof(Class, vtable);
            memcpy(klass->vtable, parent->vtable, parent_vt_size);
        }

        for (size_t i = 0; i < spec->num_inherited_meths; ++i) {
            const InheritedMethSpec *mspec = &inherited_specs[num_inherited++];
            *mspec->offset = *mspec->parent_offset;
        }

        for (size_t i = 0; i < spec->num_overridden_meths; ++i) {
            const OverriddenMethSpec *mspec
                = &overridden_specs[num_overridden++];
            *mspec->offset = *mspec->parent_offset;
            Class_Override_IMP(klass, mspec->func, *mspec->offset);
        }

        uint32_t novel_offset = parent
                                ? parent->class_alloc_size
                                : offsetof(Class, vtable);

        for (size_t i = 0; i < spec->num_novel_meths; ++i) {
            const NovelMethSpec *mspec = &novel_specs[num_novel++];
            *mspec->offset = novel_offset;
            novel_offset += sizeof(cfish_method_t);
            Class_Override_IMP(klass, mspec->func, *mspec->offset);
        }
    }

    /* Now it's safe to call methods.
     *
     * Pass 3:
     * - Inititalize name and method array.
     * - Register class.
     */
    num_novel      = 0;
    num_overridden = 0;
    num_inherited  = 0;
    for (uint32_t i = 0; i < num_classes; ++i) {
        const ClassSpec *spec = &specs[i];
        Class *klass = *spec->klass;

        String *name_internal
            = Str_new_from_trusted_utf8(spec->name, strlen(spec->name));
        if (!Atomic_cas_ptr((void**)&klass->name_internal, NULL,
                            name_internal)
           ) {
            DECREF(name_internal);
            name_internal = klass->name_internal;
        }

        String *name = Str_new_wrap_trusted_utf8(Str_Get_Ptr8(name_internal),
                                                 Str_Get_Size(name_internal));
        if (!Atomic_cas_ptr((void**)&klass->name, NULL, name)) {
            DECREF(name);
            name = klass->name;
        }

        Method **methods = (Method**)MALLOCATE((spec->num_novel_meths + 1)
                                               * sizeof(Method*));

        // Only store novel methods for now.
        for (size_t i = 0; i < spec->num_novel_meths; ++i) {
            const NovelMethSpec *mspec = &novel_specs[num_novel++];
            String *name = SSTR_WRAP_C(mspec->name);
            Method *method = Method_new(name, mspec->callback_func,
                                        *mspec->offset);
            methods[i] = method;
        }

        methods[spec->num_novel_meths] = NULL;

        if (!Atomic_cas_ptr((void**)&klass->methods, NULL, methods)) {
            // Another thread beat us to it.
            for (size_t i = 0; i < spec->num_novel_meths; ++i) {
                Method_Destroy(methods[i]);
            }
            FREEMEM(methods);
        }

        Class_add_to_registry(klass);
    }
}