static void test_Mimic_and_Clone(TestBatchRunner *runner) { String *wanted = S_get_str("foo"); CharBuf *wanted_cb = S_get_cb("foo"); CharBuf *got = S_get_cb("bar"); CB_Mimic(got, (Obj*)wanted); TEST_TRUE(runner, S_cb_equals(got, wanted), "Mimic String"); DECREF(got); got = S_get_cb("bar"); CB_Mimic(got, (Obj*)wanted_cb); TEST_TRUE(runner, S_cb_equals(got, wanted), "Mimic CharBuf"); DECREF(got); got = S_get_cb("bar"); CB_Mimic_Utf8(got, "foo", 3); TEST_TRUE(runner, S_cb_equals(got, wanted), "Mimic_Utf8"); DECREF(got); got = CB_Clone(wanted_cb); TEST_TRUE(runner, S_cb_equals(got, wanted), "Clone"); DECREF(got); DECREF(wanted); DECREF(wanted_cb); }
void TextTermStepper_Write_Delta_IMP(TextTermStepper *self, OutStream *outstream, Obj *value) { TextTermStepperIVARS *const ivars = TextTermStepper_IVARS(self); CharBuf *charbuf = (CharBuf*)ivars->value; const char *last_text = CB_Get_Ptr8(charbuf); size_t last_size = CB_Get_Size(charbuf); const char *new_text = NULL; size_t new_size = 0; if (Obj_is_a(value, STRING)) { String *new_string = (String*)value; new_text = Str_Get_Ptr8(new_string); new_size = Str_Get_Size(new_string); } else if (Obj_is_a(value, CHARBUF)) { CharBuf *new_charbuf = (CharBuf*)value; new_text = CB_Get_Ptr8(new_charbuf); new_size = CB_Get_Size(new_charbuf); } else { THROW(ERR, "'value' must be a String or CharBuf"); } // Count how many bytes the strings share at the top. const int32_t overlap = StrHelp_overlap(last_text, new_text, last_size, new_size); const char *const diff_start_str = new_text + overlap; const size_t diff_len = new_size - overlap; // Write number of common bytes and common bytes. OutStream_Write_C32(outstream, overlap); OutStream_Write_String(outstream, diff_start_str, diff_len); // Update value. CB_Mimic_Utf8(charbuf, new_text, new_size); // Invalidate string. DECREF(ivars->string); ivars->string = NULL; }
static void S_write_terms_and_postings(PostingPool *self, PostingWriter *post_writer, OutStream *skip_stream) { PostingPoolIVARS *const ivars = PostPool_IVARS(self); TermInfo *const tinfo = TInfo_new(0); TermInfo *const skip_tinfo = TInfo_new(0); TermInfoIVARS *const tinfo_ivars = TInfo_IVARS(tinfo); TermInfoIVARS *const skip_tinfo_ivars = TInfo_IVARS(skip_tinfo); LexiconWriter *const lex_writer = ivars->lex_writer; SkipStepper *const skip_stepper = ivars->skip_stepper; SkipStepperIVARS *const skip_stepper_ivars = SkipStepper_IVARS(skip_stepper); int32_t last_skip_doc = 0; int64_t last_skip_filepos = 0; const int32_t skip_interval = Arch_Skip_Interval(Schema_Get_Architecture(ivars->schema)); // Prime heldover variables. RawPosting *posting = (RawPosting*)CERTIFY(PostPool_Fetch(self), RAWPOSTING); RawPostingIVARS *post_ivars = RawPost_IVARS(posting); CharBuf *last_term_text = CB_new_from_trusted_utf8(post_ivars->blob, post_ivars->content_len); const char *last_text_buf = CB_Get_Ptr8(last_term_text); uint32_t last_text_size = CB_Get_Size(last_term_text); SkipStepper_Set_ID_And_Filepos(skip_stepper, 0, 0); // Initialize sentinel to be used on the last iter, using an empty string // in order to make LexiconWriter Do The Right Thing. size_t sentinel_size = Class_Get_Obj_Alloc_Size(RAWPOSTING) + 20; // blob length + cushion char empty_string[] = ""; RawPosting *sentinel = RawPost_new(alloca(sentinel_size), 0, 1, empty_string, 0); while (1) { bool same_text_as_last = true; if (posting == NULL) { // On the last iter, use an empty string to make LexiconWriter // DTRT. posting = sentinel; post_ivars = RawPost_IVARS(posting); same_text_as_last = false; } else { // Compare once. if (post_ivars->content_len != last_text_size || memcmp(&post_ivars->blob, last_text_buf, last_text_size) != 0 ) { same_text_as_last = false; } } // If the term text changes, process the last term. if (!same_text_as_last) { // Hand off to LexiconWriter. LexWriter_Add_Term(lex_writer, (Obj*)last_term_text, tinfo); // Start each term afresh. TInfo_Reset(tinfo); PostWriter_Start_Term(post_writer, tinfo); // Init skip data in preparation for the next term. skip_stepper_ivars->doc_id = 0; skip_stepper_ivars->filepos = tinfo_ivars->post_filepos; last_skip_doc = 0; last_skip_filepos = tinfo_ivars->post_filepos; // Remember the term_text so we can write string diffs. CB_Mimic_Utf8(last_term_text, post_ivars->blob, post_ivars->content_len); last_text_buf = CB_Get_Ptr8(last_term_text); last_text_size = CB_Get_Size(last_term_text); } // Bail on last iter before writing invalid posting data. if (posting == sentinel) { break; } // Write posting data. PostWriter_Write_Posting(post_writer, posting); // Doc freq lags by one iter. tinfo_ivars->doc_freq++; // Write skip data. if (skip_stream != NULL && same_text_as_last && tinfo_ivars->doc_freq % skip_interval == 0 && tinfo_ivars->doc_freq != 0 ) { // If first skip group, save skip stream pos for term info. if (tinfo_ivars->doc_freq == skip_interval) { tinfo_ivars->skip_filepos = OutStream_Tell(skip_stream); } // Write deltas. last_skip_doc = skip_stepper_ivars->doc_id; last_skip_filepos = skip_stepper_ivars->filepos; skip_stepper_ivars->doc_id = post_ivars->doc_id; PostWriter_Update_Skip_Info(post_writer, skip_tinfo); skip_stepper_ivars->filepos = skip_tinfo_ivars->post_filepos; SkipStepper_Write_Record(skip_stepper, skip_stream, last_skip_doc, last_skip_filepos); } // Retrieve the next posting from the sort pool. // DECREF(posting); // No!! DON'T destroy!!! posting = (RawPosting*)PostPool_Fetch(self); post_ivars = RawPost_IVARS(posting); } // Clean up. DECREF(last_term_text); DECREF(skip_tinfo); DECREF(tinfo); }