String* Highlighter_Create_Excerpt_IMP(Highlighter *self, HitDoc *hit_doc) { HighlighterIVARS *const ivars = Highlighter_IVARS(self); String *field_val = (String*)HitDoc_Extract(hit_doc, ivars->field); String *retval; if (!field_val || !Obj_Is_A((Obj*)field_val, STRING)) { retval = NULL; } else if (!Str_Get_Size(field_val)) { // Empty string yields empty string. retval = Str_new_from_trusted_utf8("", 0); } else { DocVector *doc_vec = Searcher_Fetch_Doc_Vec(ivars->searcher, HitDoc_Get_Doc_ID(hit_doc)); VArray *maybe_spans = Compiler_Highlight_Spans(ivars->compiler, ivars->searcher, doc_vec, ivars->field); VArray *score_spans = maybe_spans ? maybe_spans : VA_new(0); VA_Sort(score_spans, NULL, NULL); HeatMap *heat_map = HeatMap_new(score_spans, (ivars->excerpt_length * 2) / 3); int32_t top; String *raw_excerpt = Highlighter_Raw_Excerpt(self, field_val, &top, heat_map); String *highlighted = Highlighter_Highlight_Excerpt(self, score_spans, raw_excerpt, top); DECREF(raw_excerpt); DECREF(heat_map); DECREF(score_spans); DECREF(doc_vec); retval = highlighted; } DECREF(field_val); return retval; }
static void test_Raw_Excerpt(TestBatchRunner *runner, Searcher *searcher, Obj *query) { String *content = (String*)SSTR_WRAP_UTF8("content", 7); Highlighter *highlighter = Highlighter_new(searcher, query, content, 6); int32_t top; String *raw_excerpt; String *field_val = (String *)SSTR_WRAP_UTF8("Ook. Urk. Ick. ", 18); Vector *spans = Vec_new(1); Vec_Push(spans, (Obj*)Span_new(0, 18, 1.0f)); HeatMap *heat_map = HeatMap_new(spans, 133); DECREF(spans); raw_excerpt = Highlighter_Raw_Excerpt(highlighter, field_val, &top, heat_map); TEST_TRUE(runner, Str_Equals_Utf8(raw_excerpt, "Ook.", 4), "Raw_Excerpt at top %s", Str_Get_Ptr8(raw_excerpt)); TEST_TRUE(runner, top == 0, "top is 0"); DECREF(raw_excerpt); DECREF(heat_map); spans = Vec_new(1); Vec_Push(spans, (Obj*)Span_new(6, 12, 1.0f)); heat_map = HeatMap_new(spans, 133); DECREF(spans); raw_excerpt = Highlighter_Raw_Excerpt(highlighter, field_val, &top, heat_map); TEST_TRUE(runner, Str_Equals_Utf8(raw_excerpt, "Urk.", 4), "Raw_Excerpt in middle, with 2 bounds"); TEST_TRUE(runner, top == 6, "top in the middle modified by Raw_Excerpt"); DECREF(raw_excerpt); DECREF(heat_map); field_val = (String *)SSTR_WRAP_UTF8("Ook urk ick i.", 14); spans = Vec_new(1); Vec_Push(spans, (Obj*)Span_new(12, 1, 1.0f)); heat_map = HeatMap_new(spans, 133); DECREF(spans); raw_excerpt = Highlighter_Raw_Excerpt(highlighter, field_val, &top, heat_map); TEST_TRUE(runner, Str_Equals_Utf8(raw_excerpt, ELLIPSIS " i.", 6), "Ellipsis at top"); TEST_TRUE(runner, top == 10, "top correct when leading ellipsis inserted"); DECREF(heat_map); DECREF(raw_excerpt); field_val = (String *)SSTR_WRAP_UTF8("Urk. Iz no good.", 17); spans = Vec_new(1); Vec_Push(spans, (Obj*)Span_new(6, 2, 1.0f)); heat_map = HeatMap_new(spans, 133); DECREF(spans); raw_excerpt = Highlighter_Raw_Excerpt(highlighter, field_val, &top, heat_map); TEST_TRUE(runner, Str_Equals_Utf8(raw_excerpt, "Iz no" ELLIPSIS, 8), "Ellipsis at end"); TEST_TRUE(runner, top == 6, "top trimmed"); DECREF(heat_map); DECREF(raw_excerpt); // Words longer than excerpt len field_val = (String *)SSTR_WRAP_UTF8("abc/def/ghi/jkl/mno", 19); spans = Vec_new(1); Vec_Push(spans, (Obj*)Span_new(0, 3, 1.0f)); heat_map = HeatMap_new(spans, 133); DECREF(spans); raw_excerpt = Highlighter_Raw_Excerpt(highlighter, field_val, &top, heat_map); TEST_TRUE(runner, Str_Equals_Utf8(raw_excerpt, "abc/d" ELLIPSIS, 8), "Long word at top"); DECREF(heat_map); DECREF(raw_excerpt); spans = Vec_new(1); Vec_Push(spans, (Obj*)Span_new(8, 3, 1.0f)); heat_map = HeatMap_new(spans, 133); DECREF(spans); raw_excerpt = Highlighter_Raw_Excerpt(highlighter, field_val, &top, heat_map); TEST_TRUE(runner, Str_Equals_Utf8(raw_excerpt, ELLIPSIS " f/g" ELLIPSIS, 10), "Long word in middle"); DECREF(heat_map); DECREF(raw_excerpt); DECREF(highlighter); }