Пример #1
0
String*
Highlighter_Create_Excerpt_IMP(Highlighter *self, HitDoc *hit_doc) {
    HighlighterIVARS *const ivars = Highlighter_IVARS(self);
    String *field_val = (String*)HitDoc_Extract(hit_doc, ivars->field);
    String *retval;

    if (!field_val || !Obj_Is_A((Obj*)field_val, STRING)) {
        retval = NULL;
    }
    else if (!Str_Get_Size(field_val)) {
        // Empty string yields empty string.
        retval = Str_new_from_trusted_utf8("", 0);
    }
    else {
        DocVector *doc_vec
            = Searcher_Fetch_Doc_Vec(ivars->searcher,
                                     HitDoc_Get_Doc_ID(hit_doc));
        VArray *maybe_spans
            = Compiler_Highlight_Spans(ivars->compiler, ivars->searcher,
                                       doc_vec, ivars->field);
        VArray *score_spans = maybe_spans ? maybe_spans : VA_new(0);
        VA_Sort(score_spans, NULL, NULL);
        HeatMap *heat_map
            = HeatMap_new(score_spans, (ivars->excerpt_length * 2) / 3);

        int32_t top;
        String *raw_excerpt
            = Highlighter_Raw_Excerpt(self, field_val, &top, heat_map);
        String *highlighted
            = Highlighter_Highlight_Excerpt(self, score_spans, raw_excerpt,
                                            top);

        DECREF(raw_excerpt);
        DECREF(heat_map);
        DECREF(score_spans);
        DECREF(doc_vec);

        retval = highlighted;
    }

    DECREF(field_val);
    return retval;
}
Пример #2
0
static void
test_Raw_Excerpt(TestBatchRunner *runner, Searcher *searcher, Obj *query) {
    String *content = (String*)SSTR_WRAP_UTF8("content", 7);
    Highlighter *highlighter = Highlighter_new(searcher, query, content, 6);
    int32_t top;
    String *raw_excerpt;

    String *field_val = (String *)SSTR_WRAP_UTF8("Ook.  Urk.  Ick.  ", 18);
    Vector *spans = Vec_new(1);
    Vec_Push(spans, (Obj*)Span_new(0, 18, 1.0f));
    HeatMap *heat_map = HeatMap_new(spans, 133);
    DECREF(spans);
    raw_excerpt = Highlighter_Raw_Excerpt(highlighter, field_val, &top,
                                          heat_map);
    TEST_TRUE(runner,
              Str_Equals_Utf8(raw_excerpt, "Ook.", 4),
              "Raw_Excerpt at top %s", Str_Get_Ptr8(raw_excerpt));
    TEST_TRUE(runner,
              top == 0,
              "top is 0");
    DECREF(raw_excerpt);
    DECREF(heat_map);

    spans = Vec_new(1);
    Vec_Push(spans, (Obj*)Span_new(6, 12, 1.0f));
    heat_map = HeatMap_new(spans, 133);
    DECREF(spans);
    raw_excerpt = Highlighter_Raw_Excerpt(highlighter, field_val, &top,
                                          heat_map);
    TEST_TRUE(runner,
              Str_Equals_Utf8(raw_excerpt, "Urk.", 4),
              "Raw_Excerpt in middle, with 2 bounds");
    TEST_TRUE(runner,
              top == 6,
              "top in the middle modified by Raw_Excerpt");
    DECREF(raw_excerpt);
    DECREF(heat_map);

    field_val = (String *)SSTR_WRAP_UTF8("Ook urk ick i.", 14);
    spans     = Vec_new(1);
    Vec_Push(spans, (Obj*)Span_new(12, 1, 1.0f));
    heat_map = HeatMap_new(spans, 133);
    DECREF(spans);
    raw_excerpt = Highlighter_Raw_Excerpt(highlighter, field_val, &top,
                                          heat_map);
    TEST_TRUE(runner,
              Str_Equals_Utf8(raw_excerpt, ELLIPSIS " i.", 6),
              "Ellipsis at top");
    TEST_TRUE(runner,
              top == 10,
              "top correct when leading ellipsis inserted");
    DECREF(heat_map);
    DECREF(raw_excerpt);

    field_val = (String *)SSTR_WRAP_UTF8("Urk.  Iz no good.", 17);
    spans     = Vec_new(1);
    Vec_Push(spans, (Obj*)Span_new(6, 2, 1.0f));
    heat_map = HeatMap_new(spans, 133);
    DECREF(spans);
    raw_excerpt = Highlighter_Raw_Excerpt(highlighter, field_val, &top,
                                          heat_map);
    TEST_TRUE(runner,
              Str_Equals_Utf8(raw_excerpt, "Iz no" ELLIPSIS, 8),
              "Ellipsis at end");
    TEST_TRUE(runner,
              top == 6,
              "top trimmed");
    DECREF(heat_map);
    DECREF(raw_excerpt);

    // Words longer than excerpt len

    field_val = (String *)SSTR_WRAP_UTF8("abc/def/ghi/jkl/mno", 19);

    spans = Vec_new(1);
    Vec_Push(spans, (Obj*)Span_new(0, 3, 1.0f));
    heat_map = HeatMap_new(spans, 133);
    DECREF(spans);
    raw_excerpt = Highlighter_Raw_Excerpt(highlighter, field_val, &top,
                                          heat_map);
    TEST_TRUE(runner,
              Str_Equals_Utf8(raw_excerpt, "abc/d" ELLIPSIS, 8),
              "Long word at top");
    DECREF(heat_map);
    DECREF(raw_excerpt);

    spans = Vec_new(1);
    Vec_Push(spans, (Obj*)Span_new(8, 3, 1.0f));
    heat_map = HeatMap_new(spans, 133);
    DECREF(spans);
    raw_excerpt = Highlighter_Raw_Excerpt(highlighter, field_val, &top,
                                          heat_map);
    TEST_TRUE(runner,
              Str_Equals_Utf8(raw_excerpt, ELLIPSIS " f/g" ELLIPSIS, 10),
              "Long word in middle");
    DECREF(heat_map);
    DECREF(raw_excerpt);

    DECREF(highlighter);
}