C++ (Cpp) Str_Top Examples

Example #1

0

Show file

File: Highlighter.c Project: hernan604/lucy

static String*
S_encode_entities(String *text, CharBuf *buf) {
    StringIterator *iter = Str_Top(text);
    size_t space = 0;
    const int MAX_ENTITY_BYTES = 9; // &#dddddd;

    // Scan first so that we only allocate once.
    int32_t code_point;
    while (STRITER_DONE != (code_point = StrIter_Next(iter))) {
        if (code_point > 127
                || (!isgraph(code_point) && !isspace(code_point))
                || code_point == '<'
                || code_point == '>'
                || code_point == '&'
                || code_point == '"'
           ) {
            space += MAX_ENTITY_BYTES;
        }
        else {
            space += 1;
        }
    }

    CB_Grow(buf, space);
    CB_Set_Size(buf, 0);
    DECREF(iter);
    iter = Str_Top(text);
    while (STRITER_DONE != (code_point = StrIter_Next(iter))) {
        if (code_point > 127
                || (!isgraph(code_point) && !isspace(code_point))
           ) {
            CB_catf(buf, "&#%u32;", code_point);
        }
        else if (code_point == '<') {
            CB_Cat_Trusted_Utf8(buf, "&lt;", 4);
        }
        else if (code_point == '>') {
            CB_Cat_Trusted_Utf8(buf, "&gt;", 4);
        }
        else if (code_point == '&') {
            CB_Cat_Trusted_Utf8(buf, "&amp;", 5);
        }
        else if (code_point == '"') {
            CB_Cat_Trusted_Utf8(buf, "&quot;", 6);
        }
        else {
            CB_Cat_Char(buf, code_point);
        }
    }

    DECREF(iter);
    return CB_To_String(buf);
}

Example #2

0

Show file

File: QueryParser.c Project: kidaa/lucy

static String*
S_unescape(QueryParser *self, String *orig, CharBuf *buf) {
    StringIterator *iter = Str_Top(orig);
    int32_t code_point;
    UNUSED_VAR(self);

    CB_Set_Size(buf, 0);
    CB_Grow(buf, Str_Get_Size(orig) + 4);

    while (STRITER_DONE != (code_point = StrIter_Next(iter))) {
        if (code_point == '\\') {
            int32_t next_code_point = StrIter_Next(iter);
            if (next_code_point == ':'
                || next_code_point == '"'
                || next_code_point == '\\'
               ) {
                CB_Cat_Char(buf, next_code_point);
            }
            else {
                CB_Cat_Char(buf, code_point);
                if (next_code_point != STRITER_DONE) {
                    CB_Cat_Char(buf, next_code_point);
                }
            }
        }
        else {
            CB_Cat_Char(buf, code_point);
        }
    }

    DECREF(iter);
    return CB_To_String(buf);
}

Example #3

0

Show file

File: Lock.c Project: apache/lucy

Lock*
Lock_init(Lock *self, Folder *folder, String *name, int32_t timeout,
          int32_t interval) {
    LockIVARS *const ivars = Lock_IVARS(self);

    // Validate.
    if (interval <= 0) {
        DECREF(self);
        THROW(ERR, "Invalid value for 'interval': %i32", interval);
    }
    StringIterator *iter = Str_Top(name);
    int32_t code_point;
    while (STR_OOB != (code_point = StrIter_Next(iter))) {
        if (isalnum(code_point)
            || code_point == '.'
            || code_point == '-'
            || code_point == '_'
           ) {
            continue;
        }
        DECREF(self);
        THROW(ERR, "Lock name contains disallowed characters: '%o'", name);
    }
    DECREF(iter);

    // Assign.
    ivars->folder       = (Folder*)INCREF(folder);
    ivars->timeout      = timeout;
    ivars->name         = Str_Clone(name);
    ivars->interval     = interval;

    return self;
}

Example #4

0

Show file

File: TestString.c Project: nkurz/lucy-clownfish

static void
test_iterator_whitespace(TestBatchRunner *runner) {
    int num_spaces;
    String *ws_smiley = S_smiley_with_whitespace(&num_spaces);

    {
        StringIterator *iter = Str_Top(ws_smiley);
        TEST_INT_EQ(runner, StrIter_Skip_Whitespace(iter), num_spaces,
                    "Skip_Whitespace");
        TEST_INT_EQ(runner, StrIter_Skip_Whitespace(iter), 0,
                    "Skip_Whitespace without whitespace");
        DECREF(iter);
    }

    {
        StringIterator *iter = Str_Tail(ws_smiley);
        TEST_INT_EQ(runner, StrIter_Skip_Whitespace_Back(iter), num_spaces,
                    "Skip_Whitespace_Back");
        TEST_INT_EQ(runner, StrIter_Skip_Whitespace_Back(iter), 0,
                    "Skip_Whitespace_Back without whitespace");
        DECREF(iter);
    }

    DECREF(ws_smiley);
}

Example #5

0

Show file

File: Lock.c Project: carriercomm/lucy

bool
LFLock_Maybe_Delete_File_IMP(LockFileLock *self, String *path,
                             bool delete_mine, bool delete_other) {
    LockFileLockIVARS *const ivars = LFLock_IVARS(self);
    Folder *folder  = ivars->folder;
    bool    success = false;

    // Only delete locks that start with our lock name.
    if (!Str_Starts_With_Utf8(path, "locks", 5)) {
        return false;
    }
    StringIterator *iter = Str_Top(path);
    StrIter_Advance(iter, 5 + 1);
    if (!StrIter_Starts_With(iter, ivars->name)) {
        DECREF(iter);
        return false;
    }
    DECREF(iter);

    // Attempt to delete dead lock file.
    if (Folder_Exists(folder, path)) {
        Hash *hash = (Hash*)Json_slurp_json(folder, path);
        if (hash != NULL && Obj_Is_A((Obj*)hash, HASH)) {
            String *pid_buf = (String*)Hash_Fetch_Utf8(hash, "pid", 3);
            String *host    = (String*)Hash_Fetch_Utf8(hash, "host", 4);
            String *name    = (String*)Hash_Fetch_Utf8(hash, "name", 4);

            // Match hostname and lock name.
            if (host != NULL
                && Str_Is_A(host, STRING)
                && Str_Equals(host, (Obj*)ivars->host)
                && name != NULL
                && Str_Is_A(name, STRING)
                && Str_Equals(name, (Obj*)ivars->name)
                && pid_buf != NULL
                && Str_Is_A(pid_buf, STRING)
               ) {
                // Verify that pid is either mine or dead.
                int pid = (int)Str_To_I64(pid_buf);
                if ((delete_mine && pid == PID_getpid())  // This process.
                    || (delete_other && !PID_active(pid)) // Dead pid.
                   ) {
                    if (Folder_Delete(folder, path)) {
                        success = true;
                    }
                    else {
                        String *mess
                            = MAKE_MESS("Can't delete '%o'", path);
                        DECREF(hash);
                        Err_throw_mess(ERR, mess);
                    }
                }
            }
        }
        DECREF(hash);
    }

    return success;
}

Example #6

0

Show file

File: Segment.c Project: apache/lucy

bool
Seg_valid_seg_name(String *name) {
    if (Str_Starts_With_Utf8(name, "seg_", 4)) {
        StringIterator *iter = Str_Top(name);
        StrIter_Advance(iter, 4);
        int32_t code_point;
        while (STR_OOB != (code_point = StrIter_Next(iter))) {
            if (!isalnum(code_point)) {
                DECREF(iter);
                return false;
            }
        }
        DECREF(iter);
        return true; // Success!
    }
    return false;
}

Example #7

0

Show file

File: IndexFileNames.c Project: rectang/lucy

uint64_t
IxFileNames_extract_gen(String *name) {
    StringIterator *iter = Str_Top(name);

    // Advance past first underscore.  Bail if we run out of string or if we
    // encounter a NULL.
    while (1) {
        int32_t code_point = StrIter_Next(iter);
        if (code_point == STR_OOB) { return 0; }
        else if (code_point == '_') { break; }
    }

    String *num_string = StrIter_crop(iter, NULL);
    uint64_t retval = (uint64_t)Str_BaseX_To_I64(num_string, 36);

    DECREF(num_string);
    DECREF(iter);
    return retval;
}

Example #8

0

Show file

File: QueryParser.c Project: kidaa/lucy

Query*
QParser_Expand_Leaf_IMP(QueryParser *self, Query *query) {
    QueryParserIVARS *const ivars = QParser_IVARS(self);
    LeafQuery *leaf_query = (LeafQuery*)query;
    Schema    *schema     = ivars->schema;
    bool       is_phrase  = false;
    bool       ambiguous  = false;

    // Determine whether we can actually process the input.
    if (!Query_is_a(query, LEAFQUERY)) { return NULL; }
    String *full_text = LeafQuery_Get_Text(leaf_query);
    if (!Str_Get_Size(full_text)) { return NULL; }

    // If quoted, always generate PhraseQuery.
    StringIterator *top  = Str_Top(full_text);
    StringIterator *tail = Str_Tail(full_text);
    StrIter_Skip_Next_Whitespace(top);
    StrIter_Skip_Prev_Whitespace(tail);
    if (StrIter_Starts_With_Utf8(top, "\"", 1)) {
        is_phrase = true;
        StrIter_Advance(top, 1);
        if (StrIter_Ends_With_Utf8(tail, "\"", 1)
            && !StrIter_Ends_With_Utf8(tail, "\\\"", 2)
        ) {
            StrIter_Recede(tail, 1);
        }
    }
    String *source_text = StrIter_substring(top, tail);

    // Either use LeafQuery's field or default to Parser's list.
    Vector *fields;
    if (LeafQuery_Get_Field(leaf_query)) {
        fields = Vec_new(1);
        Vec_Push(fields, INCREF(LeafQuery_Get_Field(leaf_query)));
    }
    else {
        fields = (Vector*)INCREF(ivars->fields);
    }

    CharBuf *unescape_buf = CB_new(Str_Get_Size(source_text));
    Vector  *queries      = Vec_new(Vec_Get_Size(fields));
    for (uint32_t i = 0, max = Vec_Get_Size(fields); i < max; i++) {
        String   *field    = (String*)Vec_Fetch(fields, i);
        Analyzer *analyzer = ivars->analyzer
                             ? ivars->analyzer
                             : Schema_Fetch_Analyzer(schema, field);

        if (!analyzer) {
            Vec_Push(queries,
                    (Obj*)QParser_Make_Term_Query(self, field,
                                                  (Obj*)source_text));
        }
        else {
            // Extract token texts.
            String *split_source = S_unescape(self, source_text, unescape_buf);
            Vector *maybe_texts = Analyzer_Split(analyzer, split_source);
            uint32_t num_maybe_texts = Vec_Get_Size(maybe_texts);
            Vector *token_texts = Vec_new(num_maybe_texts);

            // Filter out zero-length token texts.
            for (uint32_t j = 0; j < num_maybe_texts; j++) {
                String *token_text = (String*)Vec_Fetch(maybe_texts, j);
                if (Str_Get_Size(token_text)) {
                    Vec_Push(token_texts, INCREF(token_text));
                }
            }

            if (Vec_Get_Size(token_texts) == 0) {
                /* Query might include stop words.  Who knows? */
                ambiguous = true;
            }

            // Add either a TermQuery or a PhraseQuery.
            if (is_phrase || Vec_Get_Size(token_texts) > 1) {
                Vec_Push(queries, (Obj*)
                        QParser_Make_Phrase_Query(self, field, token_texts));
            }
            else if (Vec_Get_Size(token_texts) == 1) {
                Vec_Push(queries,
                        (Obj*)QParser_Make_Term_Query(self, field, Vec_Fetch(token_texts, 0)));
            }

            DECREF(token_texts);
            DECREF(maybe_texts);
            DECREF(split_source);
        }
    }

    Query *retval;
    if (Vec_Get_Size(queries) == 0) {
        retval = (Query*)NoMatchQuery_new();
        if (ambiguous) {
            NoMatchQuery_Set_Fails_To_Match((NoMatchQuery*)retval, false);
        }
    }
    else if (Vec_Get_Size(queries) == 1) {
        retval = (Query*)INCREF(Vec_Fetch(queries, 0));
    }
    else {
        retval = QParser_Make_OR_Query(self, queries);
    }

    // Clean up.
    DECREF(unescape_buf);
    DECREF(queries);
    DECREF(fields);
    DECREF(source_text);
    DECREF(tail);
    DECREF(top);

    return retval;
}

Example #9

0

Show file

File: QueryLexer.c Project: rectang/lucy

Vector*
QueryLexer_Tokenize_IMP(QueryLexer *self, String *query_string) {
    QueryLexerIVARS *const ivars = QueryLexer_IVARS(self);

    Vector *elems = Vec_new(0);
    if (!query_string) { return elems; }

    StringIterator *iter = Str_Top(query_string);

    while (StrIter_Has_Next(iter)) {
        ParserElem *elem = NULL;

        if (StrIter_Skip_Whitespace(iter)) {
            // Fast-forward past whitespace.
            continue;
        }

        if (ivars->heed_colons) {
            ParserElem *elem = S_consume_field(iter);
            if (elem) {
                Vec_Push(elems, (Obj*)elem);
            }
        }

        int32_t code_point = StrIter_Next(iter);
        switch (code_point) {
            case '(':
                elem = ParserElem_new(TOKEN_OPEN_PAREN, NULL);
                break;
            case ')':
                elem = ParserElem_new(TOKEN_CLOSE_PAREN, NULL);
                break;
            case '+':
                if (StrIter_Has_Next(iter)
                    && !StrIter_Skip_Whitespace(iter)
                   ) {
                    elem = ParserElem_new(TOKEN_PLUS, NULL);
                }
                else {
                    elem = ParserElem_new(TOKEN_STRING, (Obj*)Str_newf("+"));
                }
                break;
            case '-':
                if (StrIter_Has_Next(iter)
                    && !StrIter_Skip_Whitespace(iter)
                   ) {
                    elem = ParserElem_new(TOKEN_MINUS, NULL);
                }
                else {
                    elem = ParserElem_new(TOKEN_STRING, (Obj*)Str_newf("-"));
                }
                break;
            case '"':
                StrIter_Recede(iter, 1);
                elem = S_consume_quoted_string(iter);
                break;
            case 'O':
                StrIter_Recede(iter, 1);
                elem = S_consume_keyword(iter, "OR", 2, TOKEN_OR);
                if (!elem) {
                    elem = S_consume_text(iter);
                }
                break;
            case 'A':
                StrIter_Recede(iter, 1);
                elem = S_consume_keyword(iter, "AND", 3, TOKEN_AND);
                if (!elem) {
                    elem = S_consume_text(iter);
                }
                break;
            case 'N':
                StrIter_Recede(iter, 1);
                elem = S_consume_keyword(iter, "NOT", 3, TOKEN_NOT);
                if (!elem) {
                    elem = S_consume_text(iter);
                }
                break;
            default:
                StrIter_Recede(iter, 1);
                elem = S_consume_text(iter);
                break;
        }
        Vec_Push(elems, (Obj*)elem);
    }

    DECREF(iter);
    return elems;
}

Example #10

0

Show file

File: TestString.c Project: nkurz/lucy-clownfish

static void
test_iterator_substring(TestBatchRunner *runner) {
    String *string = Str_newf("a%sb%sc%sd", smiley, smiley, smiley);

    StringIterator *start = Str_Top(string);
    StringIterator *end = Str_Tail(string);

    {
        String *substring = StrIter_crop(start, end);
        TEST_TRUE(runner, Str_Equals(substring, (Obj*)string),
                  "StrIter_crop whole string");
        DECREF(substring);
    }

    StrIter_Advance(start, 2);
    StrIter_Recede(end, 2);

    {
        String *substring = StrIter_crop(start, end);
        String *wanted = Str_newf("b%sc", smiley);
        TEST_TRUE(runner, Str_Equals(substring, (Obj*)wanted),
                  "StrIter_crop");

        TEST_TRUE(runner, StrIter_Starts_With(start, wanted),
                  "Starts_With returns true");
        TEST_TRUE(runner, StrIter_Ends_With(end, wanted),
                  "Ends_With returns true");

        DECREF(wanted);
        DECREF(substring);
    }

    {
        String *short_str = Str_newf("b%sx", smiley);
        TEST_FALSE(runner, StrIter_Starts_With(start, short_str),
                   "Starts_With returns false");
        TEST_FALSE(runner, StrIter_Ends_With(start, short_str),
                   "Ends_With returns false");

        String *long_str = Str_newf("b%sxxxxxxxxxxxx%sc", smiley, smiley);
        TEST_FALSE(runner, StrIter_Starts_With(start, long_str),
                   "Starts_With long string returns false");
        TEST_FALSE(runner, StrIter_Ends_With(end, long_str),
                   "Ends_With long string returns false");

        DECREF(short_str);
        DECREF(long_str);
    }

    {
        String *substring = StrIter_crop(end, NULL);
        String *wanted = Str_newf("%sd", smiley);
        TEST_TRUE(runner, Str_Equals(substring, (Obj*)wanted),
                  "StrIter_crop with NULL tail");
        DECREF(wanted);
        DECREF(substring);
    }

    {
        String *substring = StrIter_crop(NULL, start);
        String *wanted = Str_newf("a%s", smiley);
        TEST_TRUE(runner, Str_Equals(substring, (Obj*)wanted),
                  "StrIter_crop with NULL top");
        DECREF(wanted);
        DECREF(substring);
    }

    DECREF(start);
    DECREF(end);
    DECREF(string);
}

Example #11

0

Show file

File: TestString.c Project: nkurz/lucy-clownfish

static void
test_iterator(TestBatchRunner *runner) {
    static const int32_t code_points[] = {
        0x41,
        0x7F,
        0x80,
        0x7FF,
        0x800,
        0xFFFF,
        0x10000,
        0x10FFFF
    };
    static size_t num_code_points
        = sizeof(code_points) / sizeof(code_points[0]);

    CharBuf *buf = CB_new(0);
    for (size_t i = 0; i < num_code_points; ++i) {
        CB_Cat_Char(buf, code_points[i]);
    }
    String *string = CB_To_String(buf);

    {
        StringIterator *iter = Str_Top(string);

        TEST_TRUE(runner, StrIter_Equals(iter, (Obj*)iter),
                  "StringIterator equal to self");
        TEST_FALSE(runner, StrIter_Equals(iter, (Obj*)CFISH_TRUE),
                   "StringIterator not equal non-iterators");

        DECREF(iter);
    }

    {
        StringIterator *top  = Str_Top(string);
        StringIterator *tail = Str_Tail(string);

        TEST_INT_EQ(runner, StrIter_Compare_To(top, (Obj*)tail), -1,
                    "Compare_To top < tail");
        TEST_INT_EQ(runner, StrIter_Compare_To(tail, (Obj*)top), 1,
                    "Compare_To tail > top");
        TEST_INT_EQ(runner, StrIter_Compare_To(top, (Obj*)top), 0,
                    "Compare_To top == top");

        StringIterator *clone = StrIter_Clone(top);
        TEST_TRUE(runner, StrIter_Equals(clone, (Obj*)top), "Clone");

        StrIter_Assign(clone, tail);
        TEST_TRUE(runner, StrIter_Equals(clone, (Obj*)tail), "Assign");

        String *other = Str_newf("Other string");
        StringIterator *other_iter = Str_Top(other);
        TEST_FALSE(runner, StrIter_Equals(other_iter, (Obj*)tail),
                   "Equals returns false for different strings");
        StrIter_Assign(clone, other_iter);
        TEST_TRUE(runner, StrIter_Equals(clone, (Obj*)other_iter),
                  "Assign iterator with different string");

        DECREF(other);
        DECREF(other_iter);
        DECREF(clone);
        DECREF(top);
        DECREF(tail);
    }

    {
        StringIterator *iter = Str_Top(string);

        for (size_t i = 0; i < num_code_points; ++i) {
            TEST_TRUE(runner, StrIter_Has_Next(iter), "Has_Next %d", i);
            int32_t code_point = StrIter_Next(iter);
            TEST_INT_EQ(runner, code_point, code_points[i], "Next %d", i);
        }

        TEST_TRUE(runner, !StrIter_Has_Next(iter),
                  "Has_Next at end of string");
        TEST_INT_EQ(runner, StrIter_Next(iter), STR_OOB,
                    "Next at end of string");

        StringIterator *tail = Str_Tail(string);
        TEST_TRUE(runner, StrIter_Equals(iter, (Obj*)tail), "Equals tail");

        DECREF(tail);
        DECREF(iter);
    }

    {
        StringIterator *iter = Str_Tail(string);

        for (size_t i = num_code_points; i--;) {
            TEST_TRUE(runner, StrIter_Has_Prev(iter), "Has_Prev %d", i);
            int32_t code_point = StrIter_Prev(iter);
            TEST_INT_EQ(runner, code_point, code_points[i], "Prev %d", i);
        }

        TEST_TRUE(runner, !StrIter_Has_Prev(iter),
                  "Has_Prev at end of string");
        TEST_INT_EQ(runner, StrIter_Prev(iter), STR_OOB,
                    "Prev at start of string");

        StringIterator *top = Str_Top(string);
        TEST_TRUE(runner, StrIter_Equals(iter, (Obj*)top), "Equals top");

        DECREF(top);
        DECREF(iter);
    }

    {
        StringIterator *iter = Str_Top(string);

        StrIter_Next(iter);
        TEST_INT_EQ(runner, StrIter_Advance(iter, 2), 2,
                    "Advance returns number of code points");
        TEST_INT_EQ(runner, StrIter_Next(iter), code_points[3],
                    "Advance works");
        TEST_INT_EQ(runner,
                    StrIter_Advance(iter, 1000000), num_code_points - 4,
                    "Advance past end of string");

        StrIter_Prev(iter);
        TEST_INT_EQ(runner, StrIter_Recede(iter, 2), 2,
                    "Recede returns number of code points");
        TEST_INT_EQ(runner, StrIter_Prev(iter), code_points[num_code_points-4],
                    "Recede works");
        TEST_INT_EQ(runner, StrIter_Recede(iter, 1000000), num_code_points - 4,
                    "Recede past start of string");

        DECREF(iter);
    }

    DECREF(string);
    DECREF(buf);
}

Example #12

0

Show file

File: Highlighter.c Project: hernan604/lucy

String*
Highlighter_Highlight_Excerpt_IMP(Highlighter *self, VArray *spans,
                                  String *raw_excerpt, int32_t top) {
    int32_t         hl_start        = 0;
    int32_t         hl_end          = 0;
    StringIterator *iter            = Str_Top(raw_excerpt);
    StringIterator *temp            = Str_Top(raw_excerpt);
    CharBuf        *buf             = CB_new(Str_Get_Size(raw_excerpt) + 32);
    CharBuf        *encode_buf      = NULL;
    int32_t         raw_excerpt_end = top + Str_Length(raw_excerpt);

    for (uint32_t i = 0, max = VA_Get_Size(spans); i < max; i++) {
        Span *span = (Span*)VA_Fetch(spans, i);
        int32_t offset = Span_Get_Offset(span);
        if (offset < top) {
            continue;
        }
        else if (offset >= raw_excerpt_end) {
            break;
        }
        else {
            int32_t relative_start = offset - top;
            int32_t relative_end   = relative_start + Span_Get_Length(span);

            if (relative_start <= hl_end) {
                if (relative_end > hl_end) {
                    hl_end = relative_end;
                }
            }
            else {
                if (hl_start < hl_end) {
                    // Highlight previous section
                    int32_t highlighted_len = hl_end - hl_start;
                    StrIter_Assign(temp, iter);
                    StrIter_Advance(iter, highlighted_len);
                    String *to_cat = StrIter_substring(temp, iter);
                    String *encoded = S_do_encode(self, to_cat, &encode_buf);
                    String *hl_frag = Highlighter_Highlight(self, encoded);
                    CB_Cat(buf, hl_frag);
                    DECREF(hl_frag);
                    DECREF(encoded);
                    DECREF(to_cat);
                }

                int32_t non_highlighted_len = relative_start - hl_end;
                StrIter_Assign(temp, iter);
                StrIter_Advance(iter, non_highlighted_len);
                String *to_cat = StrIter_substring(temp, iter);
                String *encoded = S_do_encode(self, to_cat, &encode_buf);
                CB_Cat(buf, (String*)encoded);
                DECREF(encoded);
                DECREF(to_cat);

                hl_start = relative_start;
                hl_end   = relative_end;
            }
        }
    }

    if (hl_start < hl_end) {
        // Highlight final section
        int32_t highlighted_len = hl_end - hl_start;
        StrIter_Assign(temp, iter);
        StrIter_Advance(iter, highlighted_len);
        String *to_cat = StrIter_substring(temp, iter);
        String *encoded = S_do_encode(self, to_cat, &encode_buf);
        String *hl_frag = Highlighter_Highlight(self, encoded);
        CB_Cat(buf, hl_frag);
        DECREF(hl_frag);
        DECREF(encoded);
        DECREF(to_cat);
    }

    // Last text, beyond last highlight span.
    if (StrIter_Has_Next(iter)) {
        String *to_cat = StrIter_substring(iter, NULL);
        String *encoded = S_do_encode(self, to_cat, &encode_buf);
        CB_Cat(buf, encoded);
        DECREF(encoded);
        DECREF(to_cat);
    }

    String *highlighted = CB_Yield_String(buf);
    DECREF(encode_buf);
    DECREF(buf);
    DECREF(temp);
    DECREF(iter);
    return highlighted;
}

Example #13

0

Show file

File: Highlighter.c Project: hernan604/lucy

String*
Highlighter_Raw_Excerpt_IMP(Highlighter *self, String *field_val,
                            int32_t *start_ptr, HeatMap *heat_map) {
    HighlighterIVARS *const ivars = Highlighter_IVARS(self);

    // Find start of excerpt.

    StringIterator *top = Str_Top(field_val);

    int32_t  best_location = S_hottest(heat_map);
    int32_t  start;
    uint32_t max_skip;

    if ((uint32_t)best_location <= ivars->slop) {
        // If the beginning of the string falls within the window centered
        // around the hottest point in the field, start the fragment at the
        // beginning.
        start    = 0;
        max_skip = best_location;
    }
    else {
        start    = best_location - ivars->slop;
        max_skip = ivars->slop;
        StrIter_Advance(top, start);
    }

    uint32_t num_skipped;
    bool found_starting_edge
        = S_find_starting_boundary(top, max_skip, &num_skipped);
    start += num_skipped;

    // Find end of excerpt.

    StringIterator *tail = StrIter_Clone(top);

    uint32_t max_len = ivars->excerpt_length;
    if (!found_starting_edge) {
        // Leave space for starting ellipsis and space character.
        max_len -= 2;
    }

    bool found_ending_edge = true;
    uint32_t excerpt_len = StrIter_Advance(tail, max_len);

    // Skip up to slop code points but keep at least max_len - slop.
    if (excerpt_len > max_len - ivars->slop) {
        max_skip = excerpt_len - (max_len - ivars->slop);
        found_ending_edge
            = S_find_ending_boundary(tail, max_skip, &num_skipped);
        if (num_skipped >= excerpt_len) {
            excerpt_len = 0;
        }
        else {
            excerpt_len -= num_skipped;
        }
    }

    // Extract excerpt.

    String *raw_excerpt;

    if (!excerpt_len) {
        raw_excerpt = Str_new_from_trusted_utf8("", 0);
    }
    else {
        String  *substring = StrIter_substring(top, tail);
        CharBuf *buf       = CB_new(Str_Get_Size(substring) + 8);

        // If not starting on a sentence boundary, prepend an ellipsis.
        if (!found_starting_edge) {
            CB_Cat_Char(buf, ELLIPSIS_CODE_POINT);
            CB_Cat_Char(buf, ' ');
            start -= 2;
        }

        CB_Cat(buf, substring);

        // If not ending on a sentence boundary, append an ellipsis.
        if (!found_ending_edge) {
            CB_Cat_Char(buf, ELLIPSIS_CODE_POINT);
        }

        raw_excerpt = CB_Yield_String(buf);

        DECREF(buf);
        DECREF(substring);
    }

    *start_ptr = start;

    DECREF(top);
    DECREF(tail);
    return raw_excerpt;
}