Ejemplo n.º 1
0
void SSearchTest::sharpSTest()
{
    UErrorCode status = U_ZERO_ERROR;
    UCollator *coll = NULL;
    UnicodeString lp  = "fuss";
    UnicodeString sp = "fu\\u00DF";
    UnicodeString targets[]  = {"fu\\u00DF", "fu\\u00DFball", "1fu\\u00DFball", "12fu\\u00DFball", "123fu\\u00DFball", "1234fu\\u00DFball",
                                "ffu\\u00DF", "fufu\\u00DF", "fusfu\\u00DF",
                                "fuss", "ffuss", "fufuss", "fusfuss", "1fuss", "12fuss", "123fuss", "1234fuss", "fu\\u00DF", "1fu\\u00DF", "12fu\\u00DF", "123fu\\u00DF", "1234fu\\u00DF"};
    int32_t start = -1, end = -1;

    coll = ucol_openFromShortString("LEN_S1", FALSE, NULL, &status);
    TEST_ASSERT_SUCCESS(status);

    UnicodeString lpUnescaped = lp.unescape();
    UnicodeString spUnescaped = sp.unescape();

    LocalUStringSearchPointer ussLong(usearch_openFromCollator(lpUnescaped.getBuffer(), lpUnescaped.length(),
                                                           lpUnescaped.getBuffer(), lpUnescaped.length(),   // actual test data will be set later
                                                           coll,
                                                           NULL,     // the break iterator
                                                           &status));

    LocalUStringSearchPointer ussShort(usearch_openFromCollator(spUnescaped.getBuffer(), spUnescaped.length(),
                                                           spUnescaped.getBuffer(), spUnescaped.length(),   // actual test data will be set later
                                                           coll,
                                                           NULL,     // the break iterator
                                                           &status));
    TEST_ASSERT_SUCCESS(status);

    for (uint32_t t = 0; t < (sizeof(targets)/sizeof(targets[0])); t += 1) {
        UBool bFound;
        UnicodeString target = targets[t].unescape();

        start = end = -1;
        usearch_setText(ussLong.getAlias(), target.getBuffer(), target.length(), &status);
        bFound = usearch_search(ussLong.getAlias(), 0, &start, &end, &status);
        TEST_ASSERT_SUCCESS(status);
        if (bFound) {
            logln("Test %d: found long pattern at [%d, %d].", t, start, end);
        } else {
            dataerrln("Test %d: did not find long pattern.", t);
        }

        usearch_setText(ussShort.getAlias(), target.getBuffer(), target.length(), &status);
        bFound = usearch_search(ussShort.getAlias(), 0, &start, &end, &status);
        TEST_ASSERT_SUCCESS(status);
        if (bFound) {
            logln("Test %d: found long pattern at [%d, %d].", t, start, end);
        } else {
            dataerrln("Test %d: did not find long pattern.", t);
        }
    }

    ucol_close(coll);
}
Ejemplo n.º 2
0
void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
{
    if (U_SUCCESS(status)) {
        m_text_ = text;
        usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
    }
}
Ejemplo n.º 3
0
void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
{
    if (U_SUCCESS(status)) {
        text.getText(m_text_);
        usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
    }
}
Ejemplo n.º 4
0
Archivo: fixed.c Proyecto: julp/ugrep
static engine_return_t engine_fixed_whole_line_match(error_t **error, void *data, const UString *subject)
{
    FETCH_DATA(data, p, fixed_pattern_t);

    if (ustring_empty(p->pattern)) {
        return ustring_empty(subject) ? ENGINE_WHOLE_LINE_MATCH : ENGINE_NO_MATCH;
    } else if (NULL != p->usearch) {
        int32_t ret;
        UErrorCode status;

        status = U_ZERO_ERROR;
        usearch_setText(p->usearch, subject->ptr, subject->len, &status);
        if (U_FAILURE(status)) {
            icu_error_set(error, FATAL, status, "usearch_setText");
            return ENGINE_FAILURE;
        }
        ret = usearch_first(p->usearch, &status);
        if (U_FAILURE(status)) {
            icu_error_set(error, FATAL, status, "usearch_first");
            return ENGINE_FAILURE;
        }
        usearch_unbindText(p->usearch);

        return (ret != USEARCH_DONE && ((size_t) usearch_getMatchedLength(p->usearch)) == subject->len ? ENGINE_WHOLE_LINE_MATCH : ENGINE_NO_MATCH);
    } else {
        if (IS_CASE_INSENSITIVE(p->flags)) {
            return (0 == u_strcasecmp(p->pattern->ptr, subject->ptr, 0) ? ENGINE_WHOLE_LINE_MATCH : ENGINE_NO_MATCH);
        } else {
            return (0 == u_strcmp(p->pattern->ptr, subject->ptr) ? ENGINE_WHOLE_LINE_MATCH : ENGINE_NO_MATCH);
        }
    }
}
Ejemplo n.º 5
0
inline SearchBuffer::~SearchBuffer()
{
    // Leave the static object pointing to valid strings (pattern=targer,
    // text=buffer). Otheriwse, usearch_reset() will results in 'use-after-free'
    // error.
    UErrorCode status = U_ZERO_ERROR;
    usearch_setPattern(blink::searcher(), &newlineCharacter, 1, &status);
    usearch_setText(blink::searcher(), &newlineCharacter, 1, &status);
    ASSERT(status == U_ZERO_ERROR);

    unlockSearcher();
}
Ejemplo n.º 6
0
inline size_t SearchBuffer::search(size_t& start)
{
    size_t size = m_buffer.size();
    if (m_atBreak) {
        if (!size)
            return 0;
    } else {
        if (size != m_buffer.capacity())
            return 0;
    }

    UStringSearch* searcher = blink::searcher();

    UErrorCode status = U_ZERO_ERROR;
    usearch_setText(searcher, m_buffer.data(), size, &status);
    ASSERT(status == U_ZERO_ERROR);

    usearch_setOffset(searcher, m_prefixLength, &status);
    ASSERT(status == U_ZERO_ERROR);

    int matchStart = usearch_next(searcher, &status);
    ASSERT(status == U_ZERO_ERROR);

nextMatch:
    if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) {
        ASSERT(matchStart == USEARCH_DONE);
        return 0;
    }

    // Matches that start in the overlap area are only tentative.
    // The same match may appear later, matching more characters,
    // possibly including a combining character that's not yet in the buffer.
    if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) {
        size_t overlap = m_overlap;
        if (m_options & AtWordStarts) {
            // Ensure that there is sufficient context before matchStart the next time around for
            // determining if it is at a word boundary.
            int wordBoundaryContextStart = matchStart;
            U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart);
            wordBoundaryContextStart = startOfLastWordBoundaryContext(m_buffer.data(), wordBoundaryContextStart);
            overlap = std::min(size - 1, std::max(overlap, size - wordBoundaryContextStart));
        }
        memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar));
        m_prefixLength -= std::min(m_prefixLength, size - overlap);
        m_buffer.shrink(overlap);
        return 0;
    }

    size_t matchedLength = usearch_getMatchedLength(searcher);
    ASSERT_WITH_SECURITY_IMPLICATION(matchStart + matchedLength <= size);

    // If this match is "bad", move on to the next match.
    if (isBadMatch(m_buffer.data() + matchStart, matchedLength) || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))) {
        matchStart = usearch_next(searcher, &status);
        ASSERT(status == U_ZERO_ERROR);
        goto nextMatch;
    }

    size_t newSize = size - (matchStart + 1);
    memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar));
    m_prefixLength -= std::min<size_t>(m_prefixLength, matchStart + 1);
    m_buffer.shrink(newSize);

    start = size - matchStart;
    return matchedLength;
}
Ejemplo n.º 7
0
Archivo: fixed.c Proyecto: julp/ugrep
static engine_return_t engine_fixed_match(error_t **error, void *data, const UString *subject)
{
    int32_t ret;
    UErrorCode status;
    FETCH_DATA(data, p, fixed_pattern_t);

    status = U_ZERO_ERROR;
    if (ustring_empty(p->pattern)) {
        if (IS_WORD_BOUNDED(p->flags)) {
            if (ustring_empty(subject)) {
                return ENGINE_MATCH_FOUND;
            } else {
                int32_t l, u, lastState, state;

                ubrk_setText(p->ubrk, subject->ptr, subject->len, &status);
                if (U_FAILURE(status)) {
                    icu_error_set(error, FATAL, status, "ubrk_setText");
                    return ENGINE_FAILURE;
                }
                if (UBRK_DONE != (l = ubrk_first(p->ubrk))) {
                    lastState = ubrk_getRuleStatus(p->ubrk);
                    while (UBRK_DONE != (u = ubrk_next(p->ubrk))) {
                        state = ubrk_getRuleStatus(p->ubrk);
                        if (UBRK_WORD_NONE == lastState && lastState == state) {
                            return ENGINE_MATCH_FOUND;
                        }
                        lastState = state;
                        l = u;
                    }
                }
                return ENGINE_NO_MATCH;
            }
        } else {
            return ENGINE_MATCH_FOUND;
        }
    } else if (NULL != p->usearch) {
        if (subject->len > 0) {
            usearch_setText(p->usearch, subject->ptr, subject->len, &status);
            if (U_FAILURE(status)) {
                icu_error_set(error, FATAL, status, "usearch_setText");
                return ENGINE_FAILURE;
            }
            ret = usearch_first(p->usearch, &status);
            if (U_FAILURE(status)) {
                icu_error_set(error, FATAL, status, "usearch_first");
                return ENGINE_FAILURE;
            }
            usearch_unbindText(p->usearch);

            return (ret != USEARCH_DONE ? ENGINE_MATCH_FOUND : ENGINE_NO_MATCH);
        } else {
            return ENGINE_NO_MATCH;
        }
    } else {
        UChar *m;
        int32_t pos;

        pos = 0;
        ret = ENGINE_NO_MATCH;
        if (NULL != p->ubrk) {
            ubrk_setText(p->ubrk, subject->ptr, subject->len, &status);
            if (U_FAILURE(status)) {
                icu_error_set(error, FATAL, status, "ubrk_setText");
                return ENGINE_FAILURE;
            }
        }
        while (NULL != (m = u_strFindFirst(subject->ptr + pos, subject->len - pos, p->pattern->ptr, p->pattern->len))) {
            pos = m - subject->ptr;
            if (NULL == p->ubrk || (ubrk_isBoundary(p->ubrk, pos) && ubrk_isBoundary(p->ubrk, pos + p->pattern->len))) {
                ret = ENGINE_MATCH_FOUND;
            }
            pos += p->pattern->len;
        }
        ubrk_unbindText(p->ubrk);

        return ret;
    }
}
Ejemplo n.º 8
0
Archivo: fixed.c Proyecto: julp/ugrep
static UBool engine_fixed_split(error_t **error, void *data, const UString *subject, DArray *array, interval_list_t *intervals)
{
    UErrorCode status;
    int32_t l, lastU;
    dlist_element_t *el;
    FETCH_DATA(data, p, fixed_pattern_t);

    lastU = l = 0;
    status = U_ZERO_ERROR;
    if (NULL != p->usearch) {
        usearch_setText(p->usearch, subject->ptr, subject->len, &status);
        if (U_FAILURE(status)) {
            icu_error_set(error, FATAL, status, "usearch_setText");
            return FALSE;
        }
        /* <X> */
        if (NULL == intervals) {
            int32_t u;

            while (U_SUCCESS(status) && USEARCH_DONE != (u = usearch_next(p->usearch, &status))) {
                add_match(array, subject, l, u);
                l = u += usearch_getMatchedLength(p->usearch);
            }
            add_match(array, subject, l, subject->len);
        } else {
            /* </X> */
            for (el = intervals->head; NULL != el; el = el->next) {
                FETCH_DATA(el->data, i, interval_t);

                if (i->lower_limit > 0) {
                    if (!usearch_fwd_n(p->usearch, subject, NULL, i->lower_limit - lastU, &l, &status)) {
                        break;
                    }
                }
                if (!usearch_fwd_n(p->usearch, subject, array, i->upper_limit - i->lower_limit, &l, &status)) {
                    break;
                }
                lastU = i->upper_limit;
            }
            /* <X> */
        }
        /* </X> */
        usearch_unbindText(p->usearch);
        if (U_FAILURE(status)) {
            icu_error_set(error, FATAL, status, "usearch_next");
            return FALSE;
        }
    } else {
        if (NULL != p->ubrk) {
            ubrk_setText(p->ubrk, subject->ptr, subject->len, &status);
            if (U_FAILURE(status)) {
                icu_error_set(error, FATAL, status, "ubrk_setText");
                return FALSE;
            }
        }
        /* <X> */
        if (NULL == intervals) {
            UChar *m;
            int32_t u;

            u = 0;
            while (NULL != (m = u_strFindFirst(subject->ptr + u, subject->len - u, p->pattern->ptr, p->pattern->len))) {
                u = m - subject->ptr;
                if (NULL == p->ubrk || (ubrk_isBoundary(p->ubrk, u) && ubrk_isBoundary(p->ubrk, u + p->pattern->len))) {
                    add_match(array, subject, l, u);
                }
                l = u = u + p->pattern->len;
            }
            add_match(array, subject, l, subject->len);
        } else {
            /* </X> */
            for (el = intervals->head; NULL != el; el = el->next) {
                FETCH_DATA(el->data, i, interval_t);

                if (i->lower_limit > 0) {
                    if (!binary_fwd_n(p->ubrk, p->pattern, subject, NULL, i->lower_limit - lastU, &l)) {
                        break;
                    }
                }
                if (!binary_fwd_n(p->ubrk, p->pattern, subject, array, i->upper_limit - i->lower_limit, &l)) {
                    break;
                }
                lastU = i->upper_limit;
            }
            /* <X> */
        }
        /* </X> */
        ubrk_unbindText(p->ubrk);
    }

    return TRUE;
}
Ejemplo n.º 9
0
Archivo: fixed.c Proyecto: julp/ugrep
static engine_return_t engine_fixed_match_all(error_t **error, void *data, const UString *subject, interval_list_t *intervals)
{
    int32_t matches;
    UErrorCode status;
    FETCH_DATA(data, p, fixed_pattern_t);

    matches = 0;
    status = U_ZERO_ERROR;
    if (ustring_empty(p->pattern)) {
        if (IS_WORD_BOUNDED(p->flags)) {
            if (ustring_empty(subject)) {
                return ENGINE_MATCH_FOUND;
            } else {
                int32_t l, u, lastState, state;

                ubrk_setText(p->ubrk, subject->ptr, subject->len, &status);
                if (U_FAILURE(status)) {
                    icu_error_set(error, FATAL, status, "ubrk_setText");
                    return ENGINE_FAILURE;
                }
                if (UBRK_DONE != (l = ubrk_first(p->ubrk))) {
                    lastState = ubrk_getRuleStatus(p->ubrk);
                    while (UBRK_DONE != (u = ubrk_next(p->ubrk))) {
                        state = ubrk_getRuleStatus(p->ubrk);
                        if (UBRK_WORD_NONE == lastState && lastState == state) {
                            return ENGINE_MATCH_FOUND;
                        }
                        lastState = state;
                        l = u;
                    }
                }
                return ENGINE_NO_MATCH;
            }
        } else {
            return ENGINE_MATCH_FOUND;
        }
    } else if (NULL != p->usearch) {
        int32_t l, u;

        if (subject->len > 0) {
            usearch_setText(p->usearch, subject->ptr, subject->len, &status);
            if (U_FAILURE(status)) {
                icu_error_set(error, FATAL, status, "usearch_setText");
                return ENGINE_FAILURE;
            }
            for (l = usearch_first(p->usearch, &status); U_SUCCESS(status) && USEARCH_DONE != l; l = usearch_next(p->usearch, &status)) {
                matches++;
                u = l + usearch_getMatchedLength(p->usearch);
                if (interval_list_add(intervals, subject->len, l, u)) {
                    return ENGINE_WHOLE_LINE_MATCH;
                }
            }
            if (U_FAILURE(status)) {
                icu_error_set(error, FATAL, status, "usearch_[first|next]");
                return ENGINE_FAILURE;
            }
            usearch_unbindText(p->usearch);

            return (matches ? ENGINE_MATCH_FOUND : ENGINE_NO_MATCH);
        } else {
            return ENGINE_NO_MATCH;
        }
    } else {
        UChar *m;
        int32_t pos;

        pos = 0;
        if (NULL != p->ubrk) {
            ubrk_setText(p->ubrk, subject->ptr, subject->len, &status);
            if (U_FAILURE(status)) {
                icu_error_set(error, FATAL, status, "ubrk_setText");
                return ENGINE_FAILURE;
            }
        }
        while (NULL != (m = u_strFindFirst(subject->ptr + pos, subject->len - pos, p->pattern->ptr, p->pattern->len))) {
            pos = m - subject->ptr;
            if (NULL == p->ubrk || (ubrk_isBoundary(p->ubrk, pos) && ubrk_isBoundary(p->ubrk, pos + p->pattern->len))) {
                matches++;
                if (interval_list_add(intervals, subject->len, pos, pos + p->pattern->len)) {
                    return ENGINE_WHOLE_LINE_MATCH;
                }
            }
            pos += p->pattern->len;
        }
        ubrk_unbindText(p->ubrk);

        return (matches ? ENGINE_MATCH_FOUND : ENGINE_NO_MATCH);
    }
}
Ejemplo n.º 10
0
void TextSearcherICU::setText(const UChar* text, size_t length) {
  UErrorCode status = U_ZERO_ERROR;
  usearch_setText(m_searcher, text, length, &status);
  DCHECK_EQ(status, U_ZERO_ERROR);
  m_textLength = length;
}