예제 #1
0
파일: fixed.c 프로젝트: julp/ugrep
static UBool usearch_fwd_n(
    UStringSearch *usearch,
    const UString *subject,
    DArray *array, /* NULL to skip n matches */
    int32_t n,
    int32_t *l,
    UErrorCode *status
) {
    int32_t u;

    while (n > 0 && U_SUCCESS(*status) && USEARCH_DONE != (u = usearch_next(usearch, status))) {
        --n;
        if (NULL != array) {
            add_match(array, subject, *l, u);
        }
        *l = u += usearch_getMatchedLength(usearch);
    }
    if (0 == n) {
        return TRUE;
    } else {
        if (NULL != array) {
            add_match(array, subject, *l, subject->len);
        }
        return FALSE;
    }
}
예제 #2
0
/*
 Return value is a "Win32 BOOL" (1 = true, 0 = false)
 */
extern "C" int32_t EndsWith(
    SortHandle* pSortHandle, const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t options)
{
    int32_t result = FALSE;
    UErrorCode err = U_ZERO_ERROR;
    const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err);

    if (U_SUCCESS(err))
    {
        UStringSearch* pSearch = usearch_openFromCollator(lpTarget, cwTargetLength, lpSource, cwSourceLength, pColl, nullptr, &err);
        int32_t idx = USEARCH_DONE;

        if (U_SUCCESS(err))
        {
            idx = usearch_last(pSearch, &err);

            if (idx != USEARCH_DONE)
            {
                if ((idx + usearch_getMatchedLength(pSearch)) == cwSourceLength)
                {
                    result = TRUE;
                }

                // TODO (dotnet/corefx#3467): We should do something similar to what
                // StartsWith does where we can ignore
                // some collation elements at the end of the string if they are zero.
            }

            usearch_close(pSearch);
        }
    }

    return result;
}
예제 #3
0
파일: fixed.c 프로젝트: julp/ugrep
static engine_return_t engine_fixed_whole_line_match(error_t **error, void *data, const UString *subject)
{
    FETCH_DATA(data, p, fixed_pattern_t);

    if (ustring_empty(p->pattern)) {
        return ustring_empty(subject) ? ENGINE_WHOLE_LINE_MATCH : ENGINE_NO_MATCH;
    } else if (NULL != p->usearch) {
        int32_t ret;
        UErrorCode status;

        status = U_ZERO_ERROR;
        usearch_setText(p->usearch, subject->ptr, subject->len, &status);
        if (U_FAILURE(status)) {
            icu_error_set(error, FATAL, status, "usearch_setText");
            return ENGINE_FAILURE;
        }
        ret = usearch_first(p->usearch, &status);
        if (U_FAILURE(status)) {
            icu_error_set(error, FATAL, status, "usearch_first");
            return ENGINE_FAILURE;
        }
        usearch_unbindText(p->usearch);

        return (ret != USEARCH_DONE && ((size_t) usearch_getMatchedLength(p->usearch)) == subject->len ? ENGINE_WHOLE_LINE_MATCH : ENGINE_NO_MATCH);
    } else {
        if (IS_CASE_INSENSITIVE(p->flags)) {
            return (0 == u_strcasecmp(p->pattern->ptr, subject->ptr, 0) ? ENGINE_WHOLE_LINE_MATCH : ENGINE_NO_MATCH);
        } else {
            return (0 == u_strcmp(p->pattern->ptr, subject->ptr) ? ENGINE_WHOLE_LINE_MATCH : ENGINE_NO_MATCH);
        }
    }
}
예제 #4
0
bool TextSearcherICU::nextMatchResult(MatchResult& result) {
  UErrorCode status = U_ZERO_ERROR;
  const int matchStart = usearch_next(m_searcher, &status);
  DCHECK_EQ(status, U_ZERO_ERROR);

  // TODO(iceman): It is possible to use |usearch_getText| function
  // to retrieve text length and not store it explicitly.
  if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < m_textLength)) {
    DCHECK_EQ(matchStart, USEARCH_DONE);
    result.start = 0;
    result.length = 0;
    return false;
  }

  result.start = static_cast<size_t>(matchStart);
  result.length = usearch_getMatchedLength(m_searcher);
  return true;
}
예제 #5
0
UBool findPattern()
{
	UErrorCode status = U_ZERO_ERROR;
	int32_t offset = usearch_next(search, &status);
	if (offset == USEARCH_DONE) {
		fprintf(stdout, "Pattern not found in source\n");
	}
	while (offset != USEARCH_DONE) {
		fprintf(stdout, "Pattern found at offset %d size %d\n", offset,
				usearch_getMatchedLength(search));
		offset = usearch_next(search, &status);
	}
	if (U_FAILURE(status)) {
		fprintf(stderr, "Error in searching for pattern %d\n", status);
		return FALSE;
	}
	fprintf(stdout, "End of search\n");
	return TRUE;
}
예제 #6
0
파일: icu.c 프로젝트: Kielek/calibre
// Collator.find {{{
static PyObject *
icu_Collator_find(icu_Collator *self, PyObject *args, PyObject *kwargs) {
    PyObject *a_, *b_;
    int32_t asz, bsz;
    UChar *a, *b;
    wchar_t *aw, *bw;
    UErrorCode status = U_ZERO_ERROR;
    UStringSearch *search = NULL;
    int32_t pos = -1, length = -1;
  
    if (!PyArg_ParseTuple(args, "UU", &a_, &b_)) return NULL;
    asz = (int32_t)PyUnicode_GetSize(a_); bsz = (int32_t)PyUnicode_GetSize(b_);
    
    a = (UChar*)calloc(asz*4 + 2, sizeof(UChar));
    b = (UChar*)calloc(bsz*4 + 2, sizeof(UChar));
    aw = (wchar_t*)calloc(asz*4 + 2, sizeof(wchar_t));
    bw = (wchar_t*)calloc(bsz*4 + 2, sizeof(wchar_t));

    if (a == NULL || b == NULL || aw == NULL || bw == NULL) return PyErr_NoMemory();

    PyUnicode_AsWideChar((PyUnicodeObject*)a_, aw, asz*4+1);
    PyUnicode_AsWideChar((PyUnicodeObject*)b_, bw, bsz*4+1);
    u_strFromWCS(a, asz*4 + 1, NULL, aw, -1, &status);
    u_strFromWCS(b, bsz*4 + 1, NULL, bw, -1, &status);

    if (U_SUCCESS(status)) {
        search = usearch_openFromCollator(a, -1, b, -1, self->collator, NULL, &status);
        if (U_SUCCESS(status)) {
            pos = usearch_first(search, &status);
            if (pos != USEARCH_DONE) 
                length = usearch_getMatchedLength(search);
            else
                pos = -1;
        }
        if (search != NULL) usearch_close(search);
    }

    free(a); free(b); free(aw); free(bw);

    return Py_BuildValue("ii", pos, length);
} // }}}
예제 #7
0
파일: icu.c 프로젝트: IvoNet/calibre
// Collator.find {{{
static PyObject *
icu_Collator_find(icu_Collator *self, PyObject *args, PyObject *kwargs) {
#if PY_VERSION_HEX >= 0x03030000 
#error Not implemented for python >= 3.3
#endif
    PyObject *a_ = NULL, *b_ = NULL;
    UChar *a = NULL, *b = NULL;
    int32_t asz = 0, bsz = 0, pos = -1, length = -1;
    UErrorCode status = U_ZERO_ERROR;
    UStringSearch *search = NULL;
  
    if (!PyArg_ParseTuple(args, "OO", &a_, &b_)) return NULL;

    a = python_to_icu(a_, &asz, 1);
    if (a == NULL) goto end;
    b = python_to_icu(b_, &bsz, 1);
    if (b == NULL) goto end;

    search = usearch_openFromCollator(a, asz, b, bsz, self->collator, NULL, &status);
    if (U_SUCCESS(status)) {
        pos = usearch_first(search, &status);
        if (pos != USEARCH_DONE) {
            length = usearch_getMatchedLength(search);
#ifdef Py_UNICODE_WIDE
            // We have to return number of unicode characters since the string
            // could contain surrogate pairs which are represented as a single
            // character in python wide builds
            length = u_countChar32(b + pos, length);
            pos = u_countChar32(b, pos);
#endif
        } else pos = -1;
    }
end:
    if (search != NULL) usearch_close(search);
    if (a != NULL) free(a);
    if (b != NULL) free(b);

    return (PyErr_Occurred()) ? NULL : Py_BuildValue("ii", pos, length);
} // }}}
Boolean
CFStringFindWithOptionsAndLocale (CFStringRef str,
                                  CFStringRef stringToFind,
                                  CFRange rangeToSearch,
                                  CFStringCompareFlags searchOptions,
                                  CFLocaleRef locale, CFRange *result)
{
  UniChar *pattern;
  UniChar *text;
  CFIndex patternLength;
  CFIndex textLength;
  CFIndex start;
  CFIndex end;
  CFAllocatorRef alloc;
  UCollator *ucol;
  UStringSearch *usrch;
  UErrorCode err = U_ZERO_ERROR;
  
  if (rangeToSearch.length == 0)
    return false;
  
  alloc = CFAllocatorGetDefault ();
  textLength = CFStringGetLength (stringToFind);
  if (textLength == 0)
    return false;
  
  patternLength = rangeToSearch.length;
  pattern = CFAllocatorAllocate (alloc, patternLength * sizeof(UniChar), 0);
  CFStringGetCharacters (str, rangeToSearch, pattern);
  
  text = CFAllocatorAllocate (alloc, textLength * sizeof(UniChar), 0);
  CFStringGetCharacters (stringToFind, CFRangeMake(0, textLength), text);
  
  ucol = CFStringICUCollatorOpen (searchOptions, locale);
  usrch = usearch_openFromCollator (text, textLength, pattern, patternLength,
                                    ucol, NULL, &err);
  if (U_FAILURE(err))
    return false;
  
  /* FIXME: need to handle kCFCompareAnchored */
  if (searchOptions & kCFCompareBackwards)
    {
      start = usearch_last (usrch, &err);
    }
  else
    {
      start = usearch_first (usrch, &err);
    }
  if (start == USEARCH_DONE)
    {
      CFAllocatorDeallocate (alloc, pattern);
      CFAllocatorDeallocate (alloc, text);
      return false;
    }
  end = usearch_getMatchedLength (usrch);
  usearch_close (usrch);
  CFStringICUCollatorClose (ucol);
  
  if (result)
    *result = CFRangeMake (start + rangeToSearch.location, end);
  
  CFAllocatorDeallocate (alloc, pattern);
  CFAllocatorDeallocate (alloc, text);
  return true;
}
예제 #9
0
inline size_t SearchBuffer::search(size_t& start)
{
    size_t size = m_buffer.size();
    if (m_atBreak) {
        if (!size)
            return 0;
    } else {
        if (size != m_buffer.capacity())
            return 0;
    }

    UStringSearch* searcher = blink::searcher();

    UErrorCode status = U_ZERO_ERROR;
    usearch_setText(searcher, m_buffer.data(), size, &status);
    ASSERT(status == U_ZERO_ERROR);

    usearch_setOffset(searcher, m_prefixLength, &status);
    ASSERT(status == U_ZERO_ERROR);

    int matchStart = usearch_next(searcher, &status);
    ASSERT(status == U_ZERO_ERROR);

nextMatch:
    if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) {
        ASSERT(matchStart == USEARCH_DONE);
        return 0;
    }

    // Matches that start in the overlap area are only tentative.
    // The same match may appear later, matching more characters,
    // possibly including a combining character that's not yet in the buffer.
    if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) {
        size_t overlap = m_overlap;
        if (m_options & AtWordStarts) {
            // Ensure that there is sufficient context before matchStart the next time around for
            // determining if it is at a word boundary.
            int wordBoundaryContextStart = matchStart;
            U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart);
            wordBoundaryContextStart = startOfLastWordBoundaryContext(m_buffer.data(), wordBoundaryContextStart);
            overlap = std::min(size - 1, std::max(overlap, size - wordBoundaryContextStart));
        }
        memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar));
        m_prefixLength -= std::min(m_prefixLength, size - overlap);
        m_buffer.shrink(overlap);
        return 0;
    }

    size_t matchedLength = usearch_getMatchedLength(searcher);
    ASSERT_WITH_SECURITY_IMPLICATION(matchStart + matchedLength <= size);

    // If this match is "bad", move on to the next match.
    if (isBadMatch(m_buffer.data() + matchStart, matchedLength) || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))) {
        matchStart = usearch_next(searcher, &status);
        ASSERT(status == U_ZERO_ERROR);
        goto nextMatch;
    }

    size_t newSize = size - (matchStart + 1);
    memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar));
    m_prefixLength -= std::min<size_t>(m_prefixLength, matchStart + 1);
    m_buffer.shrink(newSize);

    start = size - matchStart;
    return matchedLength;
}
예제 #10
0
파일: fixed.c 프로젝트: julp/ugrep
static UBool engine_fixed_split(error_t **error, void *data, const UString *subject, DArray *array, interval_list_t *intervals)
{
    UErrorCode status;
    int32_t l, lastU;
    dlist_element_t *el;
    FETCH_DATA(data, p, fixed_pattern_t);

    lastU = l = 0;
    status = U_ZERO_ERROR;
    if (NULL != p->usearch) {
        usearch_setText(p->usearch, subject->ptr, subject->len, &status);
        if (U_FAILURE(status)) {
            icu_error_set(error, FATAL, status, "usearch_setText");
            return FALSE;
        }
        /* <X> */
        if (NULL == intervals) {
            int32_t u;

            while (U_SUCCESS(status) && USEARCH_DONE != (u = usearch_next(p->usearch, &status))) {
                add_match(array, subject, l, u);
                l = u += usearch_getMatchedLength(p->usearch);
            }
            add_match(array, subject, l, subject->len);
        } else {
            /* </X> */
            for (el = intervals->head; NULL != el; el = el->next) {
                FETCH_DATA(el->data, i, interval_t);

                if (i->lower_limit > 0) {
                    if (!usearch_fwd_n(p->usearch, subject, NULL, i->lower_limit - lastU, &l, &status)) {
                        break;
                    }
                }
                if (!usearch_fwd_n(p->usearch, subject, array, i->upper_limit - i->lower_limit, &l, &status)) {
                    break;
                }
                lastU = i->upper_limit;
            }
            /* <X> */
        }
        /* </X> */
        usearch_unbindText(p->usearch);
        if (U_FAILURE(status)) {
            icu_error_set(error, FATAL, status, "usearch_next");
            return FALSE;
        }
    } else {
        if (NULL != p->ubrk) {
            ubrk_setText(p->ubrk, subject->ptr, subject->len, &status);
            if (U_FAILURE(status)) {
                icu_error_set(error, FATAL, status, "ubrk_setText");
                return FALSE;
            }
        }
        /* <X> */
        if (NULL == intervals) {
            UChar *m;
            int32_t u;

            u = 0;
            while (NULL != (m = u_strFindFirst(subject->ptr + u, subject->len - u, p->pattern->ptr, p->pattern->len))) {
                u = m - subject->ptr;
                if (NULL == p->ubrk || (ubrk_isBoundary(p->ubrk, u) && ubrk_isBoundary(p->ubrk, u + p->pattern->len))) {
                    add_match(array, subject, l, u);
                }
                l = u = u + p->pattern->len;
            }
            add_match(array, subject, l, subject->len);
        } else {
            /* </X> */
            for (el = intervals->head; NULL != el; el = el->next) {
                FETCH_DATA(el->data, i, interval_t);

                if (i->lower_limit > 0) {
                    if (!binary_fwd_n(p->ubrk, p->pattern, subject, NULL, i->lower_limit - lastU, &l)) {
                        break;
                    }
                }
                if (!binary_fwd_n(p->ubrk, p->pattern, subject, array, i->upper_limit - i->lower_limit, &l)) {
                    break;
                }
                lastU = i->upper_limit;
            }
            /* <X> */
        }
        /* </X> */
        ubrk_unbindText(p->ubrk);
    }

    return TRUE;
}
예제 #11
0
파일: fixed.c 프로젝트: julp/ugrep
static engine_return_t engine_fixed_match_all(error_t **error, void *data, const UString *subject, interval_list_t *intervals)
{
    int32_t matches;
    UErrorCode status;
    FETCH_DATA(data, p, fixed_pattern_t);

    matches = 0;
    status = U_ZERO_ERROR;
    if (ustring_empty(p->pattern)) {
        if (IS_WORD_BOUNDED(p->flags)) {
            if (ustring_empty(subject)) {
                return ENGINE_MATCH_FOUND;
            } else {
                int32_t l, u, lastState, state;

                ubrk_setText(p->ubrk, subject->ptr, subject->len, &status);
                if (U_FAILURE(status)) {
                    icu_error_set(error, FATAL, status, "ubrk_setText");
                    return ENGINE_FAILURE;
                }
                if (UBRK_DONE != (l = ubrk_first(p->ubrk))) {
                    lastState = ubrk_getRuleStatus(p->ubrk);
                    while (UBRK_DONE != (u = ubrk_next(p->ubrk))) {
                        state = ubrk_getRuleStatus(p->ubrk);
                        if (UBRK_WORD_NONE == lastState && lastState == state) {
                            return ENGINE_MATCH_FOUND;
                        }
                        lastState = state;
                        l = u;
                    }
                }
                return ENGINE_NO_MATCH;
            }
        } else {
            return ENGINE_MATCH_FOUND;
        }
    } else if (NULL != p->usearch) {
        int32_t l, u;

        if (subject->len > 0) {
            usearch_setText(p->usearch, subject->ptr, subject->len, &status);
            if (U_FAILURE(status)) {
                icu_error_set(error, FATAL, status, "usearch_setText");
                return ENGINE_FAILURE;
            }
            for (l = usearch_first(p->usearch, &status); U_SUCCESS(status) && USEARCH_DONE != l; l = usearch_next(p->usearch, &status)) {
                matches++;
                u = l + usearch_getMatchedLength(p->usearch);
                if (interval_list_add(intervals, subject->len, l, u)) {
                    return ENGINE_WHOLE_LINE_MATCH;
                }
            }
            if (U_FAILURE(status)) {
                icu_error_set(error, FATAL, status, "usearch_[first|next]");
                return ENGINE_FAILURE;
            }
            usearch_unbindText(p->usearch);

            return (matches ? ENGINE_MATCH_FOUND : ENGINE_NO_MATCH);
        } else {
            return ENGINE_NO_MATCH;
        }
    } else {
        UChar *m;
        int32_t pos;

        pos = 0;
        if (NULL != p->ubrk) {
            ubrk_setText(p->ubrk, subject->ptr, subject->len, &status);
            if (U_FAILURE(status)) {
                icu_error_set(error, FATAL, status, "ubrk_setText");
                return ENGINE_FAILURE;
            }
        }
        while (NULL != (m = u_strFindFirst(subject->ptr + pos, subject->len - pos, p->pattern->ptr, p->pattern->len))) {
            pos = m - subject->ptr;
            if (NULL == p->ubrk || (ubrk_isBoundary(p->ubrk, pos) && ubrk_isBoundary(p->ubrk, pos + p->pattern->len))) {
                matches++;
                if (interval_list_add(intervals, subject->len, pos, pos + p->pattern->len)) {
                    return ENGINE_WHOLE_LINE_MATCH;
                }
            }
            pos += p->pattern->len;
        }
        ubrk_unbindText(p->ubrk);

        return (matches ? ENGINE_MATCH_FOUND : ENGINE_NO_MATCH);
    }
}