Esempio n. 1
0
StringSearch::StringSearch(const UnicodeString     &pattern, 
                                 CharacterIterator &text,
                           const Locale            &locale, 
                                 BreakIterator     *breakiter,
                                 UErrorCode        &status) :
                           SearchIterator(text, breakiter), 
                           m_collator_(),
                           m_pattern_(pattern)
{
    if (U_FAILURE(status)) {
        m_strsrch_ = NULL;
        return;
    }
    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), 
                              m_text_.getBuffer(), m_text_.length(), 
                              locale.getName(), (UBreakIterator *)breakiter, 
                              &status);
    uprv_free(m_search_);
    m_search_ = NULL;

    if (U_SUCCESS(status)) {
              int32_t  length;
        const UChar   *rules = ucol_getRules(m_strsrch_->collator, &length);
        m_collation_rules_.setTo(rules, length);
        m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
                                 &m_collation_rules_);
        // m_search_ has been created by the base SearchIterator class
        m_search_ = m_strsrch_->search;
    }
}
Esempio n. 2
0
StringSearch::StringSearch(const UnicodeString &pattern,
                           const UnicodeString &text,
                           const Locale        &locale,
                                 BreakIterator *breakiter,
                                 UErrorCode    &status) :
                           SearchIterator(text, breakiter),
                           m_pattern_(pattern)
{
    if (U_FAILURE(status)) {
        m_strsrch_ = NULL;
        return;
    }

    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
                              m_text_.getBuffer(), m_text_.length(),
                              locale.getName(), (UBreakIterator *)breakiter,
                              &status);
    uprv_free(m_search_);
    m_search_ = NULL;

    if (U_SUCCESS(status)) {
        // m_search_ has been created by the base SearchIterator class
        m_search_        = m_strsrch_->search;
    }
}
Esempio n. 3
0
static UStringSearch* createSearcher()
{
    // Provide a non-empty pattern and non-empty text so usearch_open will not fail,
    // but it doesn't matter exactly what it is, since we don't perform any searches
    // without setting both the pattern and the text.
    UErrorCode status = U_ZERO_ERROR;
    String searchCollatorName = currentSearchLocaleID() + String("@collation=search");
    UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, searchCollatorName.utf8().data(), 0, &status);
    ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING);
    return searcher;
}
Esempio n. 4
0
File: fixed.c Progetto: julp/ugrep
static void *engine_fixed_compile(error_t **error, UString *ustr, uint32_t flags)
{
    UErrorCode status;
    fixed_pattern_t *p;

    p = mem_new(*p);
    p->pattern = ustr; // not needed with usearch ?
    p->flags = flags;
    p->ubrk = NULL;
    p->usearch = NULL;
    status = U_ZERO_ERROR;
    if (ustring_empty(ustr)) {
        if (IS_WORD_BOUNDED(flags)) {
            p->ubrk = ubrk_open(UBRK_WORD, NULL, NULL, 0, &status);
        }
    } else {
        if (!IS_WHOLE_LINE(flags)) {
            if (IS_WORD_BOUNDED(flags)) {
                p->ubrk = ubrk_open(UBRK_WORD, NULL, NULL, 0, &status);
            } else if (WITH_GRAPHEME()) {
                p->ubrk = ubrk_open(UBRK_CHARACTER, NULL, NULL, 0, &status);
            }
            if (U_FAILURE(status)) {
                fixed_pattern_destroy(p);
                icu_error_set(error, FATAL, status, "ubrk_open");
                return NULL;
            }
        }
        if (IS_WORD_BOUNDED(flags) || (IS_CASE_INSENSITIVE(flags) && !IS_WHOLE_LINE(flags))) {
            p->usearch = usearch_open(ustr->ptr, ustr->len, USEARCH_FAKE_USTR, uloc_getDefault(), p->ubrk, &status);
            if (U_FAILURE(status)) {
                if (NULL != p->ubrk) {
                    ubrk_close(p->ubrk);
                }
                fixed_pattern_destroy(p);
                icu_error_set(error, FATAL, status, "usearch_open");
                return NULL;
            }
            if (IS_CASE_INSENSITIVE(flags)) {
                UCollator *ucol;

                ucol = usearch_getCollator(p->usearch);
                ucol_setStrength(ucol, (flags & ~OPT_MASK) > 1 ? UCOL_SECONDARY : UCOL_PRIMARY);
            }
        }
    }

    return p;
}
Esempio n. 5
0
StringSearch::StringSearch(const UnicodeString &pattern, 
                           const UnicodeString &text,
                           const Locale        &locale,       
                                 BreakIterator *breakiter,
                                 UErrorCode    &status) :
                           SearchIterator(text, breakiter), 
                           m_collator_(),
                           m_pattern_(pattern)
{
    if (U_FAILURE(status)) {
        m_strsrch_ = NULL;
        return;
    }

    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), 
                              m_text_.getBuffer(), m_text_.length(), 
                              locale.getName(), (UBreakIterator *)breakiter, 
                              &status);
    uprv_free(m_search_);
    m_search_ = NULL;

	// !!! dlf m_collator_ is an odd beast.  basically it is an aliasing
	// wrapper around the internal collator and rules, which (here) are
	// owned by this stringsearch object.  this means 1) it's destructor
	// _should not_ delete the ucollator or rules, and 2) changes made
	// to the exposed collator (setStrength etc) _should_ modify the 
	// ucollator.  thus the collator is not a copy-on-write alias, and it
	// needs to distinguish itself not merely from 'stand alone' colators
	// but also from copy-on-write ones.  it needs additional state, which
	// setUCollator should set.

    if (U_SUCCESS(status)) {
              int32_t  length;
        const UChar   *rules = ucol_getRules(m_strsrch_->collator, &length);
        m_collation_rules_.setTo(rules, length);
        m_collator_.setUCollator((UCollator *)m_strsrch_->collator,
                                 &m_collation_rules_);
        // m_search_ has been created by the base SearchIterator class
        m_search_        = m_strsrch_->search;
    }
}
Esempio n. 6
0
StringSearchPerformanceTest::StringSearchPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
:UPerfTest(argc,argv,status){
    int32_t start, end;

#ifdef TEST_BOYER_MOORE_SEARCH
    bms = NULL;
#else
    srch = NULL;
#endif

    pttrn = NULL;
    if(status== U_ILLEGAL_ARGUMENT_ERROR || line_mode){
       fprintf(stderr,gUsageString, "strsrchperf");
       return;
    }
    /* Get the Text */
    src = getBuffer(srcLen, status);

#if 0
    /* Get a word to find. Do this by selecting a random word with a word breakiterator. */
    UBreakIterator* brk = ubrk_open(UBRK_WORD, locale, src, srcLen, &status);
    if(U_FAILURE(status)){
        fprintf(stderr, "FAILED to create pattern for searching. Error: %s\n", u_errorName(status));
        return;
    }
    start = ubrk_preceding(brk, 1000);
    end = ubrk_following(brk, start);
    pttrnLen = end - start;
    UChar* temp = (UChar*)malloc(sizeof(UChar)*(pttrnLen));
    for (int i = 0; i < pttrnLen; i++) {
        temp[i] = src[start++];
    }
    pttrn = temp; /* store word in pttrn */
    ubrk_close(brk);
#else
    /* The first line of the file contains the pattern */
    start = 0;

    for(end = start; ; end += 1) {
        UChar ch = src[end];

        if (ch == 0x000A || ch == 0x000D || ch == 0x2028) {
            break;
        }
    }

    pttrnLen = end - start;
    UChar* temp = (UChar*)malloc(sizeof(UChar)*(pttrnLen));
    for (int i = 0; i < pttrnLen; i++) {
        temp[i] = src[start++];
    }
    pttrn = temp; /* store word in pttrn */
#endif
    
#ifdef TEST_BOYER_MOORE_SEARCH
    UnicodeString patternString(pttrn, pttrnLen);
    UCollator *coll = ucol_open(locale, &status);
    CollData *data = CollData::open(coll, status);

    targetString = new UnicodeString(src, srcLen);
    bms = new BoyerMooreSearch(data, patternString, targetString, status);
#else
    /* Create the StringSearch object to be use in performance test. */
    srch = usearch_open(pttrn, pttrnLen, src, srcLen, locale, NULL, &status);
#endif

    if(U_FAILURE(status)){
        fprintf(stderr, "FAILED to create UPerfTest object. Error: %s\n", u_errorName(status));
        return;
    }
    
}
Esempio n. 7
0
/* {{{ grapheme_strpos_utf16 - strrpos using utf16*/
int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, size_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last)
{
	UChar *uhaystack = NULL, *uneedle = NULL;
	int32_t uhaystack_len = 0, uneedle_len = 0, char_pos, ret_pos, offset_pos = 0;
	unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
	UBreakIterator* bi = NULL;
	UErrorCode status;
	UStringSearch* src = NULL;
	UCollator *coll;

	if(puchar_pos) {
		*puchar_pos = -1;
	}
	/* convert the strings to UTF-16. */

	status = U_ZERO_ERROR;
	intl_convert_utf8_to_utf16(&uhaystack, &uhaystack_len, haystack, haystack_len, &status );
	STRPOS_CHECK_STATUS(status, "Error converting input string to UTF-16");

	status = U_ZERO_ERROR;
	intl_convert_utf8_to_utf16(&uneedle, &uneedle_len, needle, needle_len, &status );
	STRPOS_CHECK_STATUS(status, "Error converting needle string to UTF-16");

	/* get a pointer to the haystack taking into account the offset */
	status = U_ZERO_ERROR;
	bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status );
	STRPOS_CHECK_STATUS(status, "Failed to get iterator");
	status = U_ZERO_ERROR;
	ubrk_setText(bi, uhaystack, uhaystack_len, &status);
	STRPOS_CHECK_STATUS(status, "Failed to set up iterator");

	status = U_ZERO_ERROR;
	src = usearch_open(uneedle, uneedle_len, uhaystack, uhaystack_len, "", bi, &status);
	STRPOS_CHECK_STATUS(status, "Error creating search object");

	if(f_ignore_case) {
		coll = usearch_getCollator(src);
		status = U_ZERO_ERROR;
		ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_SECONDARY, &status);
		STRPOS_CHECK_STATUS(status, "Error setting collation strength");
		usearch_reset(src);
	}

	if(offset != 0) {
		offset_pos = grapheme_get_haystack_offset(bi, offset);
		if(offset_pos == -1) {
			status = U_ILLEGAL_ARGUMENT_ERROR;
			STRPOS_CHECK_STATUS(status, "Invalid search offset");	
		}
		status = U_ZERO_ERROR;
		usearch_setOffset(src, offset_pos, &status);	
		STRPOS_CHECK_STATUS(status, "Invalid search offset");
	}


	if(last) {
		char_pos = usearch_last(src, &status);
		if(char_pos < offset_pos) {
			/* last one is beyound our start offset */
			char_pos = USEARCH_DONE;
		}
	} else {
		char_pos = usearch_next(src, &status);
	}
	STRPOS_CHECK_STATUS(status, "Error looking up string");
	if(char_pos != USEARCH_DONE && ubrk_isBoundary(bi, char_pos)) {
		ret_pos = grapheme_count_graphemes(bi, uhaystack,char_pos);
		if(puchar_pos) {
			*puchar_pos = char_pos;
		}
	} else {
		ret_pos = -1;
	}

	if (uhaystack) {
		efree( uhaystack );
	}
	if (uneedle) {
		efree( uneedle );
	}
	ubrk_close (bi);
	usearch_close (src);

	return ret_pos;
}