Ejemplo n.º 1
0
static void TestPrimary( )
{
    int32_t len,i;
    UCollator *myCollation;
    UErrorCode status=U_ZERO_ERROR;
    static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";   
    UChar rules[sizeof(str)];
    len = strlen(str);
    u_uastrcpy(rules, str);

    myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
    if(U_FAILURE(status)){
        log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
        return;
    }
    ucol_setStrength(myCollation, UCOL_PRIMARY);
    
    for (i = 17; i < 26 ; i++)
    {
        
        doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    }
    ucol_close(myCollation);
    myCollation = 0;
}
Ejemplo n.º 2
0
static void TestSecondary()
{
    UCollationResult expected=UCOL_EQUAL;
    int32_t i,j, testAcuteSize;
    UErrorCode status = U_ZERO_ERROR;
    myCollation = ucol_open("en_US", &status);
    if(U_FAILURE(status)){
        log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
        return;
    }
    ucol_setStrength(myCollation, UCOL_SECONDARY);
    log_verbose("Testing English Collation with Secondary strength\n");
    for (i = 43; i < 49 ; i++)
    {
        doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    }
    

    /*test acute and grave ordering (compare to french collation) */
    testAcuteSize = sizeof(testAcute) / sizeof(testAcute[0]);
    for (i = 0; i < testAcuteSize; i++)
    {
        for (j = 0; j < testAcuteSize; j++)
        {
            if (i <  j) expected = UCOL_LESS;
            if (i == j) expected = UCOL_EQUAL;
            if (i >  j) expected = UCOL_GREATER;
            doTest(myCollation, testAcute[i], testAcute[j], expected );
        }
    }
ucol_close(myCollation);
}
Ejemplo n.º 3
0
MojErr MojDbTextCollator::init(const MojChar* locale, MojDbCollationStrength level)
{
	MojAssert(locale);
	MojAssert(!m_ucol);

	UCollationStrength strength = UCOL_PRIMARY;
	switch (level) {
	case MojDbCollationPrimary:
		strength = UCOL_PRIMARY;
		break;
	case MojDbCollationSecondary:
		strength = UCOL_SECONDARY;
		break;
	case MojDbCollationTertiary:
		strength = UCOL_TERTIARY;
		break;
	case MojDbCollationIdentical:
		strength = UCOL_IDENTICAL;
		break;
	default:
		MojAssertNotReached();
	}

	UErrorCode status = U_ZERO_ERROR;
	m_ucol = ucol_open(locale, &status);
	MojUnicodeErrCheck(status);
	MojAssert(m_ucol);
	ucol_setAttribute(m_ucol, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
	MojUnicodeErrCheck(status);
	ucol_setStrength(m_ucol, strength);

	return MojErrNone;
}
Ejemplo n.º 4
0
static void TestExtra()
{
    int32_t i, j;
    int32_t len;
    UCollator *myCollation;
    UErrorCode status = U_ZERO_ERROR;
    static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
    UChar rules[sizeof(str)];
    len = strlen(str);
    u_uastrcpy(rules, str);

    myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
    if(U_FAILURE(status)){
        log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status));
        return;
    }
    ucol_setStrength(myCollation, UCOL_TERTIARY);
    for (i = 0; i < COUNT_TEST_CASES-1 ; i++)
    {
        for (j = i + 1; j < COUNT_TEST_CASES; j += 1)
        {
        
            doTest(myCollation, testCases[i], testCases[j], UCOL_LESS);
        }
    }
    ucol_close(myCollation);
    myCollation = 0;
}
Ejemplo n.º 5
0
bool c_Collator::t_setstrength(int64_t strength) {
  if (!m_ucoll) {
    raise_warning("setstrength called on uninitialized Collator object");
    return false;
  }
  ucol_setStrength(m_ucoll, (UCollationStrength)strength);
  return true;
}
Ejemplo n.º 6
0
bool c_Collator::t_setstrength(int64 strength) {
  INSTANCE_METHOD_INJECTION_BUILTIN(Collator, Collator::setstrength);
  if (!m_ucoll) {
    raise_warning("setstrength called on uninitialized Collator object");
    return false;
  }
  ucol_setStrength(m_ucoll, (UCollationStrength)strength);
  return true;
}
Ejemplo n.º 7
0
Archivo: icu.c Proyecto: IvoNet/calibre
static int
icu_Collator_set_strength(icu_Collator *self, PyObject *val, void *closure) {
    if (!PyInt_Check(val)) {
        PyErr_SetString(PyExc_TypeError, "Strength must be an integer.");
        return -1;
    }
    ucol_setStrength(self->collator, (int)PyInt_AS_LONG(val));
    return 0;
}
Ejemplo n.º 8
0
void TextSearcherICU::setCaseSensitivity(bool caseSensitive) {
  const UCollationStrength strength =
      caseSensitive ? UCOL_TERTIARY : UCOL_PRIMARY;

  UCollator* const collator = usearch_getCollator(m_searcher);
  if (ucol_getStrength(collator) == strength)
    return;

  ucol_setStrength(collator, strength);
  usearch_reset(m_searcher);
}
Ejemplo n.º 9
0
inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
    : m_options(options)
    , m_prefixLength(0)
    , m_numberOfCharactersJustAppended(0)
    , m_atBreak(true)
    , m_needsMoreContext(options & AtWordStarts)
    , m_targetRequiresKanaWorkaround(containsKanaLetters(target))
{
    ASSERT(!target.isEmpty());
    target.appendTo(m_target);

    // FIXME: We'd like to tailor the searcher to fold quote marks for us instead
    // of doing it in a separate replacement pass here, but ICU doesn't offer a way
    // to add tailoring on top of the locale-specific tailoring as of this writing.
    foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size());

    size_t targetLength = m_target.size();
    m_buffer.reserveInitialCapacity(std::max(targetLength * 8, minimumSearchBufferSize));
    m_overlap = m_buffer.capacity() / 4;

    if ((m_options & AtWordStarts) && targetLength) {
        UChar32 targetFirstCharacter;
        U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter);
        // Characters in the separator category never really occur at the beginning of a word,
        // so if the target begins with such a character, we just ignore the AtWordStart option.
        if (isSeparator(targetFirstCharacter)) {
            m_options &= ~AtWordStarts;
            m_needsMoreContext = false;
        }
    }

    // Grab the single global searcher.
    // If we ever have a reason to do more than once search buffer at once, we'll have
    // to move to multiple searchers.
    lockSearcher();

    UStringSearch* searcher = blink::searcher();
    UCollator* collator = usearch_getCollator(searcher);

    UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY;
    if (ucol_getStrength(collator) != strength) {
        ucol_setStrength(collator, strength);
        usearch_reset(searcher);
    }

    UErrorCode status = U_ZERO_ERROR;
    usearch_setPattern(searcher, m_target.data(), targetLength, &status);
    ASSERT(status == U_ZERO_ERROR);

    // The kana workaround requires a normalized copy of the target string.
    if (m_targetRequiresKanaWorkaround)
        normalizeCharactersIntoNFCForm(m_target.data(), m_target.size(), m_normalizedTarget);
}
Ejemplo n.º 10
0
static int icu_collator_strength(lua_State *L) {
	luaL_argcheck(L, lua_getmetatable(L,1) && lua_rawequal(L,-1,COLLATOR_UV_META), 1, "expecting collator");
	lua_pop(L,1);
	if (lua_gettop(L) == 1) {
		lua_pushnumber(L, ucol_getStrength(*(UCollator**)lua_touserdata(L,1)));
		return 1;
	}
	else {
		ucol_setStrength(*(UCollator**)lua_touserdata(L,1), (UCollationStrength)luaL_checknumber(L,2));
		lua_settop(L,1);
		return 1;
	}
}
Ejemplo n.º 11
0
int main() {
	UErrorCode status = U_ZERO_ERROR;
	UCollator *coll = ucol_open(0, &status);
	ucol_setStrength(coll, UCOL_PRIMARY);

	for (int i = 0; i < PASSES; ++i) {
		UCollationResult coll_res = ucol_strcollUTF8(coll, INPUT1, -1, INPUT2, -1, &status);
		(void)(coll_res);
	}

	ucol_close(coll);

	return 0;
}
Ejemplo n.º 12
0
static void TestJB581(void)
{
    UChar       dispName    [100]; 
    int32_t     bufferLen   = 0;
    UChar       source      [100];
    UChar       target      [100];
    UCollationResult result     = UCOL_EQUAL;
    uint8_t     sourceKeyArray  [100];
    uint8_t     targetKeyArray  [100]; 
    int32_t     sourceKeyOut    = 0, 
                targetKeyOut    = 0;
    UCollator   *myCollator = 0;
    UErrorCode status = U_ZERO_ERROR;

    /*u_uastrcpy(source, "This is a test.");*/
    /*u_uastrcpy(target, "THISISATEST.");*/
    u_uastrcpy(source, "THISISATEST.");
    u_uastrcpy(target, "Thisisatest.");

    myCollator = ucol_open("en_US", &status);
    if (U_FAILURE(status)){
        bufferLen = uloc_getDisplayName("en_US", 0, dispName, 100, &status);
        /*Report the error with display name... */
        log_err("ERROR: Failed to create the collator for : \"%s\"\n", dispName);
        return;
    }
    result = ucol_strcoll(myCollator, source, -1, target, -1);
    /* result is 1, secondary differences only for ignorable space characters*/
    if (result != 1)
    {
        log_err("Comparing two strings with only secondary differences in C failed.\n");
    }
    /* To compare them with just primary differences */
    ucol_setStrength(myCollator, UCOL_PRIMARY);
    result = ucol_strcoll(myCollator, source, -1, target, -1);
    /* result is 0 */
    if (result != 0)
    {
        log_err("Comparing two strings with no differences in C failed.\n");
    }
    /* Now, do the same comparison with keys */
    sourceKeyOut = ucol_getSortKey(myCollator, source, -1, sourceKeyArray, 100);
    targetKeyOut = ucol_getSortKey(myCollator, target, -1, targetKeyArray, 100);
    bufferLen = ((targetKeyOut > 100) ? 100 : targetKeyOut);
    if (memcmp(sourceKeyArray, targetKeyArray, bufferLen) != 0)
    {
        log_err("Comparing two strings with sort keys in C failed.\n");
    }
    ucol_close(myCollator);
}
Ejemplo n.º 13
0
Archivo: fixed.c Proyecto: julp/ugrep
static void *engine_fixed_compile(error_t **error, UString *ustr, uint32_t flags)
{
    UErrorCode status;
    fixed_pattern_t *p;

    p = mem_new(*p);
    p->pattern = ustr; // not needed with usearch ?
    p->flags = flags;
    p->ubrk = NULL;
    p->usearch = NULL;
    status = U_ZERO_ERROR;
    if (ustring_empty(ustr)) {
        if (IS_WORD_BOUNDED(flags)) {
            p->ubrk = ubrk_open(UBRK_WORD, NULL, NULL, 0, &status);
        }
    } else {
        if (!IS_WHOLE_LINE(flags)) {
            if (IS_WORD_BOUNDED(flags)) {
                p->ubrk = ubrk_open(UBRK_WORD, NULL, NULL, 0, &status);
            } else if (WITH_GRAPHEME()) {
                p->ubrk = ubrk_open(UBRK_CHARACTER, NULL, NULL, 0, &status);
            }
            if (U_FAILURE(status)) {
                fixed_pattern_destroy(p);
                icu_error_set(error, FATAL, status, "ubrk_open");
                return NULL;
            }
        }
        if (IS_WORD_BOUNDED(flags) || (IS_CASE_INSENSITIVE(flags) && !IS_WHOLE_LINE(flags))) {
            p->usearch = usearch_open(ustr->ptr, ustr->len, USEARCH_FAKE_USTR, uloc_getDefault(), p->ubrk, &status);
            if (U_FAILURE(status)) {
                if (NULL != p->ubrk) {
                    ubrk_close(p->ubrk);
                }
                fixed_pattern_destroy(p);
                icu_error_set(error, FATAL, status, "usearch_open");
                return NULL;
            }
            if (IS_CASE_INSENSITIVE(flags)) {
                UCollator *ucol;

                ucol = usearch_getCollator(p->usearch);
                ucol_setStrength(ucol, (flags & ~OPT_MASK) > 1 ? UCOL_SECONDARY : UCOL_PRIMARY);
            }
        }
    }

    return p;
}
Ejemplo n.º 14
0
static void TestPrimary()
{
    
    int32_t i;
    UErrorCode status = U_ZERO_ERROR;
    myCollation = ucol_open("fi_FI@collation=standard", &status);
    if(U_FAILURE(status)){
        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
    }
    log_verbose("Testing Finnish Collation with Primary strength\n");
    ucol_setStrength(myCollation, UCOL_PRIMARY);
    for (i = 4; i < 5; i++)
    {
        doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    }
    ucol_close(myCollation);
}
Ejemplo n.º 15
0
static void TestPrimary()
{
    
    int32_t i;
    UErrorCode status = U_ZERO_ERROR;
    myCollation = ucol_open("en_US", &status);
    if(U_FAILURE(status)){
        log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
        return;
    }
    ucol_setStrength(myCollation, UCOL_PRIMARY);
    log_verbose("Testing English Collation with Primary strength\n");
    for (i = 38; i < 43 ; i++)
    {
        doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    }
    ucol_close(myCollation);
}
Ejemplo n.º 16
0
static void TestTertiary( )
{
    
    int32_t i;
    UErrorCode status = U_ZERO_ERROR;
    myCollation = ucol_open("es_ES", &status);
    if(U_FAILURE(status)){
        log_err_status(status, "ERROR: %s: in creation of rule based collator: %s\n", __FILE__, myErrorName(status));
        return;
    }
    log_verbose("Testing Spanish Collation with Tertiary strength\n");
    ucol_setStrength(myCollation, UCOL_TERTIARY);
    for (i = 0; i < 5 ; i++)
    {
        doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    }
    ucol_close(myCollation);
}
Ejemplo n.º 17
0
static void TestTertiary( )
{
    int32_t i;
    UErrorCode status = U_ZERO_ERROR;
    myCollation = ucol_open("ja_JP", &status);
    if(U_FAILURE(status)){
        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
        return;
    }
    log_verbose("Testing Kanna(Japan) Collation with Tertiary strength\n");
    ucol_setStrength(myCollation, UCOL_TERTIARY);
    ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, UCOL_ON, &status);
    for (i = 0; i < 6 ; i++)
    {
        doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    }
    ucol_close(myCollation);
}
Ejemplo n.º 18
0
/* Testing base letters */
static void TestBase()
{
    int32_t i;
    UErrorCode status = U_ZERO_ERROR;
    myCollation = ucol_open("ja_JP", &status);
    if (U_FAILURE(status))
    {
        log_err_status(status, "ERROR: in creation of rule based collator: %s\n",
            myErrorName(status));
        return;
    }

    log_verbose("Testing Japanese Base Characters Collation\n");
    ucol_setStrength(myCollation, UCOL_PRIMARY);
    for (i = 0; i < 3 ; i++)
        doTest(myCollation, testBaseCases[i], testBaseCases[i + 1], UCOL_LESS);

    ucol_close(myCollation);
}
Ejemplo n.º 19
0
static void TestTertiary( )
{
    int32_t testMoreSize;
    UCollationResult expected=UCOL_EQUAL;
    int32_t i,j;
    UErrorCode status = U_ZERO_ERROR;
    myCollation = ucol_open("en_US", &status);
    if(U_FAILURE(status)){
        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
        return;
    }
    log_verbose("Testing English Collation with Tertiary strength\n");

    ucol_setStrength(myCollation, UCOL_TERTIARY);
    for (i = 0; i < 38 ; i++)
    {
        doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    }
  

    j = 0;
   for (i = 0; i < 10; i++)
    {
        for (j = i+1; j < 10; j++)
        {
            doTest(myCollation, testBugs[i], testBugs[j], UCOL_LESS);
        }
   }
    /*test more interesting cases */
    testMoreSize = UPRV_LENGTHOF(testMore);
    for (i = 0; i < testMoreSize; i++)
    {
        for (j = 0; j < testMoreSize; j++)
        {
            if (i <  j) expected = UCOL_LESS;
            if (i == j) expected = UCOL_EQUAL;
            if (i >  j) expected = UCOL_GREATER;
            doTest(myCollation, testMore[i], testMore[j], expected );
        }
    }
    ucol_close(myCollation);
}
Ejemplo n.º 20
0
MojErr MojDbTextCollator::init(const MojChar* locale, MojDbCollationStrength level)
{
    LOG_TRACE("Entering function %s", __FUNCTION__);
	MojAssert(locale);
	MojAssert(!m_ucol);

	UCollationStrength strength = UCOL_PRIMARY;
	switch (level) {
	case MojDbCollationPrimary:
		strength = UCOL_PRIMARY;
		break;
	case MojDbCollationSecondary:
		strength = UCOL_SECONDARY;
		break;
	case MojDbCollationTertiary:
		strength = UCOL_TERTIARY;
		break;
    case MojDbCollationQuaternary:
        strength = UCOL_QUATERNARY;
        break;
	case MojDbCollationIdentical:
		strength = UCOL_IDENTICAL;
		break;
	default:
		MojAssertNotReached();
	}

	UErrorCode status = U_ZERO_ERROR;
	m_ucol = ucol_open(locale, &status);
	MojUnicodeErrCheck(status);
	MojAssert(m_ucol);
	ucol_setAttribute(m_ucol, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
    if (level == MojDbCollationIdentical) {
        // Combination of IDENTICAL and NUMERIC option cover full-width comparison and ["001","01","1"] ordering.
        // NUMERIC option converts number charcter to numeric "a021" -> ["a",21]
        ucol_setAttribute(m_ucol, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
    }
	MojUnicodeErrCheck(status);
	ucol_setStrength(m_ucol, strength);

	return MojErrNone;
}
Ejemplo n.º 21
0
static void TestExtra()
{
    int32_t i, j;
    UErrorCode status = U_ZERO_ERROR;
    myCollation = ucol_open("fr_CA", &status);
    if(U_FAILURE(status)) {
        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
        return;
    }
    log_verbose("Testing fr_CA Collation extra with secondary strength\n");
    ucol_setStrength(myCollation, UCOL_TERTIARY);
    for (i = 0; i < 9 ; i++)
    {
        for (j = i + 1; j < 10; j += 1)
        {
            doTest(myCollation, testBugs[i], testBugs[j], UCOL_LESS);
        }
    }
    ucol_close(myCollation);
}
Ejemplo n.º 22
0
/*
* Test Katakana, Hiragana letters
*/
static void TestKatakanaHiragana(void)
{
    int32_t i;
    UErrorCode status = U_ZERO_ERROR;
    myCollation = ucol_open("ja_JP", &status);
    if (U_FAILURE(status))
    {
        log_err_status(status, "ERROR: in creation of rule based collator: %s\n",
            myErrorName(status));
        return;
    }

    log_verbose("Testing Japanese Katakana, Hiragana Characters Collation\n");
    ucol_setStrength(myCollation, UCOL_QUATERNARY);
    ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, UCOL_ON, &status);
    for (i = 0; i < 3 ; i++) {
        doTest(myCollation, testKatakanaHiraganaCases[i],
            testKatakanaHiraganaCases[i + 1], UCOL_LESS);
    }

    ucol_close(myCollation);
}
Ejemplo n.º 23
0
static void TestTertiary( )
{

    int32_t i;
    UErrorCode status = U_ZERO_ERROR;
    myCollation = ucol_open("fr_CA", &status);
    if(U_FAILURE(status) || !myCollation) {
        log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status));
        return;
    }

    ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
    if(U_FAILURE(status)) {
        log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status));
        return;
    }
    log_verbose("Testing fr_CA Collation with Tertiary strength\n");
    ucol_setStrength(myCollation, UCOL_QUATERNARY);
    for (i = 0; i < 12 ; i++)
    {
        doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
    }
    ucol_close(myCollation);
}
Ejemplo n.º 24
0
void SSearchTest::monkeyTest(char *params)
{
    // ook!
    UErrorCode status = U_ZERO_ERROR;
  //UCollator *coll = ucol_open(NULL, &status);
    UCollator *coll = ucol_openFromShortString("S1", FALSE, NULL, &status);

    if (U_FAILURE(status)) {
        errcheckln(status, "Failed to create collator in MonkeyTest! - %s", u_errorName(status));
        return;
    }

    CollData  *monkeyData = new CollData(coll, status);

    USet *expansions   = uset_openEmpty();
    USet *contractions = uset_openEmpty();

    ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &status);

    U_STRING_DECL(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39);
    U_STRING_INIT(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39);
    USet *letters = uset_openPattern(letter_pattern, 39, &status);
    SetMonkey letterMonkey(letters);
    StringSetMonkey contractionMonkey(contractions, coll, monkeyData);
    StringSetMonkey expansionMonkey(expansions, coll, monkeyData);
    UnicodeString testCase;
    UnicodeString alternate;
    UnicodeString pattern, altPattern;
    UnicodeString prefix, altPrefix;
    UnicodeString suffix, altSuffix;

    Monkey *monkeys[] = {
        &letterMonkey,
        &contractionMonkey,
        &expansionMonkey,
        &contractionMonkey,
        &expansionMonkey,
        &contractionMonkey,
        &expansionMonkey,
        &contractionMonkey,
        &expansionMonkey};
    int32_t monkeyCount = sizeof(monkeys) / sizeof(monkeys[0]);
    // int32_t nonMatchCount = 0;

    UCollationStrength strengths[] = {UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY};
    const char *strengthNames[] = {"primary", "secondary", "tertiary"};
    int32_t strengthCount = sizeof(strengths) / sizeof(strengths[0]);
    int32_t loopCount = quick? 1000 : 10000;
    int32_t firstStrength = 0;
    int32_t lastStrength  = strengthCount - 1; //*/ 0;

    if (params != NULL) {
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
        UnicodeString p(params);

        loopCount = getIntParam("loop", p, loopCount);
        m_seed    = getIntParam("seed", p, m_seed);

        RegexMatcher m(" *strength *= *(primary|secondary|tertiary) *", p, 0, status);
        if (m.find()) {
            UnicodeString breakType = m.group(1, status);

            for (int32_t s = 0; s < strengthCount; s += 1) {
                if (breakType == strengthNames[s]) {
                    firstStrength = lastStrength = s;
                    break;
                }
            }

            m.reset();
            p = m.replaceFirst("", status);
        }

        if (RegexMatcher("\\S", p, 0, status).find()) {
            // Each option is stripped out of the option string as it is processed.
            // All options have been checked.  The option string should have been completely emptied..
            char buf[100];
            p.extract(buf, sizeof(buf), NULL, status);
            buf[sizeof(buf)-1] = 0;
            errln("Unrecognized or extra parameter:  %s\n", buf);
            return;
        }
#else
        infoln("SSearchTest built with UCONFIG_NO_REGULAR_EXPRESSIONS: ignoring parameters.");
#endif
    }

    for(int32_t s = firstStrength; s <= lastStrength; s += 1) {
        int32_t notFoundCount = 0;

        logln("Setting strength to %s.", strengthNames[s]);
        ucol_setStrength(coll, strengths[s]);

        // TODO: try alternate prefix and suffix too?
        // TODO: alterntaes are only equal at primary strength. Is this OK?
        for(int32_t t = 0; t < loopCount; t += 1) {
            uint32_t seed = m_seed;
            // int32_t  nmc = 0;

            generateTestCase(coll, monkeys, monkeyCount, pattern, altPattern);
            generateTestCase(coll, monkeys, monkeyCount, prefix,  altPrefix);
            generateTestCase(coll, monkeys, monkeyCount, suffix,  altSuffix);

            // pattern
            notFoundCount += monkeyTestCase(coll, pattern, pattern, altPattern, "pattern", strengthNames[s], seed);

            testCase.remove();
            testCase.append(prefix);
            testCase.append(/*alt*/pattern);

            // prefix + pattern
            notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "prefix + pattern", strengthNames[s], seed);

            testCase.append(suffix);

            // prefix + pattern + suffix
            notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "prefix + pattern + suffix", strengthNames[s], seed);

            testCase.remove();
            testCase.append(pattern);
            testCase.append(suffix);

            // pattern + suffix
            notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "pattern + suffix", strengthNames[s], seed);
        }

       logln("For strength %s the not found count is %d.", strengthNames[s], notFoundCount);
    }

    uset_close(contractions);
    uset_close(expansions);
    uset_close(letters);
    delete monkeyData;

    ucol_close(coll);
}
Ejemplo n.º 25
0
void SSearchTest::searchTest()
{
#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FILE_IO
    UErrorCode status = U_ZERO_ERROR;
    char path[PATH_BUFFER_SIZE];
    const char *testFilePath = getPath(path, "ssearch.xml");

    if (testFilePath == NULL) {
        return; /* Couldn't get path: error message already output. */
    }

    LocalPointer<UXMLParser> parser(UXMLParser::createParser(status));
    TEST_ASSERT_SUCCESS(status);
    LocalPointer<UXMLElement> root(parser->parseFile(testFilePath, status));
    TEST_ASSERT_SUCCESS(status);
    if (U_FAILURE(status)) {
        return;
    }

    const UnicodeString *debugTestCase = root->getAttribute("debug");
    if (debugTestCase != NULL) {
//       setenv("USEARCH_DEBUG", "1", 1);
    }


    const UXMLElement *testCase;
    int32_t tc = 0;

    while((testCase = root->nextChildElement(tc)) != NULL) {

        if (testCase->getTagName().compare("test-case") != 0) {
            errln("ssearch, unrecognized XML Element in test file");
            continue;
        }
        const UnicodeString *id       = testCase->getAttribute("id");
        *testId = 0;
        if (id != NULL) {
            id->extract(0, id->length(), testId,  sizeof(testId), US_INV);
        }

        // If debugging test case has been specified and this is not it, skip to next.
        if (id!=NULL && debugTestCase!=NULL && *id != *debugTestCase) {
            continue;
        }
        //
        //  Get the requested collation strength.
        //    Default is tertiary if the XML attribute is missing from the test case.
        //
        const UnicodeString *strength = testCase->getAttribute("strength");
        UColAttributeValue collatorStrength = UCOL_PRIMARY;
        if      (strength==NULL)          { collatorStrength = UCOL_TERTIARY;}
        else if (*strength=="PRIMARY")    { collatorStrength = UCOL_PRIMARY;}
        else if (*strength=="SECONDARY")  { collatorStrength = UCOL_SECONDARY;}
        else if (*strength=="TERTIARY")   { collatorStrength = UCOL_TERTIARY;}
        else if (*strength=="QUATERNARY") { collatorStrength = UCOL_QUATERNARY;}
        else if (*strength=="IDENTICAL")  { collatorStrength = UCOL_IDENTICAL;}
        else {
            // Bogus value supplied for strength.  Shouldn't happen, even from
            //  typos, if the  XML source has been validated.
            //  This assert is a little deceiving in that strength can be
            //   any of the allowed values, not just TERTIARY, but it will
            //   do the job of getting the error output.
            TEST_ASSERT(*strength=="TERTIARY")
        }

        //
        // Get the collator normalization flag.  Default is UCOL_OFF.
        //
        UColAttributeValue normalize = UCOL_OFF;
        const UnicodeString *norm = testCase->getAttribute("norm");
        TEST_ASSERT (norm==NULL || *norm=="ON" || *norm=="OFF");
        if (norm!=NULL && *norm=="ON") {
            normalize = UCOL_ON;
        }

        //
        // Get the alternate_handling flag. Default is UCOL_NON_IGNORABLE.
        //
        UColAttributeValue alternateHandling = UCOL_NON_IGNORABLE;
        const UnicodeString *alt = testCase->getAttribute("alternate_handling");
        TEST_ASSERT (alt == NULL || *alt == "SHIFTED" || *alt == "NON_IGNORABLE");
        if (alt != NULL && *alt == "SHIFTED") {
            alternateHandling = UCOL_SHIFTED;
        }

        const UnicodeString defLocale("en");
        char  clocale[100];
        const UnicodeString *locale   = testCase->getAttribute("locale");
        if (locale == NULL || locale->length()==0) {
            locale = &defLocale;
        };
        locale->extract(0, locale->length(), clocale, sizeof(clocale), NULL);


        UnicodeString  text;
        UnicodeString  target;
        UnicodeString  pattern;
        int32_t        expectedMatchStart = -1;
        int32_t        expectedMatchLimit = -1;
        const UXMLElement  *n;
        int32_t                nodeCount = 0;

        n = testCase->getChildElement("pattern");
        TEST_ASSERT(n != NULL);
        if (n==NULL) {
            continue;
        }
        text = n->getText(FALSE);
        text = text.unescape();
        pattern.append(text);
        nodeCount++;

        n = testCase->getChildElement("pre");
        if (n!=NULL) {
            text = n->getText(FALSE);
            text = text.unescape();
            target.append(text);
            nodeCount++;
        }

        n = testCase->getChildElement("m");
        if (n!=NULL) {
            expectedMatchStart = target.length();
            text = n->getText(FALSE);
            text = text.unescape();
            target.append(text);
            expectedMatchLimit = target.length();
            nodeCount++;
        }

        n = testCase->getChildElement("post");
        if (n!=NULL) {
            text = n->getText(FALSE);
            text = text.unescape();
            target.append(text);
            nodeCount++;
        }

        //  Check that there weren't extra things in the XML
        TEST_ASSERT(nodeCount == testCase->countChildren());

        // Open a collator and StringSearch based on the parameters
        //   obtained from the XML.
        //
        status = U_ZERO_ERROR;
        LocalUCollatorPointer collator(ucol_open(clocale, &status));
        ucol_setStrength(collator.getAlias(), collatorStrength);
        ucol_setAttribute(collator.getAlias(), UCOL_NORMALIZATION_MODE, normalize, &status);
        ucol_setAttribute(collator.getAlias(), UCOL_ALTERNATE_HANDLING, alternateHandling, &status);
        LocalUStringSearchPointer uss(usearch_openFromCollator(pattern.getBuffer(), pattern.length(),
                                                               target.getBuffer(), target.length(),
                                                               collator.getAlias(),
                                                               NULL,     // the break iterator
                                                               &status));

        TEST_ASSERT_SUCCESS(status);
        if (U_FAILURE(status)) {
            continue;
        }

        int32_t foundStart = 0;
        int32_t foundLimit = 0;
        UBool   foundMatch;

        //
        // Do the search, check the match result against the expected results.
        //
        foundMatch= usearch_search(uss.getAlias(), 0, &foundStart, &foundLimit, &status);
        TEST_ASSERT_SUCCESS(status);
        if ((foundMatch && expectedMatchStart<0) ||
            (foundStart != expectedMatchStart)   ||
            (foundLimit != expectedMatchLimit)) {
                TEST_ASSERT(FALSE);   //  ouput generic error position
                infoln("Found, expected match start = %d, %d \n"
                       "Found, expected match limit = %d, %d",
                foundStart, expectedMatchStart, foundLimit, expectedMatchLimit);
        }

        // In case there are other matches...
        // (should we only do this if the test case passed?)
        while (foundMatch) {
            expectedMatchStart = foundStart;
            expectedMatchLimit = foundLimit;

            foundMatch = usearch_search(uss.getAlias(), foundLimit, &foundStart, &foundLimit, &status);
        }

        uss.adoptInstead(usearch_openFromCollator(pattern.getBuffer(), pattern.length(),
            target.getBuffer(), target.length(),
            collator.getAlias(),
            NULL,
            &status));

        //
        // Do the backwards search, check the match result against the expected results.
        //
        foundMatch= usearch_searchBackwards(uss.getAlias(), target.length(), &foundStart, &foundLimit, &status);
        TEST_ASSERT_SUCCESS(status);
        if ((foundMatch && expectedMatchStart<0) ||
            (foundStart != expectedMatchStart)   ||
            (foundLimit != expectedMatchLimit)) {
                TEST_ASSERT(FALSE);   //  ouput generic error position
                infoln("Found, expected backwards match start = %d, %d \n"
                       "Found, expected backwards match limit = %d, %d",
                foundStart, expectedMatchStart, foundLimit, expectedMatchLimit);
        }
    }
#endif
}
Ejemplo n.º 26
0
/**
 * call-seq:
 *     collator.strength = new_strength
 *
 * Sets the collation strength used in a UCollator. The strength influences how strings are compared.
 **/
VALUE icu4r_col_set_strength(VALUE self, VALUE obj)
{
    Check_Type(obj, T_FIXNUM);
    ucol_setStrength(UCOLLATOR(self), FIX2INT(obj));
    return Qnil;
}
Ejemplo n.º 27
0
static bool HHVM_METHOD(Collator, setStrength, int64_t strength) {
  FETCH_COL(data, this_, false);
  ucol_setStrength(data->collator(), (UCollationStrength)strength);
  return true;
}
Ejemplo n.º 28
0
/**
* Tests the [variable top] tag in rule syntax. Since the default [alternate]
* tag has the value shifted, any codepoints before [variable top] should give
* a primary ce of 0.
*/
static void TestVariableTop(void)
{
#if 0
    /*
     * Starting with ICU 53, setting the variable top via a pseudo relation string
     * is not supported any more.
     * It was replaced by the [maxVariable symbol] setting.
     * See ICU tickets #9958 and #8032.
     */
    static const char       str[]          = "&z = [variable top]";
          int         len          = strlen(str);
          UChar      rules[sizeof(str)];
          UCollator  *myCollation;
          UCollator  *enCollation;
          UErrorCode  status       = U_ZERO_ERROR;
          UChar       source[1];
          UChar       ch;
          uint8_t     result[20];
          uint8_t     expected[20];

    u_uastrcpy(rules, str);

    enCollation = ucol_open("en_US", &status);
    if (U_FAILURE(status)) {
        log_err_status(status, "ERROR: in creation of collator :%s\n", 
                myErrorName(status));
        return;
    }
    myCollation = ucol_openRules(rules, len, UCOL_OFF, 
                                 UCOL_PRIMARY,NULL, &status);
    if (U_FAILURE(status)) {
        ucol_close(enCollation);
        log_err("ERROR: in creation of rule based collator :%s\n", 
                myErrorName(status));
        return;
    }

    ucol_setStrength(enCollation, UCOL_PRIMARY);
    ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
                      &status);
    ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
                      &status);
        
    if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) !=
        UCOL_SHIFTED || U_FAILURE(status)) {
        log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n");
    }

    uprv_memset(expected, 0, 20);

    /* space is supposed to be a variable */
    source[0] = ' ';
    len = ucol_getSortKey(enCollation, source, 1, result, 
                          sizeof(result));

    if (uprv_memcmp(expected, result, len) != 0) {
        log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
    }

    ch = 'a';
    while (ch < 'z') {
        source[0] = ch;
        len = ucol_getSortKey(myCollation, source, 1, result,
                              sizeof(result));
        if (uprv_memcmp(expected, result, len) != 0) {
            log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n", 
                    ch);
        }
        ch ++;
    }
  
    ucol_close(enCollation);
    ucol_close(myCollation);
    enCollation = NULL;
    myCollation = NULL;
#endif
}
Ejemplo n.º 29
0
/**
* Tests the [variable top] tag in rule syntax. Since the default [alternate]
* tag has the value shifted, any codepoints before [variable top] should give
* a primary ce of 0.
*/
static void TestVariableTop(void)
{
    const char       *str          = "&z = [variable top]";
          int         len          = strlen(str);
          UChar      *rules;
          UCollator  *myCollation;
          UCollator  *enCollation;
          UErrorCode  status       = U_ZERO_ERROR;
          UChar       source[1];
          UChar       ch;
          uint8_t     result[20];
          uint8_t     expected[20];

    rules = (UChar*)malloc(sizeof(UChar*) * (len + 1));
    u_uastrcpy(rules, str);

    enCollation = ucol_open("en_US", &status);
    myCollation = ucol_openRules(rules, len, UCOL_OFF, 
                                 UCOL_PRIMARY,NULL, &status);
    if (U_FAILURE(status)) {
        log_err("ERROR: in creation of rule based collator :%s\n", 
                myErrorName(status));
        return;
    }

    ucol_setStrength(enCollation, UCOL_PRIMARY);
    ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
                      &status);
    ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
                      &status);
        
    if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) !=
        UCOL_SHIFTED || U_FAILURE(status)) {
        log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n");
    }

    uprv_memset(expected, 0, 20);

    /* space is supposed to be a variable */
    source[0] = ' ';
    len = ucol_getSortKey(enCollation, source, 1, result, 
                          sizeof(result));

    if (uprv_memcmp(expected, result, len) != 0) {
        log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
    }

    ch = 'a';
    while (ch < 'z') {
        source[0] = ch;
        len = ucol_getSortKey(myCollation, source, 1, result,
                              sizeof(result));
        if (uprv_memcmp(expected, result, len) != 0) {
            log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n", 
                    ch);
        }
        ch ++;
    }
  
    free(rules);
    ucol_close(enCollation);
    ucol_close(myCollation);
    enCollation = NULL;
    myCollation = NULL;
}