static void TestPrimary( ) { int32_t len,i; UCollator *myCollation; UErrorCode status=U_ZERO_ERROR; static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 "; UChar rules[sizeof(str)]; len = strlen(str); u_uastrcpy(rules, str); myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); if(U_FAILURE(status)){ log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status)); return; } ucol_setStrength(myCollation, UCOL_PRIMARY); for (i = 17; i < 26 ; i++) { doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); } ucol_close(myCollation); myCollation = 0; }
static void TestSecondary() { UCollationResult expected=UCOL_EQUAL; int32_t i,j, testAcuteSize; UErrorCode status = U_ZERO_ERROR; myCollation = ucol_open("en_US", &status); if(U_FAILURE(status)){ log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status)); return; } ucol_setStrength(myCollation, UCOL_SECONDARY); log_verbose("Testing English Collation with Secondary strength\n"); for (i = 43; i < 49 ; i++) { doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); } /*test acute and grave ordering (compare to french collation) */ testAcuteSize = sizeof(testAcute) / sizeof(testAcute[0]); for (i = 0; i < testAcuteSize; i++) { for (j = 0; j < testAcuteSize; j++) { if (i < j) expected = UCOL_LESS; if (i == j) expected = UCOL_EQUAL; if (i > j) expected = UCOL_GREATER; doTest(myCollation, testAcute[i], testAcute[j], expected ); } } ucol_close(myCollation); }
MojErr MojDbTextCollator::init(const MojChar* locale, MojDbCollationStrength level) { MojAssert(locale); MojAssert(!m_ucol); UCollationStrength strength = UCOL_PRIMARY; switch (level) { case MojDbCollationPrimary: strength = UCOL_PRIMARY; break; case MojDbCollationSecondary: strength = UCOL_SECONDARY; break; case MojDbCollationTertiary: strength = UCOL_TERTIARY; break; case MojDbCollationIdentical: strength = UCOL_IDENTICAL; break; default: MojAssertNotReached(); } UErrorCode status = U_ZERO_ERROR; m_ucol = ucol_open(locale, &status); MojUnicodeErrCheck(status); MojAssert(m_ucol); ucol_setAttribute(m_ucol, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); MojUnicodeErrCheck(status); ucol_setStrength(m_ucol, strength); return MojErrNone; }
static void TestExtra() { int32_t i, j; int32_t len; UCollator *myCollation; UErrorCode status = U_ZERO_ERROR; static const char str[]="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 "; UChar rules[sizeof(str)]; len = strlen(str); u_uastrcpy(rules, str); myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status); if(U_FAILURE(status)){ log_err_status(status, "ERROR: in creation of rule based collator :%s\n", myErrorName(status)); return; } ucol_setStrength(myCollation, UCOL_TERTIARY); for (i = 0; i < COUNT_TEST_CASES-1 ; i++) { for (j = i + 1; j < COUNT_TEST_CASES; j += 1) { doTest(myCollation, testCases[i], testCases[j], UCOL_LESS); } } ucol_close(myCollation); myCollation = 0; }
bool c_Collator::t_setstrength(int64_t strength) { if (!m_ucoll) { raise_warning("setstrength called on uninitialized Collator object"); return false; } ucol_setStrength(m_ucoll, (UCollationStrength)strength); return true; }
bool c_Collator::t_setstrength(int64 strength) { INSTANCE_METHOD_INJECTION_BUILTIN(Collator, Collator::setstrength); if (!m_ucoll) { raise_warning("setstrength called on uninitialized Collator object"); return false; } ucol_setStrength(m_ucoll, (UCollationStrength)strength); return true; }
static int icu_Collator_set_strength(icu_Collator *self, PyObject *val, void *closure) { if (!PyInt_Check(val)) { PyErr_SetString(PyExc_TypeError, "Strength must be an integer."); return -1; } ucol_setStrength(self->collator, (int)PyInt_AS_LONG(val)); return 0; }
void TextSearcherICU::setCaseSensitivity(bool caseSensitive) { const UCollationStrength strength = caseSensitive ? UCOL_TERTIARY : UCOL_PRIMARY; UCollator* const collator = usearch_getCollator(m_searcher); if (ucol_getStrength(collator) == strength) return; ucol_setStrength(collator, strength); usearch_reset(m_searcher); }
inline SearchBuffer::SearchBuffer(const String& target, FindOptions options) : m_options(options) , m_prefixLength(0) , m_numberOfCharactersJustAppended(0) , m_atBreak(true) , m_needsMoreContext(options & AtWordStarts) , m_targetRequiresKanaWorkaround(containsKanaLetters(target)) { ASSERT(!target.isEmpty()); target.appendTo(m_target); // FIXME: We'd like to tailor the searcher to fold quote marks for us instead // of doing it in a separate replacement pass here, but ICU doesn't offer a way // to add tailoring on top of the locale-specific tailoring as of this writing. foldQuoteMarksAndSoftHyphens(m_target.data(), m_target.size()); size_t targetLength = m_target.size(); m_buffer.reserveInitialCapacity(std::max(targetLength * 8, minimumSearchBufferSize)); m_overlap = m_buffer.capacity() / 4; if ((m_options & AtWordStarts) && targetLength) { UChar32 targetFirstCharacter; U16_GET(m_target.data(), 0, 0, targetLength, targetFirstCharacter); // Characters in the separator category never really occur at the beginning of a word, // so if the target begins with such a character, we just ignore the AtWordStart option. if (isSeparator(targetFirstCharacter)) { m_options &= ~AtWordStarts; m_needsMoreContext = false; } } // Grab the single global searcher. // If we ever have a reason to do more than once search buffer at once, we'll have // to move to multiple searchers. lockSearcher(); UStringSearch* searcher = blink::searcher(); UCollator* collator = usearch_getCollator(searcher); UCollationStrength strength = m_options & CaseInsensitive ? UCOL_PRIMARY : UCOL_TERTIARY; if (ucol_getStrength(collator) != strength) { ucol_setStrength(collator, strength); usearch_reset(searcher); } UErrorCode status = U_ZERO_ERROR; usearch_setPattern(searcher, m_target.data(), targetLength, &status); ASSERT(status == U_ZERO_ERROR); // The kana workaround requires a normalized copy of the target string. if (m_targetRequiresKanaWorkaround) normalizeCharactersIntoNFCForm(m_target.data(), m_target.size(), m_normalizedTarget); }
static int icu_collator_strength(lua_State *L) { luaL_argcheck(L, lua_getmetatable(L,1) && lua_rawequal(L,-1,COLLATOR_UV_META), 1, "expecting collator"); lua_pop(L,1); if (lua_gettop(L) == 1) { lua_pushnumber(L, ucol_getStrength(*(UCollator**)lua_touserdata(L,1))); return 1; } else { ucol_setStrength(*(UCollator**)lua_touserdata(L,1), (UCollationStrength)luaL_checknumber(L,2)); lua_settop(L,1); return 1; } }
int main() { UErrorCode status = U_ZERO_ERROR; UCollator *coll = ucol_open(0, &status); ucol_setStrength(coll, UCOL_PRIMARY); for (int i = 0; i < PASSES; ++i) { UCollationResult coll_res = ucol_strcollUTF8(coll, INPUT1, -1, INPUT2, -1, &status); (void)(coll_res); } ucol_close(coll); return 0; }
static void TestJB581(void) { UChar dispName [100]; int32_t bufferLen = 0; UChar source [100]; UChar target [100]; UCollationResult result = UCOL_EQUAL; uint8_t sourceKeyArray [100]; uint8_t targetKeyArray [100]; int32_t sourceKeyOut = 0, targetKeyOut = 0; UCollator *myCollator = 0; UErrorCode status = U_ZERO_ERROR; /*u_uastrcpy(source, "This is a test.");*/ /*u_uastrcpy(target, "THISISATEST.");*/ u_uastrcpy(source, "THISISATEST."); u_uastrcpy(target, "Thisisatest."); myCollator = ucol_open("en_US", &status); if (U_FAILURE(status)){ bufferLen = uloc_getDisplayName("en_US", 0, dispName, 100, &status); /*Report the error with display name... */ log_err("ERROR: Failed to create the collator for : \"%s\"\n", dispName); return; } result = ucol_strcoll(myCollator, source, -1, target, -1); /* result is 1, secondary differences only for ignorable space characters*/ if (result != 1) { log_err("Comparing two strings with only secondary differences in C failed.\n"); } /* To compare them with just primary differences */ ucol_setStrength(myCollator, UCOL_PRIMARY); result = ucol_strcoll(myCollator, source, -1, target, -1); /* result is 0 */ if (result != 0) { log_err("Comparing two strings with no differences in C failed.\n"); } /* Now, do the same comparison with keys */ sourceKeyOut = ucol_getSortKey(myCollator, source, -1, sourceKeyArray, 100); targetKeyOut = ucol_getSortKey(myCollator, target, -1, targetKeyArray, 100); bufferLen = ((targetKeyOut > 100) ? 100 : targetKeyOut); if (memcmp(sourceKeyArray, targetKeyArray, bufferLen) != 0) { log_err("Comparing two strings with sort keys in C failed.\n"); } ucol_close(myCollator); }
static void *engine_fixed_compile(error_t **error, UString *ustr, uint32_t flags) { UErrorCode status; fixed_pattern_t *p; p = mem_new(*p); p->pattern = ustr; // not needed with usearch ? p->flags = flags; p->ubrk = NULL; p->usearch = NULL; status = U_ZERO_ERROR; if (ustring_empty(ustr)) { if (IS_WORD_BOUNDED(flags)) { p->ubrk = ubrk_open(UBRK_WORD, NULL, NULL, 0, &status); } } else { if (!IS_WHOLE_LINE(flags)) { if (IS_WORD_BOUNDED(flags)) { p->ubrk = ubrk_open(UBRK_WORD, NULL, NULL, 0, &status); } else if (WITH_GRAPHEME()) { p->ubrk = ubrk_open(UBRK_CHARACTER, NULL, NULL, 0, &status); } if (U_FAILURE(status)) { fixed_pattern_destroy(p); icu_error_set(error, FATAL, status, "ubrk_open"); return NULL; } } if (IS_WORD_BOUNDED(flags) || (IS_CASE_INSENSITIVE(flags) && !IS_WHOLE_LINE(flags))) { p->usearch = usearch_open(ustr->ptr, ustr->len, USEARCH_FAKE_USTR, uloc_getDefault(), p->ubrk, &status); if (U_FAILURE(status)) { if (NULL != p->ubrk) { ubrk_close(p->ubrk); } fixed_pattern_destroy(p); icu_error_set(error, FATAL, status, "usearch_open"); return NULL; } if (IS_CASE_INSENSITIVE(flags)) { UCollator *ucol; ucol = usearch_getCollator(p->usearch); ucol_setStrength(ucol, (flags & ~OPT_MASK) > 1 ? UCOL_SECONDARY : UCOL_PRIMARY); } } } return p; }
static void TestPrimary() { int32_t i; UErrorCode status = U_ZERO_ERROR; myCollation = ucol_open("fi_FI@collation=standard", &status); if(U_FAILURE(status)){ log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); } log_verbose("Testing Finnish Collation with Primary strength\n"); ucol_setStrength(myCollation, UCOL_PRIMARY); for (i = 4; i < 5; i++) { doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); } ucol_close(myCollation); }
static void TestPrimary() { int32_t i; UErrorCode status = U_ZERO_ERROR; myCollation = ucol_open("en_US", &status); if(U_FAILURE(status)){ log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status)); return; } ucol_setStrength(myCollation, UCOL_PRIMARY); log_verbose("Testing English Collation with Primary strength\n"); for (i = 38; i < 43 ; i++) { doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); } ucol_close(myCollation); }
static void TestTertiary( ) { int32_t i; UErrorCode status = U_ZERO_ERROR; myCollation = ucol_open("es_ES", &status); if(U_FAILURE(status)){ log_err_status(status, "ERROR: %s: in creation of rule based collator: %s\n", __FILE__, myErrorName(status)); return; } log_verbose("Testing Spanish Collation with Tertiary strength\n"); ucol_setStrength(myCollation, UCOL_TERTIARY); for (i = 0; i < 5 ; i++) { doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); } ucol_close(myCollation); }
static void TestTertiary( ) { int32_t i; UErrorCode status = U_ZERO_ERROR; myCollation = ucol_open("ja_JP", &status); if(U_FAILURE(status)){ log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); return; } log_verbose("Testing Kanna(Japan) Collation with Tertiary strength\n"); ucol_setStrength(myCollation, UCOL_TERTIARY); ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, UCOL_ON, &status); for (i = 0; i < 6 ; i++) { doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); } ucol_close(myCollation); }
/* Testing base letters */ static void TestBase() { int32_t i; UErrorCode status = U_ZERO_ERROR; myCollation = ucol_open("ja_JP", &status); if (U_FAILURE(status)) { log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); return; } log_verbose("Testing Japanese Base Characters Collation\n"); ucol_setStrength(myCollation, UCOL_PRIMARY); for (i = 0; i < 3 ; i++) doTest(myCollation, testBaseCases[i], testBaseCases[i + 1], UCOL_LESS); ucol_close(myCollation); }
static void TestTertiary( ) { int32_t testMoreSize; UCollationResult expected=UCOL_EQUAL; int32_t i,j; UErrorCode status = U_ZERO_ERROR; myCollation = ucol_open("en_US", &status); if(U_FAILURE(status)){ log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); return; } log_verbose("Testing English Collation with Tertiary strength\n"); ucol_setStrength(myCollation, UCOL_TERTIARY); for (i = 0; i < 38 ; i++) { doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); } j = 0; for (i = 0; i < 10; i++) { for (j = i+1; j < 10; j++) { doTest(myCollation, testBugs[i], testBugs[j], UCOL_LESS); } } /*test more interesting cases */ testMoreSize = UPRV_LENGTHOF(testMore); for (i = 0; i < testMoreSize; i++) { for (j = 0; j < testMoreSize; j++) { if (i < j) expected = UCOL_LESS; if (i == j) expected = UCOL_EQUAL; if (i > j) expected = UCOL_GREATER; doTest(myCollation, testMore[i], testMore[j], expected ); } } ucol_close(myCollation); }
MojErr MojDbTextCollator::init(const MojChar* locale, MojDbCollationStrength level) { LOG_TRACE("Entering function %s", __FUNCTION__); MojAssert(locale); MojAssert(!m_ucol); UCollationStrength strength = UCOL_PRIMARY; switch (level) { case MojDbCollationPrimary: strength = UCOL_PRIMARY; break; case MojDbCollationSecondary: strength = UCOL_SECONDARY; break; case MojDbCollationTertiary: strength = UCOL_TERTIARY; break; case MojDbCollationQuaternary: strength = UCOL_QUATERNARY; break; case MojDbCollationIdentical: strength = UCOL_IDENTICAL; break; default: MojAssertNotReached(); } UErrorCode status = U_ZERO_ERROR; m_ucol = ucol_open(locale, &status); MojUnicodeErrCheck(status); MojAssert(m_ucol); ucol_setAttribute(m_ucol, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); if (level == MojDbCollationIdentical) { // Combination of IDENTICAL and NUMERIC option cover full-width comparison and ["001","01","1"] ordering. // NUMERIC option converts number charcter to numeric "a021" -> ["a",21] ucol_setAttribute(m_ucol, UCOL_NUMERIC_COLLATION, UCOL_ON, &status); } MojUnicodeErrCheck(status); ucol_setStrength(m_ucol, strength); return MojErrNone; }
static void TestExtra() { int32_t i, j; UErrorCode status = U_ZERO_ERROR; myCollation = ucol_open("fr_CA", &status); if(U_FAILURE(status)) { log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); return; } log_verbose("Testing fr_CA Collation extra with secondary strength\n"); ucol_setStrength(myCollation, UCOL_TERTIARY); for (i = 0; i < 9 ; i++) { for (j = i + 1; j < 10; j += 1) { doTest(myCollation, testBugs[i], testBugs[j], UCOL_LESS); } } ucol_close(myCollation); }
/* * Test Katakana, Hiragana letters */ static void TestKatakanaHiragana(void) { int32_t i; UErrorCode status = U_ZERO_ERROR; myCollation = ucol_open("ja_JP", &status); if (U_FAILURE(status)) { log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); return; } log_verbose("Testing Japanese Katakana, Hiragana Characters Collation\n"); ucol_setStrength(myCollation, UCOL_QUATERNARY); ucol_setAttribute(myCollation, UCOL_CASE_LEVEL, UCOL_ON, &status); for (i = 0; i < 3 ; i++) { doTest(myCollation, testKatakanaHiraganaCases[i], testKatakanaHiraganaCases[i + 1], UCOL_LESS); } ucol_close(myCollation); }
static void TestTertiary( ) { int32_t i; UErrorCode status = U_ZERO_ERROR; myCollation = ucol_open("fr_CA", &status); if(U_FAILURE(status) || !myCollation) { log_err_status(status, "ERROR: in creation of rule based collator: %s\n", myErrorName(status)); return; } ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); if(U_FAILURE(status)) { log_err("ERROR: in creation of rule based collator: %s\n", myErrorName(status)); return; } log_verbose("Testing fr_CA Collation with Tertiary strength\n"); ucol_setStrength(myCollation, UCOL_QUATERNARY); for (i = 0; i < 12 ; i++) { doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]); } ucol_close(myCollation); }
void SSearchTest::monkeyTest(char *params) { // ook! UErrorCode status = U_ZERO_ERROR; //UCollator *coll = ucol_open(NULL, &status); UCollator *coll = ucol_openFromShortString("S1", FALSE, NULL, &status); if (U_FAILURE(status)) { errcheckln(status, "Failed to create collator in MonkeyTest! - %s", u_errorName(status)); return; } CollData *monkeyData = new CollData(coll, status); USet *expansions = uset_openEmpty(); USet *contractions = uset_openEmpty(); ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &status); U_STRING_DECL(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39); U_STRING_INIT(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39); USet *letters = uset_openPattern(letter_pattern, 39, &status); SetMonkey letterMonkey(letters); StringSetMonkey contractionMonkey(contractions, coll, monkeyData); StringSetMonkey expansionMonkey(expansions, coll, monkeyData); UnicodeString testCase; UnicodeString alternate; UnicodeString pattern, altPattern; UnicodeString prefix, altPrefix; UnicodeString suffix, altSuffix; Monkey *monkeys[] = { &letterMonkey, &contractionMonkey, &expansionMonkey, &contractionMonkey, &expansionMonkey, &contractionMonkey, &expansionMonkey, &contractionMonkey, &expansionMonkey}; int32_t monkeyCount = sizeof(monkeys) / sizeof(monkeys[0]); // int32_t nonMatchCount = 0; UCollationStrength strengths[] = {UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY}; const char *strengthNames[] = {"primary", "secondary", "tertiary"}; int32_t strengthCount = sizeof(strengths) / sizeof(strengths[0]); int32_t loopCount = quick? 1000 : 10000; int32_t firstStrength = 0; int32_t lastStrength = strengthCount - 1; //*/ 0; if (params != NULL) { #if !UCONFIG_NO_REGULAR_EXPRESSIONS UnicodeString p(params); loopCount = getIntParam("loop", p, loopCount); m_seed = getIntParam("seed", p, m_seed); RegexMatcher m(" *strength *= *(primary|secondary|tertiary) *", p, 0, status); if (m.find()) { UnicodeString breakType = m.group(1, status); for (int32_t s = 0; s < strengthCount; s += 1) { if (breakType == strengthNames[s]) { firstStrength = lastStrength = s; break; } } m.reset(); p = m.replaceFirst("", status); } if (RegexMatcher("\\S", p, 0, status).find()) { // Each option is stripped out of the option string as it is processed. // All options have been checked. The option string should have been completely emptied.. char buf[100]; p.extract(buf, sizeof(buf), NULL, status); buf[sizeof(buf)-1] = 0; errln("Unrecognized or extra parameter: %s\n", buf); return; } #else infoln("SSearchTest built with UCONFIG_NO_REGULAR_EXPRESSIONS: ignoring parameters."); #endif } for(int32_t s = firstStrength; s <= lastStrength; s += 1) { int32_t notFoundCount = 0; logln("Setting strength to %s.", strengthNames[s]); ucol_setStrength(coll, strengths[s]); // TODO: try alternate prefix and suffix too? // TODO: alterntaes are only equal at primary strength. Is this OK? for(int32_t t = 0; t < loopCount; t += 1) { uint32_t seed = m_seed; // int32_t nmc = 0; generateTestCase(coll, monkeys, monkeyCount, pattern, altPattern); generateTestCase(coll, monkeys, monkeyCount, prefix, altPrefix); generateTestCase(coll, monkeys, monkeyCount, suffix, altSuffix); // pattern notFoundCount += monkeyTestCase(coll, pattern, pattern, altPattern, "pattern", strengthNames[s], seed); testCase.remove(); testCase.append(prefix); testCase.append(/*alt*/pattern); // prefix + pattern notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "prefix + pattern", strengthNames[s], seed); testCase.append(suffix); // prefix + pattern + suffix notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "prefix + pattern + suffix", strengthNames[s], seed); testCase.remove(); testCase.append(pattern); testCase.append(suffix); // pattern + suffix notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "pattern + suffix", strengthNames[s], seed); } logln("For strength %s the not found count is %d.", strengthNames[s], notFoundCount); } uset_close(contractions); uset_close(expansions); uset_close(letters); delete monkeyData; ucol_close(coll); }
void SSearchTest::searchTest() { #if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FILE_IO UErrorCode status = U_ZERO_ERROR; char path[PATH_BUFFER_SIZE]; const char *testFilePath = getPath(path, "ssearch.xml"); if (testFilePath == NULL) { return; /* Couldn't get path: error message already output. */ } LocalPointer<UXMLParser> parser(UXMLParser::createParser(status)); TEST_ASSERT_SUCCESS(status); LocalPointer<UXMLElement> root(parser->parseFile(testFilePath, status)); TEST_ASSERT_SUCCESS(status); if (U_FAILURE(status)) { return; } const UnicodeString *debugTestCase = root->getAttribute("debug"); if (debugTestCase != NULL) { // setenv("USEARCH_DEBUG", "1", 1); } const UXMLElement *testCase; int32_t tc = 0; while((testCase = root->nextChildElement(tc)) != NULL) { if (testCase->getTagName().compare("test-case") != 0) { errln("ssearch, unrecognized XML Element in test file"); continue; } const UnicodeString *id = testCase->getAttribute("id"); *testId = 0; if (id != NULL) { id->extract(0, id->length(), testId, sizeof(testId), US_INV); } // If debugging test case has been specified and this is not it, skip to next. if (id!=NULL && debugTestCase!=NULL && *id != *debugTestCase) { continue; } // // Get the requested collation strength. // Default is tertiary if the XML attribute is missing from the test case. // const UnicodeString *strength = testCase->getAttribute("strength"); UColAttributeValue collatorStrength = UCOL_PRIMARY; if (strength==NULL) { collatorStrength = UCOL_TERTIARY;} else if (*strength=="PRIMARY") { collatorStrength = UCOL_PRIMARY;} else if (*strength=="SECONDARY") { collatorStrength = UCOL_SECONDARY;} else if (*strength=="TERTIARY") { collatorStrength = UCOL_TERTIARY;} else if (*strength=="QUATERNARY") { collatorStrength = UCOL_QUATERNARY;} else if (*strength=="IDENTICAL") { collatorStrength = UCOL_IDENTICAL;} else { // Bogus value supplied for strength. Shouldn't happen, even from // typos, if the XML source has been validated. // This assert is a little deceiving in that strength can be // any of the allowed values, not just TERTIARY, but it will // do the job of getting the error output. TEST_ASSERT(*strength=="TERTIARY") } // // Get the collator normalization flag. Default is UCOL_OFF. // UColAttributeValue normalize = UCOL_OFF; const UnicodeString *norm = testCase->getAttribute("norm"); TEST_ASSERT (norm==NULL || *norm=="ON" || *norm=="OFF"); if (norm!=NULL && *norm=="ON") { normalize = UCOL_ON; } // // Get the alternate_handling flag. Default is UCOL_NON_IGNORABLE. // UColAttributeValue alternateHandling = UCOL_NON_IGNORABLE; const UnicodeString *alt = testCase->getAttribute("alternate_handling"); TEST_ASSERT (alt == NULL || *alt == "SHIFTED" || *alt == "NON_IGNORABLE"); if (alt != NULL && *alt == "SHIFTED") { alternateHandling = UCOL_SHIFTED; } const UnicodeString defLocale("en"); char clocale[100]; const UnicodeString *locale = testCase->getAttribute("locale"); if (locale == NULL || locale->length()==0) { locale = &defLocale; }; locale->extract(0, locale->length(), clocale, sizeof(clocale), NULL); UnicodeString text; UnicodeString target; UnicodeString pattern; int32_t expectedMatchStart = -1; int32_t expectedMatchLimit = -1; const UXMLElement *n; int32_t nodeCount = 0; n = testCase->getChildElement("pattern"); TEST_ASSERT(n != NULL); if (n==NULL) { continue; } text = n->getText(FALSE); text = text.unescape(); pattern.append(text); nodeCount++; n = testCase->getChildElement("pre"); if (n!=NULL) { text = n->getText(FALSE); text = text.unescape(); target.append(text); nodeCount++; } n = testCase->getChildElement("m"); if (n!=NULL) { expectedMatchStart = target.length(); text = n->getText(FALSE); text = text.unescape(); target.append(text); expectedMatchLimit = target.length(); nodeCount++; } n = testCase->getChildElement("post"); if (n!=NULL) { text = n->getText(FALSE); text = text.unescape(); target.append(text); nodeCount++; } // Check that there weren't extra things in the XML TEST_ASSERT(nodeCount == testCase->countChildren()); // Open a collator and StringSearch based on the parameters // obtained from the XML. // status = U_ZERO_ERROR; LocalUCollatorPointer collator(ucol_open(clocale, &status)); ucol_setStrength(collator.getAlias(), collatorStrength); ucol_setAttribute(collator.getAlias(), UCOL_NORMALIZATION_MODE, normalize, &status); ucol_setAttribute(collator.getAlias(), UCOL_ALTERNATE_HANDLING, alternateHandling, &status); LocalUStringSearchPointer uss(usearch_openFromCollator(pattern.getBuffer(), pattern.length(), target.getBuffer(), target.length(), collator.getAlias(), NULL, // the break iterator &status)); TEST_ASSERT_SUCCESS(status); if (U_FAILURE(status)) { continue; } int32_t foundStart = 0; int32_t foundLimit = 0; UBool foundMatch; // // Do the search, check the match result against the expected results. // foundMatch= usearch_search(uss.getAlias(), 0, &foundStart, &foundLimit, &status); TEST_ASSERT_SUCCESS(status); if ((foundMatch && expectedMatchStart<0) || (foundStart != expectedMatchStart) || (foundLimit != expectedMatchLimit)) { TEST_ASSERT(FALSE); // ouput generic error position infoln("Found, expected match start = %d, %d \n" "Found, expected match limit = %d, %d", foundStart, expectedMatchStart, foundLimit, expectedMatchLimit); } // In case there are other matches... // (should we only do this if the test case passed?) while (foundMatch) { expectedMatchStart = foundStart; expectedMatchLimit = foundLimit; foundMatch = usearch_search(uss.getAlias(), foundLimit, &foundStart, &foundLimit, &status); } uss.adoptInstead(usearch_openFromCollator(pattern.getBuffer(), pattern.length(), target.getBuffer(), target.length(), collator.getAlias(), NULL, &status)); // // Do the backwards search, check the match result against the expected results. // foundMatch= usearch_searchBackwards(uss.getAlias(), target.length(), &foundStart, &foundLimit, &status); TEST_ASSERT_SUCCESS(status); if ((foundMatch && expectedMatchStart<0) || (foundStart != expectedMatchStart) || (foundLimit != expectedMatchLimit)) { TEST_ASSERT(FALSE); // ouput generic error position infoln("Found, expected backwards match start = %d, %d \n" "Found, expected backwards match limit = %d, %d", foundStart, expectedMatchStart, foundLimit, expectedMatchLimit); } } #endif }
/** * call-seq: * collator.strength = new_strength * * Sets the collation strength used in a UCollator. The strength influences how strings are compared. **/ VALUE icu4r_col_set_strength(VALUE self, VALUE obj) { Check_Type(obj, T_FIXNUM); ucol_setStrength(UCOLLATOR(self), FIX2INT(obj)); return Qnil; }
static bool HHVM_METHOD(Collator, setStrength, int64_t strength) { FETCH_COL(data, this_, false); ucol_setStrength(data->collator(), (UCollationStrength)strength); return true; }
/** * Tests the [variable top] tag in rule syntax. Since the default [alternate] * tag has the value shifted, any codepoints before [variable top] should give * a primary ce of 0. */ static void TestVariableTop(void) { #if 0 /* * Starting with ICU 53, setting the variable top via a pseudo relation string * is not supported any more. * It was replaced by the [maxVariable symbol] setting. * See ICU tickets #9958 and #8032. */ static const char str[] = "&z = [variable top]"; int len = strlen(str); UChar rules[sizeof(str)]; UCollator *myCollation; UCollator *enCollation; UErrorCode status = U_ZERO_ERROR; UChar source[1]; UChar ch; uint8_t result[20]; uint8_t expected[20]; u_uastrcpy(rules, str); enCollation = ucol_open("en_US", &status); if (U_FAILURE(status)) { log_err_status(status, "ERROR: in creation of collator :%s\n", myErrorName(status)); return; } myCollation = ucol_openRules(rules, len, UCOL_OFF, UCOL_PRIMARY,NULL, &status); if (U_FAILURE(status)) { ucol_close(enCollation); log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status)); return; } ucol_setStrength(enCollation, UCOL_PRIMARY); ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) != UCOL_SHIFTED || U_FAILURE(status)) { log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n"); } uprv_memset(expected, 0, 20); /* space is supposed to be a variable */ source[0] = ' '; len = ucol_getSortKey(enCollation, source, 1, result, sizeof(result)); if (uprv_memcmp(expected, result, len) != 0) { log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n"); } ch = 'a'; while (ch < 'z') { source[0] = ch; len = ucol_getSortKey(myCollation, source, 1, result, sizeof(result)); if (uprv_memcmp(expected, result, len) != 0) { log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n", ch); } ch ++; } ucol_close(enCollation); ucol_close(myCollation); enCollation = NULL; myCollation = NULL; #endif }
/** * Tests the [variable top] tag in rule syntax. Since the default [alternate] * tag has the value shifted, any codepoints before [variable top] should give * a primary ce of 0. */ static void TestVariableTop(void) { const char *str = "&z = [variable top]"; int len = strlen(str); UChar *rules; UCollator *myCollation; UCollator *enCollation; UErrorCode status = U_ZERO_ERROR; UChar source[1]; UChar ch; uint8_t result[20]; uint8_t expected[20]; rules = (UChar*)malloc(sizeof(UChar*) * (len + 1)); u_uastrcpy(rules, str); enCollation = ucol_open("en_US", &status); myCollation = ucol_openRules(rules, len, UCOL_OFF, UCOL_PRIMARY,NULL, &status); if (U_FAILURE(status)) { log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status)); return; } ucol_setStrength(enCollation, UCOL_PRIMARY); ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) != UCOL_SHIFTED || U_FAILURE(status)) { log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n"); } uprv_memset(expected, 0, 20); /* space is supposed to be a variable */ source[0] = ' '; len = ucol_getSortKey(enCollation, source, 1, result, sizeof(result)); if (uprv_memcmp(expected, result, len) != 0) { log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n"); } ch = 'a'; while (ch < 'z') { source[0] = ch; len = ucol_getSortKey(myCollation, source, 1, result, sizeof(result)); if (uprv_memcmp(expected, result, len) != 0) { log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n", ch); } ch ++; } free(rules); ucol_close(enCollation); ucol_close(myCollation); enCollation = NULL; myCollation = NULL; }