void SSearchTest::sharpSTest() { UErrorCode status = U_ZERO_ERROR; UCollator *coll = NULL; UnicodeString lp = "fuss"; UnicodeString sp = "fu\\u00DF"; UnicodeString targets[] = {"fu\\u00DF", "fu\\u00DFball", "1fu\\u00DFball", "12fu\\u00DFball", "123fu\\u00DFball", "1234fu\\u00DFball", "ffu\\u00DF", "fufu\\u00DF", "fusfu\\u00DF", "fuss", "ffuss", "fufuss", "fusfuss", "1fuss", "12fuss", "123fuss", "1234fuss", "fu\\u00DF", "1fu\\u00DF", "12fu\\u00DF", "123fu\\u00DF", "1234fu\\u00DF"}; int32_t start = -1, end = -1; coll = ucol_openFromShortString("LEN_S1", FALSE, NULL, &status); TEST_ASSERT_SUCCESS(status); UnicodeString lpUnescaped = lp.unescape(); UnicodeString spUnescaped = sp.unescape(); LocalUStringSearchPointer ussLong(usearch_openFromCollator(lpUnescaped.getBuffer(), lpUnescaped.length(), lpUnescaped.getBuffer(), lpUnescaped.length(), // actual test data will be set later coll, NULL, // the break iterator &status)); LocalUStringSearchPointer ussShort(usearch_openFromCollator(spUnescaped.getBuffer(), spUnescaped.length(), spUnescaped.getBuffer(), spUnescaped.length(), // actual test data will be set later coll, NULL, // the break iterator &status)); TEST_ASSERT_SUCCESS(status); for (uint32_t t = 0; t < (sizeof(targets)/sizeof(targets[0])); t += 1) { UBool bFound; UnicodeString target = targets[t].unescape(); start = end = -1; usearch_setText(ussLong.getAlias(), target.getBuffer(), target.length(), &status); bFound = usearch_search(ussLong.getAlias(), 0, &start, &end, &status); TEST_ASSERT_SUCCESS(status); if (bFound) { logln("Test %d: found long pattern at [%d, %d].", t, start, end); } else { dataerrln("Test %d: did not find long pattern.", t); } usearch_setText(ussShort.getAlias(), target.getBuffer(), target.length(), &status); bFound = usearch_search(ussShort.getAlias(), 0, &start, &end, &status); TEST_ASSERT_SUCCESS(status); if (bFound) { logln("Test %d: found long pattern at [%d, %d].", t, start, end); } else { dataerrln("Test %d: did not find long pattern.", t); } } ucol_close(coll); }
void CollationRegressionTest::TestT7189() { UErrorCode status = U_ZERO_ERROR; UCollator *coll; uint32_t i; static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = { // "Achter De Hoven" { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 }, // "ABC" { 0x41, 0x42, 0x43, 0x00 }, // "HELLO world!" { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 } }; static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = { // "Achter de Hoven" { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 }, // "abc" { 0x61, 0x62, 0x63, 0x00 }, // "hello world!" { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 } }; // Open the collator coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status); if (U_FAILURE(status)) { errln("Failed to create a collator for short string EO_S1"); return; } for (i = 0; i < sizeof(text1) / (CollationRegressionTest::MAX_TOKEN_LEN * sizeof(UChar)); i++) { uint8_t key1[100], key2[100]; int32_t len1, len2; len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status); if (U_FAILURE(status)) { errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]); break; } len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status); if (U_FAILURE(status)) { errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]); break; } if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) { errln(UnicodeString("Failed: Identical key\n") + " text1: " + text1[i] + "\n" + " text2: " + text2[i] + "\n" + " key : " + TestUtility::hex(key1, len1)); } else { logln(UnicodeString("Keys produced -\n") + " text1: " + text1[i] + "\n" + " key1 : " + TestUtility::hex(key1, len1) + "\n" + " text2: " + text2[i] + "\n" + " key2 : " + TestUtility::hex(key2, len2)); } } ucol_close(coll); }
void SSearchTest::monkeyTest(char *params) { // ook! UErrorCode status = U_ZERO_ERROR; //UCollator *coll = ucol_open(NULL, &status); UCollator *coll = ucol_openFromShortString("S1", FALSE, NULL, &status); if (U_FAILURE(status)) { errcheckln(status, "Failed to create collator in MonkeyTest! - %s", u_errorName(status)); return; } CollData *monkeyData = new CollData(coll, status); USet *expansions = uset_openEmpty(); USet *contractions = uset_openEmpty(); ucol_getContractionsAndExpansions(coll, contractions, expansions, FALSE, &status); U_STRING_DECL(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39); U_STRING_INIT(letter_pattern, "[[:letter:]-[:ideographic:]-[:hangul:]]", 39); USet *letters = uset_openPattern(letter_pattern, 39, &status); SetMonkey letterMonkey(letters); StringSetMonkey contractionMonkey(contractions, coll, monkeyData); StringSetMonkey expansionMonkey(expansions, coll, monkeyData); UnicodeString testCase; UnicodeString alternate; UnicodeString pattern, altPattern; UnicodeString prefix, altPrefix; UnicodeString suffix, altSuffix; Monkey *monkeys[] = { &letterMonkey, &contractionMonkey, &expansionMonkey, &contractionMonkey, &expansionMonkey, &contractionMonkey, &expansionMonkey, &contractionMonkey, &expansionMonkey}; int32_t monkeyCount = sizeof(monkeys) / sizeof(monkeys[0]); // int32_t nonMatchCount = 0; UCollationStrength strengths[] = {UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY}; const char *strengthNames[] = {"primary", "secondary", "tertiary"}; int32_t strengthCount = sizeof(strengths) / sizeof(strengths[0]); int32_t loopCount = quick? 1000 : 10000; int32_t firstStrength = 0; int32_t lastStrength = strengthCount - 1; //*/ 0; if (params != NULL) { #if !UCONFIG_NO_REGULAR_EXPRESSIONS UnicodeString p(params); loopCount = getIntParam("loop", p, loopCount); m_seed = getIntParam("seed", p, m_seed); RegexMatcher m(" *strength *= *(primary|secondary|tertiary) *", p, 0, status); if (m.find()) { UnicodeString breakType = m.group(1, status); for (int32_t s = 0; s < strengthCount; s += 1) { if (breakType == strengthNames[s]) { firstStrength = lastStrength = s; break; } } m.reset(); p = m.replaceFirst("", status); } if (RegexMatcher("\\S", p, 0, status).find()) { // Each option is stripped out of the option string as it is processed. // All options have been checked. The option string should have been completely emptied.. char buf[100]; p.extract(buf, sizeof(buf), NULL, status); buf[sizeof(buf)-1] = 0; errln("Unrecognized or extra parameter: %s\n", buf); return; } #else infoln("SSearchTest built with UCONFIG_NO_REGULAR_EXPRESSIONS: ignoring parameters."); #endif } for(int32_t s = firstStrength; s <= lastStrength; s += 1) { int32_t notFoundCount = 0; logln("Setting strength to %s.", strengthNames[s]); ucol_setStrength(coll, strengths[s]); // TODO: try alternate prefix and suffix too? // TODO: alterntaes are only equal at primary strength. Is this OK? for(int32_t t = 0; t < loopCount; t += 1) { uint32_t seed = m_seed; // int32_t nmc = 0; generateTestCase(coll, monkeys, monkeyCount, pattern, altPattern); generateTestCase(coll, monkeys, monkeyCount, prefix, altPrefix); generateTestCase(coll, monkeys, monkeyCount, suffix, altSuffix); // pattern notFoundCount += monkeyTestCase(coll, pattern, pattern, altPattern, "pattern", strengthNames[s], seed); testCase.remove(); testCase.append(prefix); testCase.append(/*alt*/pattern); // prefix + pattern notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "prefix + pattern", strengthNames[s], seed); testCase.append(suffix); // prefix + pattern + suffix notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "prefix + pattern + suffix", strengthNames[s], seed); testCase.remove(); testCase.append(pattern); testCase.append(suffix); // pattern + suffix notFoundCount += monkeyTestCase(coll, testCase, pattern, altPattern, "pattern + suffix", strengthNames[s], seed); } logln("For strength %s the not found count is %d.", strengthNames[s], notFoundCount); } uset_close(contractions); uset_close(expansions); uset_close(letters); delete monkeyData; ucol_close(coll); }
static void TestGetSortKey() { /* This is meant to test a buffer reallocation crash while using French secondary sorting with a large buffer. The fact that Japanese characters are used is irrelevant. */ static const UChar pucUTF16[] = { 0x3049,0x30b9,0x3088,0xfffd,0xfffd,0x308f,0xfffd,0x3042, 0xfffd,0xfffd,0x305e,0xfffd,0x30b6,0x30bb,0x305b,0x30b1, 0x3050,0x30af,0x304e,0x30bd,0xfffd,0x30c6,0xfffd,0xfffd, 0x30e1,0xfffd,0xfffd,0x30d9,0xfffd,0x3092,0x3075,0x304a, 0x3074,0x3070,0x30f5,0x30c4,0x306e,0x30df,0x3053,0xfffd, 0x30a6,0x30b6,0x30e0,0xfffd,0x30bc,0x30ef,0x3087,0x30cc, 0x305f,0x30de,0xfffd,0x3090,0x3063,0x30dc,0x30b6,0x30b9, 0x30d2,0x3072,0x3061,0xfffd,0xfffd,0xfffd,0x307b,0x3092, 0x30a5,0x30a9,0x30b1,0x30e7,0xfffd,0xfffd,0xfffd,0xfffd, 0xfffd,0x305e,0xfffd,0x30c7,0x30ae,0x305b,0x308b,0x30c0, 0x30f5,0xfffd,0xfffd,0xfffd,0x307d,0x304e,0xfffd,0xfffd, 0x30c0,0x30c8,0x306f,0x307a,0x30dd,0x30e4,0x3084,0xfffd, 0x308c,0x30f1,0xfffd,0x30c6,0xfffd,0x307a,0xfffd,0x3052, 0x3056,0x305d,0x30b7,0xfffd,0x305b,0x30b0,0x30b9,0xfffd, 0x30b2,0x306d,0x3044,0xfffd,0x3073,0xfffd,0x30be,0x30cf, 0x3080,0xfffd,0x30a8,0x30f5,0x30a5,0x30c7,0x307c,0xfffd, 0x30d1,0x305f,0x30b2,0xfffd,0x3053,0x30ca,0xfffd,0x30dd, 0x3058,0x30c0,0x305d,0x30e1,0xfffd,0x30bb,0x305f,0x30d1, 0x30f2,0x3058,0x3086,0x30ce,0x30db,0x30cb,0x30e9,0xfffd, 0x308c,0xfffd,0xfffd,0x30af,0x30c4,0x3076,0x304c,0x30f5, 0x30e8,0x308c,0xfffd,0x30e2,0x3073,0x30a3,0x304e,0x30ea, 0xfffd,0x304f,0xfffd,0x306c,0x3044,0xfffd,0xfffd,0x30c9, 0xfffd,0x30f5,0xfffd,0xfffd,0xfffd,0x30eb,0x30a8,0xfffd, 0x306d,0x307d,0x30d8,0x3069,0xfffd,0xfffd,0x3086,0x30a9, 0xfffd,0x3076,0x30e9,0x30cc,0x3074,0x30e0,0xfffd,0xfffd, 0xfffd,0x30f0,0x3086,0x30ac,0x3076,0x3068,0x30c7,0xfffd, 0x30b7,0x30d2,0x3048,0x308e,0x30e8,0x30d9,0x30ce,0x30d0, 0x308b,0x30ee,0x30e6,0x3079,0x30f3,0x30af,0xfffd,0x3079, 0xfffd,0xfffd,0x30ca,0x30bf,0xfffd,0x30b5,0xfffd,0xfffd, 0x3093,0xfffd,0x30ba,0xfffd,0x3076,0x3047,0x304a,0xfffd, 0xfffd,0x3086,0xfffd,0x3081,0xfffd,0x30f6,0x3066,0xfffd, 0xfffd,0x30b6,0x30ef,0x30e2,0x30bf,0xfffd,0x3053,0x304a, 0xfffd,0xfffd,0x304a,0x30e8,0xfffd,0x30e2,0xfffd,0xfffd, 0x305c,0x3081,0x30c6,0xfffd,0x3091,0x3046,0x306a,0x3059, 0xfffd,0xfffd,0x30dd,0x30d1,0x308a,0x30ee,0xfffd,0xfffd, 0x308a,0x3042,0x30da,0xfffd,0x3064,0x30ef,0x305c,0x306b, 0xfffd,0x30ca,0x3085,0x3067,0x30ea,0x30c2,0x30c8,0xfffd, 0x30f5,0xfffd,0xfffd,0xfffd,0x30ca,0xfffd,0x3050,0x30f1, 0x3050,0x3053,0x3072,0xfffd,0xfffd,0xfffd,0x3074,0xfffd, 0x304b,0x30dd,0x306d,0xfffd,0x3049,0x30a1,0x30cc,0x30de, 0x30ae,0x307b,0x308a,0xfffd,0x3065,0xfffd,0xfffd,0x30c0, 0xfffd,0x3048,0x30dc,0x304f,0x3085,0x3059,0x304b,0x30d3, 0x30eb,0x30a4,0x3073,0xfffd,0x30ba,0x308f,0x30a7,0x30c3, 0x3074,0x30cf,0x306c,0x3053,0x30c0,0xfffd,0x3066,0xfffd, 0x308f,0xfffd,0x30b5,0xfffd,0x3092,0x30c4,0xfffd,0x30d6, 0x3056,0x30ad,0x30d2,0x30ba,0xfffd,0x30e6,0x304c,0x3088, 0x30b6,0x3048,0x3077,0x30d1,0xfffd,0x3050,0xfffd,0x3042, 0xfffd,0xfffd,0x308f,0xfffd,0x30c1,0xfffd,0x3074,0x3061, 0x3056,0x30e5,0xfffd,0xfffd,0x3057,0xfffd,0xfffd,0xfffd, 0xfffd,0x30bd,0x30b3,0x30ee,0xfffd,0x30f2,0x3084,0x3050, 0xfffd,0x30e7,0xfffd,0xfffd,0x3060,0x3049,0x30f2,0x30ad, 0x30bf,0x30f1,0x30a2,0xfffd,0x30af,0xfffd,0x3060,0x30a1, 0x30e9,0x30c3,0xfffd,0x3072,0x3093,0x3070,0xfffd,0x308f, 0x3060,0xfffd,0x3067,0x306f,0x3082,0x308b,0x3051,0xfffd, 0x3058,0xfffd,0xfffd,0x30a8,0x3051,0x3054,0x30ad,0x30f0, 0x3053,0xfffd,0x30e1,0x30d7,0x308d,0x307f,0x30be,0x30b0, 0xfffd,0x30db,0xfffd,0x30d1,0xfffd,0x3054,0x30a5,0xfffd, 0x306a,0xfffd,0x305c,0xfffd,0x3052,0x3088,0xfffd,0x306e, 0xfffd,0x30a9,0x30a1,0x30b4,0x3083,0x30bd,0xfffd,0xfffd, 0x306a,0x3070,0x30cd,0xfffd,0x3072,0x30ed,0x30c6,0x30be, 0x30c4,0x305e,0x30b3,0x30e1,0x308a,0xfffd,0x305b,0xfffd, 0x3042,0x3088,0xfffd,0x304c,0xfffd,0x3089,0x3071,0xfffd, 0xfffd,0x30c6,0x3062,0x3079,0xfffd,0x304b,0x304a,0xfffd, 0x30ad,0x3045,0x3045,0x3087,0xfffd,0x306a,0x308b,0x0000, 0x30bd,0x3065,0x30b8,0x3086,0x30d3,0x3076,0xfffd,0xfffd, 0x308f,0x3053,0x307c,0x3053,0x3084,0x30ae,0x30c4,0x3045, 0x30a8,0x30d0,0x30e1,0x308c,0x30e6,0x30b7,0xfffd,0xfffd, 0xfffd,0x3046,0x305f,0xfffd,0x3086,0x30ab,0xfffd,0xfffd, 0x30c8,0xfffd,0x30a1,0x3052,0x3059,0xfffd,0x30a4,0xfffd, 0xfffd,0x308c,0x3085,0x30ab,0x30b5,0x3091,0x30bf,0x30e3, 0xfffd,0xfffd,0x3087,0xfffd,0x30f6,0x3051,0x30bd,0x3092, 0x3063,0xfffd,0x30a9,0x3063,0x306e,0xfffd,0xfffd,0xfffd, 0x306c,0xfffd,0x307e,0x30ad,0x3077,0x30c2,0x30e9,0x30d5, 0xfffd,0xfffd,0x30c6,0x305c,0xfffd,0xfffd,0x3089,0xfffd, 0x3048,0x30cb,0x308c,0xfffd,0xfffd,0x3044,0xfffd,0x3080, 0x3063,0x3079,0xfffd,0x308a,0x30cb,0x3042,0x3057,0xfffd, 0x307c,0x30c1,0x30a8,0x30cf,0xfffd,0x3083,0xfffd,0xfffd, 0x306c,0xfffd,0x305e,0x3092,0xfffd,0x30dc,0x30b0,0x3081, 0x30e3,0x30f0,0x304e,0x30cc,0x308e,0x30c4,0x30ad }; UErrorCode status = U_ZERO_ERROR; UCollator *pCollator; int32_t lenActualSortKey; uint8_t pucSortKey[4096]; static const int32_t LENSORTKEY = (int32_t)sizeof(pucSortKey); ucol_prepareShortStringOpen("LFR_AN_CX_EX_FO_HX_NX_S3", 0, NULL, &status); pCollator = ucol_openFromShortString("LFR_AN_CX_EX_FO_HX_NX_S3", 0, NULL, &status); if (U_FAILURE(status)) { log_data_err("error opening collator -> %s. (Are you missing data?)\n", u_errorName(status)); return; } lenActualSortKey = ucol_getSortKey(pCollator, (const UChar *)pucUTF16, UPRV_LENGTHOF(pucUTF16), pucSortKey, LENSORTKEY); if (lenActualSortKey > LENSORTKEY) { log_err("sort key too big for original buffer. Got: %d Expected: %d\n", lenActualSortKey, LENSORTKEY); return; } /* If the test didn't crash, then the test succeeded. */ ucol_close(pCollator); }
int main(int /* argc*/ , const char * /*argv*/ []) { UErrorCode status = U_ZERO_ERROR; int diffs = 0; int gbaddiffs =0; setup(status); if(U_FAILURE(status)) return 1; int expected = PROVIDER_COUNT; for(int l=0;l<LOCALE_COUNT;l++) { printf("\n"); uint8_t oldBytes[200]; int32_t oldLen = -1; for(int v=0;v<=expected;v++) { // Construct the locale ID char locID[200]; strcpy(locID, locale[l]); if((v!=expected)) { // -1 = no version strcat(locID, "@sp=icu"); strcat(locID, provider_version[v]); } printf("%-28s = ", locID); UErrorCode subStatus = U_ZERO_ERROR; uint8_t bytes[200]; uint8_t bytesb[200]; #define USE_CXX 0 #if USE_CXX Collator *col = Collator::createInstance(Locale(locID),subStatus); if(U_FAILURE(subStatus)) { printf("ERR: %s\n", u_errorName(subStatus)); continue; } int32_t len = col->getSortKey(stuff, -1, bytes, 200); #else #if 1 char xbuf2[200]; strcpy(xbuf2,"X/"); strcat(xbuf2,locID); strcat(xbuf2,"/"); //printf(" -> %s\n", xbuf2); UCollator *col = ucol_openFromShortString(xbuf2, FALSE,NULL, &subStatus); #else UCollator *col = ucol_open(locID, &subStatus); #endif if(U_FAILURE(subStatus)) { printf("ERR: %s\n", u_errorName(subStatus)); continue; } char xbuf3[200]; { int32_t def = ucol_getShortDefinitionString(col,locID/*NULL*/,xbuf3,200,&subStatus); if(U_FAILURE(subStatus)) { printf("Err getting short string name: %s\n", u_errorName(subStatus)); } else { printf(" --> %s\n", xbuf3); } } int32_t len = ucol_getSortKey(col, stuff, -1, bytes, 200); #endif printf(" "); int tdiffs=0; for(int i=0;i<len;i++) { if(i<oldLen&&bytes[i]!=oldBytes[i]) { diffs++; printf("*"); } else { printf(" "); } printf("%02X", (0xFF&bytes[i])); } printf("\n"); char xbuf4[200]; UCollator *col2 = ucol_openFromShortString(xbuf3, FALSE, NULL, &subStatus); if(U_FAILURE(subStatus)) { printf("Err opening from new short string : %s\n", u_errorName(subStatus)); continue; } else { int32_t def4 = ucol_getShortDefinitionString(col,locID/*NULL*/,xbuf4,200,&subStatus); if(strcmp(xbuf4,xbuf3)) { printf(" --> reopened = %s (%s)\n", xbuf4, u_errorName(subStatus)); } } int32_t len2 = ucol_getSortKey(col2, stuff, -1, bytesb, 200); int baddiffs=0; for(int i=0;i<len;i++) { if(i<len&&bytes[i]!=bytesb[i]) { baddiffs++; printf("!"); } else { // printf(" "); } // printf("%02X", (0xFF&bytesb[i])); } if(baddiffs>0) { printf(" - ERR! Diffs from %s in %d places\n", xbuf2,baddiffs); gbaddiffs+=baddiffs; } else { //printf(" OK.\n"); } // printf("\n"); #if USE_CXX delete col; #else ucol_close(col); #endif oldLen = len; memcpy(oldBytes, bytes, len); } } if(diffs==0) { #if (U_ICU_VERSION_MAJOR_NUM < 49) printf("ERROR: 0 differences found between platforms. ICU " U_ICU_VERSION " does not support collator plugins properly (not until 49)\n"); #else printf("ERROR: 0 differences found between platforms.. are the platforms installed? Try 'icuinfo -L'\n"); #endif return 1; } else { printf("%d differences found among provider versions!\n", diffs); } if(gbaddiffs>0) { printf("ERROR: %d diffs found between a collator and it's reopened (from shortstring) variant.\n", gbaddiffs); return 2; } else { printf("Collator and reopened (shortstring) are OK.\n"); } printf("Success!\n"); return 0; }