MojErr MojDbIndexTest::assertContainsText(TestIndex& ti, MojObject id, const MojChar* str) { MojString strObj; MojErr err = strObj.assign(str); MojErrCheck(err); MojRefCountedPtr<MojDbTextCollator> collator(new MojDbTextCollator); MojAllocCheck(collator.get()); err = collator->init(_T("en_US"), MojDbCollationPrimary); MojTestErrCheck(err); MojDbKey key; err = collator->sortKey(strObj, key); MojTestErrCheck(err); MojObjectWriter writer; err = id.visit(writer); MojTestErrCheck(err); const MojByte* idData = NULL; MojSize idSize = 0; err = writer.buf().data(idData, idSize); MojTestErrCheck(err); err = key.byteVec().append(idData, idData + idSize); MojTestErrCheck(err); err = assertContains(ti, id, key); MojTestErrCheck(err); return MojErrNone; }
TEST(SortKeyGeneratorTest, CollatorHasNoEffectWhenExtractingNonStringSortKey) { CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); auto sortKeyGen = stdx::make_unique<SortKeyGenerator>(BSON("a" << 1), &collator); auto sortKey = sortKeyGen->getSortKey(fromjson("{_id: 0, z: 10, a: 6, b: 16}"), nullptr); ASSERT_OK(sortKey.getStatus()); ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << 6)); }
TEST(SortKeyGeneratorTest, ExtractStringSortKeyWithCollatorUsesComparisonKey) { CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); auto sortKeyGen = stdx::make_unique<SortKeyGenerator>(BSON("a" << 1), &collator); auto sortKey = sortKeyGen->getSortKey(fromjson("{_id: 0, z: 'thing1', a: 'thing2', b: 16}"), nullptr); ASSERT_OK(sortKey.getStatus()); ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << "2gniht")); }
TEST(SortKeyGeneratorTest, EnsureSortKeyGenerationForArraysRespectsCollation) { CollatorInterfaceMock collator(CollatorInterfaceMock::MockType::kReverseString); auto sortKeyGen = stdx::make_unique<SortKeyGenerator>(BSON("a" << 1), &collator); auto sortKey = sortKeyGen->getSortKey(fromjson("{_id: 0, a: ['aaz', 'zza', 'yya', 'zzb']}"), nullptr); ASSERT_OK(sortKey.getStatus()); ASSERT_BSONOBJ_EQ(sortKey.getValue(), BSON("" << "ayy")); }
MojErr MojDbSearchCursor::sort() { LOG_TRACE("Entering function %s", __FUNCTION__); MojAssert(!m_orderProp.empty()); // TODO: instead of parsing all objects, find the serialized field in the object and compare it directly // create extractor for sort prop MojRefCountedPtr<MojDbTextCollator> collator(new MojDbTextCollator); MojAllocCheck(collator.get()); // TODO: use real locale //MojErr err = collator->init(_T(""), MojDbCollationPrimary); MojErr err = MojErrNone; // set locale MojString locale = m_locale; if(m_dbIndex) { locale = m_dbIndex->locale(); } // set collate MojDbCollationStrength coll = m_collation; if (coll == MojDbCollationInvalid) { // default setting is primary coll = MojDbCollationPrimary; } err = collator->init(locale, coll); MojErrCheck(err); MojDbPropExtractor extractor; extractor.collator(collator.get()); err = extractor.prop(m_orderProp); MojErrCheck(err); // create sort keys MojDbKeyBuilder builder; ItemVec::Iterator begin; err = m_items.begin(begin); MojErrCheck(err); for (ItemVec::Iterator i = begin; i != m_items.end(); ++i) { KeySet keys; err = extractor.vals((*i)->obj(), keys); MojErrCheck(err); (*i)->sortKeys(keys); } // sort err = m_items.sort(); MojErrCheck(err); return MojErrNone; }
// // searchTime() A quick and dirty performance test for string search. // Probably doesn't really belong as part of intltest, but it // does check that the search succeeds, and gets the right result, // so it serves as a functionality test also. // // To run as a perf test, up the loop count, select by commenting // and uncommenting in the code the operation to be measured, // rebuild, and measure the running time of this test alone. // // time LD_LIBRARY_PATH=whatever ./intltest collate/SSearchTest/searchTime // void SSearchTest::searchTime() { static const char *longishText = "Whylom, as olde stories tellen us,\n" "Ther was a duk that highte Theseus:\n" "Of Athenes he was lord and governour,\n" "And in his tyme swich a conquerour,\n" "That gretter was ther noon under the sonne.\n" "Ful many a riche contree hadde he wonne;\n" "What with his wisdom and his chivalrye,\n" "He conquered al the regne of Femenye,\n" "That whylom was y-cleped Scithia;\n" "And weddede the quene Ipolita,\n" "And broghte hir hoom with him in his contree\n" "With muchel glorie and greet solempnitee,\n" "And eek hir yonge suster Emelye.\n" "And thus with victorie and with melodye\n" "Lete I this noble duk to Athenes ryde,\n" "And al his hoost, in armes, him bisyde.\n" "And certes, if it nere to long to here,\n" "I wolde han told yow fully the manere,\n" "How wonnen was the regne of Femenye\n" "By Theseus, and by his chivalrye;\n" "And of the grete bataille for the nones\n" "Bitwixen Athen's and Amazones;\n" "And how asseged was Ipolita,\n" "The faire hardy quene of Scithia;\n" "And of the feste that was at hir weddinge,\n" "And of the tempest at hir hoom-cominge;\n" "But al that thing I moot as now forbere.\n" "I have, God woot, a large feeld to ere,\n" "And wayke been the oxen in my plough.\n" "The remenant of the tale is long y-nough.\n" "I wol nat letten eek noon of this route;\n" "Lat every felawe telle his tale aboute,\n" "And lat see now who shal the soper winne;\n" "And ther I lefte, I wol ageyn biginne.\n" "This duk, of whom I make mencioun,\n" "When he was come almost unto the toun,\n" "In al his wele and in his moste pryde,\n" "He was war, as he caste his eye asyde,\n" "Wher that ther kneled in the hye weye\n" "A companye of ladies, tweye and tweye,\n" "Ech after other, clad in clothes blake; \n" "But swich a cry and swich a wo they make,\n" "That in this world nis creature livinge,\n" "That herde swich another weymentinge;\n" "And of this cry they nolde never stenten,\n" "Til they the reynes of his brydel henten.\n" "'What folk ben ye, that at myn hoomcominge\n" "Perturben so my feste with cryinge'?\n" "Quod Theseus, 'have ye so greet envye\n" "Of myn honour, that thus compleyne and crye? \n" "Or who hath yow misboden, or offended?\n" "And telleth me if it may been amended;\n" "And why that ye ben clothed thus in blak'?\n" "The eldest lady of hem alle spak,\n" "When she hadde swowned with a deedly chere,\n" "That it was routhe for to seen and here,\n" "And seyde: 'Lord, to whom Fortune hath yiven\n" "Victorie, and as a conquerour to liven,\n" "Noght greveth us your glorie and your honour;\n" "But we biseken mercy and socour.\n" "Have mercy on our wo and our distresse.\n" "Som drope of pitee, thurgh thy gentilesse,\n" "Up-on us wrecched wommen lat thou falle.\n" "For certes, lord, ther nis noon of us alle,\n" "That she nath been a duchesse or a quene;\n" "Now be we caitifs, as it is wel sene:\n" "Thanked be Fortune, and hir false wheel,\n" "That noon estat assureth to be weel.\n" "And certes, lord, t'abyden your presence,\n" "Here in the temple of the goddesse Clemence\n" "We han ben waytinge al this fourtenight;\n" "Now help us, lord, sith it is in thy might.\n" "I wrecche, which that wepe and waille thus,\n" "Was whylom wyf to king Capaneus,\n" "That starf at Thebes, cursed be that day!\n" "And alle we, that been in this array,\n" "And maken al this lamentacioun,\n" "We losten alle our housbondes at that toun,\n" "Whyl that the sege ther-aboute lay.\n" "And yet now th'olde Creon, weylaway!\n" "The lord is now of Thebes the citee, \n" "Fulfild of ire and of iniquitee,\n" "He, for despyt, and for his tirannye,\n" "To do the dede bodyes vileinye,\n" "Of alle our lordes, whiche that ben slawe,\n" "Hath alle the bodyes on an heep y-drawe,\n" "And wol nat suffren hem, by noon assent,\n" "Neither to been y-buried nor y-brent,\n" "But maketh houndes ete hem in despyt. zet'\n"; const char *cPattern = "maketh houndes ete hem"; //const char *cPattern = "Whylom"; //const char *cPattern = "zet"; const char *testId = "searchTime()"; // for error macros. UnicodeString target = longishText; UErrorCode status = U_ZERO_ERROR; LocalUCollatorPointer collator(ucol_open("en", &status)); //ucol_setStrength(collator.getAlias(), collatorStrength); //ucol_setAttribute(collator.getAlias(), UCOL_NORMALIZATION_MODE, normalize, &status); UnicodeString uPattern = cPattern; LocalUStringSearchPointer uss(usearch_openFromCollator(uPattern.getBuffer(), uPattern.length(), target.getBuffer(), target.length(), collator.getAlias(), NULL, // the break iterator &status)); TEST_ASSERT_SUCCESS(status); // int32_t foundStart; // int32_t foundEnd; UBool found; // Find the match position usgin strstr const char *pm = strstr(longishText, cPattern); TEST_ASSERT_M(pm!=NULL, "No pattern match with strstr"); int32_t refMatchPos = (int32_t)(pm - longishText); int32_t icuMatchPos; int32_t icuMatchEnd; usearch_search(uss.getAlias(), 0, &icuMatchPos, &icuMatchEnd, &status); TEST_ASSERT_SUCCESS(status); TEST_ASSERT_M(refMatchPos == icuMatchPos, "strstr and icu give different match positions."); int32_t i; // int32_t j=0; // Try loopcounts around 100000 to some millions, depending on the operation, // to get runtimes of at least several seconds. for (i=0; i<10000; i++) { found = usearch_search(uss.getAlias(), 0, &icuMatchPos, &icuMatchEnd, &status); (void)found; // Suppress set but not used warning. //TEST_ASSERT_SUCCESS(status); //TEST_ASSERT(found); // usearch_setOffset(uss.getAlias(), 0, &status); // icuMatchPos = usearch_next(uss.getAlias(), &status); // The i+j stuff is to confuse the optimizer and get it to actually leave the // call to strstr in place. //pm = strstr(longishText+j, cPattern); //j = (j + i)%5; } //printf("%ld, %d\n", pm-longishText, j); }
void SSearchTest::searchTest() { #if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FILE_IO UErrorCode status = U_ZERO_ERROR; char path[PATH_BUFFER_SIZE]; const char *testFilePath = getPath(path, "ssearch.xml"); if (testFilePath == NULL) { return; /* Couldn't get path: error message already output. */ } LocalPointer<UXMLParser> parser(UXMLParser::createParser(status)); TEST_ASSERT_SUCCESS(status); LocalPointer<UXMLElement> root(parser->parseFile(testFilePath, status)); TEST_ASSERT_SUCCESS(status); if (U_FAILURE(status)) { return; } const UnicodeString *debugTestCase = root->getAttribute("debug"); if (debugTestCase != NULL) { // setenv("USEARCH_DEBUG", "1", 1); } const UXMLElement *testCase; int32_t tc = 0; while((testCase = root->nextChildElement(tc)) != NULL) { if (testCase->getTagName().compare("test-case") != 0) { errln("ssearch, unrecognized XML Element in test file"); continue; } const UnicodeString *id = testCase->getAttribute("id"); *testId = 0; if (id != NULL) { id->extract(0, id->length(), testId, sizeof(testId), US_INV); } // If debugging test case has been specified and this is not it, skip to next. if (id!=NULL && debugTestCase!=NULL && *id != *debugTestCase) { continue; } // // Get the requested collation strength. // Default is tertiary if the XML attribute is missing from the test case. // const UnicodeString *strength = testCase->getAttribute("strength"); UColAttributeValue collatorStrength = UCOL_PRIMARY; if (strength==NULL) { collatorStrength = UCOL_TERTIARY;} else if (*strength=="PRIMARY") { collatorStrength = UCOL_PRIMARY;} else if (*strength=="SECONDARY") { collatorStrength = UCOL_SECONDARY;} else if (*strength=="TERTIARY") { collatorStrength = UCOL_TERTIARY;} else if (*strength=="QUATERNARY") { collatorStrength = UCOL_QUATERNARY;} else if (*strength=="IDENTICAL") { collatorStrength = UCOL_IDENTICAL;} else { // Bogus value supplied for strength. Shouldn't happen, even from // typos, if the XML source has been validated. // This assert is a little deceiving in that strength can be // any of the allowed values, not just TERTIARY, but it will // do the job of getting the error output. TEST_ASSERT(*strength=="TERTIARY") } // // Get the collator normalization flag. Default is UCOL_OFF. // UColAttributeValue normalize = UCOL_OFF; const UnicodeString *norm = testCase->getAttribute("norm"); TEST_ASSERT (norm==NULL || *norm=="ON" || *norm=="OFF"); if (norm!=NULL && *norm=="ON") { normalize = UCOL_ON; } // // Get the alternate_handling flag. Default is UCOL_NON_IGNORABLE. // UColAttributeValue alternateHandling = UCOL_NON_IGNORABLE; const UnicodeString *alt = testCase->getAttribute("alternate_handling"); TEST_ASSERT (alt == NULL || *alt == "SHIFTED" || *alt == "NON_IGNORABLE"); if (alt != NULL && *alt == "SHIFTED") { alternateHandling = UCOL_SHIFTED; } const UnicodeString defLocale("en"); char clocale[100]; const UnicodeString *locale = testCase->getAttribute("locale"); if (locale == NULL || locale->length()==0) { locale = &defLocale; }; locale->extract(0, locale->length(), clocale, sizeof(clocale), NULL); UnicodeString text; UnicodeString target; UnicodeString pattern; int32_t expectedMatchStart = -1; int32_t expectedMatchLimit = -1; const UXMLElement *n; int32_t nodeCount = 0; n = testCase->getChildElement("pattern"); TEST_ASSERT(n != NULL); if (n==NULL) { continue; } text = n->getText(FALSE); text = text.unescape(); pattern.append(text); nodeCount++; n = testCase->getChildElement("pre"); if (n!=NULL) { text = n->getText(FALSE); text = text.unescape(); target.append(text); nodeCount++; } n = testCase->getChildElement("m"); if (n!=NULL) { expectedMatchStart = target.length(); text = n->getText(FALSE); text = text.unescape(); target.append(text); expectedMatchLimit = target.length(); nodeCount++; } n = testCase->getChildElement("post"); if (n!=NULL) { text = n->getText(FALSE); text = text.unescape(); target.append(text); nodeCount++; } // Check that there weren't extra things in the XML TEST_ASSERT(nodeCount == testCase->countChildren()); // Open a collator and StringSearch based on the parameters // obtained from the XML. // status = U_ZERO_ERROR; LocalUCollatorPointer collator(ucol_open(clocale, &status)); ucol_setStrength(collator.getAlias(), collatorStrength); ucol_setAttribute(collator.getAlias(), UCOL_NORMALIZATION_MODE, normalize, &status); ucol_setAttribute(collator.getAlias(), UCOL_ALTERNATE_HANDLING, alternateHandling, &status); LocalUStringSearchPointer uss(usearch_openFromCollator(pattern.getBuffer(), pattern.length(), target.getBuffer(), target.length(), collator.getAlias(), NULL, // the break iterator &status)); TEST_ASSERT_SUCCESS(status); if (U_FAILURE(status)) { continue; } int32_t foundStart = 0; int32_t foundLimit = 0; UBool foundMatch; // // Do the search, check the match result against the expected results. // foundMatch= usearch_search(uss.getAlias(), 0, &foundStart, &foundLimit, &status); TEST_ASSERT_SUCCESS(status); if ((foundMatch && expectedMatchStart<0) || (foundStart != expectedMatchStart) || (foundLimit != expectedMatchLimit)) { TEST_ASSERT(FALSE); // ouput generic error position infoln("Found, expected match start = %d, %d \n" "Found, expected match limit = %d, %d", foundStart, expectedMatchStart, foundLimit, expectedMatchLimit); } // In case there are other matches... // (should we only do this if the test case passed?) while (foundMatch) { expectedMatchStart = foundStart; expectedMatchLimit = foundLimit; foundMatch = usearch_search(uss.getAlias(), foundLimit, &foundStart, &foundLimit, &status); } uss.adoptInstead(usearch_openFromCollator(pattern.getBuffer(), pattern.length(), target.getBuffer(), target.length(), collator.getAlias(), NULL, &status)); // // Do the backwards search, check the match result against the expected results. // foundMatch= usearch_searchBackwards(uss.getAlias(), target.length(), &foundStart, &foundLimit, &status); TEST_ASSERT_SUCCESS(status); if ((foundMatch && expectedMatchStart<0) || (foundStart != expectedMatchStart) || (foundLimit != expectedMatchLimit)) { TEST_ASSERT(FALSE); // ouput generic error position infoln("Found, expected backwards match start = %d, %d \n" "Found, expected backwards match limit = %d, %d", foundStart, expectedMatchStart, foundLimit, expectedMatchLimit); } } #endif }
// Based on default implementation from libxslt 1.1.22 and xsltICUSort.c example. void xsltUnicodeSortFunction(xsltTransformContextPtr ctxt, xmlNodePtr *sorts, int nbsorts) { #ifdef XSLT_REFACTORED xsltStyleItemSortPtr comp; #else xsltStylePreCompPtr comp; #endif xmlXPathObjectPtr *resultsTab[XSLT_MAX_SORT]; xmlXPathObjectPtr *results = NULL, *res; xmlNodeSetPtr list = NULL; int descending, number, desc, numb; int len = 0; int i, j, incr; int tst; int depth; xmlNodePtr node; xmlXPathObjectPtr tmp; int tempstype[XSLT_MAX_SORT], temporder[XSLT_MAX_SORT]; if ((ctxt == NULL) || (sorts == NULL) || (nbsorts <= 0) || (nbsorts >= XSLT_MAX_SORT)) return; if (sorts[0] == NULL) return; comp = static_cast<xsltStylePreComp*>(sorts[0]->psvi); if (comp == NULL) return; list = ctxt->nodeList; if ((list == NULL) || (list->nodeNr <= 1)) return; /* nothing to do */ for (j = 0; j < nbsorts; j++) { comp = static_cast<xsltStylePreComp*>(sorts[j]->psvi); tempstype[j] = 0; if ((comp->stype == NULL) && (comp->has_stype != 0)) { comp->stype = xsltEvalAttrValueTemplate(ctxt, sorts[j], (const xmlChar *) "data-type", XSLT_NAMESPACE); if (comp->stype != NULL) { tempstype[j] = 1; if (xmlStrEqual(comp->stype, (const xmlChar *) "text")) comp->number = 0; else if (xmlStrEqual(comp->stype, (const xmlChar *) "number")) comp->number = 1; else { xsltTransformError(ctxt, NULL, sorts[j], "xsltDoSortFunction: no support for data-type = %s\n", comp->stype); comp->number = 0; /* use default */ } } } temporder[j] = 0; if ((comp->order == NULL) && (comp->has_order != 0)) { comp->order = xsltEvalAttrValueTemplate(ctxt, sorts[j], (const xmlChar *) "order", XSLT_NAMESPACE); if (comp->order != NULL) { temporder[j] = 1; if (xmlStrEqual(comp->order, (const xmlChar *) "ascending")) comp->descending = 0; else if (xmlStrEqual(comp->order, (const xmlChar *) "descending")) comp->descending = 1; else { xsltTransformError(ctxt, NULL, sorts[j], "xsltDoSortFunction: invalid value %s for order\n", comp->order); comp->descending = 0; /* use default */ } } } } len = list->nodeNr; resultsTab[0] = xsltComputeSortResult(ctxt, sorts[0]); for (i = 1;i < XSLT_MAX_SORT;i++) resultsTab[i] = NULL; results = resultsTab[0]; comp = static_cast<xsltStylePreComp*>(sorts[0]->psvi); descending = comp->descending; number = comp->number; if (results == NULL) return; // We are passing a language identifier to a function that expects a locale identifier. // The implementation of Collator should be lenient, and accept both "en-US" and "en_US", for example. // This lets an author specify sorting rules, e.g. "de_DE@collation=phonebook", which isn't // possible with language alone. Collator collator(comp->has_lang ? reinterpret_cast<const char*>(comp->lang) : "en", comp->lower_first); /* Shell's sort of node-set */ for (incr = len / 2; incr > 0; incr /= 2) { for (i = incr; i < len; i++) { j = i - incr; if (results[i] == NULL) continue; while (j >= 0) { if (results[j] == NULL) tst = 1; else { if (number) { /* We make NaN smaller than number in accordance with XSLT spec */ if (xmlXPathIsNaN(results[j]->floatval)) { if (xmlXPathIsNaN(results[j + incr]->floatval)) tst = 0; else tst = -1; } else if (xmlXPathIsNaN(results[j + incr]->floatval)) tst = 1; else if (results[j]->floatval == results[j + incr]->floatval) tst = 0; else if (results[j]->floatval > results[j + incr]->floatval) tst = 1; else tst = -1; } else tst = collator.collateUTF8(reinterpret_cast<const char*>(results[j]->stringval), reinterpret_cast<const char*>(results[j + incr]->stringval)); if (descending) tst = -tst; } if (tst == 0) { /* * Okay we need to use multi level sorts */ depth = 1; while (depth < nbsorts) { if (sorts[depth] == NULL) break; comp = static_cast<xsltStylePreComp*>(sorts[depth]->psvi); if (comp == NULL) break; desc = comp->descending; numb = comp->number; /* * Compute the result of the next level for the * full set, this might be optimized ... or not */ if (resultsTab[depth] == NULL) resultsTab[depth] = xsltComputeSortResult(ctxt, sorts[depth]); res = resultsTab[depth]; if (res == NULL) break; if (res[j] == NULL) { if (res[j+incr] != NULL) tst = 1; } else { if (numb) { /* We make NaN smaller than number in accordance with XSLT spec */ if (xmlXPathIsNaN(res[j]->floatval)) { if (xmlXPathIsNaN(res[j + incr]->floatval)) tst = 0; else tst = -1; } else if (xmlXPathIsNaN(res[j + incr]-> floatval)) tst = 1; else if (res[j]->floatval == res[j + incr]-> floatval) tst = 0; else if (res[j]->floatval > res[j + incr]->floatval) tst = 1; else tst = -1; } else tst = collator.collateUTF8(reinterpret_cast<const char*>(res[j]->stringval), reinterpret_cast<const char*>(res[j + incr]->stringval)); if (desc) tst = -tst; } /* * if we still can't differenciate at this level * try one level deeper. */ if (tst != 0) break; depth++; } } if (tst == 0) { tst = results[j]->index > results[j + incr]->index; } if (tst > 0) { tmp = results[j]; results[j] = results[j + incr]; results[j + incr] = tmp; node = list->nodeTab[j]; list->nodeTab[j] = list->nodeTab[j + incr]; list->nodeTab[j + incr] = node; depth = 1; while (depth < nbsorts) { if (sorts[depth] == NULL) break; if (resultsTab[depth] == NULL) break; res = resultsTab[depth]; tmp = res[j]; res[j] = res[j + incr]; res[j + incr] = tmp; depth++; } j -= incr; } else break; } } } for (j = 0; j < nbsorts; j++) { comp = static_cast<xsltStylePreComp*>(sorts[j]->psvi); if (tempstype[j] == 1) { /* The data-type needs to be recomputed each time */ xmlFree((void *)(comp->stype)); comp->stype = NULL; } if (temporder[j] == 1) { /* The order needs to be recomputed each time */ xmlFree((void *)(comp->order)); comp->order = NULL; } if (resultsTab[j] != NULL) { for (i = 0;i < len;i++) xmlXPathFreeObject(resultsTab[j][i]); xmlFree(resultsTab[j]); } } }