UCollationResult IntlTestCollator::compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode &status) { int32_t partialSKResult = 0; uint8_t sBuf[512], tBuf[512]; UCharIterator sIter, tIter; uint32_t sState[2], tState[2]; int32_t sSize = pieceSize, tSize = pieceSize; int32_t i = 0; status = U_ZERO_ERROR; sState[0] = 0; sState[1] = 0; tState[0] = 0; tState[1] = 0; while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) { uiter_setString(&sIter, source, sLen); uiter_setString(&tIter, target, tLen); sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, &status); tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, &status); if(sState[0] != 0 || tState[0] != 0) { log("State != 0 : %08X %08X\n", sState[0], tState[0]); } log("%i ", i++); partialSKResult = memcmp(sBuf, tBuf, pieceSize); } if(partialSKResult < 0) { return UCOL_LESS; } else if(partialSKResult > 0) { return UCOL_GREATER; } else { return UCOL_EQUAL; } }
static void TestLenient8Iterator() { static const UChar text[]={ 0x61, 0x62, 0x63, /* dffd 107fd d801 dffd - in UTF-16, U+107fd=<d801 dffd> */ 0xdffd, 0xd801, 0xdffd, 0xd801, 0xdffd, 0x78, 0x79, 0x7a, 0 }; static const uint8_t bytes[]={ 0x61, 0x62, 0x63, /* dffd 107fd d801 dffd - mixture */ 0xed, 0xbf, 0xbd, 0xf0, 0x90, 0x9f, 0xbd, 0xed, 0xa0, 0x81, 0xed, 0xbf, 0xbd, 0x78, 0x79, 0x7a, 0 }; UCharIterator iter1, iter2; UChar32 c1, c2; int32_t length; puts("test a UCharIterator for lenient 8-bit Unicode (accept single surrogates)"); /* compare the same string between UTF-16 and lenient-8 UCharIterators */ uiter_setString(&iter1, text, -1); uiter_setLenient8(&iter2, (const char *)bytes, sizeof(bytes)-1); compareIterators(&iter1, "UTF16Iterator", &iter2, "Lenient8Iterator"); /* try again with length=-1 */ uiter_setLenient8(&iter2, (const char *)bytes, -1); compareIterators(&iter1, "UTF16Iterator", &iter2, "Lenient8Iterator_1"); /* test get/set state */ length=LENGTHOF(text)-1; uiter_setLenient8(&iter1, bytes, -1); testIteratorState(&iter1, &iter2, "Lenient8IteratorState", length/2); testIteratorState(&iter1, &iter2, "Lenient8IteratorStatePlus1", length/2+1); /* ---------------------------------------------------------------------- */ puts("no output so far means that the lenient-8 iterator works fine"); puts("iterate forward:\nUTF-16\tlenient-8"); uiter_setString(&iter1, text, -1); iter1.move(&iter1, 0, UITER_START); iter2.move(&iter2, 0, UITER_START); for(;;) { c1=iter1.next(&iter1); c2=iter2.next(&iter2); if(c1<0 && c2<0) { break; } if(c1<0) { printf("\t%04x\n", c2); } else if(c2<0) { printf("%04x\n", c1); } else { printf("%04x\t%04x\n", c1, c2); } } }
U_CAPI void U_EXPORT2 uiter_setUTF16BE(UCharIterator * iter, const char * s, int32_t length) { if (iter != NULL) { /* allow only even-length strings (the input length counts bytes) */ if (s != NULL && (length == -1 || (length >= 0 && IS_EVEN(length)))) { /* length/=2, except that >>=1 also works for -1 (-1/2==0, -1>>1==-1) */ length >>= 1; if (U_IS_BIG_ENDIAN && IS_POINTER_EVEN(s)) { /* big-endian machine and 2-aligned UTF-16BE string: use normal UChar iterator */ uiter_setString(iter, (const UChar *)s, length); return; } *iter = utf16BEIterator; iter->context = s; if (length >= 0) { iter->length = length; } else { iter->length = utf16BE_strlen(s); } iter->limit = iter->length; } else {
UCharIterator createIterator(StringView string) { if (string.is8Bit()) return createLatin1Iterator(string.characters8(), string.length()); UCharIterator iterator; uiter_setString(&iterator, string.characters16(), string.length()); return iterator; }
/*---------------------------------------------------------------------------------------------- Convert the Graphite character offset to the decomposed NFD character offset used internally by views code. ----------------------------------------------------------------------------------------------*/ int FwGrTxtSrc::GrToVwOffset(int grOffset) { if (!m_useNFC) { // the Graphite offset is a NFD offset return grOffset; } else { // convert NFC offsets to internal NFD offsets if (grOffset == 0) return 0; HRESULT hr; int cch; IgnoreHr(hr = m_qts->get_Length(&cch)); if (FAILED(hr)) throw; if (grOffset > cch) // grOffset points beyond the available text, i.e. is invalid. return cch + 10; // arbitrary number that is bigger than NFD text StrUni stuNfd; wchar_t* pchNfd; stuNfd.SetSize(cch + 1, &pchNfd); IgnoreHr(hr = m_qts->Fetch(0, cch, pchNfd)); if (FAILED(hr)) throw; pchNfd[cch] = '\0'; wchar_t szOut[kNFDBufferSize]; UCharIterator iter; uiter_setString(&iter, pchNfd, -1); int curGrOffset = 0; while (iter.hasNext(&iter)) { int index = iter.getIndex(&iter, UITER_CURRENT); if (curGrOffset >= grOffset) return index; UBool neededToNormalize; UErrorCode uerr = U_ZERO_ERROR; int outLen = unorm_next(&iter, szOut, kNFDBufferSize, UNORM_NFC, 0, TRUE, &neededToNormalize, &uerr); Assert(U_SUCCESS(uerr)); curGrOffset++; for (int i = 1; i < outLen; i++) { if (curGrOffset >= grOffset) return index + i; curGrOffset++; } } return iter.getIndex(&iter, UITER_CURRENT); } }
int FwGrTxtSrc::VwToGrOffset(int vwOffset, bool& badOffset) { badOffset = false; if (!m_useNFC) { // the NFD offset is a Graphite offset return vwOffset; } else { // convert internal NFD offsets to NFC offsets if (vwOffset == 0) return 0; HRESULT hr; int cch; IgnoreHr(hr = m_qts->get_Length(&cch)); if (FAILED(hr)) throw; if (vwOffset > cch) return vwOffset; StrUni stuNfd; wchar_t* pchNfd; stuNfd.SetSize(cch + 1, &pchNfd); IgnoreHr(hr = m_qts->Fetch(0, cch, pchNfd)); if (FAILED(hr)) throw; pchNfd[cch] = '\0'; wchar_t szOut[kNFDBufferSize]; UCharIterator iter; uiter_setString(&iter, pchNfd, -1); int curGrOffset = 0; while (iter.hasNext(&iter)) { int index = iter.getIndex(&iter, UITER_CURRENT); UBool neededToNormalize; UErrorCode uerr = U_ZERO_ERROR; int outLen = unorm_next(&iter, szOut, kNFDBufferSize, UNORM_NFC, 0, TRUE, &neededToNormalize, &uerr); Assert(U_SUCCESS(uerr)); for (int i = 0; i < outLen; i++) { if (index + i + 1 > vwOffset) return curGrOffset; curGrOffset++; } if (neededToNormalize && iter.getIndex(&iter, UITER_CURRENT) > vwOffset) badOffset = true; } return curGrOffset; } }
static UCollationResult compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode *status) { int32_t partialSKResult = 0; UCharIterator sIter, tIter; uint32_t sState[2], tState[2]; int32_t sSize = pieceSize, tSize = pieceSize; /*int32_t i = 0;*/ uint8_t sBuf[16384], tBuf[16384]; if(pieceSize > 16384) { log_err("Partial sortkey size buffer too small. Please consider increasing the buffer!\n"); *status = U_BUFFER_OVERFLOW_ERROR; return UCOL_EQUAL; } *status = U_ZERO_ERROR; sState[0] = 0; sState[1] = 0; tState[0] = 0; tState[1] = 0; while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) { uiter_setString(&sIter, source, sLen); uiter_setString(&tIter, target, tLen); sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, status); tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, status); if(sState[0] != 0 || tState[0] != 0) { /*log_verbose("State != 0 : %08X %08X\n", sState[0], tState[0]);*/ } /*log_verbose("%i ", i++);*/ partialSKResult = memcmp(sBuf, tBuf, pieceSize); } if(partialSKResult < 0) { return UCOL_LESS; } else if(partialSKResult > 0) { return UCOL_GREATER; } else { return UCOL_EQUAL; } }
/// Compares the strings via the Unicode Collation Algorithm on the root locale. /// Results are the usual string comparison results: /// <0 the left string is less than the right string. /// ==0 the strings are equal according to their collation. /// >0 the left string is greater than the right string. int32_t swift::_swift_stdlib_unicode_compare_utf8_utf16(const char *LeftString, int32_t LeftLength, const uint16_t *RightString, int32_t RightLength) { UCharIterator LeftIterator; UCharIterator RightIterator; UErrorCode ErrorCode = U_ZERO_ERROR; uiter_setUTF8(&LeftIterator, LeftString, LeftLength); #if defined(__CYGWIN__) || defined(_MSC_VER) || defined(__MINGW32__) uiter_setString(&RightIterator, reinterpret_cast<const UChar *>(RightString), RightLength); #else uiter_setString(&RightIterator, RightString, RightLength); #endif uint32_t Diff = ucol_strcollIter(GetRootCollator(), &LeftIterator, &RightIterator, &ErrorCode); if (U_FAILURE(ErrorCode)) { swift::crash("ucol_strcollIter: Unexpected error doing utf8<->utf16 string comparison."); } return Diff; }
U_CAPI void U_EXPORT2 uiter_setLenient8(UCharIterator *iter, const char *s, int32_t length) { if(iter!=0) { if(s!=0 && length>=-1) { *iter=lenient8Iterator; iter->context=s; if(length>=0) { iter->limit=length; } else { iter->limit=strlen(s); } iter->length= iter->limit<=1 ? iter->limit : -1; } else { /* set no-op iterator */ uiter_setString(iter, NULL, 0); } } }
int32_t _swift_stdlib_unicode_compare_utf8_utf16(const char *LeftString, int32_t LeftLength, const uint16_t *RightString, int32_t RightLength) { UCharIterator LeftIterator; UCharIterator RightIterator; UErrorCode ErrorCode = U_ZERO_ERROR; uiter_setUTF8(&LeftIterator, LeftString, LeftLength); uiter_setString(&RightIterator, RightString, RightLength); uint32_t Diff = ucol_strcollIter(GetRootCollator(), &LeftIterator, &RightIterator, &ErrorCode); if (U_FAILURE(ErrorCode)) { swift::crash("ucol_strcollIter: Unexpected error doing utf8<->utf16 string comparison."); } return Diff; }
// Ticket 7189 // // nextSortKeyPart incorrect for EO_S1 collation static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t len, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) { UCharIterator uiter; uint32_t state[2] = { 0, 0 }; int32_t keyLen; int32_t count = 8; uiter_setString(&uiter, text, len); keyLen = 0; while (TRUE) { int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status); if (U_FAILURE(status)) { return -1; } if (keyPartLen == 0) { break; } keyLen += keyPartLen; } return keyLen; }
void CharIterTest::TestUCharIterator() { // test string of length 8 UnicodeString s=UnicodeString("a \\U00010001b\\U0010fffdz", "").unescape(); const char *const moves= "0+++++++++" // 10 moves per line "----0-----" ">>|>>>>>>>" "<<|<<<<<<<" "22+>8>-8+2"; StringCharacterIterator sci(s), compareCI(s); UCharIterator sIter, cIter, rIter; uiter_setString(&sIter, s.getBuffer(), s.length()); uiter_setCharacterIterator(&cIter, &sci); uiter_setReplaceable(&rIter, &s); TestUCharIterator(&sIter, compareCI, moves, "uiter_setString"); compareCI.setIndex(0); TestUCharIterator(&cIter, compareCI, moves, "uiter_setCharacterIterator"); compareCI.setIndex(0); TestUCharIterator(&rIter, compareCI, moves, "uiter_setReplaceable"); // test move & getIndex some more sIter.start=2; sIter.index=3; sIter.limit=5; if( sIter.getIndex(&sIter, UITER_ZERO)!=0 || sIter.getIndex(&sIter, UITER_START)!=2 || sIter.getIndex(&sIter, UITER_CURRENT)!=3 || sIter.getIndex(&sIter, UITER_LIMIT)!=5 || sIter.getIndex(&sIter, UITER_LENGTH)!=s.length() ) { errln("error: UCharIterator(string).getIndex returns wrong index"); } if( sIter.move(&sIter, 4, UITER_ZERO)!=4 || sIter.move(&sIter, 1, UITER_START)!=3 || sIter.move(&sIter, 3, UITER_CURRENT)!=5 || sIter.move(&sIter, -1, UITER_LIMIT)!=4 || sIter.move(&sIter, -5, UITER_LENGTH)!=3 || sIter.move(&sIter, 0, UITER_CURRENT)!=sIter.getIndex(&sIter, UITER_CURRENT) || sIter.getIndex(&sIter, UITER_CURRENT)!=3 ) { errln("error: UCharIterator(string).move sets/returns wrong index"); } sci=StringCharacterIterator(s, 2, 5, 3); uiter_setCharacterIterator(&cIter, &sci); if( cIter.getIndex(&cIter, UITER_ZERO)!=0 || cIter.getIndex(&cIter, UITER_START)!=2 || cIter.getIndex(&cIter, UITER_CURRENT)!=3 || cIter.getIndex(&cIter, UITER_LIMIT)!=5 || cIter.getIndex(&cIter, UITER_LENGTH)!=s.length() ) { errln("error: UCharIterator(character iterator).getIndex returns wrong index"); } if( cIter.move(&cIter, 4, UITER_ZERO)!=4 || cIter.move(&cIter, 1, UITER_START)!=3 || cIter.move(&cIter, 3, UITER_CURRENT)!=5 || cIter.move(&cIter, -1, UITER_LIMIT)!=4 || cIter.move(&cIter, -5, UITER_LENGTH)!=3 || cIter.move(&cIter, 0, UITER_CURRENT)!=cIter.getIndex(&cIter, UITER_CURRENT) || cIter.getIndex(&cIter, UITER_CURRENT)!=3 ) { errln("error: UCharIterator(character iterator).move sets/returns wrong index"); } if(cIter.getIndex(&cIter, (enum UCharIteratorOrigin)-1) != -1) { errln("error: UCharIterator(char iter).getIndex did not return error value"); } if(cIter.move(&cIter, 0, (enum UCharIteratorOrigin)-1) != -1) { errln("error: UCharIterator(char iter).move did not return error value"); } if(rIter.getIndex(&rIter, (enum UCharIteratorOrigin)-1) != -1) { errln("error: UCharIterator(repl iter).getIndex did not return error value"); } if(rIter.move(&rIter, 0, (enum UCharIteratorOrigin)-1) != -1) { errln("error: UCharIterator(repl iter).move did not return error value"); } if(sIter.getIndex(&sIter, (enum UCharIteratorOrigin)-1) != -1) { errln("error: UCharIterator(string iter).getIndex did not return error value"); } if(sIter.move(&sIter, 0, (enum UCharIteratorOrigin)-1) != -1) { errln("error: UCharIterator(string iter).move did not return error value"); } /* Testing function coverage on bad input */ UErrorCode status = U_ZERO_ERROR; uiter_setString(&sIter, NULL, 1); uiter_setState(&sIter, 1, &status); if (status != U_UNSUPPORTED_ERROR) { errln("error: uiter_setState returned %s instead of U_UNSUPPORTED_ERROR", u_errorName(status)); } status = U_ZERO_ERROR; uiter_setState(NULL, 1, &status); if (status != U_ILLEGAL_ARGUMENT_ERROR) { errln("error: uiter_setState returned %s instead of U_ILLEGAL_ARGUMENT_ERROR", u_errorName(status)); } if (uiter_getState(&sIter) != UITER_NO_STATE) { errln("error: uiter_getState did not return UITER_NO_STATE on bad input"); } }
void IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result) { UErrorCode status = U_ZERO_ERROR; UCollator *myCollation = col->toUCollator(); Collator::EComparisonResult compareResult = col->compare(source, target); CollationKey srckey, tgtkey; col->getCollationKey(source, srckey, status); col->getCollationKey(target, tgtkey, status); if (U_FAILURE(status)){ errln("Creation of collation keys failed\n"); } Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey); reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result); UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status); int32_t sLen = source.length(), tLen = target.length(); const UChar* src = source.getBuffer(); const UChar* trg = target.getBuffer(); UCollationResult compareResultIter = (UCollationResult)result; { UCharIterator sIter, tIter; uiter_setString(&sIter, src, sLen); uiter_setString(&tIter, trg, tLen); compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status); if(compareResultIter != (UCollationResult)result) { errln("Different result for iterative comparison "+source+" "+target); } } /* convert the strings to UTF-8 and do try comparing with char iterator */ if(!quick) { /*!QUICK*/ char utf8Source[256], utf8Target[256]; int32_t utf8SourceLen = 0, utf8TargetLen = 0; u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status); if(U_FAILURE(status)) { /* probably buffer is not big enough */ log("Src UTF-8 buffer too small! Will not compare!\n"); } else { u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status); if(U_SUCCESS(status)) { /* probably buffer is not big enough */ UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result; UCharIterator sIter, tIter; /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/ uiter_setUTF8(&sIter, utf8Source, utf8SourceLen); uiter_setUTF8(&tIter, utf8Target, utf8TargetLen); /*uiter_setString(&sIter, source, sLen); uiter_setString(&tIter, target, tLen);*/ compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); sIter.move(&sIter, 0, UITER_START); tIter.move(&tIter, 0, UITER_START); compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); if(compareResultUTF8 != compareResultIter) { errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target); } if(compareResultUTF8 != compareResultUTF8Norm) { errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target); } } else { log("Target UTF-8 buffer too small! Did not compare!\n"); } if(U_FAILURE(status)) { log("UTF-8 strcoll failed! Ignoring result\n"); } } } /* testing the partial sortkeys */ { /*!QUICK*/ int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */ int32_t partialSizesSize = 1; if(!quick) { partialSizesSize = 7; } int32_t i = 0; log("partial sortkey test piecesize="); for(i = 0; i < partialSizesSize; i++) { UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result; log("%i ", partialSizes[i]); partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status); if(partialSKResult != (UCollationResult)result) { errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")"); } if(norm != UCOL_ON && !quick) { log("N "); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); if(partialSKResult != partialNormalizedSKResult) { errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")"); } } } log("\n"); } /* if (compareResult != result) { errln("String comparison failed in variant test\n"); } if (keyResult != result) { errln("Collation key comparison failed in variant test\n"); } */ }
static void doTestVariant(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result) { int32_t sortklen1, sortklen2, sortklenmax, sortklenmin; int temp=0, gSortklen1=0,gSortklen2=0; UCollationResult compareResult, compareResulta, keyResult, compareResultIter = result; uint8_t *sortKey1, *sortKey2, *sortKey1a, *sortKey2a; uint32_t sLen = u_strlen(source); uint32_t tLen = u_strlen(target); char buffer[256]; uint32_t len; UErrorCode status = U_ZERO_ERROR; UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status); UCharIterator sIter, tIter; uiter_setString(&sIter, source, sLen); uiter_setString(&tIter, target, tLen); compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status); if(compareResultIter != result) { log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); } /* convert the strings to UTF-8 and do try comparing with char iterator */ if(QUICK <= 0) { /*!QUICK*/ char utf8Source[256], utf8Target[256]; int32_t utf8SourceLen = 0, utf8TargetLen = 0; u_strToUTF8(utf8Source, 256, &utf8SourceLen, source, sLen, &status); if(U_FAILURE(status)) { /* probably buffer is not big enough */ log_verbose("Src UTF-8 buffer too small! Will not compare!\n"); } else { u_strToUTF8(utf8Target, 256, &utf8TargetLen, target, tLen, &status); if(U_SUCCESS(status)) { /* probably buffer is not big enough */ UCollationResult compareResultUTF8 = result, compareResultUTF8Norm = result; /*UCharIterator sIter, tIter;*/ /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/ uiter_setUTF8(&sIter, utf8Source, utf8SourceLen); uiter_setUTF8(&tIter, utf8Target, utf8TargetLen); /*uiter_setString(&sIter, source, sLen); uiter_setString(&tIter, target, tLen);*/ compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); sIter.move(&sIter, 0, UITER_START); tIter.move(&tIter, 0, UITER_START); compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); if(compareResultUTF8 != compareResultIter) { log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); } if(compareResultUTF8 != compareResultUTF8Norm) { log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1)); } } else { log_verbose("Target UTF-8 buffer too small! Did not compare!\n"); } if(U_FAILURE(status)) { log_verbose("UTF-8 strcoll failed! Ignoring result\n"); } } } /* testing the partial sortkeys */ if(1) { /*!QUICK*/ int32_t i = 0; int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */ int32_t partialSizesSize = 1; if(QUICK <= 0) { partialSizesSize = 7; } /*log_verbose("partial sortkey test piecesize=");*/ for(i = 0; i < partialSizesSize; i++) { UCollationResult partialSKResult = result, partialNormalizedSKResult = result; /*log_verbose("%i ", partialSizes[i]);*/ partialSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status); if(partialSKResult != result) { log_err("Partial sortkey comparison returned wrong result (%i exp. %i): %s, %s (size %i)\n", partialSKResult, result, aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]); } if(QUICK <= 0 && norm != UCOL_ON) { /*log_verbose("N ");*/ ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); partialNormalizedSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status); ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status); if(partialSKResult != partialNormalizedSKResult) { log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n", aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]); } } } /*log_verbose("\n");*/ } compareResult = ucol_strcoll(myCollation, source, sLen, target, tLen); compareResulta = ucol_strcoll(myCollation, source, -1, target, -1); if (compareResult != compareResulta) { log_err("ucol_strcoll result from null terminated and explicit length strings differs.\n"); } sortklen1=ucol_getSortKey(myCollation, source, sLen, NULL, 0); sortklen2=ucol_getSortKey(myCollation, target, tLen, NULL, 0); sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2); sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2); sortKey1 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); sortKey1a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); ucol_getSortKey(myCollation, source, sLen, sortKey1, sortklen1+1); ucol_getSortKey(myCollation, source, -1, sortKey1a, sortklen1+1); sortKey2 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); sortKey2a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1)); ucol_getSortKey(myCollation, target, tLen, sortKey2, sortklen2+1); ucol_getSortKey(myCollation, target, -1, sortKey2a, sortklen2+1); /* Check that sort key generated with null terminated string is identical */ /* to that generted with a length specified. */ if (uprv_strcmp((const char *)sortKey1, (const char *)sortKey1a) != 0 || uprv_strcmp((const char *)sortKey2, (const char *)sortKey2a) != 0 ) { log_err("Sort Keys from null terminated and explicit length strings differ.\n"); } /*memcmp(sortKey1, sortKey2,sortklenmax);*/ temp= uprv_strcmp((const char *)sortKey1, (const char *)sortKey2); gSortklen1 = uprv_strlen((const char *)sortKey1)+1; gSortklen2 = uprv_strlen((const char *)sortKey2)+1; if(sortklen1 != gSortklen1){ log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1, gSortklen1); log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey1, buffer, &len)); } if(sortklen2!= gSortklen2){ log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2, gSortklen2); log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey2, buffer, &len)); } if(temp < 0) { keyResult=UCOL_LESS; } else if(temp > 0) { keyResult= UCOL_GREATER; } else { keyResult = UCOL_EQUAL; } reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResultIter, result ); free(sortKey1); free(sortKey2); free(sortKey1a); free(sortKey2a); }
void CharIterTest::TestUCharIterator() { // test string of length 8 UnicodeString s=UnicodeString("a \\U00010001b\\U0010fffdz", "").unescape(); const char *const moves= "0+++++++++" // 10 moves per line "----0-----" ">>|>>>>>>>" "<<|<<<<<<<" "22+>8>-8+2"; StringCharacterIterator sci(s), compareCI(s); UCharIterator sIter, cIter, rIter; uiter_setString(&sIter, s.getBuffer(), s.length()); uiter_setCharacterIterator(&cIter, &sci); uiter_setReplaceable(&rIter, &s); TestUCharIterator(&sIter, compareCI, moves, "uiter_setString"); compareCI.setIndex(0); TestUCharIterator(&cIter, compareCI, moves, "uiter_setCharacterIterator"); compareCI.setIndex(0); TestUCharIterator(&rIter, compareCI, moves, "uiter_setReplaceable"); // test move & getIndex some more sIter.start=2; sIter.index=3; sIter.limit=5; if( sIter.getIndex(&sIter, UITER_ZERO)!=0 || sIter.getIndex(&sIter, UITER_START)!=2 || sIter.getIndex(&sIter, UITER_CURRENT)!=3 || sIter.getIndex(&sIter, UITER_LIMIT)!=5 || sIter.getIndex(&sIter, UITER_LENGTH)!=s.length() ) { errln("error: UCharIterator(string).getIndex returns wrong index"); } if( sIter.move(&sIter, 4, UITER_ZERO)!=4 || sIter.move(&sIter, 1, UITER_START)!=3 || sIter.move(&sIter, 3, UITER_CURRENT)!=5 || sIter.move(&sIter, -1, UITER_LIMIT)!=4 || sIter.move(&sIter, -5, UITER_LENGTH)!=3 || sIter.move(&sIter, 0, UITER_CURRENT)!=sIter.getIndex(&sIter, UITER_CURRENT) || sIter.getIndex(&sIter, UITER_CURRENT)!=3 ) { errln("error: UCharIterator(string).move sets/returns wrong index"); } sci=StringCharacterIterator(s, 2, 5, 3); uiter_setCharacterIterator(&cIter, &sci); if( cIter.getIndex(&cIter, UITER_ZERO)!=0 || cIter.getIndex(&cIter, UITER_START)!=2 || cIter.getIndex(&cIter, UITER_CURRENT)!=3 || cIter.getIndex(&cIter, UITER_LIMIT)!=5 || cIter.getIndex(&cIter, UITER_LENGTH)!=s.length() ) { errln("error: UCharIterator(character iterator).getIndex returns wrong index"); } if( cIter.move(&cIter, 4, UITER_ZERO)!=4 || cIter.move(&cIter, 1, UITER_START)!=3 || cIter.move(&cIter, 3, UITER_CURRENT)!=5 || cIter.move(&cIter, -1, UITER_LIMIT)!=4 || cIter.move(&cIter, -5, UITER_LENGTH)!=3 || cIter.move(&cIter, 0, UITER_CURRENT)!=cIter.getIndex(&cIter, UITER_CURRENT) || cIter.getIndex(&cIter, UITER_CURRENT)!=3 ) { errln("error: UCharIterator(character iterator).move sets/returns wrong index"); } if(cIter.getIndex(&cIter, (enum UCharIteratorOrigin)-1) != -1) { errln("error: UCharIterator(char iter).getIndex did not return error value"); } if(cIter.move(&cIter, 0, (enum UCharIteratorOrigin)-1) != -1) { errln("error: UCharIterator(char iter).move did not return error value"); } if(rIter.getIndex(&rIter, (enum UCharIteratorOrigin)-1) != -1) { errln("error: UCharIterator(repl iter).getIndex did not return error value"); } if(rIter.move(&rIter, 0, (enum UCharIteratorOrigin)-1) != -1) { errln("error: UCharIterator(repl iter).move did not return error value"); } if(sIter.getIndex(&sIter, (enum UCharIteratorOrigin)-1) != -1) { errln("error: UCharIterator(string iter).getIndex did not return error value"); } if(sIter.move(&sIter, 0, (enum UCharIteratorOrigin)-1) != -1) { errln("error: UCharIterator(string iter).move did not return error value"); } }
void __hs_uiter_setString(UCharIterator *iter, const UChar *s, int32_t length) { uiter_setString(iter, s, length); }