Exemplo n.º 1
0
UCollationResult 
IntlTestCollator::compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode &status) {
  int32_t partialSKResult = 0;
  uint8_t sBuf[512], tBuf[512];
  UCharIterator sIter, tIter;
  uint32_t sState[2], tState[2];
  int32_t sSize = pieceSize, tSize = pieceSize;
  int32_t i = 0;
  status = U_ZERO_ERROR;
  sState[0] = 0; sState[1] = 0;
  tState[0] = 0; tState[1] = 0;
  while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
    uiter_setString(&sIter, source, sLen);
    uiter_setString(&tIter, target, tLen);
    sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, &status);
    tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, &status);
    
    if(sState[0] != 0 || tState[0] != 0) {
      log("State != 0 : %08X %08X\n", sState[0], tState[0]);
    }
    log("%i ", i++);
    
    partialSKResult = memcmp(sBuf, tBuf, pieceSize);
  }

  if(partialSKResult < 0) {
      return UCOL_LESS;
  } else if(partialSKResult > 0) {
    return UCOL_GREATER;
  } else {
    return UCOL_EQUAL;
  }
}
Exemplo n.º 2
0
static void
TestLenient8Iterator() {
    static const UChar text[]={
        0x61, 0x62, 0x63,
        /* dffd 107fd             d801    dffd - in UTF-16, U+107fd=<d801 dffd> */
        0xdffd, 0xd801, 0xdffd, 0xd801, 0xdffd, 
        0x78, 0x79, 0x7a, 0
    };
    static const uint8_t bytes[]={
        0x61, 0x62, 0x63,
        /* dffd            107fd                    d801               dffd - mixture */
        0xed, 0xbf, 0xbd,  0xf0, 0x90, 0x9f, 0xbd,  0xed, 0xa0, 0x81,  0xed, 0xbf, 0xbd,
        0x78, 0x79, 0x7a, 0
    };

    UCharIterator iter1, iter2;
    UChar32 c1, c2;
    int32_t length;

    puts("test a UCharIterator for lenient 8-bit Unicode (accept single surrogates)");

    /* compare the same string between UTF-16 and lenient-8 UCharIterators */
    uiter_setString(&iter1, text, -1);
    uiter_setLenient8(&iter2, (const char *)bytes, sizeof(bytes)-1);
    compareIterators(&iter1, "UTF16Iterator", &iter2, "Lenient8Iterator");

    /* try again with length=-1 */
    uiter_setLenient8(&iter2, (const char *)bytes, -1);
    compareIterators(&iter1, "UTF16Iterator", &iter2, "Lenient8Iterator_1");

    /* test get/set state */
    length=LENGTHOF(text)-1;
    uiter_setLenient8(&iter1, bytes, -1);
    testIteratorState(&iter1, &iter2, "Lenient8IteratorState", length/2);
    testIteratorState(&iter1, &iter2, "Lenient8IteratorStatePlus1", length/2+1);

    /* ---------------------------------------------------------------------- */

    puts("no output so far means that the lenient-8 iterator works fine");

    puts("iterate forward:\nUTF-16\tlenient-8");
    uiter_setString(&iter1, text, -1);
    iter1.move(&iter1, 0, UITER_START);
    iter2.move(&iter2, 0, UITER_START);
    for(;;) {
        c1=iter1.next(&iter1);
        c2=iter2.next(&iter2);
        if(c1<0 && c2<0) {
            break;
        }
        if(c1<0) {
            printf("\t%04x\n", c2);
        } else if(c2<0) {
            printf("%04x\n", c1);
        } else {
            printf("%04x\t%04x\n", c1, c2);
        }
    }
}
Exemplo n.º 3
0
U_CAPI void U_EXPORT2
uiter_setUTF16BE(UCharIterator * iter, const char * s, int32_t length)
{
	if (iter != NULL)
	{
		/* allow only even-length strings (the input length counts bytes) */
		if (s != NULL && (length == -1 || (length >= 0 && IS_EVEN(length))))
		{
			/* length/=2, except that >>=1 also works for -1 (-1/2==0, -1>>1==-1) */
			length >>= 1;

			if (U_IS_BIG_ENDIAN && IS_POINTER_EVEN(s))
			{
				/* big-endian machine and 2-aligned UTF-16BE string: use normal UChar iterator */
				uiter_setString(iter, (const UChar *)s, length);
				return;
			}

			*iter = utf16BEIterator;
			iter->context = s;
			if (length >= 0)
			{
				iter->length = length;
			}
			else
			{
				iter->length = utf16BE_strlen(s);
			}
			iter->limit = iter->length;
		}
		else
		{
Exemplo n.º 4
0
UCharIterator createIterator(StringView string)
{
    if (string.is8Bit())
        return createLatin1Iterator(string.characters8(), string.length());
    UCharIterator iterator;
    uiter_setString(&iterator, string.characters16(), string.length());
    return iterator;
}
Exemplo n.º 5
0
/*----------------------------------------------------------------------------------------------
	Convert the Graphite character offset to the decomposed NFD
	character offset used internally by views code.
----------------------------------------------------------------------------------------------*/
int FwGrTxtSrc::GrToVwOffset(int grOffset)
{
	if (!m_useNFC)
	{
		// the Graphite offset is a NFD offset
		return grOffset;
	}
	else
	{
		// convert NFC offsets to internal NFD offsets
		if (grOffset == 0)
			return 0;

		HRESULT hr;
		int cch;
		IgnoreHr(hr = m_qts->get_Length(&cch));
		if (FAILED(hr))
			throw;

		if (grOffset > cch)
			// grOffset points beyond the available text, i.e. is invalid.
			return cch + 10; // arbitrary number that is bigger than NFD text

		StrUni stuNfd;
		wchar_t* pchNfd;
		stuNfd.SetSize(cch + 1, &pchNfd);
		IgnoreHr(hr = m_qts->Fetch(0, cch, pchNfd));
		if (FAILED(hr))
			throw;
		pchNfd[cch] = '\0';

		wchar_t szOut[kNFDBufferSize];
		UCharIterator iter;
		uiter_setString(&iter, pchNfd, -1);
		int curGrOffset = 0;
		while (iter.hasNext(&iter))
		{
			int index = iter.getIndex(&iter, UITER_CURRENT);
			if (curGrOffset >= grOffset)
				return index;
			UBool neededToNormalize;
			UErrorCode uerr = U_ZERO_ERROR;
			int outLen = unorm_next(&iter, szOut, kNFDBufferSize, UNORM_NFC, 0, TRUE, &neededToNormalize, &uerr);
			Assert(U_SUCCESS(uerr));
			curGrOffset++;
			for (int i = 1; i < outLen; i++)
			{
				if (curGrOffset >= grOffset)
					return index + i;
				curGrOffset++;
			}
		}
		return iter.getIndex(&iter, UITER_CURRENT);
	}
}
Exemplo n.º 6
0
int FwGrTxtSrc::VwToGrOffset(int vwOffset, bool& badOffset)
{
	badOffset = false;
	if (!m_useNFC)
	{
		// the NFD offset is a Graphite offset
		return vwOffset;
	}
	else
	{
		// convert internal NFD offsets to NFC offsets
		if (vwOffset == 0)
			return 0;

		HRESULT hr;
		int cch;
		IgnoreHr(hr = m_qts->get_Length(&cch));
		if (FAILED(hr))
			throw;
		if (vwOffset > cch)
			return vwOffset;

		StrUni stuNfd;
		wchar_t* pchNfd;
		stuNfd.SetSize(cch + 1, &pchNfd);
		IgnoreHr(hr = m_qts->Fetch(0, cch, pchNfd));
		if (FAILED(hr))
			throw;
		pchNfd[cch] = '\0';

		wchar_t szOut[kNFDBufferSize];
		UCharIterator iter;
		uiter_setString(&iter, pchNfd, -1);
		int curGrOffset = 0;
		while (iter.hasNext(&iter))
		{
			int index = iter.getIndex(&iter, UITER_CURRENT);
			UBool neededToNormalize;
			UErrorCode uerr = U_ZERO_ERROR;
			int outLen = unorm_next(&iter, szOut, kNFDBufferSize, UNORM_NFC, 0, TRUE, &neededToNormalize, &uerr);
			Assert(U_SUCCESS(uerr));
			for (int i = 0; i < outLen; i++)
			{
				if (index + i + 1 > vwOffset)
					return curGrOffset;
				curGrOffset++;
			}
			if (neededToNormalize && iter.getIndex(&iter, UITER_CURRENT) > vwOffset)
				badOffset = true;
		}
		return curGrOffset;
	}
}
Exemplo n.º 7
0
static UCollationResult compareUsingPartials(UCollator *coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode *status) {
  int32_t partialSKResult = 0;
  UCharIterator sIter, tIter;
  uint32_t sState[2], tState[2];
  int32_t sSize = pieceSize, tSize = pieceSize;
  /*int32_t i = 0;*/
  uint8_t sBuf[16384], tBuf[16384];
  if(pieceSize > 16384) {
    log_err("Partial sortkey size buffer too small. Please consider increasing the buffer!\n");
    *status = U_BUFFER_OVERFLOW_ERROR;
    return UCOL_EQUAL;
  }
  *status = U_ZERO_ERROR;
  sState[0] = 0; sState[1] = 0;
  tState[0] = 0; tState[1] = 0;
  while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
    uiter_setString(&sIter, source, sLen);
    uiter_setString(&tIter, target, tLen);
    sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, status);
    tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, status);
    
    if(sState[0] != 0 || tState[0] != 0) {
      /*log_verbose("State != 0 : %08X %08X\n", sState[0], tState[0]);*/
    }
    /*log_verbose("%i ", i++);*/
    
    partialSKResult = memcmp(sBuf, tBuf, pieceSize);
  }

  if(partialSKResult < 0) {
      return UCOL_LESS;
  } else if(partialSKResult > 0) {
    return UCOL_GREATER;
  } else {
    return UCOL_EQUAL;
  }
}
/// Compares the strings via the Unicode Collation Algorithm on the root locale.
/// Results are the usual string comparison results:
///  <0 the left string is less than the right string.
/// ==0 the strings are equal according to their collation.
///  >0 the left string is greater than the right string.
int32_t
swift::_swift_stdlib_unicode_compare_utf8_utf16(const char *LeftString,
                                                int32_t LeftLength,
                                                const uint16_t *RightString,
                                                int32_t RightLength) {
  UCharIterator LeftIterator;
  UCharIterator RightIterator;
  UErrorCode ErrorCode = U_ZERO_ERROR;

  uiter_setUTF8(&LeftIterator, LeftString, LeftLength);
#if defined(__CYGWIN__) || defined(_MSC_VER) || defined(__MINGW32__)
  uiter_setString(&RightIterator, reinterpret_cast<const UChar *>(RightString),
                  RightLength);
#else
  uiter_setString(&RightIterator, RightString, RightLength);
#endif

  uint32_t Diff = ucol_strcollIter(GetRootCollator(),
    &LeftIterator, &RightIterator, &ErrorCode);
  if (U_FAILURE(ErrorCode)) {
    swift::crash("ucol_strcollIter: Unexpected error doing utf8<->utf16 string comparison.");
  }
  return Diff;
}
Exemplo n.º 9
0
U_CAPI void U_EXPORT2
uiter_setLenient8(UCharIterator *iter, const char *s, int32_t length) {
    if(iter!=0) {
        if(s!=0 && length>=-1) {
            *iter=lenient8Iterator;
            iter->context=s;
            if(length>=0) {
                iter->limit=length;
            } else {
                iter->limit=strlen(s);
            }
            iter->length= iter->limit<=1 ? iter->limit : -1;
        } else {
            /* set no-op iterator */
            uiter_setString(iter, NULL, 0);
        }
    }
}
Exemplo n.º 10
0
int32_t _swift_stdlib_unicode_compare_utf8_utf16(const char *LeftString,
                                                 int32_t LeftLength,
                                                 const uint16_t *RightString,
                                                 int32_t RightLength) {
  UCharIterator LeftIterator;
  UCharIterator RightIterator;
  UErrorCode ErrorCode = U_ZERO_ERROR;

  uiter_setUTF8(&LeftIterator, LeftString, LeftLength);
  uiter_setString(&RightIterator, RightString, RightLength);

  uint32_t Diff = ucol_strcollIter(GetRootCollator(),
    &LeftIterator, &RightIterator, &ErrorCode);
  if (U_FAILURE(ErrorCode)) {
    swift::crash("ucol_strcollIter: Unexpected error doing utf8<->utf16 string comparison.");
  }
  return Diff;
}
Exemplo n.º 11
0
// Ticket 7189
//
// nextSortKeyPart incorrect for EO_S1 collation
static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t len, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) {
    UCharIterator uiter;
    uint32_t state[2] = { 0, 0 };
    int32_t keyLen;
    int32_t count = 8;

    uiter_setString(&uiter, text, len);
    keyLen = 0;
    while (TRUE) {
        int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status);
        if (U_FAILURE(status)) {
            return -1;
        }
        if (keyPartLen == 0) {
            break;
        }
        keyLen += keyPartLen;
    }
    return keyLen;
}
Exemplo n.º 12
0
void CharIterTest::TestUCharIterator() {
    // test string of length 8
    UnicodeString s=UnicodeString("a \\U00010001b\\U0010fffdz", "").unescape();
    const char *const moves=
        "0+++++++++" // 10 moves per line
        "----0-----"
        ">>|>>>>>>>"
        "<<|<<<<<<<"
        "22+>8>-8+2";

    StringCharacterIterator sci(s), compareCI(s);

    UCharIterator sIter, cIter, rIter;

    uiter_setString(&sIter, s.getBuffer(), s.length());
    uiter_setCharacterIterator(&cIter, &sci);
    uiter_setReplaceable(&rIter, &s);

    TestUCharIterator(&sIter, compareCI, moves, "uiter_setString");
    compareCI.setIndex(0);
    TestUCharIterator(&cIter, compareCI, moves, "uiter_setCharacterIterator");
    compareCI.setIndex(0);
    TestUCharIterator(&rIter, compareCI, moves, "uiter_setReplaceable");

    // test move & getIndex some more
    sIter.start=2;
    sIter.index=3;
    sIter.limit=5;
    if( sIter.getIndex(&sIter, UITER_ZERO)!=0 ||
        sIter.getIndex(&sIter, UITER_START)!=2 ||
        sIter.getIndex(&sIter, UITER_CURRENT)!=3 ||
        sIter.getIndex(&sIter, UITER_LIMIT)!=5 ||
        sIter.getIndex(&sIter, UITER_LENGTH)!=s.length()
    ) {
        errln("error: UCharIterator(string).getIndex returns wrong index");
    }

    if( sIter.move(&sIter, 4, UITER_ZERO)!=4 ||
        sIter.move(&sIter, 1, UITER_START)!=3 ||
        sIter.move(&sIter, 3, UITER_CURRENT)!=5 ||
        sIter.move(&sIter, -1, UITER_LIMIT)!=4 ||
        sIter.move(&sIter, -5, UITER_LENGTH)!=3 ||
        sIter.move(&sIter, 0, UITER_CURRENT)!=sIter.getIndex(&sIter, UITER_CURRENT) ||
        sIter.getIndex(&sIter, UITER_CURRENT)!=3
    ) {
        errln("error: UCharIterator(string).move sets/returns wrong index");
    }

    sci=StringCharacterIterator(s, 2, 5, 3);
    uiter_setCharacterIterator(&cIter, &sci);
    if( cIter.getIndex(&cIter, UITER_ZERO)!=0 ||
        cIter.getIndex(&cIter, UITER_START)!=2 ||
        cIter.getIndex(&cIter, UITER_CURRENT)!=3 ||
        cIter.getIndex(&cIter, UITER_LIMIT)!=5 ||
        cIter.getIndex(&cIter, UITER_LENGTH)!=s.length()
    ) {
        errln("error: UCharIterator(character iterator).getIndex returns wrong index");
    }

    if( cIter.move(&cIter, 4, UITER_ZERO)!=4 ||
        cIter.move(&cIter, 1, UITER_START)!=3 ||
        cIter.move(&cIter, 3, UITER_CURRENT)!=5 ||
        cIter.move(&cIter, -1, UITER_LIMIT)!=4 ||
        cIter.move(&cIter, -5, UITER_LENGTH)!=3 ||
        cIter.move(&cIter, 0, UITER_CURRENT)!=cIter.getIndex(&cIter, UITER_CURRENT) ||
        cIter.getIndex(&cIter, UITER_CURRENT)!=3
    ) {
        errln("error: UCharIterator(character iterator).move sets/returns wrong index");
    }


    if(cIter.getIndex(&cIter, (enum UCharIteratorOrigin)-1) != -1)
    {
        errln("error: UCharIterator(char iter).getIndex did not return error value");
    }

    if(cIter.move(&cIter, 0, (enum UCharIteratorOrigin)-1) != -1)
    {
        errln("error: UCharIterator(char iter).move did not return error value");
    }


    if(rIter.getIndex(&rIter, (enum UCharIteratorOrigin)-1) != -1)
    {
        errln("error: UCharIterator(repl iter).getIndex did not return error value");
    }

    if(rIter.move(&rIter, 0, (enum UCharIteratorOrigin)-1) != -1)
    {
        errln("error: UCharIterator(repl iter).move did not return error value");
    }


    if(sIter.getIndex(&sIter, (enum UCharIteratorOrigin)-1) != -1)
    {
        errln("error: UCharIterator(string iter).getIndex did not return error value");
    }

    if(sIter.move(&sIter, 0, (enum UCharIteratorOrigin)-1) != -1)
    {
        errln("error: UCharIterator(string iter).move did not return error value");
    }

    /* Testing function coverage on bad input */
    UErrorCode status = U_ZERO_ERROR;
    uiter_setString(&sIter, NULL, 1);
    uiter_setState(&sIter, 1, &status);
    if (status != U_UNSUPPORTED_ERROR) {
        errln("error: uiter_setState returned %s instead of U_UNSUPPORTED_ERROR", u_errorName(status));
    }
    status = U_ZERO_ERROR;
    uiter_setState(NULL, 1, &status);
    if (status != U_ILLEGAL_ARGUMENT_ERROR) {
        errln("error: uiter_setState returned %s instead of U_ILLEGAL_ARGUMENT_ERROR", u_errorName(status));
    }
    if (uiter_getState(&sIter) != UITER_NO_STATE) {
        errln("error: uiter_getState did not return UITER_NO_STATE on bad input");
    }
}
Exemplo n.º 13
0
void 
IntlTestCollator::doTestVariant(Collator* col, const UnicodeString &source, const UnicodeString &target, Collator::EComparisonResult result)
{   
  UErrorCode status = U_ZERO_ERROR;

  UCollator *myCollation = col->toUCollator();

  Collator::EComparisonResult compareResult = col->compare(source, target);

  CollationKey srckey, tgtkey;
  col->getCollationKey(source, srckey, status);
  col->getCollationKey(target, tgtkey, status);
  if (U_FAILURE(status)){
    errln("Creation of collation keys failed\n");
  }
  Collator::EComparisonResult keyResult = srckey.compareTo(tgtkey);

  reportCResult(source, target, srckey, tgtkey, compareResult, keyResult, result, result);

    UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);

    int32_t sLen = source.length(), tLen = target.length();
    const UChar* src = source.getBuffer();
    const UChar* trg = target.getBuffer();
    UCollationResult compareResultIter = (UCollationResult)result;

    {
      UCharIterator sIter, tIter;
      uiter_setString(&sIter, src, sLen);
      uiter_setString(&tIter, trg, tLen);
      compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
      if(compareResultIter != (UCollationResult)result) {
        errln("Different result for iterative comparison "+source+" "+target);
      }
    }
    /* convert the strings to UTF-8 and do try comparing with char iterator */
    if(!quick) { /*!QUICK*/
      char utf8Source[256], utf8Target[256];
      int32_t utf8SourceLen = 0, utf8TargetLen = 0;
      u_strToUTF8(utf8Source, 256, &utf8SourceLen, src, sLen, &status);
      if(U_FAILURE(status)) { /* probably buffer is not big enough */
        log("Src UTF-8 buffer too small! Will not compare!\n");
      } else {
        u_strToUTF8(utf8Target, 256, &utf8TargetLen, trg, tLen, &status);
        if(U_SUCCESS(status)) { /* probably buffer is not big enough */
          UCollationResult compareResultUTF8 = (UCollationResult)result, compareResultUTF8Norm = (UCollationResult)result;
          UCharIterator sIter, tIter;
          /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
          uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
          uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
       /*uiter_setString(&sIter, source, sLen);
      uiter_setString(&tIter, target, tLen);*/
          compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
          sIter.move(&sIter, 0, UITER_START);
          tIter.move(&tIter, 0, UITER_START);
          compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
          if(compareResultUTF8 != compareResultIter) {
            errln("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. "+source+", "+target);
          }
          if(compareResultUTF8 != compareResultUTF8Norm) {
            errln("different results in iterative when normalization is turned on with UTF-8 strings. "+source+", "+target);
          }
        } else {
          log("Target UTF-8 buffer too small! Did not compare!\n");
        }
        if(U_FAILURE(status)) {
          log("UTF-8 strcoll failed! Ignoring result\n");
        }
      }
    }

    /* testing the partial sortkeys */
    { /*!QUICK*/
      int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
      int32_t partialSizesSize = 1;
      if(!quick) {
        partialSizesSize = 7;
      }
      int32_t i = 0;
      log("partial sortkey test piecesize=");
      for(i = 0; i < partialSizesSize; i++) {
        UCollationResult partialSKResult = (UCollationResult)result, partialNormalizedSKResult = (UCollationResult)result;
        log("%i ", partialSizes[i]);

        partialSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
        if(partialSKResult != (UCollationResult)result) {
          errln("Partial sortkey comparison returned wrong result: "+source+", "+target+" (size "+partialSizes[i]+")");           
        }

        if(norm != UCOL_ON && !quick) {
          log("N ");
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
          partialNormalizedSKResult = compareUsingPartials(myCollation, src, sLen, trg, tLen, partialSizes[i], status);
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
          if(partialSKResult != partialNormalizedSKResult) {
            errln("Partial sortkey comparison gets different result when normalization is on: "+source+", "+target+" (size "+partialSizes[i]+")");           
          }
        }
      }
      log("\n");
    }
/*
  if (compareResult != result) {
    errln("String comparison failed in variant test\n");
  }
  if (keyResult != result) {
    errln("Collation key comparison failed in variant test\n");
  }
*/
}
Exemplo n.º 14
0
static void doTestVariant(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)
{
    int32_t sortklen1, sortklen2, sortklenmax, sortklenmin;
    int temp=0, gSortklen1=0,gSortklen2=0;
    UCollationResult compareResult, compareResulta, keyResult, compareResultIter = result;
    uint8_t *sortKey1, *sortKey2, *sortKey1a, *sortKey2a;
    uint32_t sLen = u_strlen(source);
    uint32_t tLen = u_strlen(target);
    char buffer[256];
    uint32_t len;
    UErrorCode status = U_ZERO_ERROR;
    UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);

    UCharIterator sIter, tIter;
    uiter_setString(&sIter, source, sLen);
    uiter_setString(&tIter, target, tLen);
    compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
    if(compareResultIter != result) {
      log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
    }

    /* convert the strings to UTF-8 and do try comparing with char iterator */
    if(QUICK <= 0) { /*!QUICK*/
      char utf8Source[256], utf8Target[256];
      int32_t utf8SourceLen = 0, utf8TargetLen = 0;
      u_strToUTF8(utf8Source, 256, &utf8SourceLen, source, sLen, &status);
      if(U_FAILURE(status)) { /* probably buffer is not big enough */
        log_verbose("Src UTF-8 buffer too small! Will not compare!\n");
      } else {
        u_strToUTF8(utf8Target, 256, &utf8TargetLen, target, tLen, &status);
        if(U_SUCCESS(status)) { /* probably buffer is not big enough */
          UCollationResult compareResultUTF8 = result, compareResultUTF8Norm = result;
          /*UCharIterator sIter, tIter;*/
          /*log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));*/
          uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
          uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
       /*uiter_setString(&sIter, source, sLen);
      uiter_setString(&tIter, target, tLen);*/
          compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
          sIter.move(&sIter, 0, UITER_START);
          tIter.move(&tIter, 0, UITER_START);
          compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
          if(compareResultUTF8 != compareResultIter) {
            log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
          }
          if(compareResultUTF8 != compareResultUTF8Norm) {
            log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
          }
        } else {
          log_verbose("Target UTF-8 buffer too small! Did not compare!\n");
        }
        if(U_FAILURE(status)) {
          log_verbose("UTF-8 strcoll failed! Ignoring result\n");
        }
      }
    }

    /* testing the partial sortkeys */
    if(1) { /*!QUICK*/
      int32_t i = 0;
      int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
      int32_t partialSizesSize = 1;
      if(QUICK <= 0) {
        partialSizesSize = 7;
      }
      /*log_verbose("partial sortkey test piecesize=");*/
      for(i = 0; i < partialSizesSize; i++) {
        UCollationResult partialSKResult = result, partialNormalizedSKResult = result;
        /*log_verbose("%i ", partialSizes[i]);*/

        partialSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
        if(partialSKResult != result) {
          log_err("Partial sortkey comparison returned wrong result (%i exp. %i): %s, %s (size %i)\n", 
            partialSKResult, result,
            aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
        }

        if(QUICK <= 0 && norm != UCOL_ON) {
          /*log_verbose("N ");*/
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
          partialNormalizedSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
          ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
          if(partialSKResult != partialNormalizedSKResult) {
            log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n", 
              aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
          }
        }
      }
      /*log_verbose("\n");*/
    }

    
    compareResult  = ucol_strcoll(myCollation, source, sLen, target, tLen);
    compareResulta = ucol_strcoll(myCollation, source, -1,   target, -1); 
    if (compareResult != compareResulta) {
        log_err("ucol_strcoll result from null terminated and explicit length strings differs.\n");
    }

    sortklen1=ucol_getSortKey(myCollation, source, sLen,  NULL, 0);
    sortklen2=ucol_getSortKey(myCollation, target, tLen,  NULL, 0);

    sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2);
    sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2);

    sortKey1 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
    sortKey1a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
    ucol_getSortKey(myCollation, source, sLen, sortKey1,  sortklen1+1);
    ucol_getSortKey(myCollation, source, -1,   sortKey1a, sortklen1+1);
    
    sortKey2 =(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
    sortKey2a=(uint8_t*)malloc(sizeof(uint8_t) * (sortklenmax+1));
    ucol_getSortKey(myCollation, target, tLen, sortKey2,  sortklen2+1);
    ucol_getSortKey(myCollation, target, -1,   sortKey2a, sortklen2+1);

    /* Check that sort key generated with null terminated string is identical  */
    /*  to that generted with a length specified.                              */
    if (uprv_strcmp((const char *)sortKey1, (const char *)sortKey1a) != 0 ||
        uprv_strcmp((const char *)sortKey2, (const char *)sortKey2a) != 0 ) {
        log_err("Sort Keys from null terminated and explicit length strings differ.\n");
    }

    /*memcmp(sortKey1, sortKey2,sortklenmax);*/
    temp= uprv_strcmp((const char *)sortKey1, (const char *)sortKey2);
    gSortklen1 = uprv_strlen((const char *)sortKey1)+1;
    gSortklen2 = uprv_strlen((const char *)sortKey2)+1;
    if(sortklen1 != gSortklen1){
        log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1, gSortklen1);
        log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey1, buffer, &len));
    }
    if(sortklen2!= gSortklen2){
        log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2, gSortklen2);
        log_verbose("Generated sortkey: %s\n", sortKeyToString(myCollation, sortKey2, buffer, &len));
    }

    if(temp < 0) {
        keyResult=UCOL_LESS;
    }
    else if(temp > 0) {
        keyResult= UCOL_GREATER;
    }
    else {
        keyResult = UCOL_EQUAL;
    }
    reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResultIter, result );
    free(sortKey1);
    free(sortKey2);
    free(sortKey1a);
    free(sortKey2a);

}
Exemplo n.º 15
0
void CharIterTest::TestUCharIterator() {
    // test string of length 8
    UnicodeString s=UnicodeString("a \\U00010001b\\U0010fffdz", "").unescape();
    const char *const moves=
        "0+++++++++" // 10 moves per line
        "----0-----"
        ">>|>>>>>>>"
        "<<|<<<<<<<"
        "22+>8>-8+2";

    StringCharacterIterator sci(s), compareCI(s);

    UCharIterator sIter, cIter, rIter;

    uiter_setString(&sIter, s.getBuffer(), s.length());
    uiter_setCharacterIterator(&cIter, &sci);
    uiter_setReplaceable(&rIter, &s);

    TestUCharIterator(&sIter, compareCI, moves, "uiter_setString");
    compareCI.setIndex(0);
    TestUCharIterator(&cIter, compareCI, moves, "uiter_setCharacterIterator");
    compareCI.setIndex(0);
    TestUCharIterator(&rIter, compareCI, moves, "uiter_setReplaceable");

    // test move & getIndex some more
    sIter.start=2;
    sIter.index=3;
    sIter.limit=5;
    if( sIter.getIndex(&sIter, UITER_ZERO)!=0 ||
        sIter.getIndex(&sIter, UITER_START)!=2 ||
        sIter.getIndex(&sIter, UITER_CURRENT)!=3 ||
        sIter.getIndex(&sIter, UITER_LIMIT)!=5 ||
        sIter.getIndex(&sIter, UITER_LENGTH)!=s.length()
    ) {
        errln("error: UCharIterator(string).getIndex returns wrong index");
    }

    if( sIter.move(&sIter, 4, UITER_ZERO)!=4 ||
        sIter.move(&sIter, 1, UITER_START)!=3 ||
        sIter.move(&sIter, 3, UITER_CURRENT)!=5 ||
        sIter.move(&sIter, -1, UITER_LIMIT)!=4 ||
        sIter.move(&sIter, -5, UITER_LENGTH)!=3 ||
        sIter.move(&sIter, 0, UITER_CURRENT)!=sIter.getIndex(&sIter, UITER_CURRENT) ||
        sIter.getIndex(&sIter, UITER_CURRENT)!=3
    ) {
        errln("error: UCharIterator(string).move sets/returns wrong index");
    }

    sci=StringCharacterIterator(s, 2, 5, 3);
    uiter_setCharacterIterator(&cIter, &sci);
    if( cIter.getIndex(&cIter, UITER_ZERO)!=0 ||
        cIter.getIndex(&cIter, UITER_START)!=2 ||
        cIter.getIndex(&cIter, UITER_CURRENT)!=3 ||
        cIter.getIndex(&cIter, UITER_LIMIT)!=5 ||
        cIter.getIndex(&cIter, UITER_LENGTH)!=s.length()
    ) {
        errln("error: UCharIterator(character iterator).getIndex returns wrong index");
    }

    if( cIter.move(&cIter, 4, UITER_ZERO)!=4 ||
        cIter.move(&cIter, 1, UITER_START)!=3 ||
        cIter.move(&cIter, 3, UITER_CURRENT)!=5 ||
        cIter.move(&cIter, -1, UITER_LIMIT)!=4 ||
        cIter.move(&cIter, -5, UITER_LENGTH)!=3 ||
        cIter.move(&cIter, 0, UITER_CURRENT)!=cIter.getIndex(&cIter, UITER_CURRENT) ||
        cIter.getIndex(&cIter, UITER_CURRENT)!=3
    ) {
        errln("error: UCharIterator(character iterator).move sets/returns wrong index");
    }


    if(cIter.getIndex(&cIter, (enum UCharIteratorOrigin)-1) != -1)
    {
      errln("error: UCharIterator(char iter).getIndex did not return error value");
    }

    if(cIter.move(&cIter, 0, (enum UCharIteratorOrigin)-1) != -1)
    {
      errln("error: UCharIterator(char iter).move did not return error value");
    }


    if(rIter.getIndex(&rIter, (enum UCharIteratorOrigin)-1) != -1)
    {
      errln("error: UCharIterator(repl iter).getIndex did not return error value");
    }

    if(rIter.move(&rIter, 0, (enum UCharIteratorOrigin)-1) != -1)
    {
      errln("error: UCharIterator(repl iter).move did not return error value");
    }


    if(sIter.getIndex(&sIter, (enum UCharIteratorOrigin)-1) != -1)
    {
      errln("error: UCharIterator(string iter).getIndex did not return error value");
    }

    if(sIter.move(&sIter, 0, (enum UCharIteratorOrigin)-1) != -1)
    {
      errln("error: UCharIterator(string iter).move did not return error value");
    }

}
Exemplo n.º 16
0
void __hs_uiter_setString(UCharIterator *iter, const UChar *s, int32_t length)
{
    uiter_setString(iter, s, length);
}