static int32_t _iterate(UCharIterator *src, UBool forward, UChar *dest, int32_t destCapacity, const Normalizer2 *n2, UBool doNormalize, UBool *pNeededToNormalize, UErrorCode *pErrorCode) { if(U_FAILURE(*pErrorCode)) { return 0; } if(destCapacity<0 || (dest==NULL && destCapacity>0) || src==NULL) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } if(pNeededToNormalize!=NULL) { *pNeededToNormalize=FALSE; } if(!(forward ? src->hasNext(src) : src->hasPrevious(src))) { return u_terminateUChars(dest, destCapacity, 0, pErrorCode); } UnicodeString buffer; UChar32 c; if(forward) { /* get one character and ignore its properties */ buffer.append(uiter_next32(src)); /* get all following characters until we see a boundary */ while((c=uiter_next32(src))>=0) { if(n2->hasBoundaryBefore(c)) { /* back out the latest movement to stop at the boundary */ src->move(src, -U16_LENGTH(c), UITER_CURRENT); break; } else { buffer.append(c); } } } else { while((c=uiter_previous32(src))>=0) { /* always write this character to the front of the buffer */ buffer.insert(0, c); /* stop if this just-copied character is a boundary */ if(n2->hasBoundaryBefore(c)) { break; } } } UnicodeString destString(dest, 0, destCapacity); if(buffer.length()>0 && doNormalize) { n2->normalize(buffer, destString, *pErrorCode).extract(dest, destCapacity, *pErrorCode); if(pNeededToNormalize!=NULL && U_SUCCESS(*pErrorCode)) { *pNeededToNormalize= destString!=buffer; } return destString.length(); } else { /* just copy the source characters */ return buffer.extract(dest, destCapacity, *pErrorCode); } }
UBool FCDUIterCollationIterator::previousSegment(UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return FALSE; } U_ASSERT(state == ITER_CHECK_BWD); // The input text [(iter index)..limit[ passes the FCD check. pos = iter.getIndex(&iter, UITER_CURRENT); // Collect the characters being checked, in case they need to be normalized. UnicodeString s; uint8_t nextCC = 0; for(;;) { // Fetch the previous character and its fcd16 value. UChar32 c = uiter_previous32(&iter); if(c < 0) { break; } uint16_t fcd16 = nfcImpl.getFCD16(c); uint8_t trailCC = (uint8_t)fcd16; if(trailCC == 0 && !s.isEmpty()) { // FCD boundary after this character. uiter_next32(&iter); break; } s.append(c); if(trailCC != 0 && ((nextCC != 0 && trailCC > nextCC) || CollationFCD::isFCD16OfTibetanCompositeVowel(fcd16))) { // Fails FCD check. Find the previous FCD boundary and normalize. while(fcd16 > 0xff) { c = uiter_previous32(&iter); if(c < 0) { break; } fcd16 = nfcImpl.getFCD16(c); if(fcd16 == 0) { (void)uiter_next32(&iter); break; } s.append(c); } s.reverse(); if(!normalize(s, errorCode)) { return FALSE; } limit = pos; start = pos - s.length(); state = IN_NORM_ITER_AT_START; pos = normalized.length(); return TRUE; } nextCC = (uint8_t)(fcd16 >> 8); if(nextCC == 0) { // FCD boundary before the following character. break; } } start = pos - s.length(); U_ASSERT(pos != start); iter.move(&iter, s.length(), UITER_CURRENT); state = ITER_IN_FCD_SEGMENT; return TRUE; }
UBool FCDUIterCollationIterator::nextSegment(UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return FALSE; } U_ASSERT(state == ITER_CHECK_FWD); // The input text [start..(iter index)[ passes the FCD check. pos = iter.getIndex(&iter, UITER_CURRENT); // Collect the characters being checked, in case they need to be normalized. UnicodeString s; uint8_t prevCC = 0; for(;;) { // Fetch the next character and its fcd16 value. UChar32 c = uiter_next32(&iter); if(c < 0) { break; } uint16_t fcd16 = nfcImpl.getFCD16(c); uint8_t leadCC = (uint8_t)(fcd16 >> 8); if(leadCC == 0 && !s.isEmpty()) { // FCD boundary before this character. uiter_previous32(&iter); break; } s.append(c); if(leadCC != 0 && (prevCC > leadCC || CollationFCD::isFCD16OfTibetanCompositeVowel(fcd16))) { // Fails FCD check. Find the next FCD boundary and normalize. for(;;) { c = uiter_next32(&iter); if(c < 0) { break; } if(nfcImpl.getFCD16(c) <= 0xff) { uiter_previous32(&iter); break; } s.append(c); } if(!normalize(s, errorCode)) { return FALSE; } start = pos; limit = pos + s.length(); state = IN_NORM_ITER_AT_LIMIT; pos = 0; return TRUE; } prevCC = (uint8_t)fcd16; if(prevCC == 0) { // FCD boundary after the last character. break; } } limit = pos + s.length(); U_ASSERT(pos != limit); iter.move(&iter, -s.length(), UITER_CURRENT); state = ITER_IN_FCD_SEGMENT; return TRUE; }
UChar32 FCDUIterCollationIterator::nextCodePoint(UErrorCode &errorCode) { UChar32 c; for(;;) { if(state == ITER_CHECK_FWD) { c = iter.next(&iter); if(c < 0) { return c; } if(CollationFCD::hasTccc(c)) { if(CollationFCD::maybeTibetanCompositeVowel(c) || CollationFCD::hasLccc(iter.current(&iter))) { iter.previous(&iter); if(!nextSegment(errorCode)) { return U_SENTINEL; } continue; } } if(U16_IS_LEAD(c)) { UChar32 trail = iter.next(&iter); if(U16_IS_TRAIL(trail)) { return U16_GET_SUPPLEMENTARY(c, trail); } else if(trail >= 0) { iter.previous(&iter); } } return c; } else if(state == ITER_IN_FCD_SEGMENT && pos != limit) { c = uiter_next32(&iter); pos += U16_LENGTH(c); U_ASSERT(c >= 0); return c; } else if(state >= IN_NORM_ITER_AT_LIMIT && pos != normalized.length()) { c = normalized.char32At(pos); pos += U16_LENGTH(c); return c; } else { switchToForward(); } } }
void UIterCollationIterator::forwardNumCodePoints(int32_t num, UErrorCode & /*errorCode*/) { while(num > 0 && (uiter_next32(&iter)) >= 0) { --num; } }
UChar32 UIterCollationIterator::nextCodePoint(UErrorCode & /*errorCode*/) { return uiter_next32(&iter); }
void CharIterTest::TestUCharIterator(UCharIterator *iter, CharacterIterator &ci, const char *moves, const char *which) { int32_t m; UChar32 c, c2; UBool h, h2; for(m=0;; ++m) { // move both iter and s[index] switch(moves[m]) { case '0': h=iter->hasNext(iter); h2=ci.hasNext(); c=iter->current(iter); c2=ci.current(); break; case '|': h=iter->hasNext(iter); h2=ci.hasNext(); c=uiter_current32(iter); c2=ci.current32(); break; case '+': h=iter->hasNext(iter); h2=ci.hasNext(); c=iter->next(iter); c2=ci.nextPostInc(); break; case '>': h=iter->hasNext(iter); h2=ci.hasNext(); c=uiter_next32(iter); c2=ci.next32PostInc(); break; case '-': h=iter->hasPrevious(iter); h2=ci.hasPrevious(); c=iter->previous(iter); c2=ci.previous(); break; case '<': h=iter->hasPrevious(iter); h2=ci.hasPrevious(); c=uiter_previous32(iter); c2=ci.previous32(); break; case '2': h=h2=FALSE; c=(UChar32)iter->move(iter, 2, UITER_CURRENT); c2=(UChar32)ci.move(2, CharacterIterator::kCurrent); break; case '8': h=h2=FALSE; c=(UChar32)iter->move(iter, -2, UITER_CURRENT); c2=(UChar32)ci.move(-2, CharacterIterator::kCurrent); break; case 0: return; default: errln("error: unexpected move character '%c' in \"%s\"", moves[m], moves); return; } // compare results if(c2==0xffff) { c2=(UChar32)-1; } if(c!=c2 || h!=h2 || ci.getIndex()!=iter->getIndex(iter, UITER_CURRENT)) { errln("error: UCharIterator(%s) misbehaving at \"%s\"[%d]='%c'", which, moves, m, moves[m]); } } }