示例#1
0
UBool
FCDUIterCollationIterator::previousSegment(UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return FALSE; }
    U_ASSERT(state == ITER_CHECK_BWD);
    // The input text [(iter index)..limit[ passes the FCD check.
    pos = iter.getIndex(&iter, UITER_CURRENT);
    // Collect the characters being checked, in case they need to be normalized.
    UnicodeString s;
    uint8_t nextCC = 0;
    for(;;) {
        // Fetch the previous character and its fcd16 value.
        UChar32 c = uiter_previous32(&iter);
        if(c < 0) { break; }
        uint16_t fcd16 = nfcImpl.getFCD16(c);
        uint8_t trailCC = (uint8_t)fcd16;
        if(trailCC == 0 && !s.isEmpty()) {
            // FCD boundary after this character.
            uiter_next32(&iter);
            break;
        }
        s.append(c);
        if(trailCC != 0 && ((nextCC != 0 && trailCC > nextCC) ||
                            CollationFCD::isFCD16OfTibetanCompositeVowel(fcd16))) {
            // Fails FCD check. Find the previous FCD boundary and normalize.
            while(fcd16 > 0xff) {
                c = uiter_previous32(&iter);
                if(c < 0) { break; }
                fcd16 = nfcImpl.getFCD16(c);
                if(fcd16 == 0) {
                    (void)uiter_next32(&iter);
                    break;
                }
                s.append(c);
            }
            s.reverse();
            if(!normalize(s, errorCode)) { return FALSE; }
            limit = pos;
            start = pos - s.length();
            state = IN_NORM_ITER_AT_START;
            pos = normalized.length();
            return TRUE;
        }
        nextCC = (uint8_t)(fcd16 >> 8);
        if(nextCC == 0) {
            // FCD boundary before the following character.
            break;
        }
    }
    start = pos - s.length();
    U_ASSERT(pos != start);
    iter.move(&iter, s.length(), UITER_CURRENT);
    state = ITER_IN_FCD_SEGMENT;
    return TRUE;
}
示例#2
0
UBool
FCDUIterCollationIterator::nextSegment(UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return FALSE; }
    U_ASSERT(state == ITER_CHECK_FWD);
    // The input text [start..(iter index)[ passes the FCD check.
    pos = iter.getIndex(&iter, UITER_CURRENT);
    // Collect the characters being checked, in case they need to be normalized.
    UnicodeString s;
    uint8_t prevCC = 0;
    for(;;) {
        // Fetch the next character and its fcd16 value.
        UChar32 c = uiter_next32(&iter);
        if(c < 0) { break; }
        uint16_t fcd16 = nfcImpl.getFCD16(c);
        uint8_t leadCC = (uint8_t)(fcd16 >> 8);
        if(leadCC == 0 && !s.isEmpty()) {
            // FCD boundary before this character.
            uiter_previous32(&iter);
            break;
        }
        s.append(c);
        if(leadCC != 0 && (prevCC > leadCC || CollationFCD::isFCD16OfTibetanCompositeVowel(fcd16))) {
            // Fails FCD check. Find the next FCD boundary and normalize.
            for(;;) {
                c = uiter_next32(&iter);
                if(c < 0) { break; }
                if(nfcImpl.getFCD16(c) <= 0xff) {
                    uiter_previous32(&iter);
                    break;
                }
                s.append(c);
            }
            if(!normalize(s, errorCode)) { return FALSE; }
            start = pos;
            limit = pos + s.length();
            state = IN_NORM_ITER_AT_LIMIT;
            pos = 0;
            return TRUE;
        }
        prevCC = (uint8_t)fcd16;
        if(prevCC == 0) {
            // FCD boundary after the last character.
            break;
        }
    }
    limit = pos + s.length();
    U_ASSERT(pos != limit);
    iter.move(&iter, -s.length(), UITER_CURRENT);
    state = ITER_IN_FCD_SEGMENT;
    return TRUE;
}
示例#3
0
static int32_t
_iterate(UCharIterator *src, UBool forward,
              UChar *dest, int32_t destCapacity,
              const Normalizer2 *n2,
              UBool doNormalize, UBool *pNeededToNormalize,
              UErrorCode *pErrorCode) {
    if(U_FAILURE(*pErrorCode)) {
        return 0;
    }
    if(destCapacity<0 || (dest==NULL && destCapacity>0) || src==NULL) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    if(pNeededToNormalize!=NULL) {
        *pNeededToNormalize=FALSE;
    }
    if(!(forward ? src->hasNext(src) : src->hasPrevious(src))) {
        return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
    }

    UnicodeString buffer;
    UChar32 c;
    if(forward) {
        /* get one character and ignore its properties */
        buffer.append(uiter_next32(src));
        /* get all following characters until we see a boundary */
        while((c=uiter_next32(src))>=0) {
            if(n2->hasBoundaryBefore(c)) {
                /* back out the latest movement to stop at the boundary */
                src->move(src, -U16_LENGTH(c), UITER_CURRENT);
                break;
            } else {
                buffer.append(c);
            }
        }
    } else {
        while((c=uiter_previous32(src))>=0) {
            /* always write this character to the front of the buffer */
            buffer.insert(0, c);
            /* stop if this just-copied character is a boundary */
            if(n2->hasBoundaryBefore(c)) {
                break;
            }
        }
    }

    UnicodeString destString(dest, 0, destCapacity);
    if(buffer.length()>0 && doNormalize) {
        n2->normalize(buffer, destString, *pErrorCode).extract(dest, destCapacity, *pErrorCode);
        if(pNeededToNormalize!=NULL && U_SUCCESS(*pErrorCode)) {
            *pNeededToNormalize= destString!=buffer;
        }
        return destString.length();
    } else {
        /* just copy the source characters */
        return buffer.extract(dest, destCapacity, *pErrorCode);
    }
}
示例#4
0
UChar32
FCDUIterCollationIterator::previousCodePoint(UErrorCode &errorCode) {
    UChar32 c;
    for(;;) {
        if(state == ITER_CHECK_BWD) {
            c = iter.previous(&iter);
            if(c < 0) {
                start = pos = 0;
                state = ITER_IN_FCD_SEGMENT;
                return U_SENTINEL;
            }
            if(CollationFCD::hasLccc(c)) {
                UChar32 prev = U_SENTINEL;
                if(CollationFCD::maybeTibetanCompositeVowel(c) ||
                        CollationFCD::hasTccc(prev = iter.previous(&iter))) {
                    iter.next(&iter);
                    if(prev >= 0) {
                        iter.next(&iter);
                    }
                    if(!previousSegment(errorCode)) {
                        return U_SENTINEL;
                    }
                    continue;
                }
                // hasLccc(trail)=true for all trail surrogates
                if(U16_IS_TRAIL(c)) {
                    if(prev < 0) {
                        prev = iter.previous(&iter);
                    }
                    if(U16_IS_LEAD(prev)) {
                        return U16_GET_SUPPLEMENTARY(prev, c);
                    }
                }
                if(prev >= 0) {
                    iter.next(&iter);
                }
            }
            return c;
        } else if(state == ITER_IN_FCD_SEGMENT && pos != start) {
            c = uiter_previous32(&iter);
            pos -= U16_LENGTH(c);
            U_ASSERT(c >= 0);
            return c;
        } else if(state >= IN_NORM_ITER_AT_LIMIT && pos != 0) {
            c = normalized.char32At(pos - 1);
            pos -= U16_LENGTH(c);
            return c;
        } else {
            switchToBackward();
        }
    }
}
示例#5
0
void
UIterCollationIterator::backwardNumCodePoints(int32_t num, UErrorCode & /*errorCode*/) {
    while(num > 0 && (uiter_previous32(&iter)) >= 0) {
        --num;
    }
}
示例#6
0
UChar32
UIterCollationIterator::previousCodePoint(UErrorCode & /*errorCode*/) {
    return uiter_previous32(&iter);
}
void CharIterTest::TestUCharIterator(UCharIterator *iter, CharacterIterator &ci,
                                     const char *moves, const char *which) {
    int32_t m;
    UChar32 c, c2;
    UBool h, h2;

    for(m=0;; ++m) {
        // move both iter and s[index]
        switch(moves[m]) {
        case '0':
            h=iter->hasNext(iter);
            h2=ci.hasNext();
            c=iter->current(iter);
            c2=ci.current();
            break;
        case '|':
            h=iter->hasNext(iter);
            h2=ci.hasNext();
            c=uiter_current32(iter);
            c2=ci.current32();
            break;

        case '+':
            h=iter->hasNext(iter);
            h2=ci.hasNext();
            c=iter->next(iter);
            c2=ci.nextPostInc();
            break;
        case '>':
            h=iter->hasNext(iter);
            h2=ci.hasNext();
            c=uiter_next32(iter);
            c2=ci.next32PostInc();
            break;

        case '-':
            h=iter->hasPrevious(iter);
            h2=ci.hasPrevious();
            c=iter->previous(iter);
            c2=ci.previous();
            break;
        case '<':
            h=iter->hasPrevious(iter);
            h2=ci.hasPrevious();
            c=uiter_previous32(iter);
            c2=ci.previous32();
            break;

        case '2':
            h=h2=FALSE;
            c=(UChar32)iter->move(iter, 2, UITER_CURRENT);
            c2=(UChar32)ci.move(2, CharacterIterator::kCurrent);
            break;

        case '8':
            h=h2=FALSE;
            c=(UChar32)iter->move(iter, -2, UITER_CURRENT);
            c2=(UChar32)ci.move(-2, CharacterIterator::kCurrent);
            break;

        case 0:
            return;
        default:
            errln("error: unexpected move character '%c' in \"%s\"", moves[m], moves);
            return;
        }

        // compare results
        if(c2==0xffff) {
            c2=(UChar32)-1;
        }
        if(c!=c2 || h!=h2 || ci.getIndex()!=iter->getIndex(iter, UITER_CURRENT)) {
            errln("error: UCharIterator(%s) misbehaving at \"%s\"[%d]='%c'", which, moves, m, moves[m]);
        }
    }
}