Esempio n. 1
0
/**
 * Implements {@link Transliterator#handleTransliterate}.
 */
void UppercaseTransliterator::handleTransliterate(Replaceable& text,
                                 UTransPosition& offsets, 
                                 UBool isIncremental) const {
    int32_t textPos = offsets.start;
    if (textPos >= offsets.limit) return;

    // get string for context
    
    UnicodeString original;
    text.extractBetween(offsets.contextStart, offsets.contextLimit, original);
    
    UCharIterator iter;
    uiter_setReplaceable(&iter, &text);
    iter.start = offsets.contextStart;
    iter.limit = offsets.contextLimit;
            
    // Walk through original string
    // If there is a case change, modify corresponding position in replaceable
    
    int32_t i = textPos - offsets.contextStart;
    int32_t limit = offsets.limit - offsets.contextStart;
    UChar32 cp;
    int32_t oldLen;
    
    for (; i < limit; ) {
        UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
        oldLen = UTF_CHAR_LENGTH(cp);
        i += oldLen;
        iter.index = i; // Point _past_ current char
        int32_t newLen = u_internalToUpper(cp, &iter, buffer, u_getMaxCaseExpansion(), loc.getName());
        if (newLen >= 0) {
            UnicodeString temp(buffer, newLen);
            text.handleReplaceBetween(textPos, textPos + oldLen, temp);
            if (newLen != oldLen) {
                textPos += newLen;
                offsets.limit += newLen - oldLen;
                offsets.contextLimit += newLen - oldLen;
                continue;
            }
        }
        textPos += oldLen;
    }
    offsets.start = offsets.limit;
}
void CharIterTest::TestUCharIterator() {
    // test string of length 8
    UnicodeString s=UnicodeString("a \\U00010001b\\U0010fffdz", "").unescape();
    const char *const moves=
        "0+++++++++" // 10 moves per line
        "----0-----"
        ">>|>>>>>>>"
        "<<|<<<<<<<"
        "22+>8>-8+2";

    StringCharacterIterator sci(s), compareCI(s);

    UCharIterator sIter, cIter, rIter;

    uiter_setString(&sIter, s.getBuffer(), s.length());
    uiter_setCharacterIterator(&cIter, &sci);
    uiter_setReplaceable(&rIter, &s);

    TestUCharIterator(&sIter, compareCI, moves, "uiter_setString");
    compareCI.setIndex(0);
    TestUCharIterator(&cIter, compareCI, moves, "uiter_setCharacterIterator");
    compareCI.setIndex(0);
    TestUCharIterator(&rIter, compareCI, moves, "uiter_setReplaceable");

    // test move & getIndex some more
    sIter.start=2;
    sIter.index=3;
    sIter.limit=5;
    if( sIter.getIndex(&sIter, UITER_ZERO)!=0 ||
        sIter.getIndex(&sIter, UITER_START)!=2 ||
        sIter.getIndex(&sIter, UITER_CURRENT)!=3 ||
        sIter.getIndex(&sIter, UITER_LIMIT)!=5 ||
        sIter.getIndex(&sIter, UITER_LENGTH)!=s.length()
    ) {
        errln("error: UCharIterator(string).getIndex returns wrong index");
    }

    if( sIter.move(&sIter, 4, UITER_ZERO)!=4 ||
        sIter.move(&sIter, 1, UITER_START)!=3 ||
        sIter.move(&sIter, 3, UITER_CURRENT)!=5 ||
        sIter.move(&sIter, -1, UITER_LIMIT)!=4 ||
        sIter.move(&sIter, -5, UITER_LENGTH)!=3 ||
        sIter.move(&sIter, 0, UITER_CURRENT)!=sIter.getIndex(&sIter, UITER_CURRENT) ||
        sIter.getIndex(&sIter, UITER_CURRENT)!=3
    ) {
        errln("error: UCharIterator(string).move sets/returns wrong index");
    }

    sci=StringCharacterIterator(s, 2, 5, 3);
    uiter_setCharacterIterator(&cIter, &sci);
    if( cIter.getIndex(&cIter, UITER_ZERO)!=0 ||
        cIter.getIndex(&cIter, UITER_START)!=2 ||
        cIter.getIndex(&cIter, UITER_CURRENT)!=3 ||
        cIter.getIndex(&cIter, UITER_LIMIT)!=5 ||
        cIter.getIndex(&cIter, UITER_LENGTH)!=s.length()
    ) {
        errln("error: UCharIterator(character iterator).getIndex returns wrong index");
    }

    if( cIter.move(&cIter, 4, UITER_ZERO)!=4 ||
        cIter.move(&cIter, 1, UITER_START)!=3 ||
        cIter.move(&cIter, 3, UITER_CURRENT)!=5 ||
        cIter.move(&cIter, -1, UITER_LIMIT)!=4 ||
        cIter.move(&cIter, -5, UITER_LENGTH)!=3 ||
        cIter.move(&cIter, 0, UITER_CURRENT)!=cIter.getIndex(&cIter, UITER_CURRENT) ||
        cIter.getIndex(&cIter, UITER_CURRENT)!=3
    ) {
        errln("error: UCharIterator(character iterator).move sets/returns wrong index");
    }


    if(cIter.getIndex(&cIter, (enum UCharIteratorOrigin)-1) != -1)
    {
        errln("error: UCharIterator(char iter).getIndex did not return error value");
    }

    if(cIter.move(&cIter, 0, (enum UCharIteratorOrigin)-1) != -1)
    {
        errln("error: UCharIterator(char iter).move did not return error value");
    }


    if(rIter.getIndex(&rIter, (enum UCharIteratorOrigin)-1) != -1)
    {
        errln("error: UCharIterator(repl iter).getIndex did not return error value");
    }

    if(rIter.move(&rIter, 0, (enum UCharIteratorOrigin)-1) != -1)
    {
        errln("error: UCharIterator(repl iter).move did not return error value");
    }


    if(sIter.getIndex(&sIter, (enum UCharIteratorOrigin)-1) != -1)
    {
        errln("error: UCharIterator(string iter).getIndex did not return error value");
    }

    if(sIter.move(&sIter, 0, (enum UCharIteratorOrigin)-1) != -1)
    {
        errln("error: UCharIterator(string iter).move did not return error value");
    }

    /* Testing function coverage on bad input */
    UErrorCode status = U_ZERO_ERROR;
    uiter_setString(&sIter, NULL, 1);
    uiter_setState(&sIter, 1, &status);
    if (status != U_UNSUPPORTED_ERROR) {
        errln("error: uiter_setState returned %s instead of U_UNSUPPORTED_ERROR", u_errorName(status));
    }
    status = U_ZERO_ERROR;
    uiter_setState(NULL, 1, &status);
    if (status != U_ILLEGAL_ARGUMENT_ERROR) {
        errln("error: uiter_setState returned %s instead of U_ILLEGAL_ARGUMENT_ERROR", u_errorName(status));
    }
    if (uiter_getState(&sIter) != UITER_NO_STATE) {
        errln("error: uiter_getState did not return UITER_NO_STATE on bad input");
    }
}
Esempio n. 3
0
void CharIterTest::TestUCharIterator() {
    // test string of length 8
    UnicodeString s=UnicodeString("a \\U00010001b\\U0010fffdz", "").unescape();
    const char *const moves=
        "0+++++++++" // 10 moves per line
        "----0-----"
        ">>|>>>>>>>"
        "<<|<<<<<<<"
        "22+>8>-8+2";

    StringCharacterIterator sci(s), compareCI(s);

    UCharIterator sIter, cIter, rIter;

    uiter_setString(&sIter, s.getBuffer(), s.length());
    uiter_setCharacterIterator(&cIter, &sci);
    uiter_setReplaceable(&rIter, &s);

    TestUCharIterator(&sIter, compareCI, moves, "uiter_setString");
    compareCI.setIndex(0);
    TestUCharIterator(&cIter, compareCI, moves, "uiter_setCharacterIterator");
    compareCI.setIndex(0);
    TestUCharIterator(&rIter, compareCI, moves, "uiter_setReplaceable");

    // test move & getIndex some more
    sIter.start=2;
    sIter.index=3;
    sIter.limit=5;
    if( sIter.getIndex(&sIter, UITER_ZERO)!=0 ||
        sIter.getIndex(&sIter, UITER_START)!=2 ||
        sIter.getIndex(&sIter, UITER_CURRENT)!=3 ||
        sIter.getIndex(&sIter, UITER_LIMIT)!=5 ||
        sIter.getIndex(&sIter, UITER_LENGTH)!=s.length()
    ) {
        errln("error: UCharIterator(string).getIndex returns wrong index");
    }

    if( sIter.move(&sIter, 4, UITER_ZERO)!=4 ||
        sIter.move(&sIter, 1, UITER_START)!=3 ||
        sIter.move(&sIter, 3, UITER_CURRENT)!=5 ||
        sIter.move(&sIter, -1, UITER_LIMIT)!=4 ||
        sIter.move(&sIter, -5, UITER_LENGTH)!=3 ||
        sIter.move(&sIter, 0, UITER_CURRENT)!=sIter.getIndex(&sIter, UITER_CURRENT) ||
        sIter.getIndex(&sIter, UITER_CURRENT)!=3
    ) {
        errln("error: UCharIterator(string).move sets/returns wrong index");
    }

    sci=StringCharacterIterator(s, 2, 5, 3);
    uiter_setCharacterIterator(&cIter, &sci);
    if( cIter.getIndex(&cIter, UITER_ZERO)!=0 ||
        cIter.getIndex(&cIter, UITER_START)!=2 ||
        cIter.getIndex(&cIter, UITER_CURRENT)!=3 ||
        cIter.getIndex(&cIter, UITER_LIMIT)!=5 ||
        cIter.getIndex(&cIter, UITER_LENGTH)!=s.length()
    ) {
        errln("error: UCharIterator(character iterator).getIndex returns wrong index");
    }

    if( cIter.move(&cIter, 4, UITER_ZERO)!=4 ||
        cIter.move(&cIter, 1, UITER_START)!=3 ||
        cIter.move(&cIter, 3, UITER_CURRENT)!=5 ||
        cIter.move(&cIter, -1, UITER_LIMIT)!=4 ||
        cIter.move(&cIter, -5, UITER_LENGTH)!=3 ||
        cIter.move(&cIter, 0, UITER_CURRENT)!=cIter.getIndex(&cIter, UITER_CURRENT) ||
        cIter.getIndex(&cIter, UITER_CURRENT)!=3
    ) {
        errln("error: UCharIterator(character iterator).move sets/returns wrong index");
    }


    if(cIter.getIndex(&cIter, (enum UCharIteratorOrigin)-1) != -1)
    {
      errln("error: UCharIterator(char iter).getIndex did not return error value");
    }

    if(cIter.move(&cIter, 0, (enum UCharIteratorOrigin)-1) != -1)
    {
      errln("error: UCharIterator(char iter).move did not return error value");
    }


    if(rIter.getIndex(&rIter, (enum UCharIteratorOrigin)-1) != -1)
    {
      errln("error: UCharIterator(repl iter).getIndex did not return error value");
    }

    if(rIter.move(&rIter, 0, (enum UCharIteratorOrigin)-1) != -1)
    {
      errln("error: UCharIterator(repl iter).move did not return error value");
    }


    if(sIter.getIndex(&sIter, (enum UCharIteratorOrigin)-1) != -1)
    {
      errln("error: UCharIterator(string iter).getIndex did not return error value");
    }

    if(sIter.move(&sIter, 0, (enum UCharIteratorOrigin)-1) != -1)
    {
      errln("error: UCharIterator(string iter).move did not return error value");
    }

}
Esempio n. 4
0
void NormalizationTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
                                                      UBool isIncremental) const {
    // start and limit of the input range
    int32_t start = offsets.start;
    int32_t limit = offsets.limit;
    int32_t length, delta;

    if(start >= limit) {
        return;
    }

    // a C code unit iterator, implemented around the Replaceable
    UCharIterator iter;
    uiter_setReplaceable(&iter, &text);

    // the output string and buffer pointer
    UnicodeString output;
    UChar *buffer;
    UBool neededToNormalize;

    UErrorCode errorCode;

    /*
     * Normalize as short chunks at a time as possible even in
     * bulk mode, so that styled text is minimally disrupted.
     * In incremental mode, a chunk that ends with offsets.limit
     * must not be normalized.
     *
     * If it was known that the input text is not styled, then
     * a bulk mode normalization could look like this:
     *

    UChar staticChars[256];
    UnicodeString input;

    length = limit - start;
    input.setTo(staticChars, 0, sizeof(staticChars)/U_SIZEOF_UCHAR); // writable alias

    _Replaceable_extractBetween(text, start, limit, input.getBuffer(length));
    input.releaseBuffer(length);

    UErrorCode status = U_ZERO_ERROR;
    Normalizer::normalize(input, fMode, options, output, status);

    text.handleReplaceBetween(start, limit, output);

    int32_t delta = output.length() - length;
    offsets.contextLimit += delta;
    offsets.limit += delta;
    offsets.start = limit + delta;

     *
     */
    while(start < limit) {
        // set the iterator limits for the remaining input range
        // this is a moving target because of the replacements in the text object
        iter.start = iter.index = start;
        iter.limit = limit;

        // incrementally normalize a small chunk of the input
        buffer = output.getBuffer(-1);
        errorCode = U_ZERO_ERROR;
        length = unorm_next(&iter, buffer, output.getCapacity(),
                            fMode, 0,
                            TRUE, &neededToNormalize,
                            &errorCode);
        output.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);

        if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
            // use a larger output string buffer and do it again from the start
            iter.index = start;
            buffer = output.getBuffer(length);
            errorCode = U_ZERO_ERROR;
            length = unorm_next(&iter, buffer, output.getCapacity(),
                                fMode, 0,
                                TRUE, &neededToNormalize,
                                &errorCode);
            output.releaseBuffer(U_SUCCESS(errorCode) ? length : 0);
        }

        if(U_FAILURE(errorCode)) {
            break;
        }

        limit = iter.index;
        if(isIncremental && limit == iter.limit) {
            // stop in incremental mode when we reach the input limit
            // in case there are additional characters that could change the
            // normalization result

            // UNLESS all characters in the result of the normalization of
            // the last run are in the skippable set
            const UChar *s=output.getBuffer();
            int32_t i=0, outLength=output.length();
            UChar32 c;

            while(i<outLength) {
                U16_NEXT(s, i, outLength, c);
                if(!unorm_isNFSkippable(c, fMode)) {
                    outLength=-1; // I wish C++ had labeled loops and break outer; ...
                    break;
                }
            }
            if (outLength<0) {
                break;
            }
        }

        if(neededToNormalize) {
            // replace the input chunk with its normalized form
            text.handleReplaceBetween(start, limit, output);

            // update all necessary indexes accordingly
            delta = length - (limit - start);   // length change in the text object
            start = limit += delta;             // the next chunk starts where this one ends, with adjustment
            limit = offsets.limit += delta;     // set the iteration limit to the adjusted end of the input range
            offsets.contextLimit += delta;
        } else {
            // delta == 0
            start = limit;
            limit = offsets.limit;
        }
    }

    offsets.start = start;
}
Esempio n. 5
0
/**
 * Implements {@link Transliterator#handleTransliterate}.
 */
void TitlecaseTransliterator::handleTransliterate(
                                  Replaceable& text, UTransPosition& offsets,
                                  UBool isIncremental) const {
    if (SKIP == NULL) {
        return;
    }

    // Our mode; we are either converting letter toTitle or
    // toLower.
    UBool doTitle = TRUE;
    
    // Determine if there is a preceding context of CASED SKIP*,
    // in which case we want to start in toLower mode.  If the
    // prior context is anything else (including empty) then start
    // in toTitle mode.
    UChar32 c;
    int32_t start;
    for (start = offsets.start - 1; start >= offsets.contextStart; start -= UTF_CHAR_LENGTH(c)) {
        c = text.char32At(start);
        if (SKIP->contains(c)) {
            continue;
        }
        doTitle = !CASED->contains(c);
        break;
    }
    
    // Convert things after a CASED character toLower; things
    // after a non-CASED, non-SKIP character toTitle.  SKIP
    // characters are copied directly and do not change the mode.
    int32_t textPos = offsets.start;
    if (textPos >= offsets.limit) return;

    UnicodeString original;
    text.extractBetween(offsets.contextStart, offsets.contextLimit, original);

    UCharIterator iter;
    uiter_setReplaceable(&iter, &text);
    iter.start = offsets.contextStart;
    iter.limit = offsets.contextLimit;

    // Walk through original string
    // If there is a case change, modify corresponding position in replaceable

    int32_t i = textPos - offsets.contextStart;
    int32_t limit = offsets.limit - offsets.contextStart;
    UChar32 cp;
    int32_t oldLen;
    int32_t newLen;

    for (; i < limit; ) {
        UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
        oldLen = UTF_CHAR_LENGTH(cp);
        i += oldLen;
        iter.index = i; // Point _past_ current char
        if (!SKIP->contains(cp)) {
            if (doTitle) {
                newLen = u_internalToTitle(cp, &iter, buffer, u_getMaxCaseExpansion(), loc.getName());
            } else {
                newLen = u_internalToLower(cp, &iter, buffer, u_getMaxCaseExpansion(), loc.getName());
            }
            doTitle = !CASED->contains(cp);
            if (newLen >= 0) {
                UnicodeString temp(buffer, newLen);
                text.handleReplaceBetween(textPos, textPos + oldLen, temp);
                if (newLen != oldLen) {
                    textPos += newLen;
                    offsets.limit += newLen - oldLen;
                    offsets.contextLimit += newLen - oldLen;
                    continue;
                }
            }
        }
        textPos += oldLen;
    }
    offsets.start = offsets.limit;
}