Example #1
0
UChar32
UCharCharacterIterator::current32() const {
    if (pos >= begin && pos < end) {
        UChar32 c;
        UTF_GET_CHAR(text, begin, pos, end, c);
        return c;
    } else {
        return DONE;
    }
}
Example #2
0
/**
 * Implements {@link Transliterator#handleTransliterate}.
 */
void UppercaseTransliterator::handleTransliterate(Replaceable& text,
                                 UTransPosition& offsets, 
                                 UBool isIncremental) const {
    int32_t textPos = offsets.start;
    if (textPos >= offsets.limit) return;

    // get string for context
    
    UnicodeString original;
    text.extractBetween(offsets.contextStart, offsets.contextLimit, original);
    
    UCharIterator iter;
    uiter_setReplaceable(&iter, &text);
    iter.start = offsets.contextStart;
    iter.limit = offsets.contextLimit;
            
    // Walk through original string
    // If there is a case change, modify corresponding position in replaceable
    
    int32_t i = textPos - offsets.contextStart;
    int32_t limit = offsets.limit - offsets.contextStart;
    UChar32 cp;
    int32_t oldLen;
    
    for (; i < limit; ) {
        UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
        oldLen = UTF_CHAR_LENGTH(cp);
        i += oldLen;
        iter.index = i; // Point _past_ current char
        int32_t newLen = u_internalToUpper(cp, &iter, buffer, u_getMaxCaseExpansion(), loc.getName());
        if (newLen >= 0) {
            UnicodeString temp(buffer, newLen);
            text.handleReplaceBetween(textPos, textPos + oldLen, temp);
            if (newLen != oldLen) {
                textPos += newLen;
                offsets.limit += newLen - oldLen;
                offsets.contextLimit += newLen - oldLen;
                continue;
            }
        }
        textPos += oldLen;
    }
    offsets.start = offsets.limit;
}
Example #3
0
Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status) {

    if (U_FAILURE(status)) {
        return NULL;
    }

    //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(segment)));

    UnicodeString toPut(segment, segLen);

    fillinResult->put(toPut, new UnicodeString(toPut), status);

    UnicodeSet starts;

    // cycle through all the characters
    UChar32 cp;
    for (int32_t i = 0; i < segLen; i += UTF16_CHAR_LENGTH(cp)) {
        // see if any character is at the start of some decomposition
        UTF_GET_CHAR(segment, 0, i, segLen, cp);
        if (!nfcImpl.getCanonStartSet(cp, starts)) {
            continue;
        }
        // if so, see which decompositions match
        UnicodeSetIterator iter(starts);
        while (iter.next()) {
            UChar32 cp2 = iter.getCodepoint();
            Hashtable remainder(status);
            remainder.setValueDeleter(uhash_deleteUnicodeString);
            if (extract(&remainder, cp2, segment, segLen, i, status) == NULL) {
                continue;
            }

            // there were some matches, so add all the possibilities to the set.
            UnicodeString prefix(segment, i);
            prefix += cp2;

            int32_t el = -1;
            const UHashElement *ne = remainder.nextElement(el);
            while (ne != NULL) {
                UnicodeString item = *((UnicodeString *)(ne->value.pointer));
                UnicodeString *toAdd = new UnicodeString(prefix);
                /* test for NULL */
                if (toAdd == 0) {
                    status = U_MEMORY_ALLOCATION_ERROR;
                    return NULL;
                }
                *toAdd += item;
                fillinResult->put(*toAdd, toAdd, status);

                //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(*toAdd)));

                ne = remainder.nextElement(el);
            }
        }
    }

    /* Test for buffer overflows */
    if(U_FAILURE(status)) {
        return NULL;
    }
    return fillinResult;
}
Example #4
0
/**
 * Implements {@link Transliterator#handleTransliterate}.
 */
void TitlecaseTransliterator::handleTransliterate(
                                  Replaceable& text, UTransPosition& offsets,
                                  UBool isIncremental) const {
    if (SKIP == NULL) {
        return;
    }

    // Our mode; we are either converting letter toTitle or
    // toLower.
    UBool doTitle = TRUE;
    
    // Determine if there is a preceding context of CASED SKIP*,
    // in which case we want to start in toLower mode.  If the
    // prior context is anything else (including empty) then start
    // in toTitle mode.
    UChar32 c;
    int32_t start;
    for (start = offsets.start - 1; start >= offsets.contextStart; start -= UTF_CHAR_LENGTH(c)) {
        c = text.char32At(start);
        if (SKIP->contains(c)) {
            continue;
        }
        doTitle = !CASED->contains(c);
        break;
    }
    
    // Convert things after a CASED character toLower; things
    // after a non-CASED, non-SKIP character toTitle.  SKIP
    // characters are copied directly and do not change the mode.
    int32_t textPos = offsets.start;
    if (textPos >= offsets.limit) return;

    UnicodeString original;
    text.extractBetween(offsets.contextStart, offsets.contextLimit, original);

    UCharIterator iter;
    uiter_setReplaceable(&iter, &text);
    iter.start = offsets.contextStart;
    iter.limit = offsets.contextLimit;

    // Walk through original string
    // If there is a case change, modify corresponding position in replaceable

    int32_t i = textPos - offsets.contextStart;
    int32_t limit = offsets.limit - offsets.contextStart;
    UChar32 cp;
    int32_t oldLen;
    int32_t newLen;

    for (; i < limit; ) {
        UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp);
        oldLen = UTF_CHAR_LENGTH(cp);
        i += oldLen;
        iter.index = i; // Point _past_ current char
        if (!SKIP->contains(cp)) {
            if (doTitle) {
                newLen = u_internalToTitle(cp, &iter, buffer, u_getMaxCaseExpansion(), loc.getName());
            } else {
                newLen = u_internalToLower(cp, &iter, buffer, u_getMaxCaseExpansion(), loc.getName());
            }
            doTitle = !CASED->contains(cp);
            if (newLen >= 0) {
                UnicodeString temp(buffer, newLen);
                text.handleReplaceBetween(textPos, textPos + oldLen, temp);
                if (newLen != oldLen) {
                    textPos += newLen;
                    offsets.limit += newLen - oldLen;
                    offsets.contextLimit += newLen - oldLen;
                    continue;
                }
            }
        }
        textPos += oldLen;
    }
    offsets.start = offsets.limit;
}