UChar32 UCharCharacterIterator::current32() const { if (pos >= begin && pos < end) { UChar32 c; UTF_GET_CHAR(text, begin, pos, end, c); return c; } else { return DONE; } }
/** * Implements {@link Transliterator#handleTransliterate}. */ void UppercaseTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets, UBool isIncremental) const { int32_t textPos = offsets.start; if (textPos >= offsets.limit) return; // get string for context UnicodeString original; text.extractBetween(offsets.contextStart, offsets.contextLimit, original); UCharIterator iter; uiter_setReplaceable(&iter, &text); iter.start = offsets.contextStart; iter.limit = offsets.contextLimit; // Walk through original string // If there is a case change, modify corresponding position in replaceable int32_t i = textPos - offsets.contextStart; int32_t limit = offsets.limit - offsets.contextStart; UChar32 cp; int32_t oldLen; for (; i < limit; ) { UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp); oldLen = UTF_CHAR_LENGTH(cp); i += oldLen; iter.index = i; // Point _past_ current char int32_t newLen = u_internalToUpper(cp, &iter, buffer, u_getMaxCaseExpansion(), loc.getName()); if (newLen >= 0) { UnicodeString temp(buffer, newLen); text.handleReplaceBetween(textPos, textPos + oldLen, temp); if (newLen != oldLen) { textPos += newLen; offsets.limit += newLen - oldLen; offsets.contextLimit += newLen - oldLen; continue; } } textPos += oldLen; } offsets.start = offsets.limit; }
Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const UChar *segment, int32_t segLen, UErrorCode &status) { if (U_FAILURE(status)) { return NULL; } //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(segment))); UnicodeString toPut(segment, segLen); fillinResult->put(toPut, new UnicodeString(toPut), status); UnicodeSet starts; // cycle through all the characters UChar32 cp; for (int32_t i = 0; i < segLen; i += UTF16_CHAR_LENGTH(cp)) { // see if any character is at the start of some decomposition UTF_GET_CHAR(segment, 0, i, segLen, cp); if (!nfcImpl.getCanonStartSet(cp, starts)) { continue; } // if so, see which decompositions match UnicodeSetIterator iter(starts); while (iter.next()) { UChar32 cp2 = iter.getCodepoint(); Hashtable remainder(status); remainder.setValueDeleter(uhash_deleteUnicodeString); if (extract(&remainder, cp2, segment, segLen, i, status) == NULL) { continue; } // there were some matches, so add all the possibilities to the set. UnicodeString prefix(segment, i); prefix += cp2; int32_t el = -1; const UHashElement *ne = remainder.nextElement(el); while (ne != NULL) { UnicodeString item = *((UnicodeString *)(ne->value.pointer)); UnicodeString *toAdd = new UnicodeString(prefix); /* test for NULL */ if (toAdd == 0) { status = U_MEMORY_ALLOCATION_ERROR; return NULL; } *toAdd += item; fillinResult->put(*toAdd, toAdd, status); //if (PROGRESS) printf("Adding: %s\n", UToS(Tr(*toAdd))); ne = remainder.nextElement(el); } } } /* Test for buffer overflows */ if(U_FAILURE(status)) { return NULL; } return fillinResult; }
/** * Implements {@link Transliterator#handleTransliterate}. */ void TitlecaseTransliterator::handleTransliterate( Replaceable& text, UTransPosition& offsets, UBool isIncremental) const { if (SKIP == NULL) { return; } // Our mode; we are either converting letter toTitle or // toLower. UBool doTitle = TRUE; // Determine if there is a preceding context of CASED SKIP*, // in which case we want to start in toLower mode. If the // prior context is anything else (including empty) then start // in toTitle mode. UChar32 c; int32_t start; for (start = offsets.start - 1; start >= offsets.contextStart; start -= UTF_CHAR_LENGTH(c)) { c = text.char32At(start); if (SKIP->contains(c)) { continue; } doTitle = !CASED->contains(c); break; } // Convert things after a CASED character toLower; things // after a non-CASED, non-SKIP character toTitle. SKIP // characters are copied directly and do not change the mode. int32_t textPos = offsets.start; if (textPos >= offsets.limit) return; UnicodeString original; text.extractBetween(offsets.contextStart, offsets.contextLimit, original); UCharIterator iter; uiter_setReplaceable(&iter, &text); iter.start = offsets.contextStart; iter.limit = offsets.contextLimit; // Walk through original string // If there is a case change, modify corresponding position in replaceable int32_t i = textPos - offsets.contextStart; int32_t limit = offsets.limit - offsets.contextStart; UChar32 cp; int32_t oldLen; int32_t newLen; for (; i < limit; ) { UTF_GET_CHAR(original.getBuffer(), 0, i, original.length(), cp); oldLen = UTF_CHAR_LENGTH(cp); i += oldLen; iter.index = i; // Point _past_ current char if (!SKIP->contains(cp)) { if (doTitle) { newLen = u_internalToTitle(cp, &iter, buffer, u_getMaxCaseExpansion(), loc.getName()); } else { newLen = u_internalToLower(cp, &iter, buffer, u_getMaxCaseExpansion(), loc.getName()); } doTitle = !CASED->contains(cp); if (newLen >= 0) { UnicodeString temp(buffer, newLen); text.handleReplaceBetween(textPos, textPos + oldLen, temp); if (newLen != oldLen) { textPos += newLen; offsets.limit += newLen - oldLen; offsets.contextLimit += newLen - oldLen; continue; } } } textPos += oldLen; } offsets.start = offsets.limit; }