/** * This is the implementation function for next(). */ int32_t DictionaryBasedBreakIterator::handleNext() { UErrorCode status = U_ZERO_ERROR; // if there are no cached break positions, or if we've just moved // off the end of the range covered by the cache, we have to dump // and possibly regenerate the cache if (cachedBreakPositions == NULL || positionInCache == numCachedBreakPositions - 1) { // start by using the inherited handleNext() to find a tentative return // value. dictionaryCharCount tells us how many dictionary characters // we passed over on our way to the tentative return value int32_t startPos = fText->getIndex(); fDictionaryCharCount = 0; int32_t result = RuleBasedBreakIterator::handleNext(); // if we passed over more than one dictionary character, then we use // divideUpDictionaryRange() to regenerate the cached break positions // for the new range if (fDictionaryCharCount > 1 && result - startPos > 1) { divideUpDictionaryRange(startPos, result, status); U_ASSERT(U_SUCCESS(status)); if (U_FAILURE(status)) { // Something went badly wrong, an internal error. // We have no way from here to report it to caller. // Treat as if this is if the dictionary did not apply to range. reset(); return result; } } // otherwise, the value we got back from the inherited fuction // is our return value, and we can dump the cache else { reset(); return result; } } // if the cache of break positions has been regenerated (or existed all // along), then just advance to the next break position in the cache // and return it if (cachedBreakPositions != NULL) { ++positionInCache; fText->setIndex(cachedBreakPositions[positionInCache]); return cachedBreakPositions[positionInCache]; } return -9999; // SHOULD NEVER GET HERE! }
int32_t DictionaryBreakEngine::findBreaks( UText *text, int32_t startPos, int32_t endPos, UBool reverse, int32_t breakType, UStack &foundBreaks ) const { int32_t result = 0; // Find the span of characters included in the set. int32_t start = (int32_t)utext_getNativeIndex(text); int32_t current; int32_t rangeStart; int32_t rangeEnd; UChar32 c = utext_current32(text); if (reverse) { UBool isDict = fSet.contains(c); while((current = (int32_t)utext_getNativeIndex(text)) > startPos && isDict) { c = utext_previous32(text); isDict = fSet.contains(c); } rangeStart = (current < startPos) ? startPos : current+(isDict ? 0 : 1); rangeEnd = start + 1; } else { while((current = (int32_t)utext_getNativeIndex(text)) < endPos && fSet.contains(c)) { utext_next32(text); // TODO: recast loop for postincrement c = utext_current32(text); } rangeStart = start; rangeEnd = current; } if (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)) { result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks); utext_setNativeIndex(text, current); } return result; }