Пример #1
0
/**
 * This is the implementation function for next().
 */
int32_t
DictionaryBasedBreakIterator::handleNext()
{
    UErrorCode status = U_ZERO_ERROR;
    // if there are no cached break positions, or if we've just moved
    // off the end of the range covered by the cache, we have to dump
    // and possibly regenerate the cache
    if (cachedBreakPositions == NULL || positionInCache == numCachedBreakPositions - 1) {

        // start by using the inherited handleNext() to find a tentative return
        // value.   dictionaryCharCount tells us how many dictionary characters
        // we passed over on our way to the tentative return value
        int32_t startPos = fText->getIndex();
        fDictionaryCharCount = 0;
        int32_t result = RuleBasedBreakIterator::handleNext();

        // if we passed over more than one dictionary character, then we use
        // divideUpDictionaryRange() to regenerate the cached break positions
        // for the new range
        if (fDictionaryCharCount > 1 && result - startPos > 1) {
            divideUpDictionaryRange(startPos, result, status);
            U_ASSERT(U_SUCCESS(status));
            if (U_FAILURE(status)) {
                // Something went badly wrong, an internal error.
                // We have no way from here to report it to caller.
                // Treat as if this is if the dictionary did not apply to range.
                reset();
                return result;
            }
        }

        // otherwise, the value we got back from the inherited fuction
        // is our return value, and we can dump the cache
        else {
            reset();
            return result;
        }
    }

    // if the cache of break positions has been regenerated (or existed all
    // along), then just advance to the next break position in the cache
    // and return it
    if (cachedBreakPositions != NULL) {
        ++positionInCache;
        fText->setIndex(cachedBreakPositions[positionInCache]);
        return cachedBreakPositions[positionInCache];
    }
    return -9999;   // SHOULD NEVER GET HERE!
}
Пример #2
0
int32_t
DictionaryBreakEngine::findBreaks( UText *text,
                                 int32_t startPos,
                                 int32_t endPos,
                                 UBool reverse,
                                 int32_t breakType,
                                 UStack &foundBreaks ) const {
    int32_t result = 0;

    // Find the span of characters included in the set.
    int32_t start = (int32_t)utext_getNativeIndex(text);
    int32_t current;
    int32_t rangeStart;
    int32_t rangeEnd;
    UChar32 c = utext_current32(text);
    if (reverse) {
        UBool   isDict = fSet.contains(c);
        while((current = (int32_t)utext_getNativeIndex(text)) > startPos && isDict) {
            c = utext_previous32(text);
            isDict = fSet.contains(c);
        }
        rangeStart = (current < startPos) ? startPos : current+(isDict ? 0 : 1);
        rangeEnd = start + 1;
    }
    else {
        while((current = (int32_t)utext_getNativeIndex(text)) < endPos && fSet.contains(c)) {
            utext_next32(text);         // TODO:  recast loop for postincrement
            c = utext_current32(text);
        }
        rangeStart = start;
        rangeEnd = current;
    }
    if (breakType >= 0 && breakType < 32 && (((uint32_t)1 << breakType) & fTypes)) {
        result = divideUpDictionaryRange(text, rangeStart, rangeEnd, foundBreaks);
        utext_setNativeIndex(text, current);
    }
    
    return result;
}