void ScriptSet::setScriptExtensions(UChar32 codePoint, UErrorCode& status) {
    if (U_FAILURE(status)) { return; }
    static const int32_t FIRST_GUESS_SCRIPT_CAPACITY = 5;
    MaybeStackArray<UScriptCode,FIRST_GUESS_SCRIPT_CAPACITY> scripts;
    UErrorCode internalStatus = U_ZERO_ERROR;
    int32_t script_count = -1;

    while (TRUE) {
        script_count = uscript_getScriptExtensions(
            codePoint, scripts.getAlias(), FIRST_GUESS_SCRIPT_CAPACITY, &internalStatus);
        if (internalStatus == U_BUFFER_OVERFLOW_ERROR) {
            // Need to allocate more space
            if (scripts.resize(script_count) == NULL) {
                status = U_MEMORY_ALLOCATION_ERROR;
                return;
            }
            internalStatus = U_ZERO_ERROR;
        } else {
            break;
        }
    }

    // Check if we failed for some reason other than buffer overflow
    if (U_FAILURE(internalStatus)) {
        status = internalStatus;
        return;
    }

    // Load the scripts into the ScriptSet and return
    for (int32_t i = 0; i < script_count; i++) {
        this->set(scripts[i], status);
        if (U_FAILURE(status)) { return; }
    }
}
Example #2
0
void
PluralRules::initSamples(UErrorCode& status) {
    if (U_FAILURE(status)) {
        return;
    }
    Mutex lock(&pluralMutex);

    if (mSamples) {
        return;
    }

    // Note, the original design let you have multiple rules with the same keyword.  But
    // we don't use that in our data and existing functions in this implementation don't
    // fully support it (for example, the returned keywords is a list and not a set).
    //
    // So I don't support this here either.  If you ask for samples, or for all values,
    // you will get information about the first rule with that keyword, not all rules with
    // that keyword.

    int32_t maxIndex = 0;
    int32_t otherIndex = -1; // the value -1 will indicate we added 'other' at end
    RuleChain* rc = mRules;
    while (rc != NULL) {
        if (rc->ruleHeader != NULL) {
            if (otherIndex == -1 && 0 == rc->keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
                otherIndex = maxIndex;
            }
            ++maxIndex;
        }
        rc = rc->next;
    }
    if (otherIndex == -1) {
        ++maxIndex;
    }

    LocalMemory<int32_t> newSampleInfo;
    if (NULL == newSampleInfo.allocateInsteadAndCopy(maxIndex)) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }

    const int32_t LIMIT_MASK = 0x1 << 31;

    rc = mRules;
    int32_t n = 0;
    while (rc != NULL) {
        if (rc->ruleHeader != NULL) {
            newSampleInfo[n++] = rc->ruleHeader->isLimited() ? LIMIT_MASK : 0;
        }
        rc = rc->next;
    }
    if (otherIndex == -1) {
        newSampleInfo[maxIndex - 1] = 0; // unlimited
    }

    MaybeStackArray<SampleRecord, 10> newSamples;
    int32_t sampleCount = 0;

    int32_t limit = getRepeatLimit() * MAX_SAMPLES * 2;
    if (limit < 10) {
        limit = 10;
    }

    for (int i = 0, keywordsRemaining = maxIndex;
          keywordsRemaining > 0 && i < limit;
          ++i) {
        double val = i / 2.0;

        n = 0;
        rc = mRules;
        int32_t found = -1;
        while (rc != NULL) {
            if (rc->ruleHeader != NULL) {
                if (rc->ruleHeader->isFulfilled(val)) {
                    found = n;
                    break;
                }
                ++n;
            }
            rc = rc->next;
        }
        if (found == -1) {
            // 'other'.  If there is an 'other' rule, the rule set is bad since nothing
            // should leak through, but we don't bother to report that here.
            found = otherIndex == -1 ? maxIndex - 1 : otherIndex;
        }
        if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set
            continue;
        }
        newSampleInfo[found] += 1; // won't impact limit flag

        if (sampleCount == newSamples.getCapacity()) {
            int32_t newCapacity = sampleCount < 20 ? 128 : sampleCount * 2;
            if (NULL == newSamples.resize(newCapacity, sampleCount)) {
                status = U_MEMORY_ALLOCATION_ERROR;
                return;
            }
        }
        newSamples[sampleCount].ruleIndex = found;
        newSamples[sampleCount].value = val;
        ++sampleCount;

        if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set
            --keywordsRemaining;
        }
    }

    // sort the values by index, leaving order otherwise unchanged
    // this is just a selection sort for simplicity
    LocalMemory<double> values;
    if (NULL == values.allocateInsteadAndCopy(sampleCount)) {
        status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    for (int i = 0, j = 0; i < maxIndex; ++i) {
        for (int k = 0; k < sampleCount; ++k) {
            if (newSamples[k].ruleIndex == i) {
                values[j++] = newSamples[k].value;
            }
        }
    }

    // convert array of mask/lengths to array of mask/limits
    limit = 0;
    for (int i = 0; i < maxIndex; ++i) {
        int32_t info = newSampleInfo[i];
        int32_t len = info & ~LIMIT_MASK;
        limit += len;
        // if a rule is 'unlimited' but has fewer than MAX_SAMPLES samples,
        // it's not really unlimited, so mark it as limited
        int32_t mask = len < MAX_SAMPLES ? LIMIT_MASK : info & LIMIT_MASK;
        newSampleInfo[i] = limit | mask;
    }

    // ok, we've got good data
    mSamples = values.orphan();
    mSampleInfo = newSampleInfo.orphan();
    mSampleInfoCount = maxIndex;
}