Ejemplo n.º 1
0
void ScriptSet::setScriptExtensions(UChar32 codePoint, UErrorCode& status) {
    if (U_FAILURE(status)) { return; }
    static const int32_t FIRST_GUESS_SCRIPT_CAPACITY = 5;
    MaybeStackArray<UScriptCode,FIRST_GUESS_SCRIPT_CAPACITY> scripts;
    UErrorCode internalStatus = U_ZERO_ERROR;
    int32_t script_count = -1;

    while (TRUE) {
        script_count = uscript_getScriptExtensions(
            codePoint, scripts.getAlias(), FIRST_GUESS_SCRIPT_CAPACITY, &internalStatus);
        if (internalStatus == U_BUFFER_OVERFLOW_ERROR) {
            // Need to allocate more space
            if (scripts.resize(script_count) == NULL) {
                status = U_MEMORY_ALLOCATION_ERROR;
                return;
            }
            internalStatus = U_ZERO_ERROR;
        } else {
            break;
        }
    }

    // Check if we failed for some reason other than buffer overflow
    if (U_FAILURE(internalStatus)) {
        status = internalStatus;
        return;
    }

    // Load the scripts into the ScriptSet and return
    for (int32_t i = 0; i < script_count; i++) {
        this->set(scripts[i], status);
        if (U_FAILURE(status)) { return; }
    }
}
Ejemplo n.º 2
0
IdentifierInfo &IdentifierInfo::setIdentifier(const UnicodeString &identifier, UErrorCode &status) {
    if (U_FAILURE(status)) {
        return *this;
    }
    *fIdentifier = identifier;
    clear();
    ScriptSet scriptsForCP;
    UChar32 cp;
    for (int32_t i = 0; i < identifier.length(); i += U16_LENGTH(cp)) {
        cp = identifier.char32At(i);
        // Store a representative character for each kind of decimal digit
        if (u_charType(cp) == U_DECIMAL_DIGIT_NUMBER) {
            // Just store the zero character as a representative for comparison. Unicode guarantees it is cp - value
            fNumerics->add(cp - (UChar32)u_getNumericValue(cp));
        }
        UScriptCode extensions[500];
        int32_t extensionsCount = uscript_getScriptExtensions(cp, extensions, UPRV_LENGTHOF(extensions), &status);
        if (U_FAILURE(status)) {
            return *this;
        }
        scriptsForCP.resetAll();
        for (int32_t j=0; j<extensionsCount; j++) {
            scriptsForCP.set(extensions[j], status);
        }
        scriptsForCP.reset(USCRIPT_COMMON, status);
        scriptsForCP.reset(USCRIPT_INHERITED, status);
        switch (scriptsForCP.countMembers()) {
          case 0: break;
          case 1:
            // Single script, record it.
            fRequiredScripts->Union(scriptsForCP);
            break;
          default:
            if (!fRequiredScripts->intersects(scriptsForCP) 
                    && !uhash_geti(fScriptSetSet, &scriptsForCP)) {
                // If the set hasn't been added already, add it
                //    (Add a copy, fScriptSetSet takes ownership of the copy.)
                uhash_puti(fScriptSetSet, new ScriptSet(scriptsForCP), 1, &status);
            }
            break;
        }
    }
    // Now make a final pass through ScriptSetSet to remove alternates that came before singles.
    // [Kana], [Kana Hira] => [Kana]
    // This is relatively infrequent, so doesn't have to be optimized.
    // We also compute any commonalities among the alternates.
    if (uhash_count(fScriptSetSet) > 0) {
        fCommonAmongAlternates->setAll();
        for (int32_t it = UHASH_FIRST;;) {
            const UHashElement *nextHashEl = uhash_nextElement(fScriptSetSet, &it);
            if (nextHashEl == NULL) {
                break;
            }
            ScriptSet *next = static_cast<ScriptSet *>(nextHashEl->key.pointer);
            // [Kana], [Kana Hira] => [Kana]
            if (fRequiredScripts->intersects(*next)) {
                uhash_removeElement(fScriptSetSet, nextHashEl);
            } else {
                fCommonAmongAlternates->intersect(*next);
                // [[Arab Syrc Thaa]; [Arab Syrc]] => [[Arab Syrc]]
                for (int32_t otherIt = UHASH_FIRST;;) {
                    const UHashElement *otherHashEl = uhash_nextElement(fScriptSetSet, &otherIt);
                    if (otherHashEl == NULL) {
                        break;
                    }
                    ScriptSet *other = static_cast<ScriptSet *>(otherHashEl->key.pointer);
                    if (next != other && next->contains(*other)) {
                        uhash_removeElement(fScriptSetSet, nextHashEl);
                        break;
                    }
                }
            }
        }
    }
    if (uhash_count(fScriptSetSet) == 0) {
        fCommonAmongAlternates->resetAll();
    }
    return *this;
}
Ejemplo n.º 3
0
void TestGetScriptExtensions() {
    UScriptCode scripts[20];
    int32_t length;
    UErrorCode errorCode;

    /* errors and overflows */
    errorCode=U_PARSE_ERROR;
    length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
    if(errorCode!=U_PARSE_ERROR) {
        log_err("uscript_getScriptExtensions(U+0640, U_PARSE_ERROR) did not preserve the UErrorCode - %s\n",
              u_errorName(errorCode));
    }
    errorCode=U_ZERO_ERROR;
    length=uscript_getScriptExtensions(0x0640, NULL, UPRV_LENGTHOF(scripts), &errorCode);
    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
        log_err("uscript_getScriptExtensions(U+0640, NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
              u_errorName(errorCode));
    }
    errorCode=U_ZERO_ERROR;
    length=uscript_getScriptExtensions(0x0640, scripts, -1, &errorCode);
    if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
        log_err("uscript_getScriptExtensions(U+0640, capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
              u_errorName(errorCode));
    }
    errorCode=U_ZERO_ERROR;
    length=uscript_getScriptExtensions(0x0640, scripts, 0, &errorCode);
    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
        log_err("uscript_getScriptExtensions(U+0640, capacity=0: pure preflighting)=%d < 3 - %s\n",
              (int)length, u_errorName(errorCode));
    }
    errorCode=U_ZERO_ERROR;
    length=uscript_getScriptExtensions(0x0640, scripts, 1, &errorCode);
    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
        log_err("uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d < 3 - %s\n",
              (int)length, u_errorName(errorCode));
    }
    /* U+063F has only a Script code, no Script_Extensions. */
    errorCode=U_ZERO_ERROR;
    length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode);
    if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=1) {
        log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s\n",
              (int)length, u_errorName(errorCode));
    }

    /* invalid code points */
    errorCode=U_ZERO_ERROR;
    length=uscript_getScriptExtensions(-1, scripts, UPRV_LENGTHOF(scripts), &errorCode);
    if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
        log_err("uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s\n",
              (int)length, u_errorName(errorCode));
    }
    errorCode=U_ZERO_ERROR;
    length=uscript_getScriptExtensions(0x110000, scripts, UPRV_LENGTHOF(scripts), &errorCode);
    if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
        log_err("uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s\n",
              (int)length, u_errorName(errorCode));
    }

    /* normal usage */
    errorCode=U_ZERO_ERROR;
    length=uscript_getScriptExtensions(0x063f, scripts, 1, &errorCode);
    if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_ARABIC) {
        log_err("uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s\n",
              (int)length, u_errorName(errorCode));
    }
    errorCode=U_ZERO_ERROR;
    length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
    if(U_FAILURE(errorCode) || length<3 ||
            !scriptsContain(scripts, length, USCRIPT_ARABIC) ||
            !scriptsContain(scripts, length, USCRIPT_SYRIAC) ||
            !scriptsContain(scripts, length, USCRIPT_MANDAIC)) {
        log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n",
              (int)length, u_errorName(errorCode));
    }
    errorCode=U_ZERO_ERROR;
    length=uscript_getScriptExtensions(0xfdf2, scripts, UPRV_LENGTHOF(scripts), &errorCode);
    if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_THAANA) {
        log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n",
              (int)length, u_errorName(errorCode));
    }
    errorCode=U_ZERO_ERROR;
    length=uscript_getScriptExtensions(0xff65, scripts, UPRV_LENGTHOF(scripts), &errorCode);
    if(U_FAILURE(errorCode) || length!=6 || scripts[0]!=USCRIPT_BOPOMOFO || scripts[5]!=USCRIPT_YI) {
        log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n",
              (int)length, u_errorName(errorCode));
    }
}