Exemple #1
0
// Computes the augmented script set for a code point, according to UTS 39 section 5.1.
void SpoofImpl::getAugmentedScriptSet(UChar32 codePoint, ScriptSet& result, UErrorCode& status) {
    result.resetAll();
    result.setScriptExtensions(codePoint, status);
    if (U_FAILURE(status)) { return; }

    // Section 5.1 step 1
    if (result.test(USCRIPT_HAN, status)) {
        result.set(USCRIPT_HAN_WITH_BOPOMOFO, status);
        result.set(USCRIPT_JAPANESE, status);
        result.set(USCRIPT_KOREAN, status);
    }
    if (result.test(USCRIPT_HIRAGANA, status)) {
        result.set(USCRIPT_JAPANESE, status);
    }
    if (result.test(USCRIPT_KATAKANA, status)) {
        result.set(USCRIPT_JAPANESE, status);
    }
    if (result.test(USCRIPT_HANGUL, status)) {
        result.set(USCRIPT_KOREAN, status);
    }
    if (result.test(USCRIPT_BOPOMOFO, status)) {
        result.set(USCRIPT_HAN_WITH_BOPOMOFO, status);
    }

    // Section 5.1 step 2
    if (result.test(USCRIPT_COMMON, status) || result.test(USCRIPT_INHERITED, status)) {
        result.setAll();
    }
}
Exemple #2
0
// Computes the restriction level of a string, according to UTS 39 section 5.2.
URestrictionLevel SpoofImpl::getRestrictionLevel(const UnicodeString& input, UErrorCode& status) const {
    // Section 5.2 step 1:
    if (!fAllowedCharsSet->containsAll(input)) {
        return USPOOF_UNRESTRICTIVE;
    }

    // Section 5.2 step 2
    // Java use a static UnicodeSet for this test.  In C++, avoid the static variable
    // and just do a simple for loop.
    UBool allASCII = TRUE;
    for (int32_t i=0, length=input.length(); i<length; i++) {
        if (input.charAt(i) > 0x7f) {
            allASCII = FALSE;
            break;
        }
    }
    if (allASCII) {
        return USPOOF_ASCII;
    }

    // Section 5.2 steps 3:
    ScriptSet resolvedScriptSet;
    getResolvedScriptSet(input, resolvedScriptSet, status);
    if (U_FAILURE(status)) { return USPOOF_UNRESTRICTIVE; }

    // Section 5.2 step 4:
    if (!resolvedScriptSet.isEmpty()) {
        return USPOOF_SINGLE_SCRIPT_RESTRICTIVE;
    }

    // Section 5.2 step 5:
    ScriptSet resolvedNoLatn;
    getResolvedScriptSetWithout(input, USCRIPT_LATIN, resolvedNoLatn, status);
    if (U_FAILURE(status)) { return USPOOF_UNRESTRICTIVE; }

    // Section 5.2 step 6:
    if (resolvedNoLatn.test(USCRIPT_HAN_WITH_BOPOMOFO, status)
            || resolvedNoLatn.test(USCRIPT_JAPANESE, status)
            || resolvedNoLatn.test(USCRIPT_KOREAN, status)) {
        return USPOOF_HIGHLY_RESTRICTIVE;
    }

    // Section 5.2 step 7:
    if (!resolvedNoLatn.isEmpty()
            && !resolvedNoLatn.test(USCRIPT_CYRILLIC, status)
            && !resolvedNoLatn.test(USCRIPT_GREEK, status)
            && !resolvedNoLatn.test(USCRIPT_CHEROKEE, status)) {
        return USPOOF_MODERATELY_RESTRICTIVE;
    }

    // Section 5.2 step 8:
    return USPOOF_MINIMALLY_RESTRICTIVE;
}
Exemple #3
0
// Computes the resolved script set for a string, omitting characters having the specified script.
// If USCRIPT_CODE_LIMIT is passed as the second argument, all characters are included.
void SpoofImpl::getResolvedScriptSetWithout(const UnicodeString& input, UScriptCode script, ScriptSet& result, UErrorCode& status) const {
    result.setAll();

    ScriptSet temp;
    UChar32 codePoint;
    for (int32_t i = 0; i < input.length(); i += U16_LENGTH(codePoint)) {
        codePoint = input.char32At(i);

        // Compute the augmented script set for the character
        getAugmentedScriptSet(codePoint, temp, status);
        if (U_FAILURE(status)) { return; }

        // Intersect the augmented script set with the resolved script set, but only if the character doesn't
        // have the script specified in the function call
        if (script == USCRIPT_CODE_LIMIT || !temp.test(script, status)) {
            result.intersect(temp);
        }
    }
}