C++ (Cpp) ScriptSet::countMembers примеры использования

Язык программирования: C++ (Cpp)

Класс/Тип: ScriptSet

Метод/Функция: countMembers

Примеров на hotexamples.com: 3

C++ (Cpp) ScriptSet::countMembers - 3 примера найдено. Это лучшие примеры C++ (Cpp) кода для ScriptSet::countMembers, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

set(3)

countMembers(3)

test(3)

contains(2)

resetAll(2)

setAll(2)

AddScript(1)

RemScript(1)

displayScripts(1)

intersect(1)

intersects(1)

isEmpty(1)

reset(1)

setScriptExtensions(1)

Пример #1

Показать файл

Файл: uspoof.cpp Проект: BrunoReX/palemoon

U_CAPI int32_t U_EXPORT2
uspoof_check(const USpoofChecker *sc,
             const UChar *text, int32_t length,
             int32_t *position,
             UErrorCode *status) {
             
    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    if (This == NULL) {
        return 0;
    }
    if (length < -1) {
        *status = U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }
    if (length == -1) {
        // It's not worth the bother to handle nul terminated strings everywhere.
        //   Just get the length and be done with it.
        length = u_strlen(text);
    }

    int32_t result = 0;
    int32_t failPos = 0x7fffffff;   // TODO: do we have a #define for max int32?

    // A count of the number of non-Common or inherited scripts.
    // Needed for both the SINGLE_SCRIPT and the WHOLE/MIXED_SCIRPT_CONFUSABLE tests.
    // Share the computation when possible.  scriptCount == -1 means that we haven't
    // done it yet.
    int32_t scriptCount = -1;

    if ((This->fChecks) & USPOOF_SINGLE_SCRIPT) {
        scriptCount = This->scriptScan(text, length, failPos, *status);
        // printf("scriptCount (clipped to 2) = %d\n", scriptCount);
        if ( scriptCount >= 2) {
            // Note: scriptCount == 2 covers all cases of the number of scripts >= 2
            result |= USPOOF_SINGLE_SCRIPT;
        }
    }

    if (This->fChecks & USPOOF_CHAR_LIMIT) {
        int32_t i;
        UChar32 c;
        for (i=0; i<length ;) {
            U16_NEXT(text, i, length, c);
            if (!This->fAllowedCharsSet->contains(c)) {
                result |= USPOOF_CHAR_LIMIT;
                if (i < failPos) {
                    failPos = i;
                }
                break;
            }
        }
    }

    if (This->fChecks & 
        (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_INVISIBLE)) {
        // These are the checks that need to be done on NFD input
        NFDBuffer   normalizedInput(text, length, *status);
        const UChar  *nfdText = normalizedInput.getBuffer();
        int32_t      nfdLength = normalizedInput.getLength();

        if (This->fChecks & USPOOF_INVISIBLE) {
           
            // scan for more than one occurence of the same non-spacing mark
            // in a sequence of non-spacing marks.
            int32_t     i;
            UChar32     c;
            UChar32     firstNonspacingMark = 0;
            UBool       haveMultipleMarks = FALSE;  
            UnicodeSet  marksSeenSoFar;   // Set of combining marks in a single combining sequence.
            
            for (i=0; i<nfdLength ;) {
                U16_NEXT(nfdText, i, nfdLength, c);
                if (u_charType(c) != U_NON_SPACING_MARK) {
                    firstNonspacingMark = 0;
                    if (haveMultipleMarks) {
                        marksSeenSoFar.clear();
                        haveMultipleMarks = FALSE;
                    }
                    continue;
                }
                if (firstNonspacingMark == 0) {
                    firstNonspacingMark = c;
                    continue;
                }
                if (!haveMultipleMarks) {
                    marksSeenSoFar.add(firstNonspacingMark);
                    haveMultipleMarks = TRUE;
                }
                if (marksSeenSoFar.contains(c)) {
                    // report the error, and stop scanning.
                    // No need to find more than the first failure.
                    result |= USPOOF_INVISIBLE;
                    failPos = i;
                    // TODO: Bug 8655: failPos is the position in the NFD buffer, but what we want
                    //       to give back to our caller is a position in the original input string.
                    if (failPos > length) {
                        failPos = length;
                    }
                    break;
                }
                marksSeenSoFar.add(c);
            }
        }
       
        
        if (This->fChecks & (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE)) {
            // The basic test is the same for both whole and mixed script confusables.
            // Compute the set of scripts that every input character has a confusable in.
            // For this computation an input character is always considered to be
            //    confusable with itself in its own script.
            // If the number of such scripts is two or more, and the input consisted of
            //   characters all from a single script, we have a whole script confusable.
            //   (The two scripts will be the original script and the one that is confusable)
            // If the number of such scripts >= one, and the original input contained characters from
            //   more than one script, we have a mixed script confusable.  (We can transform
            //   some of the characters, and end up with a visually similar string all in
            //   one script.)

            if (scriptCount == -1) {
                int32_t t;
                scriptCount = This->scriptScan(text, length, t, *status);
            }
            
            ScriptSet scripts;
            This->wholeScriptCheck(nfdText, nfdLength, &scripts, *status);
            int32_t confusableScriptCount = scripts.countMembers();
            //printf("confusableScriptCount = %d\n", confusableScriptCount);
            
            if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) &&
                confusableScriptCount >= 2 &&
                scriptCount == 1) {
                result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
            }
        
            if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) &&
                confusableScriptCount >= 1 &&
                scriptCount > 1) {
                result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
            }
        }
    }
    if (position != NULL && failPos != 0x7fffffff) {
        *position = failPos;
    }
    return result;
}

Пример #2

Показать файл

Файл: identifier_info.cpp Проект: icu-project/icu4c

IdentifierInfo &IdentifierInfo::setIdentifier(const UnicodeString &identifier, UErrorCode &status) {
    if (U_FAILURE(status)) {
        return *this;
    }
    *fIdentifier = identifier;
    clear();
    ScriptSet scriptsForCP;
    UChar32 cp;
    for (int32_t i = 0; i < identifier.length(); i += U16_LENGTH(cp)) {
        cp = identifier.char32At(i);
        // Store a representative character for each kind of decimal digit
        if (u_charType(cp) == U_DECIMAL_DIGIT_NUMBER) {
            // Just store the zero character as a representative for comparison. Unicode guarantees it is cp - value
            fNumerics->add(cp - (UChar32)u_getNumericValue(cp));
        }
        UScriptCode extensions[500];
        int32_t extensionsCount = uscript_getScriptExtensions(cp, extensions, UPRV_LENGTHOF(extensions), &status);
        if (U_FAILURE(status)) {
            return *this;
        }
        scriptsForCP.resetAll();
        for (int32_t j=0; j<extensionsCount; j++) {
            scriptsForCP.set(extensions[j], status);
        }
        scriptsForCP.reset(USCRIPT_COMMON, status);
        scriptsForCP.reset(USCRIPT_INHERITED, status);
        switch (scriptsForCP.countMembers()) {
          case 0: break;
          case 1:
            // Single script, record it.
            fRequiredScripts->Union(scriptsForCP);
            break;
          default:
            if (!fRequiredScripts->intersects(scriptsForCP) 
                    && !uhash_geti(fScriptSetSet, &scriptsForCP)) {
                // If the set hasn't been added already, add it
                //    (Add a copy, fScriptSetSet takes ownership of the copy.)
                uhash_puti(fScriptSetSet, new ScriptSet(scriptsForCP), 1, &status);
            }
            break;
        }
    }
    // Now make a final pass through ScriptSetSet to remove alternates that came before singles.
    // [Kana], [Kana Hira] => [Kana]
    // This is relatively infrequent, so doesn't have to be optimized.
    // We also compute any commonalities among the alternates.
    if (uhash_count(fScriptSetSet) > 0) {
        fCommonAmongAlternates->setAll();
        for (int32_t it = UHASH_FIRST;;) {
            const UHashElement *nextHashEl = uhash_nextElement(fScriptSetSet, &it);
            if (nextHashEl == NULL) {
                break;
            }
            ScriptSet *next = static_cast<ScriptSet *>(nextHashEl->key.pointer);
            // [Kana], [Kana Hira] => [Kana]
            if (fRequiredScripts->intersects(*next)) {
                uhash_removeElement(fScriptSetSet, nextHashEl);
            } else {
                fCommonAmongAlternates->intersect(*next);
                // [[Arab Syrc Thaa]; [Arab Syrc]] => [[Arab Syrc]]
                for (int32_t otherIt = UHASH_FIRST;;) {
                    const UHashElement *otherHashEl = uhash_nextElement(fScriptSetSet, &otherIt);
                    if (otherHashEl == NULL) {
                        break;
                    }
                    ScriptSet *other = static_cast<ScriptSet *>(otherHashEl->key.pointer);
                    if (next != other && next->contains(*other)) {
                        uhash_removeElement(fScriptSetSet, nextHashEl);
                        break;
                    }
                }
            }
        }
    }
    if (uhash_count(fScriptSetSet) == 0) {
        fCommonAmongAlternates->resetAll();
    }
    return *this;
}

Пример #3

Показать файл

Файл: uspoof.cpp Проект: eyoung-father/libicu_full

U_CAPI int32_t U_EXPORT2
uspoof_checkUnicodeString(const USpoofChecker *sc,
                          const icu::UnicodeString &id, 
                          int32_t *position,
                          UErrorCode *status) {
    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    if (This == NULL) {
        return 0;
    }
    int32_t result = 0;

    IdentifierInfo *identifierInfo = NULL;
    if ((This->fChecks) & (USPOOF_RESTRICTION_LEVEL | USPOOF_MIXED_NUMBERS)) {
        identifierInfo = This->getIdentifierInfo(*status);
        if (U_FAILURE(*status)) {
            goto cleanupAndReturn;
        }
        identifierInfo->setIdentifier(id, *status);
        identifierInfo->setIdentifierProfile(*This->fAllowedCharsSet);
    }


    if ((This->fChecks) & USPOOF_RESTRICTION_LEVEL) {
        URestrictionLevel idRestrictionLevel = identifierInfo->getRestrictionLevel(*status);
        if (idRestrictionLevel > This->fRestrictionLevel) {
            result |= USPOOF_RESTRICTION_LEVEL;
        }
        if (This->fChecks & USPOOF_AUX_INFO) {
            result |= idRestrictionLevel;
        }
    }

    if ((This->fChecks) & USPOOF_MIXED_NUMBERS) {
        const UnicodeSet *numerics = identifierInfo->getNumerics();
        if (numerics->size() > 1) {
            result |= USPOOF_MIXED_NUMBERS;
        }

        // TODO: ICU4J returns the UnicodeSet of the numerics found in the identifier.
        //       We have no easy way to do the same in C.
        // if (checkResult != null) {
        //     checkResult.numerics = numerics;
        // }
    }


    if (This->fChecks & (USPOOF_CHAR_LIMIT)) {
        int32_t i;
        UChar32 c;
        int32_t length = id.length();
        for (i=0; i<length ;) {
            c = id.char32At(i);
            i += U16_LENGTH(c);
            if (!This->fAllowedCharsSet->contains(c)) {
                result |= USPOOF_CHAR_LIMIT;
                break;
            }
        }
    }

    if (This->fChecks & 
        (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_INVISIBLE)) {
        // These are the checks that need to be done on NFD input
        UnicodeString nfdText;
        gNfdNormalizer->normalize(id, nfdText, *status);
        int32_t nfdLength = nfdText.length();

        if (This->fChecks & USPOOF_INVISIBLE) {
           
            // scan for more than one occurence of the same non-spacing mark
            // in a sequence of non-spacing marks.
            int32_t     i;
            UChar32     c;
            UChar32     firstNonspacingMark = 0;
            UBool       haveMultipleMarks = FALSE;  
            UnicodeSet  marksSeenSoFar;   // Set of combining marks in a single combining sequence.
            
            for (i=0; i<nfdLength ;) {
                c = nfdText.char32At(i);
                i += U16_LENGTH(c);
                if (u_charType(c) != U_NON_SPACING_MARK) {
                    firstNonspacingMark = 0;
                    if (haveMultipleMarks) {
                        marksSeenSoFar.clear();
                        haveMultipleMarks = FALSE;
                    }
                    continue;
                }
                if (firstNonspacingMark == 0) {
                    firstNonspacingMark = c;
                    continue;
                }
                if (!haveMultipleMarks) {
                    marksSeenSoFar.add(firstNonspacingMark);
                    haveMultipleMarks = TRUE;
                }
                if (marksSeenSoFar.contains(c)) {
                    // report the error, and stop scanning.
                    // No need to find more than the first failure.
                    result |= USPOOF_INVISIBLE;
                    break;
                }
                marksSeenSoFar.add(c);
            }
        }
       
        
        if (This->fChecks & (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE)) {
            // The basic test is the same for both whole and mixed script confusables.
            // Compute the set of scripts that every input character has a confusable in.
            // For this computation an input character is always considered to be
            // confusable with itself in its own script.
            //
            // If the number of such scripts is two or more, and the input consisted of
            // characters all from a single script, we have a whole script confusable.
            // (The two scripts will be the original script and the one that is confusable)
            //
            // If the number of such scripts >= one, and the original input contained characters from
            // more than one script, we have a mixed script confusable.  (We can transform
            // some of the characters, and end up with a visually similar string all in
            // one script.)

            if (identifierInfo == NULL) {
                identifierInfo = This->getIdentifierInfo(*status);
                if (U_FAILURE(*status)) {
                    goto cleanupAndReturn;
                }
                identifierInfo->setIdentifier(id, *status);
            }

            int32_t scriptCount = identifierInfo->getScriptCount();
            
            ScriptSet scripts;
            This->wholeScriptCheck(nfdText, &scripts, *status);
            int32_t confusableScriptCount = scripts.countMembers();
            //printf("confusableScriptCount = %d\n", confusableScriptCount);
            
            if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) &&
                confusableScriptCount >= 2 &&
                scriptCount == 1) {
                result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
            }
        
            if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) &&
                confusableScriptCount >= 1 &&
                scriptCount > 1) {
                result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
            }
        }
    }

cleanupAndReturn:
    This->releaseIdentifierInfo(identifierInfo);
    if (position != NULL) {
        *position = 0;
    }
    return result;
}