C++ (Cpp) IdentifierInfo::setIdentifier Examples

Programming Language: C++ (Cpp)

Class/Type: IdentifierInfo

Method/Function: setIdentifier

Examples at hotexamples.com: 2

C++ (Cpp) IdentifierInfo::setIdentifier - 2 examples found. These are the top rated real world C++ (Cpp) examples of IdentifierInfo::setIdentifier extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getName(13)

isStr(8)

hasMacroDefinition(8)

getTokenID(6)

getNameStart(5)

getBuiltinID(4)

isOutOfDate(4)

getLength(4)

isPoisoned(3)

isHandleIdentifierCase(3)

setIdentifier(2)

getScriptCount(2)

setChangedSinceDeserialization(2)

isFromAST(2)

setIdentifierProfile(1)

setFETokenInfoChangedSinceDeserialization(1)

isCPlusPlusOperatorKeyword(1)

isFutureCompatKeyword(1)

getTokenKind(1)

getScripts(1)

getRestrictionLevel(1)

getPPKeywordID(1)

getNumerics(1)

setIsPoisoned(1)

Example #1

Show file

File: uspoof.cpp Project: eyoung-father/libicu_full

U_CAPI int32_t U_EXPORT2
uspoof_checkUnicodeString(const USpoofChecker *sc,
                          const icu::UnicodeString &id, 
                          int32_t *position,
                          UErrorCode *status) {
    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    if (This == NULL) {
        return 0;
    }
    int32_t result = 0;

    IdentifierInfo *identifierInfo = NULL;
    if ((This->fChecks) & (USPOOF_RESTRICTION_LEVEL | USPOOF_MIXED_NUMBERS)) {
        identifierInfo = This->getIdentifierInfo(*status);
        if (U_FAILURE(*status)) {
            goto cleanupAndReturn;
        }
        identifierInfo->setIdentifier(id, *status);
        identifierInfo->setIdentifierProfile(*This->fAllowedCharsSet);
    }


    if ((This->fChecks) & USPOOF_RESTRICTION_LEVEL) {
        URestrictionLevel idRestrictionLevel = identifierInfo->getRestrictionLevel(*status);
        if (idRestrictionLevel > This->fRestrictionLevel) {
            result |= USPOOF_RESTRICTION_LEVEL;
        }
        if (This->fChecks & USPOOF_AUX_INFO) {
            result |= idRestrictionLevel;
        }
    }

    if ((This->fChecks) & USPOOF_MIXED_NUMBERS) {
        const UnicodeSet *numerics = identifierInfo->getNumerics();
        if (numerics->size() > 1) {
            result |= USPOOF_MIXED_NUMBERS;
        }

        // TODO: ICU4J returns the UnicodeSet of the numerics found in the identifier.
        //       We have no easy way to do the same in C.
        // if (checkResult != null) {
        //     checkResult.numerics = numerics;
        // }
    }


    if (This->fChecks & (USPOOF_CHAR_LIMIT)) {
        int32_t i;
        UChar32 c;
        int32_t length = id.length();
        for (i=0; i<length ;) {
            c = id.char32At(i);
            i += U16_LENGTH(c);
            if (!This->fAllowedCharsSet->contains(c)) {
                result |= USPOOF_CHAR_LIMIT;
                break;
            }
        }
    }

    if (This->fChecks & 
        (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_INVISIBLE)) {
        // These are the checks that need to be done on NFD input
        UnicodeString nfdText;
        gNfdNormalizer->normalize(id, nfdText, *status);
        int32_t nfdLength = nfdText.length();

        if (This->fChecks & USPOOF_INVISIBLE) {
           
            // scan for more than one occurence of the same non-spacing mark
            // in a sequence of non-spacing marks.
            int32_t     i;
            UChar32     c;
            UChar32     firstNonspacingMark = 0;
            UBool       haveMultipleMarks = FALSE;  
            UnicodeSet  marksSeenSoFar;   // Set of combining marks in a single combining sequence.
            
            for (i=0; i<nfdLength ;) {
                c = nfdText.char32At(i);
                i += U16_LENGTH(c);
                if (u_charType(c) != U_NON_SPACING_MARK) {
                    firstNonspacingMark = 0;
                    if (haveMultipleMarks) {
                        marksSeenSoFar.clear();
                        haveMultipleMarks = FALSE;
                    }
                    continue;
                }
                if (firstNonspacingMark == 0) {
                    firstNonspacingMark = c;
                    continue;
                }
                if (!haveMultipleMarks) {
                    marksSeenSoFar.add(firstNonspacingMark);
                    haveMultipleMarks = TRUE;
                }
                if (marksSeenSoFar.contains(c)) {
                    // report the error, and stop scanning.
                    // No need to find more than the first failure.
                    result |= USPOOF_INVISIBLE;
                    break;
                }
                marksSeenSoFar.add(c);
            }
        }
       
        
        if (This->fChecks & (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE)) {
            // The basic test is the same for both whole and mixed script confusables.
            // Compute the set of scripts that every input character has a confusable in.
            // For this computation an input character is always considered to be
            // confusable with itself in its own script.
            //
            // If the number of such scripts is two or more, and the input consisted of
            // characters all from a single script, we have a whole script confusable.
            // (The two scripts will be the original script and the one that is confusable)
            //
            // If the number of such scripts >= one, and the original input contained characters from
            // more than one script, we have a mixed script confusable.  (We can transform
            // some of the characters, and end up with a visually similar string all in
            // one script.)

            if (identifierInfo == NULL) {
                identifierInfo = This->getIdentifierInfo(*status);
                if (U_FAILURE(*status)) {
                    goto cleanupAndReturn;
                }
                identifierInfo->setIdentifier(id, *status);
            }

            int32_t scriptCount = identifierInfo->getScriptCount();
            
            ScriptSet scripts;
            This->wholeScriptCheck(nfdText, &scripts, *status);
            int32_t confusableScriptCount = scripts.countMembers();
            //printf("confusableScriptCount = %d\n", confusableScriptCount);
            
            if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) &&
                confusableScriptCount >= 2 &&
                scriptCount == 1) {
                result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
            }
        
            if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) &&
                confusableScriptCount >= 1 &&
                scriptCount > 1) {
                result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
            }
        }
    }

cleanupAndReturn:
    This->releaseIdentifierInfo(identifierInfo);
    if (position != NULL) {
        *position = 0;
    }
    return result;
}

Example #2

Show file

File: uspoof.cpp Project: eyoung-father/libicu_full

U_CAPI int32_t U_EXPORT2
uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
                                  const icu::UnicodeString &id1,
                                  const icu::UnicodeString &id2,
                                  UErrorCode *status) {
    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    if (U_FAILURE(*status)) {
        return 0;
    }
    // 
    // See section 4 of UAX 39 for the algorithm for checking whether two strings are confusable,
    //   and for definitions of the types (single, whole, mixed-script) of confusables.
    
    // We only care about a few of the check flags.  Ignore the others.
    // If no tests relavant to this function have been specified, return an error.
    // TODO:  is this really the right thing to do?  It's probably an error on the caller's part,
    //        but logically we would just return 0 (no error).
    if ((This->fChecks & (USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | 
                          USPOOF_WHOLE_SCRIPT_CONFUSABLE)) == 0) {
        *status = U_INVALID_STATE_ERROR;
        return 0;
    }
    int32_t  flagsForSkeleton = This->fChecks & USPOOF_ANY_CASE;

    int32_t  result = 0;
    IdentifierInfo *identifierInfo = This->getIdentifierInfo(*status);
    if (U_FAILURE(*status)) {
        return 0;
    }
    identifierInfo->setIdentifier(id1, *status);
    int32_t id1ScriptCount = identifierInfo->getScriptCount();
    int32_t id1FirstScript = identifierInfo->getScripts()->nextSetBit(0);
    identifierInfo->setIdentifier(id2, *status);
    int32_t id2ScriptCount = identifierInfo->getScriptCount();
    int32_t id2FirstScript = identifierInfo->getScripts()->nextSetBit(0);
    This->releaseIdentifierInfo(identifierInfo);
    identifierInfo = NULL;

    if (This->fChecks & USPOOF_SINGLE_SCRIPT_CONFUSABLE) {
        UnicodeString   id1Skeleton;
        UnicodeString   id2Skeleton;
        if (id1ScriptCount <= 1 && id2ScriptCount <= 1 && id1FirstScript == id2FirstScript) {
            flagsForSkeleton |= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
            uspoof_getSkeletonUnicodeString(sc, flagsForSkeleton, id1, id1Skeleton, status);
            uspoof_getSkeletonUnicodeString(sc, flagsForSkeleton, id2, id2Skeleton, status);
            if (id1Skeleton == id2Skeleton) {
                result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE;
            }
        }
    }

    if (result & USPOOF_SINGLE_SCRIPT_CONFUSABLE) {
         // If the two inputs are single script confusable they cannot also be
         // mixed or whole script confusable, according to the UAX39 definitions.
         // So we can skip those tests.
         return result;
    }

    // Two identifiers are whole script confusable if each is of a single script 
    // and they are mixed script confusable.
    UBool possiblyWholeScriptConfusables = 
        id1ScriptCount <= 1 && id2ScriptCount <= 1 && (This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE);

    //
    // Mixed Script Check
    //
    if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) || possiblyWholeScriptConfusables ) {
        // For getSkeleton(), resetting the USPOOF_SINGLE_SCRIPT_CONFUSABLE flag will get us
        // the mixed script table skeleton, which is what we want.
        // The Any Case / Lower Case bit in the skelton flags was set at the top of the function.
        UnicodeString id1Skeleton;
        UnicodeString id2Skeleton;
        flagsForSkeleton &= ~USPOOF_SINGLE_SCRIPT_CONFUSABLE;
        uspoof_getSkeletonUnicodeString(sc, flagsForSkeleton, id1, id1Skeleton, status);
        uspoof_getSkeletonUnicodeString(sc, flagsForSkeleton, id2, id2Skeleton, status);
        if (id1Skeleton == id2Skeleton) {
            result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
            if (possiblyWholeScriptConfusables) {
                result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
            }
        }
    }

    return result;
}