Ejemplo n.º 1
0
U_CAPI int32_t U_EXPORT2
uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
                                  const icu::UnicodeString &s1,
                                  const icu::UnicodeString &s2,
                                  UErrorCode *status) {

    const UChar *u1  = s1.getBuffer();
    int32_t  length1 = s1.length();
    const UChar *u2  = s2.getBuffer();
    int32_t  length2 = s2.length();

    int32_t results  = uspoof_areConfusable(sc, u1, length1, u2, length2, status);
    return results;
}
Ejemplo n.º 2
0
	void FStringConverter::ConvertString(const icu::UnicodeString& Source, const int32 SourceStartIndex, const int32 SourceLen, FString& Destination)
	{
		if (Source.length() > 0)
		{
			UErrorCode ICUStatus = U_ZERO_ERROR;

			ucnv_reset(ICUConverter);
			
			// Get the internal buffer of the string, we're going to use it as scratch space
			TArray<TCHAR>& InternalStringBuffer = Destination.GetCharArray();
				
			// Work out the maximum size required and resize the buffer so it can hold enough data
			const int32_t DestinationCapacityBytes = UCNV_GET_MAX_BYTES_FOR_STRING(SourceLen, ucnv_getMaxCharSize(ICUConverter));
			const int32 DestinationCapacityTCHARs = DestinationCapacityBytes / sizeof(TCHAR);
			InternalStringBuffer.SetNumUninitialized(DestinationCapacityTCHARs);

			// Perform the conversion into the string buffer, and then null terminate the FString and size it back down to the correct size
			const int32_t DestinationSizeBytes = ucnv_fromUChars(ICUConverter, reinterpret_cast<char*>(InternalStringBuffer.GetData()), DestinationCapacityBytes, Source.getBuffer() + SourceStartIndex, SourceLen, &ICUStatus);
			const int32 DestinationSizeTCHARs = DestinationSizeBytes / sizeof(TCHAR);
			InternalStringBuffer[DestinationSizeTCHARs] = 0;
			InternalStringBuffer.SetNum(DestinationSizeTCHARs + 1, /*bAllowShrinking*/false); // the array size includes null

			check(U_SUCCESS(ICUStatus));
		}
		else
		{
			Destination.Empty();
		}
	}
Ejemplo n.º 3
0
U_CAPI int32_t U_EXPORT2
uspoof_checkUnicodeString(const USpoofChecker *sc,
                          const icu::UnicodeString &text, 
                          int32_t *position,
                          UErrorCode *status) {
    int32_t result = uspoof_check(sc, text.getBuffer(), text.length(), position, status);
    return result;
}
Ejemplo n.º 4
0
	int32 GetUnicodeStringLengthImpl(const TCHAR* Source, const int32 InSourceStartIndex, const int32 InSourceLength)
	{
		if (InSourceLength > 0)
		{
			const icu::UnicodeString TmpStr = ConvertString(Source, InSourceStartIndex, InSourceLength);
			return TmpStr.length();
		}
		return 0;
	}
Ejemplo n.º 5
0
/**
 * Write an Unitex file content (to system filesystem or filespace)
 * it write from two buffer (prefix and suffix). This is useful for writing both header and footer (or BOM and text...)
 */
    UNITEX_FUNC int UNITEX_CALL WriteUnicodeUnitexFile(const char*filename, icu::UnicodeString const& uString)
    {
        UChar uBom = 0xfeff;

        const UChar * uBuffer = uString.getBuffer();
        int32_t uLength = uString.length();

        bool result = WriteUnitexFile(filename, &uBom, sizeof(UChar), uBuffer, uLength * sizeof(UChar)) == 0;

        return result;
    }
Ejemplo n.º 6
0
static void
printLine(UChar32 start, UChar32 end, Status status, const icu::UnicodeString &mapping) {
    if(start==end) {
        printf("%04lX          ", (long)start);
    } else {
        printf("%04lX..%04lX    ", (long)start, (long)end);
    }
    printf("; %s", statusNames[status]);
    if(status==MAPPED || status==DEVIATION || !mapping.isEmpty()) {
        printf(" ;");
        const UChar *buffer=mapping.getBuffer();
        int32_t length=mapping.length();
        int32_t i=0;
        UChar32 c;
        while(i<length) {
            U16_NEXT(buffer, i, length, c);
            printf(" %04lX", (long)c);
        }
    }
    puts("");
}
Ejemplo n.º 7
0
// --------------------------------------------------------------------------
icu::UnicodeString
processor::normalize_enum (
 icu::UnicodeString const& value
)
// --------------------------------------------------------------------------
{
  icu::UnicodeString normalized;
  int32_t pos;
  bool space_before = false;
  bool leading = true;

  for (pos=0; pos<value.length(); ++pos)
  {
    if (value[pos] == ' ')
    {
      space_before = true;
    }
    else
    {
      if (space_before)
      {
        if (leading)
        {
          leading = false;
        }
        else
        {
          normalized += ' ';
        }

        space_before = false;
      }

      normalized += value[pos];
    }
  }

  return normalized;
}
Ejemplo n.º 8
0
void alignedNormalizeUnicodeString(icu::UnicodeString const& u, IcuNormalizer2Ptr normalizer,
                                   ITakeAlignedChars& out) {
    // TODO: test
    Position start = 0;
    int32 len = u.length(), pos;
    UErrorCode err = U_ZERO_ERROR;
    int nfcPrefixLen = normalizer->spanQuickCheckYes(u, err);
    assert(U_SUCCESS(err));
    assert(len >= 0 && nfcPrefixLen >= 0);
    TokenSpan span;
    span.first = 0;
    icu::StringCharacterIterator it(u);
    while ((pos = it.getIndex()) < nfcPrefixLen) {
        assert(it.hasNext());
        Unicode c = it.next32PostInc();
        span.second = span.first + 1;
        out.takeWithSpan(c, span);
        ++span.first;
    }
    icu::UnicodeString remainder(u.tempSubString(nfcPrefixLen)), normalized;
    CharsFromUnicodeStringImpl chars(remainder);  // TODO: docs say normalizeSecondAndAppend
    IcuNormalizeByChunks<CharsFromUnicodeStringImpl> norm(chars, normalizer);
    norm.takeAllWithSpan(out);
}
Ejemplo n.º 9
0
	jobject operator()(icu::UnicodeString const& value) const {
		return env->NewString(value.getBuffer(), value.length());
	}
Ejemplo n.º 10
0
 inline
 cxxopts::UnicodeStringIterator
 end(const icu::UnicodeString& s)
 {
   return cxxopts::UnicodeStringIterator(&s, s.length());
 }
Ejemplo n.º 11
0
	void FStringConverter::ConvertString(const icu::UnicodeString& Source, FString& Destination)
	{
		return ConvertString(Source, 0, Source.length(), Destination);
	}
Ejemplo n.º 12
0
	int32 GetNativeStringLength(const icu::UnicodeString& Source)
	{
		return GetNativeStringLength(Source, 0, Source.length());
	}
Ejemplo n.º 13
0
U_CAPI int32_t U_EXPORT2
uspoof_checkUnicodeString(const USpoofChecker *sc,
                          const icu::UnicodeString &id, 
                          int32_t *position,
                          UErrorCode *status) {
    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    if (This == NULL) {
        return 0;
    }
    int32_t result = 0;

    IdentifierInfo *identifierInfo = NULL;
    if ((This->fChecks) & (USPOOF_RESTRICTION_LEVEL | USPOOF_MIXED_NUMBERS)) {
        identifierInfo = This->getIdentifierInfo(*status);
        if (U_FAILURE(*status)) {
            goto cleanupAndReturn;
        }
        identifierInfo->setIdentifier(id, *status);
        identifierInfo->setIdentifierProfile(*This->fAllowedCharsSet);
    }


    if ((This->fChecks) & USPOOF_RESTRICTION_LEVEL) {
        URestrictionLevel idRestrictionLevel = identifierInfo->getRestrictionLevel(*status);
        if (idRestrictionLevel > This->fRestrictionLevel) {
            result |= USPOOF_RESTRICTION_LEVEL;
        }
        if (This->fChecks & USPOOF_AUX_INFO) {
            result |= idRestrictionLevel;
        }
    }

    if ((This->fChecks) & USPOOF_MIXED_NUMBERS) {
        const UnicodeSet *numerics = identifierInfo->getNumerics();
        if (numerics->size() > 1) {
            result |= USPOOF_MIXED_NUMBERS;
        }

        // TODO: ICU4J returns the UnicodeSet of the numerics found in the identifier.
        //       We have no easy way to do the same in C.
        // if (checkResult != null) {
        //     checkResult.numerics = numerics;
        // }
    }


    if (This->fChecks & (USPOOF_CHAR_LIMIT)) {
        int32_t i;
        UChar32 c;
        int32_t length = id.length();
        for (i=0; i<length ;) {
            c = id.char32At(i);
            i += U16_LENGTH(c);
            if (!This->fAllowedCharsSet->contains(c)) {
                result |= USPOOF_CHAR_LIMIT;
                break;
            }
        }
    }

    if (This->fChecks & 
        (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_INVISIBLE)) {
        // These are the checks that need to be done on NFD input
        UnicodeString nfdText;
        gNfdNormalizer->normalize(id, nfdText, *status);
        int32_t nfdLength = nfdText.length();

        if (This->fChecks & USPOOF_INVISIBLE) {
           
            // scan for more than one occurence of the same non-spacing mark
            // in a sequence of non-spacing marks.
            int32_t     i;
            UChar32     c;
            UChar32     firstNonspacingMark = 0;
            UBool       haveMultipleMarks = FALSE;  
            UnicodeSet  marksSeenSoFar;   // Set of combining marks in a single combining sequence.
            
            for (i=0; i<nfdLength ;) {
                c = nfdText.char32At(i);
                i += U16_LENGTH(c);
                if (u_charType(c) != U_NON_SPACING_MARK) {
                    firstNonspacingMark = 0;
                    if (haveMultipleMarks) {
                        marksSeenSoFar.clear();
                        haveMultipleMarks = FALSE;
                    }
                    continue;
                }
                if (firstNonspacingMark == 0) {
                    firstNonspacingMark = c;
                    continue;
                }
                if (!haveMultipleMarks) {
                    marksSeenSoFar.add(firstNonspacingMark);
                    haveMultipleMarks = TRUE;
                }
                if (marksSeenSoFar.contains(c)) {
                    // report the error, and stop scanning.
                    // No need to find more than the first failure.
                    result |= USPOOF_INVISIBLE;
                    break;
                }
                marksSeenSoFar.add(c);
            }
        }
       
        
        if (This->fChecks & (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE)) {
            // The basic test is the same for both whole and mixed script confusables.
            // Compute the set of scripts that every input character has a confusable in.
            // For this computation an input character is always considered to be
            // confusable with itself in its own script.
            //
            // If the number of such scripts is two or more, and the input consisted of
            // characters all from a single script, we have a whole script confusable.
            // (The two scripts will be the original script and the one that is confusable)
            //
            // If the number of such scripts >= one, and the original input contained characters from
            // more than one script, we have a mixed script confusable.  (We can transform
            // some of the characters, and end up with a visually similar string all in
            // one script.)

            if (identifierInfo == NULL) {
                identifierInfo = This->getIdentifierInfo(*status);
                if (U_FAILURE(*status)) {
                    goto cleanupAndReturn;
                }
                identifierInfo->setIdentifier(id, *status);
            }

            int32_t scriptCount = identifierInfo->getScriptCount();
            
            ScriptSet scripts;
            This->wholeScriptCheck(nfdText, &scripts, *status);
            int32_t confusableScriptCount = scripts.countMembers();
            //printf("confusableScriptCount = %d\n", confusableScriptCount);
            
            if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) &&
                confusableScriptCount >= 2 &&
                scriptCount == 1) {
                result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE;
            }
        
            if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) &&
                confusableScriptCount >= 1 &&
                scriptCount > 1) {
                result |= USPOOF_MIXED_SCRIPT_CONFUSABLE;
            }
        }
    }

cleanupAndReturn:
    This->releaseIdentifierInfo(identifierInfo);
    if (position != NULL) {
        *position = 0;
    }
    return result;
}
Ejemplo n.º 14
0
QString
EnabledLocalesModel::unicodeStringToQString( const icu::UnicodeString& sourceStr )
{
    return QString( reinterpret_cast<const QChar*>( sourceStr.getBuffer() ),
                    sourceStr.length() );
}