U_CAPI int32_t U_EXPORT2 uspoof_areConfusableUnicodeString(const USpoofChecker *sc, const icu::UnicodeString &s1, const icu::UnicodeString &s2, UErrorCode *status) { const UChar *u1 = s1.getBuffer(); int32_t length1 = s1.length(); const UChar *u2 = s2.getBuffer(); int32_t length2 = s2.length(); int32_t results = uspoof_areConfusable(sc, u1, length1, u2, length2, status); return results; }
void FStringConverter::ConvertString(const icu::UnicodeString& Source, const int32 SourceStartIndex, const int32 SourceLen, FString& Destination) { if (Source.length() > 0) { UErrorCode ICUStatus = U_ZERO_ERROR; ucnv_reset(ICUConverter); // Get the internal buffer of the string, we're going to use it as scratch space TArray<TCHAR>& InternalStringBuffer = Destination.GetCharArray(); // Work out the maximum size required and resize the buffer so it can hold enough data const int32_t DestinationCapacityBytes = UCNV_GET_MAX_BYTES_FOR_STRING(SourceLen, ucnv_getMaxCharSize(ICUConverter)); const int32 DestinationCapacityTCHARs = DestinationCapacityBytes / sizeof(TCHAR); InternalStringBuffer.SetNumUninitialized(DestinationCapacityTCHARs); // Perform the conversion into the string buffer, and then null terminate the FString and size it back down to the correct size const int32_t DestinationSizeBytes = ucnv_fromUChars(ICUConverter, reinterpret_cast<char*>(InternalStringBuffer.GetData()), DestinationCapacityBytes, Source.getBuffer() + SourceStartIndex, SourceLen, &ICUStatus); const int32 DestinationSizeTCHARs = DestinationSizeBytes / sizeof(TCHAR); InternalStringBuffer[DestinationSizeTCHARs] = 0; InternalStringBuffer.SetNum(DestinationSizeTCHARs + 1, /*bAllowShrinking*/false); // the array size includes null check(U_SUCCESS(ICUStatus)); } else { Destination.Empty(); } }
U_CAPI int32_t U_EXPORT2 uspoof_checkUnicodeString(const USpoofChecker *sc, const icu::UnicodeString &text, int32_t *position, UErrorCode *status) { int32_t result = uspoof_check(sc, text.getBuffer(), text.length(), position, status); return result; }
int32 GetUnicodeStringLengthImpl(const TCHAR* Source, const int32 InSourceStartIndex, const int32 InSourceLength) { if (InSourceLength > 0) { const icu::UnicodeString TmpStr = ConvertString(Source, InSourceStartIndex, InSourceLength); return TmpStr.length(); } return 0; }
/** * Write an Unitex file content (to system filesystem or filespace) * it write from two buffer (prefix and suffix). This is useful for writing both header and footer (or BOM and text...) */ UNITEX_FUNC int UNITEX_CALL WriteUnicodeUnitexFile(const char*filename, icu::UnicodeString const& uString) { UChar uBom = 0xfeff; const UChar * uBuffer = uString.getBuffer(); int32_t uLength = uString.length(); bool result = WriteUnitexFile(filename, &uBom, sizeof(UChar), uBuffer, uLength * sizeof(UChar)) == 0; return result; }
static void printLine(UChar32 start, UChar32 end, Status status, const icu::UnicodeString &mapping) { if(start==end) { printf("%04lX ", (long)start); } else { printf("%04lX..%04lX ", (long)start, (long)end); } printf("; %s", statusNames[status]); if(status==MAPPED || status==DEVIATION || !mapping.isEmpty()) { printf(" ;"); const UChar *buffer=mapping.getBuffer(); int32_t length=mapping.length(); int32_t i=0; UChar32 c; while(i<length) { U16_NEXT(buffer, i, length, c); printf(" %04lX", (long)c); } } puts(""); }
// -------------------------------------------------------------------------- icu::UnicodeString processor::normalize_enum ( icu::UnicodeString const& value ) // -------------------------------------------------------------------------- { icu::UnicodeString normalized; int32_t pos; bool space_before = false; bool leading = true; for (pos=0; pos<value.length(); ++pos) { if (value[pos] == ' ') { space_before = true; } else { if (space_before) { if (leading) { leading = false; } else { normalized += ' '; } space_before = false; } normalized += value[pos]; } } return normalized; }
void alignedNormalizeUnicodeString(icu::UnicodeString const& u, IcuNormalizer2Ptr normalizer, ITakeAlignedChars& out) { // TODO: test Position start = 0; int32 len = u.length(), pos; UErrorCode err = U_ZERO_ERROR; int nfcPrefixLen = normalizer->spanQuickCheckYes(u, err); assert(U_SUCCESS(err)); assert(len >= 0 && nfcPrefixLen >= 0); TokenSpan span; span.first = 0; icu::StringCharacterIterator it(u); while ((pos = it.getIndex()) < nfcPrefixLen) { assert(it.hasNext()); Unicode c = it.next32PostInc(); span.second = span.first + 1; out.takeWithSpan(c, span); ++span.first; } icu::UnicodeString remainder(u.tempSubString(nfcPrefixLen)), normalized; CharsFromUnicodeStringImpl chars(remainder); // TODO: docs say normalizeSecondAndAppend IcuNormalizeByChunks<CharsFromUnicodeStringImpl> norm(chars, normalizer); norm.takeAllWithSpan(out); }
jobject operator()(icu::UnicodeString const& value) const { return env->NewString(value.getBuffer(), value.length()); }
inline cxxopts::UnicodeStringIterator end(const icu::UnicodeString& s) { return cxxopts::UnicodeStringIterator(&s, s.length()); }
void FStringConverter::ConvertString(const icu::UnicodeString& Source, FString& Destination) { return ConvertString(Source, 0, Source.length(), Destination); }
int32 GetNativeStringLength(const icu::UnicodeString& Source) { return GetNativeStringLength(Source, 0, Source.length()); }
U_CAPI int32_t U_EXPORT2 uspoof_checkUnicodeString(const USpoofChecker *sc, const icu::UnicodeString &id, int32_t *position, UErrorCode *status) { const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); if (This == NULL) { return 0; } int32_t result = 0; IdentifierInfo *identifierInfo = NULL; if ((This->fChecks) & (USPOOF_RESTRICTION_LEVEL | USPOOF_MIXED_NUMBERS)) { identifierInfo = This->getIdentifierInfo(*status); if (U_FAILURE(*status)) { goto cleanupAndReturn; } identifierInfo->setIdentifier(id, *status); identifierInfo->setIdentifierProfile(*This->fAllowedCharsSet); } if ((This->fChecks) & USPOOF_RESTRICTION_LEVEL) { URestrictionLevel idRestrictionLevel = identifierInfo->getRestrictionLevel(*status); if (idRestrictionLevel > This->fRestrictionLevel) { result |= USPOOF_RESTRICTION_LEVEL; } if (This->fChecks & USPOOF_AUX_INFO) { result |= idRestrictionLevel; } } if ((This->fChecks) & USPOOF_MIXED_NUMBERS) { const UnicodeSet *numerics = identifierInfo->getNumerics(); if (numerics->size() > 1) { result |= USPOOF_MIXED_NUMBERS; } // TODO: ICU4J returns the UnicodeSet of the numerics found in the identifier. // We have no easy way to do the same in C. // if (checkResult != null) { // checkResult.numerics = numerics; // } } if (This->fChecks & (USPOOF_CHAR_LIMIT)) { int32_t i; UChar32 c; int32_t length = id.length(); for (i=0; i<length ;) { c = id.char32At(i); i += U16_LENGTH(c); if (!This->fAllowedCharsSet->contains(c)) { result |= USPOOF_CHAR_LIMIT; break; } } } if (This->fChecks & (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_INVISIBLE)) { // These are the checks that need to be done on NFD input UnicodeString nfdText; gNfdNormalizer->normalize(id, nfdText, *status); int32_t nfdLength = nfdText.length(); if (This->fChecks & USPOOF_INVISIBLE) { // scan for more than one occurence of the same non-spacing mark // in a sequence of non-spacing marks. int32_t i; UChar32 c; UChar32 firstNonspacingMark = 0; UBool haveMultipleMarks = FALSE; UnicodeSet marksSeenSoFar; // Set of combining marks in a single combining sequence. for (i=0; i<nfdLength ;) { c = nfdText.char32At(i); i += U16_LENGTH(c); if (u_charType(c) != U_NON_SPACING_MARK) { firstNonspacingMark = 0; if (haveMultipleMarks) { marksSeenSoFar.clear(); haveMultipleMarks = FALSE; } continue; } if (firstNonspacingMark == 0) { firstNonspacingMark = c; continue; } if (!haveMultipleMarks) { marksSeenSoFar.add(firstNonspacingMark); haveMultipleMarks = TRUE; } if (marksSeenSoFar.contains(c)) { // report the error, and stop scanning. // No need to find more than the first failure. result |= USPOOF_INVISIBLE; break; } marksSeenSoFar.add(c); } } if (This->fChecks & (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE)) { // The basic test is the same for both whole and mixed script confusables. // Compute the set of scripts that every input character has a confusable in. // For this computation an input character is always considered to be // confusable with itself in its own script. // // If the number of such scripts is two or more, and the input consisted of // characters all from a single script, we have a whole script confusable. // (The two scripts will be the original script and the one that is confusable) // // If the number of such scripts >= one, and the original input contained characters from // more than one script, we have a mixed script confusable. (We can transform // some of the characters, and end up with a visually similar string all in // one script.) if (identifierInfo == NULL) { identifierInfo = This->getIdentifierInfo(*status); if (U_FAILURE(*status)) { goto cleanupAndReturn; } identifierInfo->setIdentifier(id, *status); } int32_t scriptCount = identifierInfo->getScriptCount(); ScriptSet scripts; This->wholeScriptCheck(nfdText, &scripts, *status); int32_t confusableScriptCount = scripts.countMembers(); //printf("confusableScriptCount = %d\n", confusableScriptCount); if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) && confusableScriptCount >= 2 && scriptCount == 1) { result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE; } if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) && confusableScriptCount >= 1 && scriptCount > 1) { result |= USPOOF_MIXED_SCRIPT_CONFUSABLE; } } } cleanupAndReturn: This->releaseIdentifierInfo(identifierInfo); if (position != NULL) { *position = 0; } return result; }
QString EnabledLocalesModel::unicodeStringToQString( const icu::UnicodeString& sourceStr ) { return QString( reinterpret_cast<const QChar*>( sourceStr.getBuffer() ), sourceStr.length() ); }