C++ (Cpp) UnicodeSimpleCaseFold Examples

Programming Language: C++ (Cpp)

Method/Function: UnicodeSimpleCaseFold

Examples at hotexamples.com: 2

C++ (Cpp) UnicodeSimpleCaseFold - 2 examples found. These are the top rated real world C++ (Cpp) examples of UnicodeSimpleCaseFold extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: unicodeSimpleTransforms.c Project: nf-mlo/open-vm-tools

Unicode
Unicode_FoldCase(ConstUnicode str) // IN
{
   Unicode folded;
   utf16_t *utf16;
   utf16_t *utf16Current;

   ASSERT(str);

   utf16 = Unicode_GetAllocBytes(str, STRING_ENCODING_UTF16);

   utf16Current = utf16;
   while (*utf16Current) {
      *utf16Current = UnicodeSimpleCaseFold(*utf16Current);
      utf16Current++;
   }

   folded = Unicode_AllocWithUTF16(utf16);
   free(utf16);

   return folded;
}

Example #2

Show file

File: unicodeSimpleOperations.c Project: raphaeldias/vmware

int
Unicode_CompareRange(ConstUnicode str1,       // IN
                     UnicodeIndex str1Start,  // IN
                     UnicodeIndex str1Length, // IN
                     ConstUnicode str2,       // IN
                     UnicodeIndex str2Start,  // IN
                     UnicodeIndex str2Length, // IN
                     Bool ignoreCase)         // IN
{
   int result = -1;
   Unicode substr1 = NULL;
   Unicode substr2 = NULL;
   utf16_t *substr1UTF16 = NULL;
   utf16_t *substr2UTF16 = NULL;
   UnicodeIndex i = 0;
   UnicodeIndex utf16Index;
   utf16_t codeUnit1;
   utf16_t codeUnit2;
   uint32 codePoint1;
   uint32 codePoint2;

   UnicodePinIndices(str1, &str1Start, &str1Length);
   UnicodePinIndices(str2, &str2Start, &str2Length);

   /*
    * TODO: Allocating substrings is a performance hit.  We should do
    * this search in-place.  (However, searching UTF-8 requires tender loving
    * care, and it's just easier to search UTF-16.)
    */
   substr1 = Unicode_Substr(str1, str1Start, str1Length);
   if (!substr1) {
      goto out;
   }

   substr2 = Unicode_Substr(str2, str2Start, str2Length);
   if (!substr2) {
      goto out;
   }

   /*
    * XXX TODO: Need to normalize the incoming strings to NFC or NFD.
    */
   substr1UTF16 = Unicode_GetAllocUTF16(substr1);
   if (!substr1UTF16) {
      goto out;
   }

   substr2UTF16 = Unicode_GetAllocUTF16(substr2);
   if (!substr2UTF16) {
      goto out;
   }

   /*
    * TODO: This is the naive string search algorithm, which is
    * O(n * m).  We can do better with KMP or Boyer-Moore if this
    * proves to be a bottleneck.
    */
   while (TRUE) {
      codeUnit1 = *(substr1UTF16 + i);
      codeUnit2 = *(substr2UTF16 + i);

      /*
       * TODO: Simple case folding doesn't handle the situation where
       * more than one code unit is needed to store the result of the
       * case folding.
       *
       * This means that German "straBe" (where B = sharp S, U+00DF)
       * will not match "STRASSE", even though the two strings are the
       * same.
       */
      if (ignoreCase) {
         codeUnit1 = UnicodeSimpleCaseFold(codeUnit1);
         codeUnit2 = UnicodeSimpleCaseFold(codeUnit2);
      }

      if (codeUnit1 != codeUnit2) {
         break;
      }

      if (codeUnit1 == 0) {
         // End of both strings reached: strings are equal.
         result = 0;
         goto out;
      }

      i++;
   }

   /*
    * The two UTF-16 code units differ.  If they're the first code unit
    * of a surrogate pair (for Unicode values past U+FFFF), decode the
    * surrogate pair into a full Unicode code point.
    */
   if (U16_IS_SURROGATE(codeUnit1)) {
      ssize_t substrUTF16Len = Unicode_UTF16Strlen(substr1UTF16);

      // U16_NEXT modifies the index, so let it work on a copy.
      utf16Index = i;

      // Decode the surrogate if needed.
      U16_NEXT(substr1UTF16, utf16Index, substrUTF16Len, codePoint1);
   } else {
      // Not a surrogate?  Then the code point value is the code unit.
      codePoint1 = codeUnit1;
   }

   if (U16_IS_SURROGATE(codeUnit2)) {
      ssize_t substrUTF16Len = Unicode_UTF16Strlen(substr2UTF16);

      utf16Index = i;
      U16_NEXT(substr2UTF16, utf16Index, substrUTF16Len, codePoint2);
   } else {
      codePoint2 = codeUnit2;
   }

   if (codePoint1 < codePoint2) {
      result = -1;
   } else if (codePoint1 > codePoint2) {
      result = 1;
   } else {
      // If we hit the end of the string, we've already gone to 'out'.
      NOT_REACHED();
   }

  out:
   free(substr1UTF16);
   free(substr2UTF16);

   Unicode_Free(substr1);
   Unicode_Free(substr2);

   return result;
}