示例#1
0
char *
Unicode_ReplaceRange(const char *dest,         // IN:
                     UnicodeIndex destStart,   // IN:
                     UnicodeIndex destLength,  // IN:
                     const char *src,          // IN:
                     UnicodeIndex srcStart,    // IN:
                     UnicodeIndex srcLength)   // IN:
{
   char *result;
   char *stringOne;
   char *stringTwo;
   char *stringThree;

   ASSERT(dest);
   ASSERT((destStart >= 0) || (destStart == -1));
   ASSERT((destLength >= 0) || (destLength == -1));

   ASSERT(src);
   ASSERT((srcStart >= 0) || (srcStart == -1));
   ASSERT((srcLength >= 0) || (srcLength == -1));

   stringOne = Unicode_Substr(dest, 0, destStart);
   stringTwo = Unicode_Substr(src, srcStart, srcLength);
   stringThree = Unicode_Substr(dest, destStart + destLength, -1);

   result = Unicode_Join(stringOne, stringTwo, stringThree, NULL);

   free(stringOne);
   free(stringTwo);
   free(stringThree);

   return result;
}
示例#2
0
void 
File_GetPathName(ConstUnicode fullPath,  // IN:
                 Unicode *pathName,      // OUT (OPT):
                 Unicode *baseName)      // OUT (OPT):
{
   Unicode volume;
   UnicodeIndex len;
   UnicodeIndex curLen;

   File_SplitName(fullPath, &volume, pathName, baseName);

   if (pathName == NULL) {
      Unicode_Free(volume);
      return;
   }

   /*
    * The volume component may be empty.
    */

   if (!Unicode_IsEmpty(volume)) {
      Unicode temp = Unicode_Append(volume, *pathName);

      Unicode_Free(*pathName);
      *pathName = temp;
   }
   Unicode_Free(volume);

   /*
    * Remove any trailing directory separator characters.
    */

   len = Unicode_LengthInCodePoints(*pathName);

   curLen = len;

   while ((curLen > 0) &&
          (FileFirstSlashIndex(*pathName, curLen - 1) == curLen - 1)) {
      curLen--;
   }

   if (curLen < len) {
      Unicode temp = Unicode_Substr(*pathName, 0, curLen);

      Unicode_Free(*pathName);
      *pathName = temp;
   }
}
示例#3
0
void
File_SplitName(ConstUnicode pathName,  // IN:
               Unicode *volume,        // OUT (OPT):
               Unicode *directory,     // OUT (OPT):
               Unicode *base)          // OUT (OPT):
{
   Unicode vol;
   Unicode dir;
   Unicode bas;
   UnicodeIndex volEnd;
   UnicodeIndex length;
   UnicodeIndex baseBegin;
   WIN32_ONLY(UnicodeIndex pathLen);

   ASSERT(pathName);

   /*
    * Get volume.
    */

   volEnd = 0;

#if defined(_WIN32)
   pathLen = Unicode_LengthInCodePoints(pathName);
   if ((pathLen > 2) &&
       (Unicode_StartsWith(pathName, "\\\\") ||
        Unicode_StartsWith(pathName, "//"))) {
      /* UNC path */
      volEnd = FileFirstSlashIndex(pathName, 2);

      if (volEnd == UNICODE_INDEX_NOT_FOUND) {
         /* we have \\foo, which is just bogus */
         volEnd = 0;
      } else {
         volEnd = FileFirstSlashIndex(pathName, volEnd + 1);

         if (volEnd == UNICODE_INDEX_NOT_FOUND) {
            /* we have \\foo\bar, which is legal */
            volEnd = pathLen;
         }
      }
   } else if ((pathLen >= 2) &&
              (Unicode_FindSubstrInRange(pathName, 1, 1, ":", 0,
                                         1) != UNICODE_INDEX_NOT_FOUND)) {
      /* drive-letter path */
      volEnd = 2;
   }

   if (volEnd > 0) {
      vol = Unicode_Substr(pathName, 0, volEnd);
   } else {
      vol = Unicode_Duplicate("");
   }
#else
   vol = Unicode_Duplicate("");
#endif /* _WIN32 */

   /*
    * Get base.
    */

   baseBegin = FileLastSlashIndex(pathName, 0);
   baseBegin = (baseBegin == UNICODE_INDEX_NOT_FOUND) ? 0 : baseBegin + 1;

   if (baseBegin >= volEnd) {
      bas = Unicode_Substr(pathName, baseBegin, -1);
   } else {
      bas = Unicode_Duplicate("");
   }

   /*
    * Get dir.
    */

   length = baseBegin - volEnd;

   if (length > 0) {
      dir = Unicode_Substr(pathName, volEnd, length);
   } else {
      dir = Unicode_Duplicate("");
   }

   /*
    * Return what needs to be returned.
    */

   if (volume) {
      *volume = vol;
   } else {
      Unicode_Free(vol);
   }

   if (directory) {
      *directory = dir;
   } else {
      Unicode_Free(dir);
   }

   if (base) {
      *base = bas;
   } else {
      Unicode_Free(bas);
   }
}
int
Unicode_CompareRange(ConstUnicode str1,       // IN
                     UnicodeIndex str1Start,  // IN
                     UnicodeIndex str1Length, // IN
                     ConstUnicode str2,       // IN
                     UnicodeIndex str2Start,  // IN
                     UnicodeIndex str2Length, // IN
                     Bool ignoreCase)         // IN
{
   int result = -1;
   Unicode substr1 = NULL;
   Unicode substr2 = NULL;
   utf16_t *substr1UTF16 = NULL;
   utf16_t *substr2UTF16 = NULL;
   UnicodeIndex i = 0;
   UnicodeIndex utf16Index;
   utf16_t codeUnit1;
   utf16_t codeUnit2;
   uint32 codePoint1;
   uint32 codePoint2;

   UnicodePinIndices(str1, &str1Start, &str1Length);
   UnicodePinIndices(str2, &str2Start, &str2Length);

   /*
    * TODO: Allocating substrings is a performance hit.  We should do
    * this search in-place.  (However, searching UTF-8 requires tender loving
    * care, and it's just easier to search UTF-16.)
    */
   substr1 = Unicode_Substr(str1, str1Start, str1Length);
   if (!substr1) {
      goto out;
   }

   substr2 = Unicode_Substr(str2, str2Start, str2Length);
   if (!substr2) {
      goto out;
   }

   /*
    * XXX TODO: Need to normalize the incoming strings to NFC or NFD.
    */
   substr1UTF16 = Unicode_GetAllocUTF16(substr1);
   if (!substr1UTF16) {
      goto out;
   }

   substr2UTF16 = Unicode_GetAllocUTF16(substr2);
   if (!substr2UTF16) {
      goto out;
   }

   /*
    * TODO: This is the naive string search algorithm, which is
    * O(n * m).  We can do better with KMP or Boyer-Moore if this
    * proves to be a bottleneck.
    */
   while (TRUE) {
      codeUnit1 = *(substr1UTF16 + i);
      codeUnit2 = *(substr2UTF16 + i);

      /*
       * TODO: Simple case folding doesn't handle the situation where
       * more than one code unit is needed to store the result of the
       * case folding.
       *
       * This means that German "straBe" (where B = sharp S, U+00DF)
       * will not match "STRASSE", even though the two strings are the
       * same.
       */
      if (ignoreCase) {
         codeUnit1 = UnicodeSimpleCaseFold(codeUnit1);
         codeUnit2 = UnicodeSimpleCaseFold(codeUnit2);
      }

      if (codeUnit1 != codeUnit2) {
         break;
      }

      if (codeUnit1 == 0) {
         // End of both strings reached: strings are equal.
         result = 0;
         goto out;
      }

      i++;
   }

   /*
    * The two UTF-16 code units differ.  If they're the first code unit
    * of a surrogate pair (for Unicode values past U+FFFF), decode the
    * surrogate pair into a full Unicode code point.
    */
   if (U16_IS_SURROGATE(codeUnit1)) {
      ssize_t substrUTF16Len = Unicode_UTF16Strlen(substr1UTF16);

      // U16_NEXT modifies the index, so let it work on a copy.
      utf16Index = i;

      // Decode the surrogate if needed.
      U16_NEXT(substr1UTF16, utf16Index, substrUTF16Len, codePoint1);
   } else {
      // Not a surrogate?  Then the code point value is the code unit.
      codePoint1 = codeUnit1;
   }

   if (U16_IS_SURROGATE(codeUnit2)) {
      ssize_t substrUTF16Len = Unicode_UTF16Strlen(substr2UTF16);

      utf16Index = i;
      U16_NEXT(substr2UTF16, utf16Index, substrUTF16Len, codePoint2);
   } else {
      codePoint2 = codeUnit2;
   }

   if (codePoint1 < codePoint2) {
      result = -1;
   } else if (codePoint1 > codePoint2) {
      result = 1;
   } else {
      // If we hit the end of the string, we've already gone to 'out'.
      NOT_REACHED();
   }

  out:
   free(substr1UTF16);
   free(substr2UTF16);

   Unicode_Free(substr1);
   Unicode_Free(substr2);

   return result;
}