char * Unicode_ReplaceRange(const char *dest, // IN: UnicodeIndex destStart, // IN: UnicodeIndex destLength, // IN: const char *src, // IN: UnicodeIndex srcStart, // IN: UnicodeIndex srcLength) // IN: { char *result; char *stringOne; char *stringTwo; char *stringThree; ASSERT(dest); ASSERT((destStart >= 0) || (destStart == -1)); ASSERT((destLength >= 0) || (destLength == -1)); ASSERT(src); ASSERT((srcStart >= 0) || (srcStart == -1)); ASSERT((srcLength >= 0) || (srcLength == -1)); stringOne = Unicode_Substr(dest, 0, destStart); stringTwo = Unicode_Substr(src, srcStart, srcLength); stringThree = Unicode_Substr(dest, destStart + destLength, -1); result = Unicode_Join(stringOne, stringTwo, stringThree, NULL); free(stringOne); free(stringTwo); free(stringThree); return result; }
void File_GetPathName(ConstUnicode fullPath, // IN: Unicode *pathName, // OUT (OPT): Unicode *baseName) // OUT (OPT): { Unicode volume; UnicodeIndex len; UnicodeIndex curLen; File_SplitName(fullPath, &volume, pathName, baseName); if (pathName == NULL) { Unicode_Free(volume); return; } /* * The volume component may be empty. */ if (!Unicode_IsEmpty(volume)) { Unicode temp = Unicode_Append(volume, *pathName); Unicode_Free(*pathName); *pathName = temp; } Unicode_Free(volume); /* * Remove any trailing directory separator characters. */ len = Unicode_LengthInCodePoints(*pathName); curLen = len; while ((curLen > 0) && (FileFirstSlashIndex(*pathName, curLen - 1) == curLen - 1)) { curLen--; } if (curLen < len) { Unicode temp = Unicode_Substr(*pathName, 0, curLen); Unicode_Free(*pathName); *pathName = temp; } }
void File_SplitName(ConstUnicode pathName, // IN: Unicode *volume, // OUT (OPT): Unicode *directory, // OUT (OPT): Unicode *base) // OUT (OPT): { Unicode vol; Unicode dir; Unicode bas; UnicodeIndex volEnd; UnicodeIndex length; UnicodeIndex baseBegin; WIN32_ONLY(UnicodeIndex pathLen); ASSERT(pathName); /* * Get volume. */ volEnd = 0; #if defined(_WIN32) pathLen = Unicode_LengthInCodePoints(pathName); if ((pathLen > 2) && (Unicode_StartsWith(pathName, "\\\\") || Unicode_StartsWith(pathName, "//"))) { /* UNC path */ volEnd = FileFirstSlashIndex(pathName, 2); if (volEnd == UNICODE_INDEX_NOT_FOUND) { /* we have \\foo, which is just bogus */ volEnd = 0; } else { volEnd = FileFirstSlashIndex(pathName, volEnd + 1); if (volEnd == UNICODE_INDEX_NOT_FOUND) { /* we have \\foo\bar, which is legal */ volEnd = pathLen; } } } else if ((pathLen >= 2) && (Unicode_FindSubstrInRange(pathName, 1, 1, ":", 0, 1) != UNICODE_INDEX_NOT_FOUND)) { /* drive-letter path */ volEnd = 2; } if (volEnd > 0) { vol = Unicode_Substr(pathName, 0, volEnd); } else { vol = Unicode_Duplicate(""); } #else vol = Unicode_Duplicate(""); #endif /* _WIN32 */ /* * Get base. */ baseBegin = FileLastSlashIndex(pathName, 0); baseBegin = (baseBegin == UNICODE_INDEX_NOT_FOUND) ? 0 : baseBegin + 1; if (baseBegin >= volEnd) { bas = Unicode_Substr(pathName, baseBegin, -1); } else { bas = Unicode_Duplicate(""); } /* * Get dir. */ length = baseBegin - volEnd; if (length > 0) { dir = Unicode_Substr(pathName, volEnd, length); } else { dir = Unicode_Duplicate(""); } /* * Return what needs to be returned. */ if (volume) { *volume = vol; } else { Unicode_Free(vol); } if (directory) { *directory = dir; } else { Unicode_Free(dir); } if (base) { *base = bas; } else { Unicode_Free(bas); } }
int Unicode_CompareRange(ConstUnicode str1, // IN UnicodeIndex str1Start, // IN UnicodeIndex str1Length, // IN ConstUnicode str2, // IN UnicodeIndex str2Start, // IN UnicodeIndex str2Length, // IN Bool ignoreCase) // IN { int result = -1; Unicode substr1 = NULL; Unicode substr2 = NULL; utf16_t *substr1UTF16 = NULL; utf16_t *substr2UTF16 = NULL; UnicodeIndex i = 0; UnicodeIndex utf16Index; utf16_t codeUnit1; utf16_t codeUnit2; uint32 codePoint1; uint32 codePoint2; UnicodePinIndices(str1, &str1Start, &str1Length); UnicodePinIndices(str2, &str2Start, &str2Length); /* * TODO: Allocating substrings is a performance hit. We should do * this search in-place. (However, searching UTF-8 requires tender loving * care, and it's just easier to search UTF-16.) */ substr1 = Unicode_Substr(str1, str1Start, str1Length); if (!substr1) { goto out; } substr2 = Unicode_Substr(str2, str2Start, str2Length); if (!substr2) { goto out; } /* * XXX TODO: Need to normalize the incoming strings to NFC or NFD. */ substr1UTF16 = Unicode_GetAllocUTF16(substr1); if (!substr1UTF16) { goto out; } substr2UTF16 = Unicode_GetAllocUTF16(substr2); if (!substr2UTF16) { goto out; } /* * TODO: This is the naive string search algorithm, which is * O(n * m). We can do better with KMP or Boyer-Moore if this * proves to be a bottleneck. */ while (TRUE) { codeUnit1 = *(substr1UTF16 + i); codeUnit2 = *(substr2UTF16 + i); /* * TODO: Simple case folding doesn't handle the situation where * more than one code unit is needed to store the result of the * case folding. * * This means that German "straBe" (where B = sharp S, U+00DF) * will not match "STRASSE", even though the two strings are the * same. */ if (ignoreCase) { codeUnit1 = UnicodeSimpleCaseFold(codeUnit1); codeUnit2 = UnicodeSimpleCaseFold(codeUnit2); } if (codeUnit1 != codeUnit2) { break; } if (codeUnit1 == 0) { // End of both strings reached: strings are equal. result = 0; goto out; } i++; } /* * The two UTF-16 code units differ. If they're the first code unit * of a surrogate pair (for Unicode values past U+FFFF), decode the * surrogate pair into a full Unicode code point. */ if (U16_IS_SURROGATE(codeUnit1)) { ssize_t substrUTF16Len = Unicode_UTF16Strlen(substr1UTF16); // U16_NEXT modifies the index, so let it work on a copy. utf16Index = i; // Decode the surrogate if needed. U16_NEXT(substr1UTF16, utf16Index, substrUTF16Len, codePoint1); } else { // Not a surrogate? Then the code point value is the code unit. codePoint1 = codeUnit1; } if (U16_IS_SURROGATE(codeUnit2)) { ssize_t substrUTF16Len = Unicode_UTF16Strlen(substr2UTF16); utf16Index = i; U16_NEXT(substr2UTF16, utf16Index, substrUTF16Len, codePoint2); } else { codePoint2 = codeUnit2; } if (codePoint1 < codePoint2) { result = -1; } else if (codePoint1 > codePoint2) { result = 1; } else { // If we hit the end of the string, we've already gone to 'out'. NOT_REACHED(); } out: free(substr1UTF16); free(substr2UTF16); Unicode_Free(substr1); Unicode_Free(substr2); return result; }