/* Just like strncasecmp, but compare two UTF8 strings. Limited to 4096 chars. */ int win_utf8ncasecmp(const char *s1, const char *s2, size_t n) { wchar_t ws1[4096], ws2[4096]; int converted; /* Make sure input is valid UTF-8 */ if (!isLegalUTF8String(s1)) { logmsg(LOG_CRIT, "win_utf8ncasecmp: Illegal UTF-8 string:%s", s1); return -1; } if (!isLegalUTF8String(s2)) { logmsg(LOG_CRIT, "win_utf8ncasecmp: Illegal UTF-8 string:%s", s2); return -1; } /* Convert both strings to wide chars */ converted = MultiByteToWideChar(CP_UTF8, 0, s1, n, ws1, wsizeof(ws1)); if (!converted) { logmsg(LOG_CRIT, "win_utf8ncasecmp: MultiByteToWideChar failed"); return -1; } ws1[converted] = '\0'; converted = MultiByteToWideChar(CP_UTF8, 0, s2, n, ws2, wsizeof(ws2)); if (!converted) { logmsg(LOG_CRIT, "win_utf8ncasecmp: MultiByteToWideChar failed"); return 1; } ws2[converted] = '\0'; /* compare */ return _wcsicmp(ws1, ws2); }
bool convertWideToUTF8(const std::wstring &Source, std::string &Result) { if (sizeof(wchar_t) == 1) { const UTF8 *Start = reinterpret_cast<const UTF8 *>(Source.data()); const UTF8 *End = reinterpret_cast<const UTF8 *>(Source.data() + Source.size()); if (!isLegalUTF8String(&Start, End)) return false; Result.resize(Source.size()); memcpy(&Result[0], Source.data(), Source.size()); return true; } else if (sizeof(wchar_t) == 2) { return convertUTF16ToUTF8String( llvm::ArrayRef<UTF16>(reinterpret_cast<const UTF16 *>(Source.data()), Source.size()), Result); } else if (sizeof(wchar_t) == 4) { const UTF32 *Start = reinterpret_cast<const UTF32 *>(Source.data()); const UTF32 *End = reinterpret_cast<const UTF32 *>(Source.data() + Source.size()); Result.resize(UNI_MAX_UTF8_BYTES_PER_CODE_POINT * Source.size()); UTF8 *ResultPtr = reinterpret_cast<UTF8 *>(&Result[0]); UTF8 *ResultEnd = reinterpret_cast<UTF8 *>(&Result[0] + Result.size()); if (ConvertUTF32toUTF8(&Start, End, &ResultPtr, ResultEnd, strictConversion) == conversionOK) { Result.resize(reinterpret_cast<char *>(ResultPtr) - &Result[0]); return true; } else { Result.clear(); return false; } } else { llvm_unreachable( "Control should never reach this point; see static_assert further up"); } }
bool ConvertUTF8toWide(unsigned WideCharWidth, const std::string& Source, char *&ResultPtr, const UTF8 *&ErrorPtr) { assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4); ConversionResult result = conversionOK; // Copy the character span over. if (WideCharWidth == 1) { const UTF8 *Pos = reinterpret_cast<const UTF8*>(Source.data()); if (!isLegalUTF8String(&Pos, reinterpret_cast<const UTF8*>(Source.data() + Source.length()))) { result = sourceIllegal; ErrorPtr = Pos; } else { memcpy(ResultPtr, Source.data(), Source.size()); ResultPtr += Source.size(); } } else if (WideCharWidth == 2) { const UTF8 *sourceStart = (const UTF8*)Source.data(); // FIXME: Make the type of the result buffer correct instead of // using reinterpret_cast. UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr); ConversionFlags flags = strictConversion; result = ConvertUTF8toUTF16( &sourceStart, sourceStart + Source.size(), &targetStart, targetStart + 2*Source.size(), flags); if (result == conversionOK) ResultPtr = reinterpret_cast<char*>(targetStart); else ErrorPtr = sourceStart; } else if (WideCharWidth == 4) { const UTF8 *sourceStart = (const UTF8*)Source.data(); // FIXME: Make the type of the result buffer correct instead of // using reinterpret_cast. UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr); ConversionFlags flags = strictConversion; result = ConvertUTF8toUTF32( &sourceStart, sourceStart + Source.size(), &targetStart, targetStart + 4*Source.size(), flags); if (result == conversionOK) ResultPtr = reinterpret_cast<char*>(targetStart); else ErrorPtr = sourceStart; } assert((result != targetExhausted) && "ConvertUTF8toUTFXX exhausted target buffer"); return result == conversionOK; }
/* Translate an internal representation of a path (like /c/home) to a Windows path (like c:\home) */ static wchar_t *intpath2winpath(const char *intpath) { wchar_t *winpath; int winpath_len; wchar_t *slash; const char *lastrootslash; wchar_t *lastslash; size_t intlen; /* Verify that input is valid UTF-8. We cannot use MB_ERR_INVALID_CHARS to MultiByteToWideChar, since it's only available in late versions of Windows. */ if (!isLegalUTF8String(intpath)) { logmsg(LOG_CRIT, "intpath2winpath: Illegal UTF-8 string:%s", intpath); return NULL; } /* Skip over multiple root slashes for paths like ///home/john */ lastrootslash = intpath; while (*lastrootslash == '/') lastrootslash++; if (lastrootslash != intpath) lastrootslash--; intlen = strlen(lastrootslash); /* One extra for /c -> c:\ */ winpath_len = sizeof(wchar_t) * (intlen + 2); winpath = malloc(winpath_len); if (!winpath) { logmsg(LOG_CRIT, "intpath2winpath: Unable to allocate memory"); return NULL; } if (!MultiByteToWideChar (CP_UTF8, 0, lastrootslash, -1, winpath, winpath_len)) { logmsg(LOG_CRIT, "intpath2winpath: MultiByteToWideChar failed"); return NULL; } /* If path ends with /.., chop of the last component. Eventually, we might want to eliminate all occurances of .. */ lastslash = wcsrchr(winpath, '/'); if (!wcscmp(lastslash, L"/..")) { *lastslash = '\0'; lastslash = wcsrchr(winpath, '/'); *lastslash = '\0'; } /* Translate /x -> x:/ and /x/something -> x:/something */ if ((winpath[0] == '/') && winpath[1]) { switch (winpath[2]) { case '\0': winpath[2] = '/'; winpath[3] = '\0'; /* fall through */ case '/': winpath[0] = winpath[1]; winpath[1] = ':'; break; default: break; } } while ((slash = wcschr(winpath, '/')) != NULL) { *slash = '\\'; } return winpath; }