/* Just like strncasecmp, but compare two UTF8 strings. Limited to 4096 chars. */
int win_utf8ncasecmp(const char *s1, const char *s2, size_t n)
{
    wchar_t ws1[4096], ws2[4096];
    int converted;

    /* Make sure input is valid UTF-8 */
    if (!isLegalUTF8String(s1)) {
	logmsg(LOG_CRIT, "win_utf8ncasecmp: Illegal UTF-8 string:%s", s1);
	return -1;
    }
    if (!isLegalUTF8String(s2)) {
	logmsg(LOG_CRIT, "win_utf8ncasecmp: Illegal UTF-8 string:%s", s2);
	return -1;
    }

    /* Convert both strings to wide chars */
    converted = MultiByteToWideChar(CP_UTF8, 0, s1, n, ws1, wsizeof(ws1));
    if (!converted) {
	logmsg(LOG_CRIT, "win_utf8ncasecmp: MultiByteToWideChar failed");
	return -1;
    }
    ws1[converted] = '\0';
    converted = MultiByteToWideChar(CP_UTF8, 0, s2, n, ws2, wsizeof(ws2));
    if (!converted) {
	logmsg(LOG_CRIT, "win_utf8ncasecmp: MultiByteToWideChar failed");
	return 1;
    }
    ws2[converted] = '\0';

    /* compare */
    return _wcsicmp(ws1, ws2);
}
Exemple #2
0
bool convertWideToUTF8(const std::wstring &Source, std::string &Result) {
  if (sizeof(wchar_t) == 1) {
    const UTF8 *Start = reinterpret_cast<const UTF8 *>(Source.data());
    const UTF8 *End =
        reinterpret_cast<const UTF8 *>(Source.data() + Source.size());
    if (!isLegalUTF8String(&Start, End))
      return false;
    Result.resize(Source.size());
    memcpy(&Result[0], Source.data(), Source.size());
    return true;
  } else if (sizeof(wchar_t) == 2) {
    return convertUTF16ToUTF8String(
        llvm::ArrayRef<UTF16>(reinterpret_cast<const UTF16 *>(Source.data()),
                              Source.size()),
        Result);
  } else if (sizeof(wchar_t) == 4) {
    const UTF32 *Start = reinterpret_cast<const UTF32 *>(Source.data());
    const UTF32 *End =
        reinterpret_cast<const UTF32 *>(Source.data() + Source.size());
    Result.resize(UNI_MAX_UTF8_BYTES_PER_CODE_POINT * Source.size());
    UTF8 *ResultPtr = reinterpret_cast<UTF8 *>(&Result[0]);
    UTF8 *ResultEnd = reinterpret_cast<UTF8 *>(&Result[0] + Result.size());
    if (ConvertUTF32toUTF8(&Start, End, &ResultPtr, ResultEnd,
                           strictConversion) == conversionOK) {
      Result.resize(reinterpret_cast<char *>(ResultPtr) - &Result[0]);
      return true;
    } else {
      Result.clear();
      return false;
    }
  } else {
    llvm_unreachable(
        "Control should never reach this point; see static_assert further up");
  }
}
bool ConvertUTF8toWide(unsigned WideCharWidth, const std::string& Source,
					   char *&ResultPtr, const UTF8 *&ErrorPtr)
{
	assert(WideCharWidth == 1 || WideCharWidth == 2 || WideCharWidth == 4);
	ConversionResult result = conversionOK;
	// Copy the character span over.
	if (WideCharWidth == 1) {
		const UTF8 *Pos = reinterpret_cast<const UTF8*>(Source.data());
		if (!isLegalUTF8String(&Pos, reinterpret_cast<const UTF8*>(Source.data() + Source.length()))) {
			result = sourceIllegal;
			ErrorPtr = Pos;
		} else {
			memcpy(ResultPtr, Source.data(), Source.size());
			ResultPtr += Source.size();
		}
	} else if (WideCharWidth == 2) {
		const UTF8 *sourceStart = (const UTF8*)Source.data();
		// FIXME: Make the type of the result buffer correct instead of
		// using reinterpret_cast.
		UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr);
		ConversionFlags flags = strictConversion;
		result = ConvertUTF8toUTF16(
			&sourceStart, sourceStart + Source.size(),
			&targetStart, targetStart + 2*Source.size(), flags);
		if (result == conversionOK)
			ResultPtr = reinterpret_cast<char*>(targetStart);
		else
			ErrorPtr = sourceStart;
	} else if (WideCharWidth == 4) {
		const UTF8 *sourceStart = (const UTF8*)Source.data();
		// FIXME: Make the type of the result buffer correct instead of
		// using reinterpret_cast.
		UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr);
		ConversionFlags flags = strictConversion;
		result = ConvertUTF8toUTF32(
			&sourceStart, sourceStart + Source.size(),
			&targetStart, targetStart + 4*Source.size(), flags);
		if (result == conversionOK)
			ResultPtr = reinterpret_cast<char*>(targetStart);
		else
			ErrorPtr = sourceStart;
	}
	assert((result != targetExhausted)
		&& "ConvertUTF8toUTFXX exhausted target buffer");
	return result == conversionOK;
}
/* Translate an internal representation of a path (like /c/home) to
   a Windows path (like c:\home) */
static wchar_t *intpath2winpath(const char *intpath)
{
    wchar_t *winpath;
    int winpath_len;
    wchar_t *slash;
    const char *lastrootslash;
    wchar_t *lastslash;
    size_t intlen;

    /* Verify that input is valid UTF-8. We cannot use MB_ERR_INVALID_CHARS
       to MultiByteToWideChar, since it's only available in late versions of
       Windows. */
    if (!isLegalUTF8String(intpath)) {
	logmsg(LOG_CRIT, "intpath2winpath: Illegal UTF-8 string:%s", intpath);
	return NULL;
    }

    /* Skip over multiple root slashes for paths like ///home/john */
    lastrootslash = intpath;
    while (*lastrootslash == '/')
	lastrootslash++;
    if (lastrootslash != intpath)
	lastrootslash--;

    intlen = strlen(lastrootslash);
    /* One extra for /c -> c:\ */
    winpath_len = sizeof(wchar_t) * (intlen + 2);
    winpath = malloc(winpath_len);
    if (!winpath) {
	logmsg(LOG_CRIT, "intpath2winpath: Unable to allocate memory");
	return NULL;
    }

    if (!MultiByteToWideChar
	(CP_UTF8, 0, lastrootslash, -1, winpath, winpath_len)) {
	logmsg(LOG_CRIT, "intpath2winpath: MultiByteToWideChar failed");
	return NULL;
    }

    /* If path ends with /.., chop of the last component. Eventually, we
       might want to eliminate all occurances of .. */
    lastslash = wcsrchr(winpath, '/');
    if (!wcscmp(lastslash, L"/..")) {
	*lastslash = '\0';
	lastslash = wcsrchr(winpath, '/');
	*lastslash = '\0';
    }

    /* Translate /x -> x:/ and /x/something -> x:/something */
    if ((winpath[0] == '/') && winpath[1]) {
	switch (winpath[2]) {
	    case '\0':
		winpath[2] = '/';
		winpath[3] = '\0';
		/* fall through */

	    case '/':
		winpath[0] = winpath[1];
		winpath[1] = ':';
		break;

	    default:
		break;
	}
    }

    while ((slash = wcschr(winpath, '/')) != NULL) {
	*slash = '\\';
    }

    return winpath;
}