Beispiel #1
0
static void _seq_set_string(PyObject* seq, locale_t loc, ssize_t index, const char* data) {
	if (!data)
		return;
	size_t needed = mbstowcs_l(NULL, data, 0, loc);
	wchar_t space[30];
	wchar_t *buf;
	if (needed==(size_t)-1)
		// Should we log an error here?
		return;
	if (needed<sizeof(space))
		buf=space;
	else if ((buf=PyMem_Malloc(needed+1))==NULL)
		return;
	size_t length = mbstowcs_l(buf, data, needed+1, loc);
	PyStructSequence_SET_ITEM(seq, index, PyUnicode_FromWideChar(buf, length));
	if (buf!=space)
		PyMem_Free(buf);
}
Beispiel #2
0
int UTF8::ToUnicode(const char *utf8_data, int len, wchar_t *wstr, int *wlen)
{
#if defined(WIN32) || defined(_WINDOWS_)
	len = MultiByteToWideChar(CP_UTF8, 0, utf8_data, len, wstr, len * 4);
#else
	if (!m_UTF8_locale) m_UTF8_locale = _create_locale(LC_ALL, "en_US.UTF-8");
	len = mbstowcs_l(wstr, utf8_data, len, m_UTF8_locale);
#endif

	if (wlen != NULL) *wlen = len;
	return (errno = 0);
}
Beispiel #3
0
size_t
wcsftime_l(wchar_t *wcs, size_t maxsize,
    const wchar_t *format, const struct tm *timeptr, locale_t loc)
{
	char *dst, *dstp, *sformat;
	size_t n, sflen;
	int sverrno;

	sformat = dst = NULL;

	/*
	 * Convert the supplied format string to a multibyte representation
	 * for strftime(), which only handles single-byte characters.
	 */
	sflen = wcstombs_l(NULL, format, 0, loc);
	if (sflen == (size_t)-1)
		goto error;
	if ((sformat = malloc(sflen + 1)) == NULL)
		goto error;
	wcstombs_l(sformat, format, sflen + 1, loc);

	/*
	 * Allocate memory for longest multibyte sequence that will fit
	 * into the caller's buffer and call strftime() to fill it.
	 * Then, copy and convert the result back into wide characters in
	 * the caller's buffer.
	 */
	if (SIZE_T_MAX / MB_CUR_MAX_L(loc) <= maxsize) {
		/* maxsize is preposterously large - avoid int. overflow. */
		errno = EINVAL;
		goto error;
	}
	dst = malloc(maxsize * MB_CUR_MAX_L(loc));
	if (dst == NULL)
		goto error;
	if (strftime_l(dst, maxsize, sformat, timeptr, loc) == 0)
		goto error;
	dstp = dst;
	n = mbstowcs_l(wcs, dstp, maxsize, loc);
	if (n == (size_t)-2 || n == (size_t)-1)
		goto error;

	free(sformat);
	free(dst);
	return n;

error:
	sverrno = errno;
	free(sformat);
	free(dst);
	errno = sverrno;
	return 0;
}
Beispiel #4
0
/**
 * Main entry point for the test program.
 * @param  argc Unused.
 * @param  argv argv[1] contains the file to hash.
 * @return      Returns negative on failure, zero on success.
 */
int main(int argc, char** argv) {
    char* mbsfilename = argv[1];

    /* Convert the filename from char* to wchar_t* to test the
     * library. It's a pain, but it's designed to be called from
     * python, not C. */
    locale_t utf8 = newlocale(LC_ALL_MASK, NULL, NULL);
    size_t size = mbstowcs_l(NULL, mbsfilename, 0, utf8);
    wchar_t* wfilename = (wchar_t*)malloc(size * sizeof(wchar_t));
    size = mbstowcs_l(wfilename, mbsfilename, size, utf8);
    if (size == -1) {
        fprintf(stderr, "Error converting string.\n");
        return -1;
    }

    /* Set up our hash request. */
    HashRequest request;
    memset(&request, 0, sizeof(HashRequest));
    request.tag = 15;
    request.filename = wfilename;
    request.options = OPTION_ED2K;

    /* Hash the file. */
    int result = HashFileWithSyncIO(&request, HashCallback);

    /* Print the results. */
    printf("\nresult: %d\n", result);
    if (result == 0) {
        print_hash("  ED2K", &request.result[0], 16);
        print_hash(" CRC32", &request.result[16], 4);
        print_hash("   MD5", &request.result[20], 16);
        print_hash("  SHA1", &request.result[36], 20);
    }

    return 0;
}
Beispiel #5
0
/*
 * char2wchar --- convert multibyte characters to wide characters
 *
 * This has almost the API of mbstowcs_l(), except that *from need not be
 * null-terminated; instead, the number of input bytes is specified as
 * fromlen.  Also, we ereport() rather than returning -1 for invalid
 * input encoding.	tolen is the maximum number of wchar_t's to store at *to.
 * The output will be zero-terminated iff there is room.
 */
size_t
char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
		   pg_locale_t locale)
{
	size_t		result;

	if (tolen == 0)
		return 0;

#ifdef WIN32
	/* See WIN32 "Unicode" comment above */
	if (GetDatabaseEncoding() == PG_UTF8)
	{
		/* Win32 API does not work for zero-length input */
		if (fromlen == 0)
			result = 0;
		else
		{
			result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
			/* A zero return is failure */
			if (result == 0)
				result = -1;
		}

		if (result != -1)
		{
			Assert(result < tolen);
			/* Append trailing null wchar (MultiByteToWideChar() does not) */
			to[result] = 0;
		}
	}
	else
#endif   /* WIN32 */
	{
		/* mbstowcs requires ending '\0' */
		char	   *str = pnstrdup(from, fromlen);

		if (locale == (pg_locale_t) 0)
		{
			/* Use mbstowcs directly for the default locale */
			result = mbstowcs(to, str, tolen);
		}
		else
		{
#ifdef HAVE_LOCALE_T
#ifdef HAVE_MBSTOWCS_L
			/* Use mbstowcs_l for nondefault locales */
			result = mbstowcs_l(to, str, tolen, locale);
#else							/* !HAVE_MBSTOWCS_L */
			/* We have to temporarily set the locale as current ... ugh */
			locale_t	save_locale = uselocale(locale);

			result = mbstowcs(to, str, tolen);

			uselocale(save_locale);
#endif   /* HAVE_MBSTOWCS_L */
#else							/* !HAVE_LOCALE_T */
			/* Can't have locale != 0 without HAVE_LOCALE_T */
			elog(ERROR, "mbstowcs_l is not available");
			result = 0;			/* keep compiler quiet */
#endif   /* HAVE_LOCALE_T */
		}

		pfree(str);
	}

	if (result == -1)
	{
		/*
		 * Invalid multibyte character encountered.  We try to give a useful
		 * error message by letting pg_verifymbstr check the string.  But it's
		 * possible that the string is OK to us, and not OK to mbstowcs ---
		 * this suggests that the LC_CTYPE locale is different from the
		 * database encoding.  Give a generic error message if verifymbstr
		 * can't find anything wrong.
		 */
		pg_verifymbstr(from, fromlen, false);	/* might not return */
		/* but if it does ... */
		ereport(ERROR,
				(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
				 errmsg("invalid multibyte character for locale"),
				 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
	}

	return result;
}
Beispiel #6
0
/*
 * In order to properly handle multibyte locales, its easiet to just
 * convert to wide characters and then use wcscoll.  However if an
 * error occurs, we gracefully fall back to simple strcmp.  Caller
 * should check errno.
 */
int
strcoll_l(const char *s1, const char *s2, locale_t loc)
{
	int ret;
	wchar_t *t1 = NULL, *t2 = NULL;
	wchar_t *w1 = NULL, *w2 = NULL;
	size_t sz1, sz2;
	const struct lc_collate *lcc = loc->collate;

	if (lcc->lc_is_posix)
		return (strcmp(s1, s2));

	sz1 = strlen(s1) + 1;
	sz2 = strlen(s2) + 1;

	/*
	 * Simple assumption: conversion to wide format is strictly
	 * reducing, i.e. a single byte (or multibyte character)
	 * cannot result in multiple wide characters.
	 *
	 * We gain a bit of performance by giving preference to alloca
	 * for small string allocations.
	 */
	if (sz1 > ALLOCA_LIMIT) {
		if ((t1 = malloc(sz1 * sizeof (wchar_t))) == NULL)
			goto error;
		w1 = t1;
	} else {
		if ((w1 = alloca(sz1 * sizeof (wchar_t))) == NULL)
			goto error;
	}
	if (sz2 > ALLOCA_LIMIT) {
		if ((t2 = malloc(sz2 * sizeof (wchar_t))) == NULL)
			goto error;
		w2 = t2;
	} else {
		if ((w2 = alloca(sz2 * sizeof (wchar_t))) == NULL)
			goto error;
	}

	if ((mbstowcs_l(w1, s1, sz1, loc)) == (size_t)-1)
		goto error;

	if ((mbstowcs_l(w2, s2, sz2, loc)) == (size_t)-1)
		goto error;

	ret = wcscoll_l(w1, w2, loc);
	if (t1)
		free(t1);
	if (t2)
		free(t2);

	return (ret);

error:
	if (t1)
		free(t1);
	if (t2)
		free(t2);
	return (strcmp(s1, s2));
}