static void _seq_set_string(PyObject* seq, locale_t loc, ssize_t index, const char* data) { if (!data) return; size_t needed = mbstowcs_l(NULL, data, 0, loc); wchar_t space[30]; wchar_t *buf; if (needed==(size_t)-1) // Should we log an error here? return; if (needed<sizeof(space)) buf=space; else if ((buf=PyMem_Malloc(needed+1))==NULL) return; size_t length = mbstowcs_l(buf, data, needed+1, loc); PyStructSequence_SET_ITEM(seq, index, PyUnicode_FromWideChar(buf, length)); if (buf!=space) PyMem_Free(buf); }
int UTF8::ToUnicode(const char *utf8_data, int len, wchar_t *wstr, int *wlen) { #if defined(WIN32) || defined(_WINDOWS_) len = MultiByteToWideChar(CP_UTF8, 0, utf8_data, len, wstr, len * 4); #else if (!m_UTF8_locale) m_UTF8_locale = _create_locale(LC_ALL, "en_US.UTF-8"); len = mbstowcs_l(wstr, utf8_data, len, m_UTF8_locale); #endif if (wlen != NULL) *wlen = len; return (errno = 0); }
size_t wcsftime_l(wchar_t *wcs, size_t maxsize, const wchar_t *format, const struct tm *timeptr, locale_t loc) { char *dst, *dstp, *sformat; size_t n, sflen; int sverrno; sformat = dst = NULL; /* * Convert the supplied format string to a multibyte representation * for strftime(), which only handles single-byte characters. */ sflen = wcstombs_l(NULL, format, 0, loc); if (sflen == (size_t)-1) goto error; if ((sformat = malloc(sflen + 1)) == NULL) goto error; wcstombs_l(sformat, format, sflen + 1, loc); /* * Allocate memory for longest multibyte sequence that will fit * into the caller's buffer and call strftime() to fill it. * Then, copy and convert the result back into wide characters in * the caller's buffer. */ if (SIZE_T_MAX / MB_CUR_MAX_L(loc) <= maxsize) { /* maxsize is preposterously large - avoid int. overflow. */ errno = EINVAL; goto error; } dst = malloc(maxsize * MB_CUR_MAX_L(loc)); if (dst == NULL) goto error; if (strftime_l(dst, maxsize, sformat, timeptr, loc) == 0) goto error; dstp = dst; n = mbstowcs_l(wcs, dstp, maxsize, loc); if (n == (size_t)-2 || n == (size_t)-1) goto error; free(sformat); free(dst); return n; error: sverrno = errno; free(sformat); free(dst); errno = sverrno; return 0; }
/** * Main entry point for the test program. * @param argc Unused. * @param argv argv[1] contains the file to hash. * @return Returns negative on failure, zero on success. */ int main(int argc, char** argv) { char* mbsfilename = argv[1]; /* Convert the filename from char* to wchar_t* to test the * library. It's a pain, but it's designed to be called from * python, not C. */ locale_t utf8 = newlocale(LC_ALL_MASK, NULL, NULL); size_t size = mbstowcs_l(NULL, mbsfilename, 0, utf8); wchar_t* wfilename = (wchar_t*)malloc(size * sizeof(wchar_t)); size = mbstowcs_l(wfilename, mbsfilename, size, utf8); if (size == -1) { fprintf(stderr, "Error converting string.\n"); return -1; } /* Set up our hash request. */ HashRequest request; memset(&request, 0, sizeof(HashRequest)); request.tag = 15; request.filename = wfilename; request.options = OPTION_ED2K; /* Hash the file. */ int result = HashFileWithSyncIO(&request, HashCallback); /* Print the results. */ printf("\nresult: %d\n", result); if (result == 0) { print_hash(" ED2K", &request.result[0], 16); print_hash(" CRC32", &request.result[16], 4); print_hash(" MD5", &request.result[20], 16); print_hash(" SHA1", &request.result[36], 20); } return 0; }
/* * char2wchar --- convert multibyte characters to wide characters * * This has almost the API of mbstowcs_l(), except that *from need not be * null-terminated; instead, the number of input bytes is specified as * fromlen. Also, we ereport() rather than returning -1 for invalid * input encoding. tolen is the maximum number of wchar_t's to store at *to. * The output will be zero-terminated iff there is room. */ size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen, pg_locale_t locale) { size_t result; if (tolen == 0) return 0; #ifdef WIN32 /* See WIN32 "Unicode" comment above */ if (GetDatabaseEncoding() == PG_UTF8) { /* Win32 API does not work for zero-length input */ if (fromlen == 0) result = 0; else { result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1); /* A zero return is failure */ if (result == 0) result = -1; } if (result != -1) { Assert(result < tolen); /* Append trailing null wchar (MultiByteToWideChar() does not) */ to[result] = 0; } } else #endif /* WIN32 */ { /* mbstowcs requires ending '\0' */ char *str = pnstrdup(from, fromlen); if (locale == (pg_locale_t) 0) { /* Use mbstowcs directly for the default locale */ result = mbstowcs(to, str, tolen); } else { #ifdef HAVE_LOCALE_T #ifdef HAVE_MBSTOWCS_L /* Use mbstowcs_l for nondefault locales */ result = mbstowcs_l(to, str, tolen, locale); #else /* !HAVE_MBSTOWCS_L */ /* We have to temporarily set the locale as current ... ugh */ locale_t save_locale = uselocale(locale); result = mbstowcs(to, str, tolen); uselocale(save_locale); #endif /* HAVE_MBSTOWCS_L */ #else /* !HAVE_LOCALE_T */ /* Can't have locale != 0 without HAVE_LOCALE_T */ elog(ERROR, "mbstowcs_l is not available"); result = 0; /* keep compiler quiet */ #endif /* HAVE_LOCALE_T */ } pfree(str); } if (result == -1) { /* * Invalid multibyte character encountered. We try to give a useful * error message by letting pg_verifymbstr check the string. But it's * possible that the string is OK to us, and not OK to mbstowcs --- * this suggests that the LC_CTYPE locale is different from the * database encoding. Give a generic error message if verifymbstr * can't find anything wrong. */ pg_verifymbstr(from, fromlen, false); /* might not return */ /* but if it does ... */ ereport(ERROR, (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), errmsg("invalid multibyte character for locale"), errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding."))); } return result; }
/* * In order to properly handle multibyte locales, its easiet to just * convert to wide characters and then use wcscoll. However if an * error occurs, we gracefully fall back to simple strcmp. Caller * should check errno. */ int strcoll_l(const char *s1, const char *s2, locale_t loc) { int ret; wchar_t *t1 = NULL, *t2 = NULL; wchar_t *w1 = NULL, *w2 = NULL; size_t sz1, sz2; const struct lc_collate *lcc = loc->collate; if (lcc->lc_is_posix) return (strcmp(s1, s2)); sz1 = strlen(s1) + 1; sz2 = strlen(s2) + 1; /* * Simple assumption: conversion to wide format is strictly * reducing, i.e. a single byte (or multibyte character) * cannot result in multiple wide characters. * * We gain a bit of performance by giving preference to alloca * for small string allocations. */ if (sz1 > ALLOCA_LIMIT) { if ((t1 = malloc(sz1 * sizeof (wchar_t))) == NULL) goto error; w1 = t1; } else { if ((w1 = alloca(sz1 * sizeof (wchar_t))) == NULL) goto error; } if (sz2 > ALLOCA_LIMIT) { if ((t2 = malloc(sz2 * sizeof (wchar_t))) == NULL) goto error; w2 = t2; } else { if ((w2 = alloca(sz2 * sizeof (wchar_t))) == NULL) goto error; } if ((mbstowcs_l(w1, s1, sz1, loc)) == (size_t)-1) goto error; if ((mbstowcs_l(w2, s2, sz2, loc)) == (size_t)-1) goto error; ret = wcscoll_l(w1, w2, loc); if (t1) free(t1); if (t2) free(t2); return (ret); error: if (t1) free(t1); if (t2) free(t2); return (strcmp(s1, s2)); }