int xmem_cd_iconv (const char *src, size_t srclen, iconv_t cd, char **resultp, size_t *lengthp) { int retval = mem_cd_iconv (src, srclen, cd, resultp, lengthp); if (retval < 0 && errno == ENOMEM) xalloc_die (); return retval; }
int main () { #if HAVE_ICONV /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1, and UTF-8. */ iconv_t cd_88591_to_utf8 = iconv_open ("UTF-8", "ISO-8859-1"); iconv_t cd_utf8_to_88591 = iconv_open ("ISO-8859-1", "UTF-8"); ASSERT (cd_88591_to_utf8 != (iconv_t)(-1)); ASSERT (cd_utf8_to_88591 != (iconv_t)(-1)); /* ------------------------- Test mem_cd_iconv() ------------------------- */ /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */ { static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; char *result = NULL; size_t length = 0; int retval = mem_cd_iconv (input, strlen (input), cd_88591_to_utf8, &result, &length); ASSERT (retval == 0); ASSERT (length == strlen (expected)); ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); free (result); } /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */ { static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; char *result = NULL; size_t length = 0; int retval = mem_cd_iconv (input, strlen (input), cd_utf8_to_88591, &result, &length); ASSERT (retval == 0); ASSERT (length == strlen (expected)); ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); free (result); } /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */ { static const char input[] = "\342\202\254"; /* EURO SIGN */ char *result = NULL; size_t length = 0; int retval = mem_cd_iconv (input, strlen (input), cd_utf8_to_88591, &result, &length); ASSERT (retval == -1 && errno == EILSEQ); ASSERT (result == NULL); } /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */ { static const char input[] = "\342"; char *result = NULL; size_t length = 0; int retval = mem_cd_iconv (input, strlen (input), cd_utf8_to_88591, &result, &length); ASSERT (retval == 0); ASSERT (length == 0); free (result); } /* ------------------------- Test str_cd_iconv() ------------------------- */ /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */ { static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; char *result = str_cd_iconv (input, cd_88591_to_utf8); ASSERT (result != NULL); ASSERT (strcmp (result, expected) == 0); free (result); } /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */ { static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; char *result = str_cd_iconv (input, cd_utf8_to_88591); ASSERT (result != NULL); ASSERT (strcmp (result, expected) == 0); free (result); } /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */ { static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */ char *result = str_cd_iconv (input, cd_utf8_to_88591); ASSERT (result == NULL && errno == EILSEQ); } /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */ { static const char input[] = "\342"; char *result = str_cd_iconv (input, cd_utf8_to_88591); ASSERT (result != NULL); ASSERT (strcmp (result, "") == 0); free (result); } iconv_close (cd_88591_to_utf8); iconv_close (cd_utf8_to_88591); /* -------------------------- Test str_iconv() -------------------------- */ /* Test conversion from ISO-8859-1 to UTF-8 with no errors. */ { static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; static const char expected[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; char *result = str_iconv (input, "ISO-8859-1", "UTF-8"); ASSERT (result != NULL); ASSERT (strcmp (result, expected) == 0); free (result); } /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */ { static const char input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; char *result = str_iconv (input, "UTF-8", "ISO-8859-1"); ASSERT (result != NULL); ASSERT (strcmp (result, expected) == 0); free (result); } /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */ { static const char input[] = "Costs: 27 \342\202\254"; /* EURO SIGN */ char *result = str_iconv (input, "UTF-8", "ISO-8859-1"); ASSERT (result == NULL && errno == EILSEQ); } /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */ { static const char input[] = "\342"; char *result = str_iconv (input, "UTF-8", "ISO-8859-1"); ASSERT (result != NULL); ASSERT (strcmp (result, "") == 0); free (result); } #endif return 0; }
char * str_cd_iconv (const char *src, iconv_t cd) { /* For most encodings, a trailing NUL byte in the input will be converted to a trailing NUL byte in the output. But not for UTF-7. So that this function is usable for UTF-7, we have to exclude the NUL byte from the conversion and add it by hand afterwards. */ # if !defined _LIBICONV_VERSION && !defined __GLIBC__ /* Irix iconv() inserts a NUL byte if it cannot convert. NetBSD iconv() inserts a question mark if it cannot convert. Only GNU libiconv and GNU libc are known to prefer to fail rather than doing a lossy conversion. For other iconv() implementations, we have to look at the number of irreversible conversions returned; but this information is lost when iconv() returns for an E2BIG reason. Therefore we cannot use the second, faster algorithm. */ char *result = NULL; size_t length = 0; int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length); char *final_result; if (retval < 0) { if (result != NULL) abort (); return NULL; } /* Add the terminating NUL byte. */ final_result = (result != NULL ? realloc (result, length + 1) : malloc (length + 1)); if (final_result == NULL) { if (result != NULL) free (result); errno = ENOMEM; return NULL; } final_result[length] = '\0'; return final_result; # else /* This algorithm is likely faster than the one above. But it may produce iconv() returns for an E2BIG reason, when the output size guess is too small. Therefore it can only be used when we don't need the number of irreversible conversions performed. */ char *result; size_t result_size; size_t length; const char *inptr = src; size_t inbytes_remaining = strlen (src); /* Make a guess for the worst-case output size, in order to avoid a realloc. It's OK if the guess is wrong as long as it is not zero and doesn't lead to an integer overflow. */ result_size = inbytes_remaining; { size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2); if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX) result_size *= MB_LEN_MAX; } result_size += 1; /* for the terminating NUL */ result = (char *) malloc (result_size); if (result == NULL) { errno = ENOMEM; return NULL; } /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ # if defined _LIBICONV_VERSION \ || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) /* Set to the initial state. */ iconv (cd, NULL, NULL, NULL, NULL); # endif /* Do the conversion. */ { char *outptr = result; size_t outbytes_remaining = result_size - 1; for (;;) { /* Here inptr + inbytes_remaining = src + strlen (src), outptr + outbytes_remaining = result + result_size - 1. */ size_t res = iconv (cd, (ICONV_CONST char **) &inptr, &inbytes_remaining, &outptr, &outbytes_remaining); if (res == (size_t)(-1)) { if (errno == EINVAL) break; else if (errno == E2BIG) { size_t used = outptr - result; size_t newsize = result_size * 2; char *newresult; if (!(newsize > result_size)) { errno = ENOMEM; goto failed; } newresult = (char *) realloc (result, newsize); if (newresult == NULL) { errno = ENOMEM; goto failed; } result = newresult; result_size = newsize; outptr = result + used; outbytes_remaining = result_size - 1 - used; } else goto failed; } else break; } /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ # if defined _LIBICONV_VERSION \ || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun) for (;;) { /* Here outptr + outbytes_remaining = result + result_size - 1. */ size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining); if (res == (size_t)(-1)) { if (errno == E2BIG) { size_t used = outptr - result; size_t newsize = result_size * 2; char *newresult; if (!(newsize > result_size)) { errno = ENOMEM; goto failed; } newresult = (char *) realloc (result, newsize); if (newresult == NULL) { errno = ENOMEM; goto failed; } result = newresult; result_size = newsize; outptr = result + used; outbytes_remaining = result_size - 1 - used; } else goto failed; } else break; } # endif /* Add the terminating NUL byte. */ *outptr++ = '\0'; length = outptr - result; } /* Give away unused memory. */ if (length < result_size) { char *smaller_result = (char *) realloc (result, length); if (smaller_result != NULL) result = smaller_result; } return result; failed: { int saved_errno = errno; free (result); errno = saved_errno; return NULL; } # endif }