/** * Convert the len characters long character sequence * given in input that is in the given input charset * to a string in given output charset. * * @param input input string * @param len number of bytes in @a input * @param input_charset character set used for @a input * @param output_charset desired character set for the return value * @return the converted string (0-terminated), * if conversion fails, a copy of the orignal * string is returned. */ char * GNUNET_STRINGS_conv (const char *input, size_t len, const char *input_charset, const char *output_charset) { char *ret; uint8_t *u8_string; char *encoded_string; size_t u8_string_length; size_t encoded_string_length; u8_string = u8_conv_from_encoding (input_charset, iconveh_error, input, len, NULL, NULL, &u8_string_length); if (NULL == u8_string) { LOG_STRERROR (GNUNET_ERROR_TYPE_WARNING, "u8_conv_from_encoding"); goto fail; } if (0 == strcmp (output_charset, "UTF-8")) { ret = GNUNET_malloc (u8_string_length + 1); memcpy (ret, u8_string, u8_string_length); ret[u8_string_length] = '\0'; free (u8_string); return ret; } encoded_string = u8_conv_to_encoding (output_charset, iconveh_error, u8_string, u8_string_length, NULL, NULL, &encoded_string_length); free (u8_string); if (NULL == encoded_string) { LOG_STRERROR (GNUNET_ERROR_TYPE_WARNING, "u8_conv_to_encoding"); goto fail; } ret = GNUNET_malloc (encoded_string_length + 1); memcpy (ret, encoded_string, encoded_string_length); ret[encoded_string_length] = '\0'; free (encoded_string); return ret; fail: LOG (GNUNET_ERROR_TYPE_WARNING, _("Character sets requested were `%s'->`%s'\n"), "UTF-8", output_charset); ret = GNUNET_malloc (len + 1); memcpy (ret, input, len); ret[len] = '\0'; return ret; }
int main () { static enum iconv_ilseq_handler handlers[] = { iconveh_error, iconveh_question_mark, iconveh_escape_sequence }; size_t h; size_t o; size_t i; #if HAVE_ICONV /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1, ISO-8859-2, and UTF-8. */ /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */ for (h = 0; h < SIZEOF (handlers); h++) { enum iconv_ilseq_handler handler = handlers[h]; static const uint8_t input[] = "\303\204rger mit b\303\266sen B\303\274bchen ohne Augenma\303\237"; static const char expected[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; for (o = 0; o < 2; o++) { size_t *offsets = (o ? new_offsets (u8_strlen (input)) : NULL); size_t length; char *result = u8_conv_to_encoding ("ISO-8859-1", handler, input, u8_strlen (input), offsets, NULL, &length); ASSERT (result != NULL); ASSERT (length == strlen (expected)); ASSERT (memcmp (result, expected, length) == 0); if (o) { for (i = 0; i < 41; i++) ASSERT (offsets[i] == (i < 1 ? i : i == 1 ? (size_t)(-1) : i < 13 ? i - 1 : i == 13 ? (size_t)(-1) : i < 20 ? i - 2 : i == 20 ? (size_t)(-1) : i < 40 ? i - 3 : i == 40 ? (size_t)(-1) : i - 4)); ASSERT (offsets[41] == MAGIC); free (offsets); } free (result); } } /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */ for (h = 0; h < SIZEOF (handlers); h++) { enum iconv_ilseq_handler handler = handlers[h]; static const uint8_t input[] = "Rafa\305\202 Maszkowski"; /* Rafał Maszkowski */ for (o = 0; o < 2; o++) { size_t *offsets = (o ? new_offsets (u8_strlen (input)) : NULL); size_t length = 0xdead; char *result = u8_conv_to_encoding ("ISO-8859-1", handler, input, u8_strlen (input), offsets, NULL, &length); switch (handler) { case iconveh_error: ASSERT (result == NULL); ASSERT (errno == EILSEQ); ASSERT (length == 0xdead); break; case iconveh_question_mark: { static const char expected[] = "Rafa? Maszkowski"; static const char expected_translit[] = "Rafal Maszkowski"; ASSERT (result != NULL); ASSERT (length == strlen (expected)); ASSERT (memcmp (result, expected, length) == 0 || memcmp (result, expected_translit, length) == 0); if (o) { for (i = 0; i < 17; i++) ASSERT (offsets[i] == (i < 5 ? i : i == 5 ? (size_t)(-1) : i - 1)); ASSERT (offsets[17] == MAGIC); free (offsets); } free (result); } break; case iconveh_escape_sequence: { static const char expected[] = "Rafa\\u0142 Maszkowski"; ASSERT (result != NULL); ASSERT (length == strlen (expected)); ASSERT (memcmp (result, expected, length) == 0); if (o) { for (i = 0; i < 17; i++) ASSERT (offsets[i] == (i < 5 ? i : i == 5 ? (size_t)(-1) : i + 4)); ASSERT (offsets[17] == MAGIC); free (offsets); } free (result); } break; } } } /* Test conversion from UTF-8 to ISO-8859-1 with EINVAL. */ for (h = 0; h < SIZEOF (handlers); h++) { enum iconv_ilseq_handler handler = handlers[h]; static const uint8_t input[] = "\342"; for (o = 0; o < 2; o++) { size_t *offsets = (o ? new_offsets (u8_strlen (input)) : NULL); size_t length; char *result = u8_conv_to_encoding ("ISO-8859-1", handler, input, u8_strlen (input), offsets, NULL, &length); ASSERT (result != NULL); ASSERT (length == strlen ("")); if (o) { ASSERT (offsets[0] == 0); ASSERT (offsets[1] == MAGIC); free (offsets); } free (result); } } #endif return 0; }