static void message_decode_body_init_charset(struct message_decoder_context *ctx, struct message_part *part) { ctx->binary_input = ctx->content_charset == NULL && (ctx->flags & MESSAGE_DECODER_FLAG_RETURN_BINARY) != 0 && (part->flags & (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_MESSAGE_RFC822)) == 0; if (ctx->binary_input) return; if (ctx->charset_trans != NULL && ctx->content_charset != NULL && strcasecmp(ctx->content_charset, ctx->charset_trans_charset) == 0) { /* already have the correct translation selected */ charset_to_utf8_reset(ctx->charset_trans); return; } if (ctx->charset_trans != NULL) charset_to_utf8_end(&ctx->charset_trans); i_free_and_null(ctx->charset_trans_charset); ctx->charset_trans_charset = i_strdup(ctx->content_charset != NULL ? ctx->content_charset : "UTF-8"); if (charset_to_utf8_begin(ctx->charset_trans_charset, ctx->normalizer, &ctx->charset_trans) < 0) ctx->charset_trans = charset_utf8_to_utf8_begin(ctx->normalizer); }
static void test_charset_iconv(void) { struct { const char *charset; const char *input; const char *output; enum charset_result result; } tests[] = { { "ISO-8859-1", "p\xE4\xE4", "p\xC3\xA4\xC3\xA4", CHARSET_RET_OK }, { "UTF-7", "+AOQA5AD2AOQA9gDkAPYA5AD2AOQA9gDkAPYA5AD2AOQA9gDkAPYA5AD2AOQA9gDkAPYA5AD2AOQA9gDkAPYA5AD2AOQA9gDk", "\xC3\xA4\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4" "\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4" "\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4" "\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4" "\xC3\xB6\xC3\xA4\xC3\xB6\xC3\xA4", CHARSET_RET_OK } }; string_t *str = t_str_new(128); struct charset_translation *trans; enum charset_result result; size_t pos, left, limit, len; unsigned int i; test_begin("charset iconv"); for (i = 0; i < N_ELEMENTS(tests); i++) { str_truncate(str, 0); test_assert_idx(charset_to_utf8_str(tests[i].charset, NULL, tests[i].input, str, &result) == 0, i); test_assert_idx(strcmp(tests[i].output, str_c(str)) == 0, i); test_assert_idx(result == tests[i].result, i); str_truncate(str, 0); test_assert_idx(charset_to_utf8_begin(tests[i].charset, NULL, &trans) == 0, i); len = strlen(tests[i].input); for (pos = 0, limit = 1; limit <= len; pos += left, limit++) { left = limit - pos; result = charset_to_utf8(trans, (const void *)(tests[i].input + pos), &left, str); if (result != CHARSET_RET_INCOMPLETE_INPUT && result != CHARSET_RET_OK) break; } test_assert_idx(strcmp(tests[i].output, str_c(str)) == 0, i); test_assert_idx(result == tests[i].result, i); charset_to_utf8_end(&trans); } /* Use //IGNORE just to force handling to be done by iconv instead of our own UTF-8 routines. */ test_charset_utf8_common("UTF-8//IGNORE"); test_end(); }
int charset_to_utf8_str(const char *charset, normalizer_func_t *normalizer, const char *input, string_t *output, enum charset_result *result_r) { struct charset_translation *t; size_t len = strlen(input); if (charset_to_utf8_begin(charset, normalizer, &t) < 0) return -1; *result_r = charset_to_utf8(t, (const unsigned char *)input, &len, output); charset_to_utf8_end(&t); return 0; }
void message_decoder_deinit(struct message_decoder_context **_ctx) { struct message_decoder_context *ctx = *_ctx; *_ctx = NULL; if (ctx->charset_trans != NULL) charset_to_utf8_end(&ctx->charset_trans); buffer_free(&ctx->encoding_buf); buffer_free(&ctx->buf); buffer_free(&ctx->buf2); i_free(ctx->charset_trans_charset); i_free(ctx->content_charset); i_free(ctx); }
static void test_charset_iconv_utf7_state(void) { struct charset_translation *trans; string_t *str = t_str_new(32); unsigned char nextbuf[5+CHARSET_MAX_PENDING_BUF_SIZE+1]; size_t size; test_begin("charset iconv utf7 state"); test_assert(charset_to_utf8_begin("UTF-7", NULL, &trans) == 0); size = 2; test_assert(charset_to_utf8(trans, (const void *)"a+", &size, str) == CHARSET_RET_INCOMPLETE_INPUT); test_assert(strcmp(str_c(str), "a") == 0); test_assert(size == 1); memset(nextbuf, '?', sizeof(nextbuf)); memcpy(nextbuf, "+AOQ-", 5); size = sizeof(nextbuf); test_assert(charset_to_utf8(trans, nextbuf, &size, str) == CHARSET_RET_OK); test_assert(strcmp(str_c(str), "a\xC3\xA4???????????") == 0); charset_to_utf8_end(&trans); test_end(); }