コード例 #1
0
ファイル: str_convert.c プロジェクト: impegoraro/tagtool
/*
 * Returns the (minimum) number of bytes per character in this encoding.
 * This is necessary to find the end of zero-terminated strings (e.g. an 
 * UTF-8 string ends on a single zero byte, an UTF-16 string ends on 2 zero
 * bytes, etc.)
 *
 * XXX - Don't know a generic way to determine bytes per character for all 
 *       encodings.  For now multibyte encodings other than UTF will be wrong.
 */
static int bytes_per_char(int enc)
{
	if (strncasecmp(encoding_name(enc), "UTF-16", 6) == 0)
		return 2;
	else if (strncasecmp(encoding_name(enc), "UTF-32", 6) == 0)
		return 4;
	else
		return 1;
}
コード例 #2
0
ファイル: message_test.c プロジェクト: elliefm/cyrus-imapd
    }
    else {
        dump_octets(fp, data->s, MAX_TEXT/2);
        fputs("    ...\n", fp);
        dump_octets(fp, data->s + data->len - MAX_TEXT/2, MAX_TEXT/2);
    }
#undef MAX_TEXT
}

static int dump_one_section(int partno, charset_t charset, int encoding,
                            const char *subtype, struct buf *data,
                            void *rock __attribute__((unused)))
{
#define MAX_TEXT    512
    printf("SECTION partno=%d length=%llu subtype=%s charset=%s encoding=%s\n",
            partno, (unsigned long long)data->len, subtype, charset_name(charset), encoding_name(encoding));
    dump_buf(stdout, data);
    return 0;
#undef MAX_TEXT
}

static int dump_text_sections(message_t *message)
{
    return message_foreach_text_section(message, dump_one_section, NULL);
}

/*-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-*/

static int dump_message(message_t *message)
{
    return dump_text_sections(message);
コード例 #3
0
ファイル: str_convert.c プロジェクト: impegoraro/tagtool
char *str_convert_encoding(int from, int to, const char *str)
{
	/*
	 * And here it should just be a matter of calling glib's g_convert().
	 * Or so I thought.  Alas, they chickened out of the hard part: how to 
	 * figure out the size of a zero-terminaded string in any arbitrary 
	 * encoding.
	 * Since there is no advantage in using g_convert(), might as well 
	 * keep my old implementation.  It's worth using their iconv wrapppers 
	 * though, because they provide libiconv in systems that don't have it 
	 * natively.
	 */


	GIConv conv;
	char *result;
	char *inbuf, *outbuf;
	size_t inbpc, outbpc;
	size_t inbytes, outbytes;
	size_t inbytesleft, outbytesleft;
	size_t res;

	inbytes = strsize(from, str);

	if (strcasecmp(encoding_name(to), encoding_name(from)) == 0) {
		result = malloc(inbytes);
		memcpy(result, str, inbytes);
		return result;
	}

	conv = g_iconv_open(encoding_name(to), encoding_name(from));
	if (conv == (GIConv)-1) {
		fprintf(stderr, "convert_encoding: cannot convert from %s to %s\n", 
			encoding_name(from), encoding_name(to));
		return NULL;
	}

	inbpc = bytes_per_char(from);
	outbpc = bytes_per_char(to);

	/* estimate the converted size */
	outbytes = ((double)outbpc / (double)inbpc) * inbytes;
	/* optimize common cases (tuned for western european languages) */
	if (to == UTF_8 && inbpc == 1)
		outbytes = ceil(1.25 * inbytes);
	else if (to == UTF_16)
		outbytes += 2;	/* for the BOM */

	//printf("inbytes : %i\noutbytes: %i\n", inbytes, outbytes);

	result = malloc(outbytes);

	inbuf = (char*)str;
	inbytesleft = inbytes;
	outbuf = result;
	outbytesleft = outbytes;

	while(1) {
		res = g_iconv(conv, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
		if (res == (size_t)-1) {
			if (errno == E2BIG) {
				/* Ran out of space, alloc more 
				   This code tries hard to avoid the need for a second realloc,
				   while still keeping over-allocation to a minimum */
				double done = 1.0 - (double)inbytesleft / (double)inbytes;
				size_t bytes_written = outbuf - result;
				size_t newsize = ceil((bytes_written / done) * 1.1);

				//printf("growing: done=%g%%, old size=%i, new size=%i\n",
				//	100.0*done, outbytes, newsize);

				outbytesleft += newsize - outbytes;
				outbytes = newsize;
				result = realloc(result, outbytes);
				outbuf = result + bytes_written;

				continue;
			}
			else {
				/* Invalid or inconvertible char, skip it
				   Seems better than aborting the conversion... */

				fprintf(stderr, "convert_encoding: conversion error at offset %i\n", 
					inbytes-inbytesleft);

				inbuf += inbpc;
				inbytesleft = max(inbytesleft - inbpc, 0);
				outbuf += outbpc;
				outbytesleft = max(outbytesleft - outbpc, 0);

				continue;
			}
		}
		break;
	}

	//printf("%i of %i bytes unused (wasted %g%%)\n", 
	//	outbytesleft, outbytes, 100.0*(double)outbytesleft/(double)outbytes);

	g_iconv_close(conv);
	return result;
}