/* ----------
 * conv_proc(
 *		INTEGER,	-- source encoding id
 *		INTEGER,	-- destination encoding id
 *		CSTRING,	-- source string (null terminated C string)
 *		CSTRING,	-- destination string (null terminated C string)
 *		INTEGER		-- source string length
 * ) returns VOID;
 * ----------
 */
Datum
shift_jis_2004_to_utf8(PG_FUNCTION_ARGS)
{
	unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
	unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
	int			len = PG_GETARG_INT32(4);

	CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_UTF8);

	LocalToUtf(src, len, dest,
			   LUmapSHIFT_JIS_2004, lengthof(LUmapSHIFT_JIS_2004),
		LUmapSHIFT_JIS_2004_combined, lengthof(LUmapSHIFT_JIS_2004_combined),
			   NULL,
			   PG_SHIFT_JIS_2004);

	PG_RETURN_VOID();
}
Datum
win1250_to_latin2(PG_FUNCTION_ARGS)
{
	unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
	unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
	int			len = PG_GETARG_INT32(4);
	unsigned char *buf;

	CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1250, PG_LATIN2);

	buf = palloc(len * ENCODING_GROWTH_RATE + 1);
	win12502mic(src, buf, len);
	mic2latin2(buf, dest, strlen((char *) buf));
	pfree(buf);

	PG_RETURN_VOID();
}
Datum
utf8_to_johab(PG_FUNCTION_ARGS)
{
	unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
	unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
	int			len = PG_GETARG_INT32(4);

	CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_JOHAB);

	UtfToLocal(src, len, dest,
			   &johab_from_unicode_tree,
			   NULL, 0,
			   NULL,
			   PG_JOHAB);

	PG_RETURN_VOID();
}
Datum
utf8_to_euc_jis_2004(PG_FUNCTION_ARGS)
{
	unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
	unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
	int			len = PG_GETARG_INT32(4);

	CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JIS_2004);

	UtfToLocal(src, len, dest,
			   ULmapEUC_JIS_2004, lengthof(ULmapEUC_JIS_2004),
			ULmapEUC_JIS_2004_combined, lengthof(ULmapEUC_JIS_2004_combined),
			   NULL,
			   PG_EUC_JIS_2004);

	PG_RETURN_VOID();
}
Datum
koi8r_to_iso(PG_FUNCTION_ARGS)
{
	unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
	unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
	int			len = PG_GETARG_INT32(4);
	unsigned char *buf;

	CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_ISO_8859_5);

	buf = palloc(len * ENCODING_GROWTH_RATE + 1);
	koi8r2mic(src, buf, len);
	mic2iso(buf, dest, strlen((char *) buf));
	pfree(buf);

	PG_RETURN_VOID();
}
Datum
win866_to_koi8r(PG_FUNCTION_ARGS)
{
	unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
	unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
	int			len = PG_GETARG_INT32(4);
	unsigned char *buf;

	CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_KOI8R);

	buf = palloc(len * ENCODING_GROWTH_RATE + 1);
	win8662mic(src, buf, len);
	mic2koi8r(buf, dest, strlen((char *) buf));
	pfree(buf);

	PG_RETURN_VOID();
}
Datum
win866_to_iso(PG_FUNCTION_ARGS)
{
	unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
	unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
	int			len = PG_GETARG_INT32(4);
	unsigned char *buf;

	CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_ISO_8859_5);

	/* Use mic/KOI8R as intermediary, see comment in win866_to_win1251() */
	buf = palloc(len * ENCODING_GROWTH_RATE + 1);
	win8662mic(src, buf, len);
	mic2iso(buf, dest, strlen((char *) buf));
	pfree(buf);

	PG_RETURN_VOID();
}
Datum
win866_to_win1251(PG_FUNCTION_ARGS)
{
	unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
	unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
	int			len = PG_GETARG_INT32(4);
	unsigned char *buf;

	CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_WIN1251);

	/*
	 * Note: There are a few characters like the "Numero" sign that exist in
	 * all the other cyrillic encodings (win1251, ISO_8859-5 and cp866), but
	 * not in KOI8R. As we use MULE_INTERNAL/KOI8R as an intermediary, we will
	 * fail to convert those characters.
	 */
	buf = palloc(len * ENCODING_GROWTH_RATE + 1);
	win8662mic(src, buf, len);
	mic2win1251(buf, dest, strlen((char *) buf));
	pfree(buf);

	PG_RETURN_VOID();
}
Exemple #9
0
/* ----------
 * conv_proc(
 *		INTEGER,	-- source encoding id
 *		INTEGER,	-- destination encoding id
 *		CSTRING,	-- source string (null terminated C string)
 *		CSTRING,	-- destination string (null terminated C string)
 *		INTEGER		-- source string length
 * ) returns VOID;
 * ----------
 */
Datum
sjis_eudc_to_utf8(PG_FUNCTION_ARGS)
{
	unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
	unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
	unsigned char *p;
	unsigned char *fallback_character = NULL;
	int			len = PG_GETARG_INT32(4);
	int			sjis_len;
	int			clen;

	CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_UTF8);

	if (sjis_to_utf8 == NULL)
		sjis_to_utf8 = load_external_function(
			"utf8_and_sjis", "sjis_to_utf8", true, NULL);

	*dest = '\0';
	p = src;
	sjis_len = 0;
	for (; len > 0; len -= clen)
	{
		const unsigned char *c = p + sjis_len;

		if (c[0] == '\0')
			report_invalid_encoding(PG_SJIS, (const char *) p + sjis_len, len);

		if (c[0] >= 0xf0 && c[0] <= 0xf9 && len >= 2 && ISSJISTAIL(c[1]))
		{
			int	ucs;
			int	m;
			int	n;

			clen = 2;

			/* SJIS to UTF8 */
			if (sjis_len > 0)
			{
				DirectFunctionCall5(sjis_to_utf8, PG_SJIS, PG_UTF8,
									CStringGetDatum(p), CStringGetDatum(dest),
									sjis_len);
				dest = dest + strlen((char *) dest);
				p += sjis_len;
				sjis_len = 0;
			}
			p += clen;

			elog(eudc_log_level,
				"eudc character found: %02x%02x in SJIS to UTF8 conversion",
				c[0], c[1]);

			/* SJIS EUDC to UTF8 */
			if (eudc_fallback_character && eudc_fallback_character[0])
			{
				/* map to fallback character */
				int		i;

				if (fallback_character == NULL)
				{
					fallback_character = pg_do_encoding_conversion(
						(unsigned char *) eudc_fallback_character,
						strlen(eudc_fallback_character),
						GetDatabaseEncoding(),
						PG_UTF8);
				}

				for (i = 0; fallback_character[i]; i++)
					*dest++ = fallback_character[i];
			}
			else
			{
				/* linear mapping */
				n = c[0] - 0xf0;
				m = c[1] - 0x40;

				if (m >= 0x40)
					m--;

				ucs = 0xe000 + n * 188 + m;

				*dest++ = (ucs >> 12) | 0xe0;
				*dest++ = (ucs & 0x0fc0) >> 6 | 0x80;
				*dest++ = (ucs & 0x003f) | 0x80;
			}
			*dest = '\0';
		}
		else
		{