Beispiel #1
0
static int
pg_sjis_verifier(const unsigned char *s, int len)
{
	int l, mbl;
	unsigned char c1, c2;

	l = mbl = pg_sjis_mblen(s);

	if (len < l)
		return -1;

	if (l == 1)					/* pg_sjis_mblen already verified it */
		return mbl;

	c1 = *s++;
	c2 = *s;
	if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2))
		return -1;
	return mbl;
}
Beispiel #2
0
/* ----------
 * conv_proc(
 *		INTEGER,	-- source encoding id
 *		INTEGER,	-- destination encoding id
 *		CSTRING,	-- source string (null terminated C string)
 *		CSTRING,	-- destination string (null terminated C string)
 *		INTEGER		-- source string length
 * ) returns VOID;
 * ----------
 */
Datum
sjis_eudc_to_utf8(PG_FUNCTION_ARGS)
{
	unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
	unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
	unsigned char *p;
	unsigned char *fallback_character = NULL;
	int			len = PG_GETARG_INT32(4);
	int			sjis_len;
	int			clen;

	CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_UTF8);

	if (sjis_to_utf8 == NULL)
		sjis_to_utf8 = load_external_function(
			"utf8_and_sjis", "sjis_to_utf8", true, NULL);

	*dest = '\0';
	p = src;
	sjis_len = 0;
	for (; len > 0; len -= clen)
	{
		const unsigned char *c = p + sjis_len;

		if (c[0] == '\0')
			report_invalid_encoding(PG_SJIS, (const char *) p + sjis_len, len);

		if (c[0] >= 0xf0 && c[0] <= 0xf9 && len >= 2 && ISSJISTAIL(c[1]))
		{
			int	ucs;
			int	m;
			int	n;

			clen = 2;

			/* SJIS to UTF8 */
			if (sjis_len > 0)
			{
				DirectFunctionCall5(sjis_to_utf8, PG_SJIS, PG_UTF8,
									CStringGetDatum(p), CStringGetDatum(dest),
									sjis_len);
				dest = dest + strlen((char *) dest);
				p += sjis_len;
				sjis_len = 0;
			}
			p += clen;

			elog(eudc_log_level,
				"eudc character found: %02x%02x in SJIS to UTF8 conversion",
				c[0], c[1]);

			/* SJIS EUDC to UTF8 */
			if (eudc_fallback_character && eudc_fallback_character[0])
			{
				/* map to fallback character */
				int		i;

				if (fallback_character == NULL)
				{
					fallback_character = pg_do_encoding_conversion(
						(unsigned char *) eudc_fallback_character,
						strlen(eudc_fallback_character),
						GetDatabaseEncoding(),
						PG_UTF8);
				}

				for (i = 0; fallback_character[i]; i++)
					*dest++ = fallback_character[i];
			}
			else
			{
				/* linear mapping */
				n = c[0] - 0xf0;
				m = c[1] - 0x40;

				if (m >= 0x40)
					m--;

				ucs = 0xe000 + n * 188 + m;

				*dest++ = (ucs >> 12) | 0xe0;
				*dest++ = (ucs & 0x0fc0) >> 6 | 0x80;
				*dest++ = (ucs & 0x003f) | 0x80;
			}
			*dest = '\0';
		}
		else
		{