/* ---------- * conv_proc( * INTEGER, -- source encoding id * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) * INTEGER -- source string length * ) returns VOID; * ---------- */ Datum shift_jis_2004_to_utf8(PG_FUNCTION_ARGS) { unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_UTF8); LocalToUtf(src, len, dest, LUmapSHIFT_JIS_2004, lengthof(LUmapSHIFT_JIS_2004), LUmapSHIFT_JIS_2004_combined, lengthof(LUmapSHIFT_JIS_2004_combined), NULL, PG_SHIFT_JIS_2004); PG_RETURN_VOID(); }
Datum win1250_to_latin2(PG_FUNCTION_ARGS) { unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1250, PG_LATIN2); buf = palloc(len * ENCODING_GROWTH_RATE + 1); win12502mic(src, buf, len); mic2latin2(buf, dest, strlen((char *) buf)); pfree(buf); PG_RETURN_VOID(); }
Datum utf8_to_johab(PG_FUNCTION_ARGS) { unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_JOHAB); UtfToLocal(src, len, dest, &johab_from_unicode_tree, NULL, 0, NULL, PG_JOHAB); PG_RETURN_VOID(); }
Datum utf8_to_euc_jis_2004(PG_FUNCTION_ARGS) { unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JIS_2004); UtfToLocal(src, len, dest, ULmapEUC_JIS_2004, lengthof(ULmapEUC_JIS_2004), ULmapEUC_JIS_2004_combined, lengthof(ULmapEUC_JIS_2004_combined), NULL, PG_EUC_JIS_2004); PG_RETURN_VOID(); }
Datum koi8r_to_iso(PG_FUNCTION_ARGS) { unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_ISO_8859_5); buf = palloc(len * ENCODING_GROWTH_RATE + 1); koi8r2mic(src, buf, len); mic2iso(buf, dest, strlen((char *) buf)); pfree(buf); PG_RETURN_VOID(); }
Datum win866_to_koi8r(PG_FUNCTION_ARGS) { unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_KOI8R); buf = palloc(len * ENCODING_GROWTH_RATE + 1); win8662mic(src, buf, len); mic2koi8r(buf, dest, strlen((char *) buf)); pfree(buf); PG_RETURN_VOID(); }
Datum win866_to_iso(PG_FUNCTION_ARGS) { unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_ISO_8859_5); /* Use mic/KOI8R as intermediary, see comment in win866_to_win1251() */ buf = palloc(len * ENCODING_GROWTH_RATE + 1); win8662mic(src, buf, len); mic2iso(buf, dest, strlen((char *) buf)); pfree(buf); PG_RETURN_VOID(); }
Datum win866_to_win1251(PG_FUNCTION_ARGS) { unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); unsigned char *buf; CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_WIN1251); /* * Note: There are a few characters like the "Numero" sign that exist in * all the other cyrillic encodings (win1251, ISO_8859-5 and cp866), but * not in KOI8R. As we use MULE_INTERNAL/KOI8R as an intermediary, we will * fail to convert those characters. */ buf = palloc(len * ENCODING_GROWTH_RATE + 1); win8662mic(src, buf, len); mic2win1251(buf, dest, strlen((char *) buf)); pfree(buf); PG_RETURN_VOID(); }
/* ---------- * conv_proc( * INTEGER, -- source encoding id * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) * INTEGER -- source string length * ) returns VOID; * ---------- */ Datum sjis_eudc_to_utf8(PG_FUNCTION_ARGS) { unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *p; unsigned char *fallback_character = NULL; int len = PG_GETARG_INT32(4); int sjis_len; int clen; CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_UTF8); if (sjis_to_utf8 == NULL) sjis_to_utf8 = load_external_function( "utf8_and_sjis", "sjis_to_utf8", true, NULL); *dest = '\0'; p = src; sjis_len = 0; for (; len > 0; len -= clen) { const unsigned char *c = p + sjis_len; if (c[0] == '\0') report_invalid_encoding(PG_SJIS, (const char *) p + sjis_len, len); if (c[0] >= 0xf0 && c[0] <= 0xf9 && len >= 2 && ISSJISTAIL(c[1])) { int ucs; int m; int n; clen = 2; /* SJIS to UTF8 */ if (sjis_len > 0) { DirectFunctionCall5(sjis_to_utf8, PG_SJIS, PG_UTF8, CStringGetDatum(p), CStringGetDatum(dest), sjis_len); dest = dest + strlen((char *) dest); p += sjis_len; sjis_len = 0; } p += clen; elog(eudc_log_level, "eudc character found: %02x%02x in SJIS to UTF8 conversion", c[0], c[1]); /* SJIS EUDC to UTF8 */ if (eudc_fallback_character && eudc_fallback_character[0]) { /* map to fallback character */ int i; if (fallback_character == NULL) { fallback_character = pg_do_encoding_conversion( (unsigned char *) eudc_fallback_character, strlen(eudc_fallback_character), GetDatabaseEncoding(), PG_UTF8); } for (i = 0; fallback_character[i]; i++) *dest++ = fallback_character[i]; } else { /* linear mapping */ n = c[0] - 0xf0; m = c[1] - 0x40; if (m >= 0x40) m--; ucs = 0xe000 + n * 188 + m; *dest++ = (ucs >> 12) | 0xe0; *dest++ = (ucs & 0x0fc0) >> 6 | 0x80; *dest++ = (ucs & 0x003f) | 0x80; } *dest = '\0'; } else {