static int pg_sjis_verifier(const unsigned char *s, int len) { int l, mbl; unsigned char c1, c2; l = mbl = pg_sjis_mblen(s); if (len < l) return -1; if (l == 1) /* pg_sjis_mblen already verified it */ return mbl; c1 = *s++; c2 = *s; if (!ISSJISHEAD(c1) || !ISSJISTAIL(c2)) return -1; return mbl; }
/* ---------- * conv_proc( * INTEGER, -- source encoding id * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) * INTEGER -- source string length * ) returns VOID; * ---------- */ Datum sjis_eudc_to_utf8(PG_FUNCTION_ARGS) { unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); unsigned char *p; unsigned char *fallback_character = NULL; int len = PG_GETARG_INT32(4); int sjis_len; int clen; CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_UTF8); if (sjis_to_utf8 == NULL) sjis_to_utf8 = load_external_function( "utf8_and_sjis", "sjis_to_utf8", true, NULL); *dest = '\0'; p = src; sjis_len = 0; for (; len > 0; len -= clen) { const unsigned char *c = p + sjis_len; if (c[0] == '\0') report_invalid_encoding(PG_SJIS, (const char *) p + sjis_len, len); if (c[0] >= 0xf0 && c[0] <= 0xf9 && len >= 2 && ISSJISTAIL(c[1])) { int ucs; int m; int n; clen = 2; /* SJIS to UTF8 */ if (sjis_len > 0) { DirectFunctionCall5(sjis_to_utf8, PG_SJIS, PG_UTF8, CStringGetDatum(p), CStringGetDatum(dest), sjis_len); dest = dest + strlen((char *) dest); p += sjis_len; sjis_len = 0; } p += clen; elog(eudc_log_level, "eudc character found: %02x%02x in SJIS to UTF8 conversion", c[0], c[1]); /* SJIS EUDC to UTF8 */ if (eudc_fallback_character && eudc_fallback_character[0]) { /* map to fallback character */ int i; if (fallback_character == NULL) { fallback_character = pg_do_encoding_conversion( (unsigned char *) eudc_fallback_character, strlen(eudc_fallback_character), GetDatabaseEncoding(), PG_UTF8); } for (i = 0; fallback_character[i]; i++) *dest++ = fallback_character[i]; } else { /* linear mapping */ n = c[0] - 0xf0; m = c[1] - 0x40; if (m >= 0x40) m--; ucs = 0xe000 + n * 188 + m; *dest++ = (ucs >> 12) | 0xe0; *dest++ = (ucs & 0x0fc0) >> 6 | 0x80; *dest++ = (ucs & 0x003f) | 0x80; } *dest = '\0'; } else {