Пример #1
0
/*
 * Big5 ---> MIC
 */
static void
big52mic(const unsigned char *big5, unsigned char *p, int len)
{
	unsigned short c1;
	unsigned short big5buf,
				cnsBuf;
	unsigned char lc;
	int			l;

	while (len > 0)
	{
		c1 = *big5;
		if (!IS_HIGHBIT_SET(c1))
		{
			/* ASCII */
			if (c1 == 0)
				report_inval_enc(PG_BIG5,
										(const char *) big5, len);
			*p++ = c1;
			big5++;
			len--;
			continue;
		}
		l = pg_encoding_verifymb(PG_BIG5, (const char *) big5, len);
		if (l < 0)
			report_inval_enc(PG_BIG5,
									(const char *) big5, len);
		big5buf = (c1 << 8) | big5[1];
		cnsBuf = big5_to_cns(big5buf, &lc);
		if (lc != 0)
		{
			if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4)
			{
				*p++ = 0x9d;	/* LCPRV2 */
			}
			*p++ = lc;			/* Plane No. */
			*p++ = (cnsBuf >> 8) & 0x00ff;
			*p++ = cnsBuf & 0x00ff;
		}
		else
			report_untranslatable_char(PG_BIG5, PG_MULE_INTERNAL,
									   (const char *) big5, len);
		big5 += l;
		len -= l;
	}
Пример #2
0
/*
 * MIC ---> EUC_KR
 */
static void
mic2euc_kr(const unsigned char *mic, unsigned char *p, int len)
{
	int			c1;
	int			l;

	while (len > 0)
	{
		c1 = *mic;
		if (!IS_HIGHBIT_SET(c1))
		{
			/* ASCII */
			if (c1 == 0)
				report_invalid_encoding(PG_MULE_INTERNAL,
										(const char *) mic, len);
			*p++ = c1;
			mic++;
			len--;
			continue;
		}
		l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
		if (l < 0)
			report_invalid_encoding(PG_MULE_INTERNAL,
									(const char *) mic, len);
		if (c1 == LC_KS5601)
		{
			*p++ = mic[1];
			*p++ = mic[2];
		}
		else
			report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_KR,
									   (const char *) mic, len);
		mic += l;
		len -= l;
	}
	*p = '\0';
}
Пример #3
0
/*
 * local code ---> UTF8
 *
 * iso: input local string (need not be null-terminated).
 * utf: pointer to the output area (must be large enough!)
 * map: the conversion map.
 * cmap: the conversion map for combined characters.
 *		  (optional)
 * size1: the size of the conversion map.
 * size2: the size of the conversion map for combined characters
 *		  (optional)
 * encoding: the PG identifier for the local encoding.
 * len: length of input string.
 */
void
LocalToUtf(const unsigned char *iso, unsigned char *utf,
		   const pg_local_to_utf *map, const pg_local_to_utf_combined *cmap,
		   int size1, int size2, int encoding, int len)
{
	unsigned int iiso;
	int			l;
	pg_local_to_utf *p;
	pg_local_to_utf_combined *cp;

	if (!PG_VALID_ENCODING(encoding))
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("invalid encoding number: %d", encoding)));

	for (; len > 0; len -= l)
	{
		/* "break" cases all represent errors */
		if (*iso == '\0')
			break;

		if (!IS_HIGHBIT_SET(*iso))
		{
			/* ASCII case is easy */
			*utf++ = *iso++;
			l = 1;
			continue;
		}

		l = pg_encoding_verifymb(encoding, (const char *) iso, len);
		if (l < 0)
			break;

		if (l == 1)
			iiso = *iso++;
		else if (l == 2)
		{
			iiso = *iso++ << 8;
			iiso |= *iso++;
		}
		else if (l == 3)
		{
			iiso = *iso++ << 16;
			iiso |= *iso++ << 8;
			iiso |= *iso++;
		}
		else if (l == 4)
		{
			iiso = *iso++ << 24;
			iiso |= *iso++ << 16;
			iiso |= *iso++ << 8;
			iiso |= *iso++;
		}

		p = bsearch(&iiso, map, size1,
					sizeof(pg_local_to_utf), compare2);

		if (p == NULL)
		{
			/*
			 * not found in the ordinary map. if there's a combined character
			 * map, try with it
			 */
			if (cmap)
			{
				cp = bsearch(&iiso, cmap, size2,
							 sizeof(pg_local_to_utf_combined), compare4);

				if (cp)
				{
					if (cp->utf1 & 0xff000000)
						*utf++ = cp->utf1 >> 24;
					if (cp->utf1 & 0x00ff0000)
						*utf++ = (cp->utf1 & 0x00ff0000) >> 16;
					if (cp->utf1 & 0x0000ff00)
						*utf++ = (cp->utf1 & 0x0000ff00) >> 8;
					if (cp->utf1 & 0x000000ff)
						*utf++ = cp->utf1 & 0x000000ff;

					if (cp->utf2 & 0xff000000)
						*utf++ = cp->utf2 >> 24;
					if (cp->utf2 & 0x00ff0000)
						*utf++ = (cp->utf2 & 0x00ff0000) >> 16;
					if (cp->utf2 & 0x0000ff00)
						*utf++ = (cp->utf2 & 0x0000ff00) >> 8;
					if (cp->utf2 & 0x000000ff)
						*utf++ = cp->utf2 & 0x000000ff;

					continue;
				}
			}

			report_untranslatable_char(encoding, PG_UTF8,
									   (const char *) (iso - l), len);

		}
		else
		{
			if (p->utf & 0xff000000)
				*utf++ = p->utf >> 24;
			if (p->utf & 0x00ff0000)
				*utf++ = (p->utf & 0x00ff0000) >> 16;
			if (p->utf & 0x0000ff00)
				*utf++ = (p->utf & 0x0000ff00) >> 8;
			if (p->utf & 0x000000ff)
				*utf++ = p->utf & 0x000000ff;
		}
	}
Пример #4
0
/*
 * local code ---> UTF8
 *
 * iso: input string in local encoding (need not be null-terminated)
 * len: length of input string (in bytes)
 * utf: pointer to the output area (must be large enough!)
		  (output string will be null-terminated)
 * map: conversion map for single characters
 * mapsize: number of entries in the conversion map
 * cmap: conversion map for combined characters
 *		  (optional, pass NULL if none)
 * cmapsize: number of entries in the conversion map for combined characters
 *		  (optional, pass 0 if none)
 * conv_func: algorithmic encoding conversion function
 *		  (optional, pass NULL if none)
 * encoding: PG identifier for the local encoding
 *
 * For each character, the map is consulted first; if no match, the cmap
 * (if provided) is consulted next; if still no match, the conv_func
 * (if provided) is applied.  An error is raised if no match is found.
 *
 * See pg_wchar.h for more details about the data structures used here.
 */
void
LocalToUtf(const unsigned char *iso, int len,
		   unsigned char *utf,
		   const pg_local_to_utf *map, int mapsize,
		   const pg_local_to_utf_combined *cmap, int cmapsize,
		   utf_local_conversion_func conv_func,
		   int encoding)
{
	uint32		iiso;
	int			l;
	const pg_local_to_utf *p;
	const pg_local_to_utf_combined *cp;

	if (!PG_VALID_ENCODING(encoding))
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("invalid encoding number: %d", encoding)));

	for (; len > 0; len -= l)
	{
		/* "break" cases all represent errors */
		if (*iso == '\0')
			break;

		if (!IS_HIGHBIT_SET(*iso))
		{
			/* ASCII case is easy, assume it's one-to-one conversion */
			*utf++ = *iso++;
			l = 1;
			continue;
		}

		l = pg_encoding_verifymb(encoding, (const char *) iso, len);
		if (l < 0)
			break;

		/* collect coded char of length l */
		if (l == 1)
			iiso = *iso++;
		else if (l == 2)
		{
			iiso = *iso++ << 8;
			iiso |= *iso++;
		}
		else if (l == 3)
		{
			iiso = *iso++ << 16;
			iiso |= *iso++ << 8;
			iiso |= *iso++;
		}
		else if (l == 4)
		{
			iiso = *iso++ << 24;
			iiso |= *iso++ << 16;
			iiso |= *iso++ << 8;
			iiso |= *iso++;
		}
		else
		{
			elog(ERROR, "unsupported character length %d", l);
			iiso = 0;			/* keep compiler quiet */
		}

		/* First check ordinary map */
		p = bsearch(&iiso, map, mapsize,
					sizeof(pg_local_to_utf), compare2);

		if (p)
		{
			utf = store_coded_char(utf, p->utf);
			continue;
		}

		/* If there's a combined character map, try that */
		if (cmap)
		{
			cp = bsearch(&iiso, cmap, cmapsize,
						 sizeof(pg_local_to_utf_combined), compare4);

			if (cp)
			{
				utf = store_coded_char(utf, cp->utf1);
				utf = store_coded_char(utf, cp->utf2);
				continue;
			}
		}

		/* if there's a conversion function, try that */
		if (conv_func)
		{
			uint32		converted = (*conv_func) (iiso);

			if (converted)
			{
				utf = store_coded_char(utf, converted);
				continue;
			}
		}

		/* failed to translate this character */
		report_untranslatable_char(encoding, PG_UTF8,
								   (const char *) (iso - l), len);
	}

	/* if we broke out of loop early, must be invalid input */
	if (len > 0)
		report_invalid_encoding(encoding, (const char *) iso, len);

	*utf = '\0';
}