Ejemplo n.º 1
0
/*
 * fetch maximum length of a char encoding
 */
int
pg_encoding_max_length(int encoding)
{
	Assert(PG_VALID_ENCODING(encoding));

	return pg_wchar_table[encoding].maxmblen;
}
Ejemplo n.º 2
0
/*
 * Verify mbstr to make sure that it is validly encoded in the specified
 * encoding.
 *
 * mbstr is not necessarily zero terminated; length of mbstr is
 * specified by len.
 *
 * If OK, return TRUE.	If a problem is found, return FALSE when noError is
 * true; when noError is false, ereport() a descriptive message.
 */
bool
pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
{
	mbverifier	mbverify;

	Assert(PG_VALID_ENCODING(encoding));

	/*
	 * In single-byte encodings, we need only reject nulls (\0).
	 */
	if (pg_encoding_max_length(encoding) <= 1)
	{
		const char *nullpos = memchr(mbstr, 0, len);

		if (nullpos == NULL)
			return true;
		if (noError)
			return false;
		report_invalid_encoding(encoding, nullpos, 1);
	}

	/* fetch function pointer just once */
	mbverify = pg_wchar_table[encoding].mbverify;

	while (len > 0)
	{
		int			l;

		/* fast path for ASCII-subset characters */
		if (!IS_HIGHBIT_SET(*mbstr))
		{
			if (*mbstr != '\0')
			{
				mbstr++;
				len--;
				continue;
			}
			if (noError)
				return false;
			report_invalid_encoding(encoding, mbstr, len);
		}

		l = (*mbverify) ((const unsigned char *) mbstr, len);

		if (l < 0)
		{
			if (noError)
				return false;
			report_invalid_encoding(encoding, mbstr, len);
		}

		mbstr += l;
		len -= l;
	}
	return true;
}
Ejemplo n.º 3
0
/*
 * Verify the first multibyte character of the given string.
 * Return its byte length if good, -1 if bad.  (See comments above for
 * full details of the mbverify API.)
 */
int
pg_encoding_verifymb(int encoding, const char *mbstr, int len)
{
	Assert(PG_VALID_ENCODING(encoding));

	return ((encoding >= 0 &&
			 encoding < sizeof(pg_wchar_table) / sizeof(pg_wchar_tbl)) ?
		((*pg_wchar_table[encoding].mbverify) ((const unsigned char *) mbstr, len)) :
	((*pg_wchar_table[PG_SQL_ASCII].mbverify) ((const unsigned char *) mbstr, len)));
}
Ejemplo n.º 4
0
/*
 * Returns the byte length of a multibyte word.
 */
int
pg_encoding_mblen(int encoding, const unsigned char *mbstr)
{
	Assert(PG_VALID_ENCODING(encoding));

	return ((encoding >= 0 &&
			 encoding < sizeof(pg_wchar_table) / sizeof(pg_wchar_tbl)) ?
			((*pg_wchar_table[encoding].mblen) (mbstr)) :
			((*pg_wchar_table[PG_SQL_ASCII].mblen) (mbstr)));
}
Ejemplo n.º 5
0
Archivo: ascii.c Proyecto: 50wu/gpdb
/* ----------
 * convert to ASCII - enc is set as int4
 * ----------
 */
Datum
to_ascii_enc(PG_FUNCTION_ARGS)
{
	text	   *data = PG_GETARG_TEXT_P_COPY(0);
	int			enc = PG_GETARG_INT32(1);

	if (!PG_VALID_ENCODING(enc))
		ereport(ERROR,
				(errcode(ERRCODE_UNDEFINED_OBJECT),
				 errmsg("%d is not a valid encoding code", enc)));

	PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
}
Ejemplo n.º 6
0
/*
 * check_encoding_conversion_args: check arguments of a conversion function
 *
 * "expected" arguments can be either an encoding ID or -1 to indicate that
 * the caller will check whether it accepts the ID.
 *
 * Note: the errors here are not really user-facing, so elog instead of
 * ereport seems sufficient.  Also, we trust that the "expected" encoding
 * arguments are valid encoding IDs, but we don't trust the actuals.
 */
void
check_encoding_conversion_args(int src_encoding,
							   int dest_encoding,
							   int len,
							   int expected_src_encoding,
							   int expected_dest_encoding)
{
	if (!PG_VALID_ENCODING(src_encoding))
		elog(ERROR, "invalid source encoding ID: %d", src_encoding);
	if (src_encoding != expected_src_encoding && expected_src_encoding >= 0)
		elog(ERROR, "expected source encoding \"%s\", but got \"%s\"",
			 pg_enc2name_tbl[expected_src_encoding].name,
			 pg_enc2name_tbl[src_encoding].name);
	if (!PG_VALID_ENCODING(dest_encoding))
		elog(ERROR, "invalid destination encoding ID: %d", dest_encoding);
	if (dest_encoding != expected_dest_encoding && expected_dest_encoding >= 0)
		elog(ERROR, "expected destination encoding \"%s\", but got \"%s\"",
			 pg_enc2name_tbl[expected_dest_encoding].name,
			 pg_enc2name_tbl[dest_encoding].name);
	if (len < 0)
		elog(ERROR, "encoding conversion length must not be negative");
}
Ejemplo n.º 7
0
/*
 * local code ---> UTF8
 *
 * iso: input local string (need not be null-terminated).
 * utf: pointer to the output area (must be large enough!)
 * map: the conversion map.
 * cmap: the conversion map for combined characters.
 *		  (optional)
 * size1: the size of the conversion map.
 * size2: the size of the conversion map for combined characters
 *		  (optional)
 * encoding: the PG identifier for the local encoding.
 * len: length of input string.
 */
void
LocalToUtf(const unsigned char *iso, unsigned char *utf,
		   const pg_local_to_utf *map, const pg_local_to_utf_combined *cmap,
		   int size1, int size2, int encoding, int len)
{
	unsigned int iiso;
	int			l;
	pg_local_to_utf *p;
	pg_local_to_utf_combined *cp;

	if (!PG_VALID_ENCODING(encoding))
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("invalid encoding number: %d", encoding)));

	for (; len > 0; len -= l)
	{
		/* "break" cases all represent errors */
		if (*iso == '\0')
			break;

		if (!IS_HIGHBIT_SET(*iso))
		{
			/* ASCII case is easy */
			*utf++ = *iso++;
			l = 1;
			continue;
		}

		l = pg_encoding_verifymb(encoding, (const char *) iso, len);
		if (l < 0)
			break;

		if (l == 1)
			iiso = *iso++;
		else if (l == 2)
		{
			iiso = *iso++ << 8;
			iiso |= *iso++;
		}
		else if (l == 3)
		{
			iiso = *iso++ << 16;
			iiso |= *iso++ << 8;
			iiso |= *iso++;
		}
		else if (l == 4)
		{
			iiso = *iso++ << 24;
			iiso |= *iso++ << 16;
			iiso |= *iso++ << 8;
			iiso |= *iso++;
		}

		p = bsearch(&iiso, map, size1,
					sizeof(pg_local_to_utf), compare2);

		if (p == NULL)
		{
			/*
			 * not found in the ordinary map. if there's a combined character
			 * map, try with it
			 */
			if (cmap)
			{
				cp = bsearch(&iiso, cmap, size2,
							 sizeof(pg_local_to_utf_combined), compare4);

				if (cp)
				{
					if (cp->utf1 & 0xff000000)
						*utf++ = cp->utf1 >> 24;
					if (cp->utf1 & 0x00ff0000)
						*utf++ = (cp->utf1 & 0x00ff0000) >> 16;
					if (cp->utf1 & 0x0000ff00)
						*utf++ = (cp->utf1 & 0x0000ff00) >> 8;
					if (cp->utf1 & 0x000000ff)
						*utf++ = cp->utf1 & 0x000000ff;

					if (cp->utf2 & 0xff000000)
						*utf++ = cp->utf2 >> 24;
					if (cp->utf2 & 0x00ff0000)
						*utf++ = (cp->utf2 & 0x00ff0000) >> 16;
					if (cp->utf2 & 0x0000ff00)
						*utf++ = (cp->utf2 & 0x0000ff00) >> 8;
					if (cp->utf2 & 0x000000ff)
						*utf++ = cp->utf2 & 0x000000ff;

					continue;
				}
			}

			report_untranslatable_char(encoding, PG_UTF8,
									   (const char *) (iso - l), len);

		}
		else
		{
			if (p->utf & 0xff000000)
				*utf++ = p->utf >> 24;
			if (p->utf & 0x00ff0000)
				*utf++ = (p->utf & 0x00ff0000) >> 16;
			if (p->utf & 0x0000ff00)
				*utf++ = (p->utf & 0x0000ff00) >> 8;
			if (p->utf & 0x000000ff)
				*utf++ = p->utf & 0x000000ff;
		}
	}
Ejemplo n.º 8
0
/*
 * local code ---> UTF8
 *
 * iso: input string in local encoding (need not be null-terminated)
 * len: length of input string (in bytes)
 * utf: pointer to the output area (must be large enough!)
		  (output string will be null-terminated)
 * map: conversion map for single characters
 * mapsize: number of entries in the conversion map
 * cmap: conversion map for combined characters
 *		  (optional, pass NULL if none)
 * cmapsize: number of entries in the conversion map for combined characters
 *		  (optional, pass 0 if none)
 * conv_func: algorithmic encoding conversion function
 *		  (optional, pass NULL if none)
 * encoding: PG identifier for the local encoding
 *
 * For each character, the map is consulted first; if no match, the cmap
 * (if provided) is consulted next; if still no match, the conv_func
 * (if provided) is applied.  An error is raised if no match is found.
 *
 * See pg_wchar.h for more details about the data structures used here.
 */
void
LocalToUtf(const unsigned char *iso, int len,
		   unsigned char *utf,
		   const pg_local_to_utf *map, int mapsize,
		   const pg_local_to_utf_combined *cmap, int cmapsize,
		   utf_local_conversion_func conv_func,
		   int encoding)
{
	uint32		iiso;
	int			l;
	const pg_local_to_utf *p;
	const pg_local_to_utf_combined *cp;

	if (!PG_VALID_ENCODING(encoding))
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("invalid encoding number: %d", encoding)));

	for (; len > 0; len -= l)
	{
		/* "break" cases all represent errors */
		if (*iso == '\0')
			break;

		if (!IS_HIGHBIT_SET(*iso))
		{
			/* ASCII case is easy, assume it's one-to-one conversion */
			*utf++ = *iso++;
			l = 1;
			continue;
		}

		l = pg_encoding_verifymb(encoding, (const char *) iso, len);
		if (l < 0)
			break;

		/* collect coded char of length l */
		if (l == 1)
			iiso = *iso++;
		else if (l == 2)
		{
			iiso = *iso++ << 8;
			iiso |= *iso++;
		}
		else if (l == 3)
		{
			iiso = *iso++ << 16;
			iiso |= *iso++ << 8;
			iiso |= *iso++;
		}
		else if (l == 4)
		{
			iiso = *iso++ << 24;
			iiso |= *iso++ << 16;
			iiso |= *iso++ << 8;
			iiso |= *iso++;
		}
		else
		{
			elog(ERROR, "unsupported character length %d", l);
			iiso = 0;			/* keep compiler quiet */
		}

		/* First check ordinary map */
		p = bsearch(&iiso, map, mapsize,
					sizeof(pg_local_to_utf), compare2);

		if (p)
		{
			utf = store_coded_char(utf, p->utf);
			continue;
		}

		/* If there's a combined character map, try that */
		if (cmap)
		{
			cp = bsearch(&iiso, cmap, cmapsize,
						 sizeof(pg_local_to_utf_combined), compare4);

			if (cp)
			{
				utf = store_coded_char(utf, cp->utf1);
				utf = store_coded_char(utf, cp->utf2);
				continue;
			}
		}

		/* if there's a conversion function, try that */
		if (conv_func)
		{
			uint32		converted = (*conv_func) (iiso);

			if (converted)
			{
				utf = store_coded_char(utf, converted);
				continue;
			}
		}

		/* failed to translate this character */
		report_untranslatable_char(encoding, PG_UTF8,
								   (const char *) (iso - l), len);
	}

	/* if we broke out of loop early, must be invalid input */
	if (len > 0)
		report_invalid_encoding(encoding, (const char *) iso, len);

	*utf = '\0';
}
Ejemplo n.º 9
0
/*
 * UTF8 ---> local code
 *
 * utf: input string in UTF8 encoding (need not be null-terminated)
 * len: length of input string (in bytes)
 * iso: pointer to the output area (must be large enough!)
		  (output string will be null-terminated)
 * map: conversion map for single characters
 * mapsize: number of entries in the conversion map
 * cmap: conversion map for combined characters
 *		  (optional, pass NULL if none)
 * cmapsize: number of entries in the conversion map for combined characters
 *		  (optional, pass 0 if none)
 * conv_func: algorithmic encoding conversion function
 *		  (optional, pass NULL if none)
 * encoding: PG identifier for the local encoding
 *
 * For each character, the cmap (if provided) is consulted first; if no match,
 * the map is consulted next; if still no match, the conv_func (if provided)
 * is applied.  An error is raised if no match is found.
 *
 * See pg_wchar.h for more details about the data structures used here.
 */
void
UtfToLocal(const unsigned char *utf, int len,
		   unsigned char *iso,
		   const pg_utf_to_local *map, int mapsize,
		   const pg_utf_to_local_combined *cmap, int cmapsize,
		   utf_local_conversion_func conv_func,
		   int encoding)
{
	uint32		iutf;
	int			l;
	const pg_utf_to_local *p;
	const pg_utf_to_local_combined *cp;

	if (!PG_VALID_ENCODING(encoding))
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("invalid encoding number: %d", encoding)));

	for (; len > 0; len -= l)
	{
		/* "break" cases all represent errors */
		if (*utf == '\0')
			break;

		l = pg_utf_mblen(utf);
		if (len < l)
			break;

		if (!pg_utf8_islegal(utf, l))
			break;

		if (l == 1)
		{
			/* ASCII case is easy, assume it's one-to-one conversion */
			*iso++ = *utf++;
			continue;
		}

		/* collect coded char of length l */
		if (l == 2)
		{
			iutf = *utf++ << 8;
			iutf |= *utf++;
		}
		else if (l == 3)
		{
			iutf = *utf++ << 16;
			iutf |= *utf++ << 8;
			iutf |= *utf++;
		}
		else if (l == 4)
		{
			iutf = *utf++ << 24;
			iutf |= *utf++ << 16;
			iutf |= *utf++ << 8;
			iutf |= *utf++;
		}
		else
		{
			elog(ERROR, "unsupported character length %d", l);
			iutf = 0;			/* keep compiler quiet */
		}

		/* First, try with combined map if possible */
		if (cmap && len > l)
		{
			const unsigned char *utf_save = utf;
			int			len_save = len;
			int			l_save = l;

			/* collect next character, same as above */
			len -= l;

			l = pg_utf_mblen(utf);
			if (len < l)
				break;

			if (!pg_utf8_islegal(utf, l))
				break;

			/* We assume ASCII character cannot be in combined map */
			if (l > 1)
			{
				uint32		iutf2;
				uint32		cutf[2];

				if (l == 2)
				{
					iutf2 = *utf++ << 8;
					iutf2 |= *utf++;
				}
				else if (l == 3)
				{
					iutf2 = *utf++ << 16;
					iutf2 |= *utf++ << 8;
					iutf2 |= *utf++;
				}
				else if (l == 4)
				{
					iutf2 = *utf++ << 24;
					iutf2 |= *utf++ << 16;
					iutf2 |= *utf++ << 8;
					iutf2 |= *utf++;
				}
				else
				{
					elog(ERROR, "unsupported character length %d", l);
					iutf2 = 0;	/* keep compiler quiet */
				}

				cutf[0] = iutf;
				cutf[1] = iutf2;

				cp = bsearch(cutf, cmap, cmapsize,
							 sizeof(pg_utf_to_local_combined), compare3);

				if (cp)
				{
					iso = store_coded_char(iso, cp->code);
					continue;
				}
			}

			/* fail, so back up to reprocess second character next time */
			utf = utf_save;
			len = len_save;
			l = l_save;
		}

		/* Now check ordinary map */
		p = bsearch(&iutf, map, mapsize,
					sizeof(pg_utf_to_local), compare1);

		if (p)
		{
			iso = store_coded_char(iso, p->code);
			continue;
		}

		/* if there's a conversion function, try that */
		if (conv_func)
		{
			uint32		converted = (*conv_func) (iutf);

			if (converted)
			{
				iso = store_coded_char(iso, converted);
				continue;
			}
		}

		/* failed to translate this character */
		report_untranslatable_char(PG_UTF8, encoding,
								   (const char *) (utf - l), len);
	}

	/* if we broke out of loop early, must be invalid input */
	if (len > 0)
		report_invalid_encoding(PG_UTF8, (const char *) utf, len);

	*iso = '\0';
}
Ejemplo n.º 10
0
/*
 * Like GetExtTableEntry(Oid), but returns NULL instead of throwing
 * an error if no pg_exttable entry is found.
 */
ExtTableEntry*
GetExtTableEntryIfExists(Oid relid)
{
	Relation	pg_exttable_rel;
	ScanKeyData skey;
	SysScanDesc scan;
	HeapTuple	tuple;
	ExtTableEntry *extentry;
	Datum		locations,
				fmtcode, 
				fmtopts, 
				command, 
				rejectlimit, 
				rejectlimittype, 
				fmterrtbl, 
				encoding, 
				iswritable;
	bool		isNull;
	bool		locationNull = false;

	pg_exttable_rel = heap_open(ExtTableRelationId, RowExclusiveLock);

	ScanKeyInit(&skey,
				Anum_pg_exttable_reloid,
				BTEqualStrategyNumber, F_OIDEQ,
				ObjectIdGetDatum(relid));

	scan = systable_beginscan(pg_exttable_rel, ExtTableReloidIndexId, true,
							  SnapshotNow, 1, &skey);
	tuple = systable_getnext(scan);

	if (!HeapTupleIsValid(tuple))
	{
		systable_endscan(scan);
		heap_close(pg_exttable_rel, RowExclusiveLock);
		return NULL;
	}

	extentry = (ExtTableEntry *) palloc0(sizeof(ExtTableEntry));

	/* get the location list */
	locations = heap_getattr(tuple, 
							 Anum_pg_exttable_location, 
							 RelationGetDescr(pg_exttable_rel), 
							 &isNull);

	if (isNull)
	{
		Insist(false); /* location list is always populated (url or ON X) */
	}
	else
	{
		Datum	   *elems;
		int			nelems;
		int			i;
		char*		loc_str = NULL;
		
		deconstruct_array(DatumGetArrayTypeP(locations),
						  TEXTOID, -1, false, 'i',
						  &elems, NULL, &nelems);

		for (i = 0; i < nelems; i++)
		{
			loc_str = DatumGetCString(DirectFunctionCall1(textout, elems[i]));

			/* append to a list of Value nodes, size nelems */
			extentry->locations = lappend(extentry->locations, makeString(pstrdup(loc_str)));
		}
		
		if(loc_str && (IS_FILE_URI(loc_str) || IS_GPFDIST_URI(loc_str) || IS_GPFDISTS_URI(loc_str)))
			extentry->isweb = false;
		else
			extentry->isweb = true;

	}
		
	/* get the execute command */
	command = heap_getattr(tuple, 
						   Anum_pg_exttable_command, 
						   RelationGetDescr(pg_exttable_rel), 
						   &isNull);
	
	if(isNull)
	{
		if(locationNull)
			ereport(ERROR,
					(errcode(ERRCODE_UNDEFINED_OBJECT),
					 errmsg("got invalid pg_exttable tuple. location and command are both NULL")));	
		
		extentry->command = NULL;
	}
	else
	{
		extentry->command = DatumGetCString(DirectFunctionCall1(textout, command));
	}
	

	/* get the format code */
	fmtcode = heap_getattr(tuple, 
						   Anum_pg_exttable_fmttype, 
						   RelationGetDescr(pg_exttable_rel), 
						   &isNull);
	
	Insist(!isNull);
	extentry->fmtcode = DatumGetChar(fmtcode);
	Insist(extentry->fmtcode == 'c' || extentry->fmtcode == 't' 
		 || extentry->fmtcode == 'b' || extentry->fmtcode == 'a'
		 || extentry->fmtcode == 'p');

	/* get the format options string */
	fmtopts = heap_getattr(tuple, 
						   Anum_pg_exttable_fmtopts, 
						   RelationGetDescr(pg_exttable_rel), 
						   &isNull);
	
	Insist(!isNull);
	extentry->fmtopts = DatumGetCString(DirectFunctionCall1(textout, fmtopts));
	


	/* get the reject limit */
	rejectlimit = heap_getattr(tuple, 
							   Anum_pg_exttable_rejectlimit, 
							   RelationGetDescr(pg_exttable_rel), 
							   &isNull);
	
	if(!isNull)
		extentry->rejectlimit = DatumGetInt32(rejectlimit);
	else
		extentry->rejectlimit = -1; /* mark that no SREH requested */

	/* get the reject limit type */
	rejectlimittype = heap_getattr(tuple, 
								   Anum_pg_exttable_rejectlimittype, 
								   RelationGetDescr(pg_exttable_rel), 
								   &isNull);
	
	extentry->rejectlimittype = DatumGetChar(rejectlimittype);
	if(!isNull)
		Insist(extentry->rejectlimittype == 'r' || extentry->rejectlimittype == 'p');
	else
		extentry->rejectlimittype = -1;
	
	/* get the error table oid */
	fmterrtbl = heap_getattr(tuple, 
							 Anum_pg_exttable_fmterrtbl, 
							 RelationGetDescr(pg_exttable_rel), 
							 &isNull);
    
	if(isNull)
		extentry->fmterrtbl = InvalidOid;
	else
		extentry->fmterrtbl = DatumGetObjectId(fmterrtbl);

	/* get the table encoding */
	encoding = heap_getattr(tuple, 
							Anum_pg_exttable_encoding, 
							RelationGetDescr(pg_exttable_rel), 
							&isNull);
	
	Insist(!isNull);
	extentry->encoding = DatumGetInt32(encoding);
	Insist(PG_VALID_ENCODING(extentry->encoding));

	/* get the table encoding */
	iswritable = heap_getattr(tuple, 
							  Anum_pg_exttable_writable, 
							  RelationGetDescr(pg_exttable_rel), 
							  &isNull);
	Insist(!isNull);
	extentry->iswritable = DatumGetBool(iswritable);

	
	/* Finish up scan and close pg_exttable catalog. */
	systable_endscan(scan);
	heap_close(pg_exttable_rel, RowExclusiveLock);

	return extentry;
}