Esempio n. 1
0
char *
CheckerConversion(Checker *checker, char *src)
{
	int	len;

	if (!checker->check_encoding)
		return src;

	len = strlen(src);

	if (checker->encoding == checker->db_encoding ||
		checker->encoding == PG_SQL_ASCII)
	{
		/*
		 * No conversion is needed, but we must still validate the data.
		 */
		pg_verify_mbstr(checker->db_encoding, src, len, false);
		return src;
	}

	if (checker->db_encoding == PG_SQL_ASCII)
	{
		/*
		 * No conversion is possible, but we must still validate the data,
		 * because the client-side code might have done string escaping using
		 * the selected client_encoding.  If the client encoding is ASCII-safe
		 * then we just do a straight validation under that encoding.  For an
		 * ASCII-unsafe encoding we have a problem: we dare not pass such data
		 * to the parser but we have no way to convert it.	We compromise by
		 * rejecting the data if it contains any non-ASCII characters.
		 */
		if (PG_VALID_BE_ENCODING(checker->encoding))
			pg_verify_mbstr(checker->encoding, src, len, false);
		else
		{
			int			i;

			for (i = 0; i < len; i++)
			{
				if (src[i] == '\0' || IS_HIGHBIT_SET(src[i]))
					ereport(ERROR,
							(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
					 errmsg("invalid byte value for encoding \"%s\": 0x%02x",
							pg_enc2name_tbl[PG_SQL_ASCII].name,
							(unsigned char) src[i])));
			}
		}
		return src;
	}

	/* Convert the input into the database encoding. */
	return (char *) pg_do_encoding_conversion((unsigned char *) src,
											  len,
											  checker->encoding,
											  checker->db_encoding);
}
Esempio n. 2
0
/*
 * pg_custom_client_to_server
 *
 * convert client encoding to server encoding, but use the passed in encodings
 * instead of the global client and server encoding variables. 
 *
 * This routine is basically a slightly modified version of pg_client_to_server.
 * Instead of creating this routine a better way may have been to just call
 * pg_do_encoding_conversion(), which takes in the necessary arguments, however
 * it does not do several necessary checks that pg_client_to_server() does, and
 * altering it to have those check may break other parts of the system. Therefore
 * until there's a better idea we resort to duplicating some code.
 *
 * The reason for creating this routine is to let external tables do data
 * conversion reliably. Since each external table has an encoding attached to
 * it we'd like to just convert from that encoding to the server encoding without
 * altering the global client_encoding variable for this local database.
 */
char *
pg_custom_to_server(const char *s, int len, int src_encoding, void *cep)
{
	FmgrInfo *custom_encoding_proc = (FmgrInfo *)cep;

	Assert(DatabaseEncoding);
	Assert(ClientEncoding);
	
	if (len <= 0)
		return (char *) s;
	
	if (src_encoding == DatabaseEncoding->encoding ||
		src_encoding == PG_SQL_ASCII)
	{
		/*
		 * No conversion is needed, but we must still validate the data.
		 */
		(void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false);
		return (char *) s;
	}
	
	if (DatabaseEncoding->encoding == PG_SQL_ASCII)
	{
		/*
		 * No conversion is possible, but we must still validate the data,
		 * because the client-side code might have done string escaping using
		 * the selected client_encoding.  If the client encoding is ASCII-safe
		 * then we just do a straight validation under that encoding.  For an
		 * ASCII-unsafe encoding we have a problem: we dare not pass such data
		 * to the parser but we have no way to convert it.	We compromise by
		 * rejecting the data if it contains any non-ASCII characters.
		 */
		if (PG_VALID_BE_ENCODING(src_encoding))
			(void) pg_verify_mbstr(src_encoding, s, len, false);
		else
		{
			int			i;
			
			for (i = 0; i < len; i++)
			{
				if (s[i] == '\0' || IS_HIGHBIT_SET(s[i]))
					ereport(ERROR,
							(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
							 errmsg("invalid byte value for encoding \"%s\": 0x%02x",
									pg_enc2name_tbl[PG_SQL_ASCII].name,
									(unsigned char) s[i])));
			}
		}
		return (char *) s;
	}
	
	return perform_default_encoding_conversion(s, len, true, src_encoding, custom_encoding_proc);
}
Esempio n. 3
0
/*
 * Read the next line from a tsearch data file (expected to be in UTF-8), and
 * convert it to database encoding if needed. The returned string is palloc'd.
 * NULL return means EOF.
 *
 * Note: direct use of this function is now deprecated.  Go through
 * tsearch_readline() to provide better error reporting.
 */
char *
t_readline(FILE *fp)
{
	int			len;
	char	   *recoded;
	char		buf[4096];		/* lines must not be longer than this */

	if (fgets(buf, sizeof(buf), fp) == NULL)
		return NULL;

	len = strlen(buf);

	/* Make sure the input is valid UTF-8 */
	(void) pg_verify_mbstr(PG_UTF8, buf, len, false);

	/* And convert */
	recoded = pg_any_to_server(buf, len, PG_UTF8);
	if (recoded == buf)
	{
		/*
		 * conversion didn't pstrdup, so we must. We can use the length of the
		 * original string, because no conversion was done.
		 */
		recoded = pnstrdup(recoded, len);
	}

	return recoded;
}
Esempio n. 4
0
/*
 * Perform output plugin write into tuplestore.
 */
static void
LogicalOutputWrite(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid,
				   bool last_write)
{
	Datum		values[3];
	bool		nulls[3];
	DecodingOutputState *p;

	/* SQL Datums can only be of a limited length... */
	if (ctx->out->len > MaxAllocSize - VARHDRSZ)
		elog(ERROR, "too much output for sql interface");

	p = (DecodingOutputState *) ctx->output_writer_private;

	memset(nulls, 0, sizeof(nulls));
	values[0] = LSNGetDatum(lsn);
	values[1] = TransactionIdGetDatum(xid);

	/*
	 * Assert ctx->out is in database encoding when we're writing textual
	 * output.
	 */
	if (!p->binary_output)
		Assert(pg_verify_mbstr(GetDatabaseEncoding(),
							   ctx->out->data, ctx->out->len,
							   false));

	/* ick, but cstring_to_text_with_len works for bytea perfectly fine */
	values[2] = PointerGetDatum(
								cstring_to_text_with_len(ctx->out->data, ctx->out->len));

	tuplestore_putvalues(p->tupstore, p->tupdesc, values, nulls);
	p->returned_rows++;
}
Esempio n. 5
0
/*
 * Convert string using encoding_names.
 *
 * BYTEA convert(BYTEA string, NAME src_encoding_name, NAME dest_encoding_name)
 */
Datum
pg_convert(PG_FUNCTION_ARGS)
{
	bytea	   *string = PG_GETARG_BYTEA_P(0);
	char	   *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
	int			src_encoding = pg_char_to_encoding(src_encoding_name);
	char	   *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
	int			dest_encoding = pg_char_to_encoding(dest_encoding_name);
	unsigned char *result;
	bytea	   *retval;
	unsigned char *str;
	int			len;

	if (src_encoding < 0)
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("invalid source encoding name \"%s\"",
						src_encoding_name)));
	if (dest_encoding < 0)
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("invalid destination encoding name \"%s\"",
						dest_encoding_name)));

	/* make sure that source string is valid and null terminated */
	len = VARSIZE(string) - VARHDRSZ;
	pg_verify_mbstr(src_encoding, VARDATA(string), len, false);
	str = palloc(len + 1);
	memcpy(str, VARDATA(string), len);
	*(str + len) = '\0';

	result = pg_do_encoding_conversion(str, len, src_encoding, dest_encoding);

	/*
	 * build bytea data type structure.
	 */
	len = strlen((char *) result) + VARHDRSZ;
	retval = palloc(len);
	SET_VARSIZE(retval, len);
	memcpy(VARDATA(retval), result, len - VARHDRSZ);

	if (result != str)
		pfree(result);
	pfree(str);

	/* free memory if allocated by the toaster */
	PG_FREE_IF_COPY(string, 0);

	PG_RETURN_BYTEA_P(retval);
}
Esempio n. 6
0
/*
 * Verify mbstr to make sure that it is validly encoded in the current
 * database encoding.  Otherwise same as pg_verify_mbstr().
 */
bool
pg_verifymbstr(const char *mbstr, int len, bool noError)
{
	return pg_verify_mbstr(GetDatabaseEncoding(), mbstr, len, noError);
}
Esempio n. 7
0
File: file.c Progetto: 50wu/gpdb
static text *
get_line(FILE *f, int max_linesize, int encoding, bool *iseof)
{
	int c;
	char *buffer = NULL;
	char *bpt;
	int csize = 0;
	text *result = NULL;

	bool eof = true;

	buffer = palloc(max_linesize + 2);
	bpt = buffer;

	errno = 0;

	while (csize < max_linesize && (c = fgetc(f)) != EOF)
	{
		eof = false; 	/* I was able read one char */

		if (c == '\r')  /* lookin ahead \n */
		{
			c = fgetc(f);
			if (c == EOF)
				break;  /* last char */

			if (c != '\n')
				ungetc(c, f);
			/* skip \r\n */
			break;
		}
		else if (c == '\n')
			break;

		++csize;
		*bpt++ = c;
	}

	if (!eof)
	{
		char   *decoded;
		int		len;

		pg_verify_mbstr(encoding, buffer, csize, false);
		decoded = (char *) pg_do_encoding_conversion((unsigned char *) buffer,
									 csize, encoding, GetDatabaseEncoding());
		len = (decoded == buffer ? csize : strlen(decoded));
		result = palloc(len + VARHDRSZ);
		memcpy(VARDATA(result), decoded, len);
		SET_VARSIZE(result, len + VARHDRSZ);
		if (decoded != buffer)
			pfree(decoded);
		*iseof = false;
	}
	else
	{
		switch (errno)
		{
			case 0:
				break;

			case EBADF:
				CUSTOM_EXCEPTION(INVALID_OPERATION, "file descriptor isn't valid for reading");
				break;

			default:
				CUSTOM_EXCEPTION(READ_ERROR, strerror(errno));
				break;
		}

		*iseof = true;
	}

	pfree(buffer);
	return result;
}