Ejemplo n.º 1
0
/*
 * EUC_TW
 *
 */
static int
pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
{
	int			cnt = 0;

	while (len > 0 && *from)
	{
		if (*from == SS2 && len >= 4)	/* code set 2 */
		{
			from++;
			*to = (((uint32) SS2) << 24) | (*from++ << 16);
			*to |= *from++ << 8;
			*to |= *from++;
			len -= 4;
		}
		else if (*from == SS3 && len >= 3)		/* code set 3 (unused?) */
		{
			from++;
			*to = (SS3 << 16) | (*from++ << 8);
			*to |= *from++;
			len -= 3;
		}
		else if (IS_HIGHBIT_SET(*from) && len >= 2)		/* code set 2 */
		{
			*to = *from++ << 8;
			*to |= *from++;
			len -= 2;
		}
		else
		{
			*to = *from++;
			len--;
		}
		to++;
		cnt++;
	}
	*to = 0;
	return cnt;
}
Ejemplo n.º 2
0
Archivo: wchar.c Proyecto: GisKook/Gis
/*
 * EUC
 */
static int
pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
{
	int			cnt = 0;

	while (len > 0 && *from)
	{
		if (*from == SS2 && len >= 2)	/* JIS X 0201 (so called "1 byte
										 * KANA") */
		{
			from++;
			*to = (SS2 << 8) | *from++;
			len -= 2;
		}
		else if (*from == SS3 && len >= 3)		/* JIS X 0212 KANJI */
		{
			from++;
			*to = (SS3 << 16) | (*from++ << 8);
			*to |= *from++;
			len -= 3;
		}
		else if (IS_HIGHBIT_SET(*from) && len >= 2)		/* JIS X 0208 KANJI */
		{
			*to = *from++ << 8;
			*to |= *from++;
			len -= 2;
		}
		else
			/* must be ASCII */
		{
			*to = *from++;
			len--;
		}
		to++;
		cnt++;
	}
	*to = 0;
	return cnt;
}
Ejemplo n.º 3
0
static int
pg_johab_verifier(const unsigned char *s, int len)
{
	int			l,
				mbl;
	unsigned char c;

	l = mbl = pg_johab_mblen(s);

	if (len < l)
		return -1;

	if (!IS_HIGHBIT_SET(*s))
		return mbl;

	while (--l > 0)
	{
		c = *++s;
		if (!IS_EUC_RANGE_VALID(c))
			return -1;
	}
	return mbl;
}
Ejemplo n.º 4
0
/*
 * MIC ---> EUC_KR
 */
static void
mic2euc_kr(const unsigned char *mic, unsigned char *p, int len)
{
	int			c1;
	int			l;

	while (len > 0)
	{
		c1 = *mic;
		if (!IS_HIGHBIT_SET(c1))
		{
			/* ASCII */
			if (c1 == 0)
				report_invalid_encoding(PG_MULE_INTERNAL,
										(const char *) mic, len);
			*p++ = c1;
			mic++;
			len--;
			continue;
		}
		l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
		if (l < 0)
			report_invalid_encoding(PG_MULE_INTERNAL,
									(const char *) mic, len);
		if (c1 == LC_KS5601)
		{
			*p++ = mic[1];
			*p++ = mic[2];
		}
		else
			report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_KR,
									   (const char *) mic, len);
		mic += l;
		len -= l;
	}
	*p = '\0';
}
Ejemplo n.º 5
0
/*
 * downcase_truncate_identifier() --- do appropriate downcasing and
 * truncation of an unquoted identifier.  Optionally warn of truncation.
 *
 * Returns a palloc'd string containing the adjusted identifier.
 *
 * Note: in some usages the passed string is not null-terminated.
 *
 * Note: the API of this function is designed to allow for downcasing
 * transformations that increase the string length, but we don't yet
 * support that.  If you want to implement it, you'll need to fix
 * SplitIdentifierString() in utils/adt/varlena.c.
 */
char *
downcase_truncate_identifier(const char *ident, int len, bool warn)
{
	char	   *result;
	int			i;
	bool		enc_is_single_byte;

	result = palloc(len + 1);
	enc_is_single_byte = pg_database_encoding_max_length() == 1;

	/*
	 * SQL99 specifies Unicode-aware case normalization, which we don't yet
	 * have the infrastructure for.  Instead we use tolower() to provide a
	 * locale-aware translation.  However, there are some locales where this
	 * is not right either (eg, Turkish may do strange things with 'i' and
	 * 'I').  Our current compromise is to use tolower() for characters with
	 * the high bit set, as long as they aren't part of a multi-byte
	 * character, and use an ASCII-only downcasing for 7-bit characters.
	 */
	for (i = 0; i < len; i++)
	{
		unsigned char ch = (unsigned char) ident[i];

		if (ch >= 'A' && ch <= 'Z')
			ch += 'a' - 'A';
		else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch))
			ch = tolower(ch);
		result[i] = (char) ch;
	}
	result[i] = '\0';

	if (i >= NAMEDATALEN)
		truncate_identifier(result, i, warn);

	return result;
}
Ejemplo n.º 6
0
/*
 * Convert a string value to an SQL string literal and append it to
 * the given buffer.  We assume the specified client_encoding and
 * standard_conforming_strings settings.
 *
 * This is essentially equivalent to libpq's PQescapeStringInternal,
 * except for the output buffer structure.	We need it in situations
 * where we do not have a PGconn available.  Where we do,
 * appendStringLiteralConn is a better choice.
 */
void
appendStringLiteral(PQExpBuffer buf, const char *str,
					int encoding, bool std_strings)
{
	size_t		length = strlen(str);
	const char *source = str;
	char	   *target;

	if (!enlargePQExpBuffer(buf, 2 * length + 2))
		return;

	target = buf->data + buf->len;
	*target++ = '\'';

	while (*source != '\0')
	{
		char		c = *source;
		int			len;
		int			i;

		/* Fast path for plain ASCII */
		if (!IS_HIGHBIT_SET(c))
		{
			/* Apply quoting if needed */
			if (SQL_STR_DOUBLE(c, !std_strings))
				*target++ = c;
			/* Copy the character */
			*target++ = c;
			source++;
			continue;
		}

		/* Slow path for possible multibyte characters */
		len = PQmblen(source, encoding);

		/* Copy the character */
		for (i = 0; i < len; i++)
		{
			if (*source == '\0')
				break;
			*target++ = *source++;
		}

		/*
		 * If we hit premature end of string (ie, incomplete multibyte
		 * character), try to pad out to the correct length with spaces. We
		 * may not be able to pad completely, but we will always be able to
		 * insert at least one pad space (since we'd not have quoted a
		 * multibyte character).  This should be enough to make a string that
		 * the server will error out on.
		 */
		if (i < len)
		{
			char	   *stop = buf->data + buf->maxlen - 2;

			for (; i < len; i++)
			{
				if (target >= stop)
					break;
				*target++ = ' ';
			}
			break;
		}
	}

	/* Write the terminating quote and NUL character. */
	*target++ = '\'';
	*target = '\0';

	buf->len = target - buf->data;
}
Ejemplo n.º 7
0
/*-------------------------------------------------------------------------
 * The next token in the input stream is known to be a number; lex it.
 *
 * In JSON, a number consists of four parts:
 *
 * (1) An optional minus sign ('-').
 *
 * (2) Either a single '0', or a string of one or more digits that does not
 *     begin with a '0'.
 *
 * (3) An optional decimal part, consisting of a period ('.') followed by
 *     one or more digits.  (Note: While this part can be omitted
 *     completely, it's not OK to have only the decimal point without
 *     any digits afterwards.)
 *
 * (4) An optional exponent part, consisting of 'e' or 'E', optionally
 *     followed by '+' or '-', followed by one or more digits.  (Note:
 *     As with the decimal part, if 'e' or 'E' is present, it must be
 *     followed by at least one digit.)
 *
 * The 's' argument to this function points to the ostensible beginning
 * of part 2 - i.e. the character after any optional minus sign, and the
 * first character of the string if there is none.
 *
 *-------------------------------------------------------------------------
 */
static void
json_lex_number(JsonLexContext *lex, char *s)
{
	bool	error = false;
	char   *p;

	/* Part (1): leading sign indicator. */
	/* Caller already did this for us; so do nothing. */

	/* Part (2): parse main digit string. */
	if (*s == '0')
		++s;
	else if (*s >= '1' && *s <= '9')
	{
		do
		{
			++s;
		} while (*s >= '0' && *s <= '9');
	}
	else
		error = true;

	/* Part (3): parse optional decimal portion. */
	if (*s == '.')
	{
		++s;
		if (*s < '0' && *s > '9')
			error = true;
		else
		{
			do
			{
				++s;
			} while (*s >= '0' && *s <= '9');
		}
	}

	/* Part (4): parse optional exponent. */
	if (*s == 'e' || *s == 'E')
	{
		++s;
		if (*s == '+' || *s == '-')
			++s;
		if (*s < '0' && *s > '9')
			error = true;
		else
		{
			do
			{
				++s;
			} while (*s >= '0' && *s <= '9');
		}
	}

	/* Check for trailing garbage. */
	for (p = s; (*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z')
		|| (*p >= '0' && *p <= '9') || *p == '_' || IS_HIGHBIT_SET(*p); ++p)
		;
	lex->token_terminator = p;
	if (p > s || error)
		report_invalid_token(lex);
}
Ejemplo n.º 8
0
/*
 * Lex one token from the input stream.
 */
static void
json_lex(JsonLexContext *lex)
{
	char	   *s;

	/* Skip leading whitespace. */
	s = lex->token_terminator;
	while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r')
	{
		if (*s == '\n')
			++lex->line_number;
		++s;
	}
	lex->token_start = s;

	/* Determine token type. */
	if (strchr("{}[],:", s[0]))
	{
		/* strchr() doesn't return false on a NUL input. */
		if (s[0] == '\0')
		{
			/* End of string. */
			lex->token_start = NULL;
			lex->token_terminator = NULL;
		}
		else
		{
			/* Single-character token, some kind of punctuation mark. */
			lex->token_terminator = s + 1;
		}
		lex->token_type = JSON_VALUE_INVALID;
	}
	else if (*s == '"')
	{
		/* String. */
		json_lex_string(lex);
		lex->token_type = JSON_VALUE_STRING;
	}
	else if (*s == '-')
	{
		/* Negative number. */
		json_lex_number(lex, s + 1);
		lex->token_type = JSON_VALUE_NUMBER;
	}
	else if (*s >= '0' && *s <= '9')
	{
		/* Positive number. */
		json_lex_number(lex, s);
		lex->token_type = JSON_VALUE_NUMBER;
	}
	else
	{
		char   *p;

		/*
		 * We're not dealing with a string, number, legal punctuation mark,
		 * or end of string.  The only legal tokens we might find here are
		 * true, false, and null, but for error reporting purposes we scan
		 * until we see a non-alphanumeric character.  That way, we can report
		 * the whole word as an unexpected token, rather than just some
		 * unintuitive prefix thereof.
		 */
 		for (p = s; (*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z')
			|| (*p >= '0' && *p <= '9') || *p == '_' || IS_HIGHBIT_SET(*p);
			++p)
			;

		/*
		 * We got some sort of unexpected punctuation or an otherwise
		 * unexpected character, so just complain about that one character.
		 */
		if (p == s)
		{
			lex->token_terminator = s + 1;
			report_invalid_token(lex);
		}

		/*
		 * We've got a real alphanumeric token here.  If it happens to be
		 * true, false, or null, all is well.  If not, error out.
		 */
		lex->token_terminator = p;
		if (p - s == 4)
		{
			if (memcmp(s, "true", 4) == 0)
				lex->token_type = JSON_VALUE_TRUE;
			else if (memcmp(s, "null", 4) == 0)
				lex->token_type = JSON_VALUE_NULL;
			else
				report_invalid_token(lex);
		}
		else if (p - s == 5 && memcmp(s, "false", 5) == 0)
			lex->token_type = JSON_VALUE_FALSE;
		else
			report_invalid_token(lex);
	}
}
Ejemplo n.º 9
0
/**
 * @brief Performs data loading.
 *
 * Invokes pg_bulkload() user-defined function with given parameters
 * in single transaction.
 *
 * @return exitcode (always 0).
 */
static int
LoaderLoadMain(List *options)
{
	PGresult	   *res;
	const char	   *params[1];
	StringInfoData	buf;
	int				encoding;
	int				errors;
	ListCell	   *cell;

	if (options == NIL)
		ereport(ERROR,
			(errcode(EXIT_FAILURE),
			 errmsg("requires control file or command line options")));

	initStringInfo(&buf);
	reconnect(ERROR);
	encoding = PQclientEncoding(connection);

	elog(NOTICE, "BULK LOAD START");

	/* form options as text[] */
	appendStringInfoString(&buf, "{\"");
	foreach (cell, options)
	{
		const char *item = lfirst(cell);

		if (buf.len > 2)
			appendStringInfoString(&buf, "\",\"");

		/* escape " and \ */
		while (*item)
		{
			if (*item == '"' || *item == '\\')
			{
				appendStringInfoChar(&buf, '\\');
				appendStringInfoChar(&buf, *item);
				item++;
			}
			else if (!IS_HIGHBIT_SET(*item))
			{
				appendStringInfoChar(&buf, *item);
				item++;
			}
			else
			{
				int	n = PQmblen(item, encoding);
				appendBinaryStringInfo(&buf, item, n);
				item += n;
			}
		}
	}
	appendStringInfoString(&buf, "\"}");

	command("BEGIN", 0, NULL);
	params[0] = buf.data;
	res = execute("SELECT * FROM pg_bulkload($1)", 1, params);
	if (PQresultStatus(res) == PGRES_COPY_IN)
	{
		PQclear(res);
		res = RemoteLoad(connection, stdin, type_binary);
		if (PQresultStatus(res) != PGRES_TUPLES_OK)
			elog(ERROR, "copy failed: %s", PQerrorMessage(connection));
	}
	command("COMMIT", 0, NULL);

	errors = atoi(PQgetvalue(res, 0, 2)) +	/* parse errors */
			 atoi(PQgetvalue(res, 0, 3));	/* duplicate errors */

	elog(NOTICE, "BULK LOAD END\n"
				 "\t%s Rows skipped.\n"
				 "\t%s Rows successfully loaded.\n"
				 "\t%s Rows not loaded due to parse errors.\n"
				 "\t%s Rows not loaded due to duplicate errors.\n"
				 "\t%s Rows replaced with new rows.",
				 PQgetvalue(res, 0, 0), PQgetvalue(res, 0, 1),
				 PQgetvalue(res, 0, 2), PQgetvalue(res, 0, 3),
				 PQgetvalue(res, 0, 4));
	PQclear(res);

	disconnect();
	termStringInfo(&buf);

	if (errors > 0)
	{
		elog(WARNING, "some rows were not loaded due to errors.");
		return E_PG_USER;
	}
	else
		return 0;	/* succeeded without errors */
}
Ejemplo n.º 10
0
/*
 * Report a CONTEXT line for bogus JSON input.
 *
 * lex->token_terminator must be set to identify the spot where we detected
 * the error.  Note that lex->token_start might be NULL, in case we recognized
 * error at EOF.
 *
 * The return value isn't meaningful, but we make it non-void so that this
 * can be invoked inside ereport().
 */
static int
report_json_context(JsonLexContext *lex)
{
	const char *context_start;
	const char *context_end;
	const char *line_start;
	int			line_number;
	char	   *ctxt;
	int			ctxtlen;
	const char *prefix;
	const char *suffix;

	/* Choose boundaries for the part of the input we will display */
	context_start = lex->input;
	context_end = lex->token_terminator;
	line_start = context_start;
	line_number = 1;
	for (;;)
	{
		/* Always advance over newlines */
		if (context_start < context_end && *context_start == '\n')
		{
			context_start++;
			line_start = context_start;
			line_number++;
			continue;
		}
		/* Otherwise, done as soon as we are close enough to context_end */
		if (context_end - context_start < 50)
			break;
		/* Advance to next multibyte character */
		if (IS_HIGHBIT_SET(*context_start))
			context_start += pg_mblen(context_start);
		else
			context_start++;
	}

	/*
	 * We add "..." to indicate that the excerpt doesn't start at the
	 * beginning of the line ... but if we're within 3 characters of the
	 * beginning of the line, we might as well just show the whole line.
	 */
	if (context_start - line_start <= 3)
		context_start = line_start;

	/* Get a null-terminated copy of the data to present */
	ctxtlen = context_end - context_start;
	ctxt = palloc(ctxtlen + 1);
	memcpy(ctxt, context_start, ctxtlen);
	ctxt[ctxtlen] = '\0';

	/*
	 * Show the context, prefixing "..." if not starting at start of line, and
	 * suffixing "..." if not ending at end of line.
	 */
	prefix = (context_start > line_start) ? "..." : "";
	suffix = (lex->token_type != JSON_TOKEN_END && context_end - lex->input < lex->input_length && *context_end != '\n' && *context_end != '\r') ? "..." : "";

	return errcontext("JSON data, line %d: %s%s%s",
					  line_number, prefix, ctxt, suffix);
}
Ejemplo n.º 11
0
/*
 * local code ---> UTF8
 *
 * iso: input local string (need not be null-terminated).
 * utf: pointer to the output area (must be large enough!)
 * map: the conversion map.
 * cmap: the conversion map for combined characters.
 *		  (optional)
 * size1: the size of the conversion map.
 * size2: the size of the conversion map for combined characters
 *		  (optional)
 * encoding: the PG identifier for the local encoding.
 * len: length of input string.
 */
void
LocalToUtf(const unsigned char *iso, unsigned char *utf,
		   const pg_local_to_utf *map, const pg_local_to_utf_combined *cmap,
		   int size1, int size2, int encoding, int len)
{
	unsigned int iiso;
	int			l;
	pg_local_to_utf *p;
	pg_local_to_utf_combined *cp;

	if (!PG_VALID_ENCODING(encoding))
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("invalid encoding number: %d", encoding)));

	for (; len > 0; len -= l)
	{
		/* "break" cases all represent errors */
		if (*iso == '\0')
			break;

		if (!IS_HIGHBIT_SET(*iso))
		{
			/* ASCII case is easy */
			*utf++ = *iso++;
			l = 1;
			continue;
		}

		l = pg_encoding_verifymb(encoding, (const char *) iso, len);
		if (l < 0)
			break;

		if (l == 1)
			iiso = *iso++;
		else if (l == 2)
		{
			iiso = *iso++ << 8;
			iiso |= *iso++;
		}
		else if (l == 3)
		{
			iiso = *iso++ << 16;
			iiso |= *iso++ << 8;
			iiso |= *iso++;
		}
		else if (l == 4)
		{
			iiso = *iso++ << 24;
			iiso |= *iso++ << 16;
			iiso |= *iso++ << 8;
			iiso |= *iso++;
		}

		p = bsearch(&iiso, map, size1,
					sizeof(pg_local_to_utf), compare2);

		if (p == NULL)
		{
			/*
			 * not found in the ordinary map. if there's a combined character
			 * map, try with it
			 */
			if (cmap)
			{
				cp = bsearch(&iiso, cmap, size2,
							 sizeof(pg_local_to_utf_combined), compare4);

				if (cp)
				{
					if (cp->utf1 & 0xff000000)
						*utf++ = cp->utf1 >> 24;
					if (cp->utf1 & 0x00ff0000)
						*utf++ = (cp->utf1 & 0x00ff0000) >> 16;
					if (cp->utf1 & 0x0000ff00)
						*utf++ = (cp->utf1 & 0x0000ff00) >> 8;
					if (cp->utf1 & 0x000000ff)
						*utf++ = cp->utf1 & 0x000000ff;

					if (cp->utf2 & 0xff000000)
						*utf++ = cp->utf2 >> 24;
					if (cp->utf2 & 0x00ff0000)
						*utf++ = (cp->utf2 & 0x00ff0000) >> 16;
					if (cp->utf2 & 0x0000ff00)
						*utf++ = (cp->utf2 & 0x0000ff00) >> 8;
					if (cp->utf2 & 0x000000ff)
						*utf++ = cp->utf2 & 0x000000ff;

					continue;
				}
			}

			report_untranslatable_char(encoding, PG_UTF8,
									   (const char *) (iso - l), len);

		}
		else
		{
			if (p->utf & 0xff000000)
				*utf++ = p->utf >> 24;
			if (p->utf & 0x00ff0000)
				*utf++ = (p->utf & 0x00ff0000) >> 16;
			if (p->utf & 0x0000ff00)
				*utf++ = (p->utf & 0x0000ff00) >> 8;
			if (p->utf & 0x000000ff)
				*utf++ = p->utf & 0x000000ff;
		}
	}
Ejemplo n.º 12
0
/*
 * Output an attribute to text
 * This takes portions of the code of CopyAttributeOutText
 */
static void
attribute_out_text(StringInfo buf, char *string)
{
	char	   *ptr;
	char		c;
	char	   *start;
	char		delimc = COPYOPS_DELIMITER;
	bool		need_transcoding, encoding_embeds_ascii;
	int			file_encoding = pg_get_client_encoding();

	need_transcoding = (file_encoding != GetDatabaseEncoding() ||
						pg_database_encoding_max_length() > 1);
	encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(file_encoding);

	if (need_transcoding)
		ptr = pg_server_to_any(string, strlen(string), file_encoding);
	else
		ptr = string;

	/*
	 * We have to grovel through the string searching for control characters
	 * and instances of the delimiter character.  In most cases, though, these
	 * are infrequent.	To avoid overhead from calling CopySendData once per
	 * character, we dump out all characters between escaped characters in a
	 * single call.  The loop invariant is that the data from "start" to "ptr"
	 * can be sent literally, but hasn't yet been.
	 *
	 * We can skip pg_encoding_mblen() overhead when encoding is safe, because
	 * in valid backend encodings, extra bytes of a multibyte character never
	 * look like ASCII.  This loop is sufficiently performance-critical that
	 * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out
	 * of the normal safe-encoding path.
	 */
	if (encoding_embeds_ascii)
	{
		start = ptr;
		while ((c = *ptr) != '\0')
		{
			if ((unsigned char) c < (unsigned char) 0x20)
			{
				/*
				 * \r and \n must be escaped, the others are traditional. We
				 * prefer to dump these using the C-like notation, rather than
				 * a backslash and the literal character, because it makes the
				 * dump file a bit more proof against Microsoftish data
				 * mangling.
				 */
				switch (c)
				{
					case '\b':
						c = 'b';
						break;
					case '\f':
						c = 'f';
						break;
					case '\n':
						c = 'n';
						break;
					case '\r':
						c = 'r';
						break;
					case '\t':
						c = 't';
						break;
					case '\v':
						c = 'v';
						break;
					default:
						/* If it's the delimiter, must backslash it */
						if (c == delimc)
							break;
						/* All ASCII control chars are length 1 */
						ptr++;
						continue;		/* fall to end of loop */
				}

				/* if we get here, we need to convert the control char */
				DUMPSOFAR();
				appendStringInfoCharMacro(buf, '\\');
				appendStringInfoCharMacro(buf, c);
				start = ++ptr;
			}
			else if (c == '\\' || c == delimc)
			{
				DUMPSOFAR();
				appendStringInfoCharMacro(buf, '\\');
				appendStringInfoCharMacro(buf, c);
				start = ++ptr;
			}
			else if (IS_HIGHBIT_SET(c))
				ptr += pg_encoding_mblen(file_encoding, ptr);
			else
				ptr++;
		}
	}
	else
	{
		start = ptr;
		while ((c = *ptr) != '\0')
		{
			if ((unsigned char) c < (unsigned char) 0x20)
			{
				/*
				 * \r and \n must be escaped, the others are traditional. We
				 * prefer to dump these using the C-like notation, rather than
				 * a backslash and the literal character, because it makes the
				 * dump file a bit more proof against Microsoftish data
				 * mangling.
				 */
				switch (c)
				{
					case '\b':
						c = 'b';
						break;
					case '\f':
						c = 'f';
						break;
					case '\n':
						c = 'n';
						break;
					case '\r':
						c = 'r';
						break;
					case '\t':
						c = 't';
						break;
					case '\v':
						c = 'v';
						break;
					default:
						/* If it's the delimiter, must backslash it */
						if (c == delimc)
							break;
						/* All ASCII control chars are length 1 */
						ptr++;
						continue;		/* fall to end of loop */
				}
				/* if we get here, we need to convert the control char */
				DUMPSOFAR();
				appendStringInfoCharMacro(buf, '\\');
				appendStringInfoCharMacro(buf, c);
				start = ++ptr;
			}
			else if (c == '\\' || c == delimc)
			{
				DUMPSOFAR();
				appendStringInfoCharMacro(buf, '\\');
				appendStringInfoCharMacro(buf, c);
				start = ++ptr;
			}
			else
				ptr++;
		}
	}

	DUMPSOFAR();
}
Ejemplo n.º 13
0
/*
 * local code ---> UTF8
 *
 * iso: input string in local encoding (need not be null-terminated)
 * len: length of input string (in bytes)
 * utf: pointer to the output area (must be large enough!)
		  (output string will be null-terminated)
 * map: conversion map for single characters
 * mapsize: number of entries in the conversion map
 * cmap: conversion map for combined characters
 *		  (optional, pass NULL if none)
 * cmapsize: number of entries in the conversion map for combined characters
 *		  (optional, pass 0 if none)
 * conv_func: algorithmic encoding conversion function
 *		  (optional, pass NULL if none)
 * encoding: PG identifier for the local encoding
 *
 * For each character, the map is consulted first; if no match, the cmap
 * (if provided) is consulted next; if still no match, the conv_func
 * (if provided) is applied.  An error is raised if no match is found.
 *
 * See pg_wchar.h for more details about the data structures used here.
 */
void
LocalToUtf(const unsigned char *iso, int len,
		   unsigned char *utf,
		   const pg_local_to_utf *map, int mapsize,
		   const pg_local_to_utf_combined *cmap, int cmapsize,
		   utf_local_conversion_func conv_func,
		   int encoding)
{
	uint32		iiso;
	int			l;
	const pg_local_to_utf *p;
	const pg_local_to_utf_combined *cp;

	if (!PG_VALID_ENCODING(encoding))
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("invalid encoding number: %d", encoding)));

	for (; len > 0; len -= l)
	{
		/* "break" cases all represent errors */
		if (*iso == '\0')
			break;

		if (!IS_HIGHBIT_SET(*iso))
		{
			/* ASCII case is easy, assume it's one-to-one conversion */
			*utf++ = *iso++;
			l = 1;
			continue;
		}

		l = pg_encoding_verifymb(encoding, (const char *) iso, len);
		if (l < 0)
			break;

		/* collect coded char of length l */
		if (l == 1)
			iiso = *iso++;
		else if (l == 2)
		{
			iiso = *iso++ << 8;
			iiso |= *iso++;
		}
		else if (l == 3)
		{
			iiso = *iso++ << 16;
			iiso |= *iso++ << 8;
			iiso |= *iso++;
		}
		else if (l == 4)
		{
			iiso = *iso++ << 24;
			iiso |= *iso++ << 16;
			iiso |= *iso++ << 8;
			iiso |= *iso++;
		}
		else
		{
			elog(ERROR, "unsupported character length %d", l);
			iiso = 0;			/* keep compiler quiet */
		}

		/* First check ordinary map */
		p = bsearch(&iiso, map, mapsize,
					sizeof(pg_local_to_utf), compare2);

		if (p)
		{
			utf = store_coded_char(utf, p->utf);
			continue;
		}

		/* If there's a combined character map, try that */
		if (cmap)
		{
			cp = bsearch(&iiso, cmap, cmapsize,
						 sizeof(pg_local_to_utf_combined), compare4);

			if (cp)
			{
				utf = store_coded_char(utf, cp->utf1);
				utf = store_coded_char(utf, cp->utf2);
				continue;
			}
		}

		/* if there's a conversion function, try that */
		if (conv_func)
		{
			uint32		converted = (*conv_func) (iiso);

			if (converted)
			{
				utf = store_coded_char(utf, converted);
				continue;
			}
		}

		/* failed to translate this character */
		report_untranslatable_char(encoding, PG_UTF8,
								   (const char *) (iso - l), len);
	}

	/* if we broke out of loop early, must be invalid input */
	if (len > 0)
		report_invalid_encoding(encoding, (const char *) iso, len);

	*utf = '\0';
}
Ejemplo n.º 14
0
/*
 * pgfadvise_file
 */
static int
pgfadvise_loader_file(char *filename,
					  bool willneed, bool dontneed, VarBit *databit,
					  pgfloaderStruct *pgfloader)
{
	bits8	*sp;
	int		bitlen;
	bits8	x;
	int		i, k;

	/*
	 * We use the AllocateFile(2) provided by PostgreSQL.  We're going to
	 * close it ourselves even if PostgreSQL close it anyway at transaction
	 * end.
	 */
	FILE	*fp;
	int	fd;
	struct stat st;

	/*
	 * OS things : Page size
	 */
	pgfloader->pageSize = sysconf(_SC_PAGESIZE);

	/*
	 * we count the action we perform
	 * both are theorical : we don't know if the page was or not in memory
	 * when we call posix_fadvise
	 */
	pgfloader->pagesLoaded		= 0;
	pgfloader->pagesUnloaded	= 0;

	/*
	 * Fopen and fstat file
	 * fd will be provided to posix_fadvise
	 * if there is no file, just return 1, it is expected to leave the SRF
	 */
	fp = AllocateFile(filename, "rb");
	if (fp == NULL)
                return 1;

	fd = fileno(fp);
	if (fstat(fd, &st) == -1)
	{
		FreeFile(fp);
		elog(ERROR, "pgfadvise_loader: Can not stat object file: %s", filename);
		return 2;
	}

	elog(DEBUG1, "pgfadvise_loader: working on %s", filename);

	bitlen = VARBITLEN(databit);
	sp = VARBITS(databit);
	for (i = 0; i < bitlen - BITS_PER_BYTE; i += BITS_PER_BYTE, sp++)
	{
		x = *sp;
		/*  Is this bit set ? */
		for (k = 0; k < BITS_PER_BYTE; k++)
		{
			if (IS_HIGHBIT_SET(x))
			{
				if (willneed)
				{
					(void) posix_fadvise(fd,
					                     ((i+k) * pgfloader->pageSize),
					                     pgfloader->pageSize,
					                     POSIX_FADV_WILLNEED);
					pgfloader->pagesLoaded++;
				}
			}
			else if (dontneed)
			{
				(void) posix_fadvise(fd,
				                     ((i+k) * pgfloader->pageSize),
				                     pgfloader->pageSize,
				                     POSIX_FADV_DONTNEED);
				pgfloader->pagesUnloaded++;
			}

			x <<= 1;
		}
	}
	/*
	 * XXX this copy/paste of code to finnish to walk the bits is not pretty
	 */
	if (i < bitlen)
	{
		/* print the last partial byte */
		x = *sp;
		for (k = i; k < bitlen; k++)
		{
			if (IS_HIGHBIT_SET(x))
			{
				if (willneed)
				{
					(void) posix_fadvise(fd,
					                     (k * pgfloader->pageSize),
					                     pgfloader->pageSize,
					                     POSIX_FADV_WILLNEED);
					pgfloader->pagesLoaded++;
				}
			}
			else if (dontneed)
			{
				(void) posix_fadvise(fd,
				                     (k * pgfloader->pageSize),
				                     pgfloader->pageSize,
				                     POSIX_FADV_DONTNEED);
				pgfloader->pagesUnloaded++;
			}
			x <<= 1;
		}
	}
	FreeFile(fp);

	/*
	 * OS things : Pages free
	 */
	pgfloader->pagesFree = sysconf(_SC_AVPHYS_PAGES);

	return 0;
}
Ejemplo n.º 15
0
/*
 * Verify mbstr to make sure that it is validly encoded in the specified
 * encoding.
 *
 * mbstr is not necessarily zero terminated; length of mbstr is
 * specified by len.
 *
 * If OK, return length of string in the encoding.
 * If a problem is found, return -1 when noError is
 * true; when noError is false, ereport() a descriptive message.
 */
int
pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
{
	mbverifier	mbverify;
	int			mb_len;

	Assert(PG_VALID_ENCODING(encoding));

	/*
	 * In single-byte encodings, we need only reject nulls (\0).
	 */
	if (pg_encoding_max_length(encoding) <= 1)
	{
		const char *nullpos = memchr(mbstr, 0, len);

		if (nullpos == NULL)
			return len;
		if (noError)
			return -1;
		report_invalid_encoding(encoding, nullpos, 1);
	}

	/* fetch function pointer just once */
	mbverify = pg_wchar_table[encoding].mbverify;

	mb_len = 0;

	while (len > 0)
	{
		int			l;

		/* fast path for ASCII-subset characters */
		if (!IS_HIGHBIT_SET(*mbstr))
		{
			if (*mbstr != '\0')
			{
				mb_len++;
				mbstr++;
				len--;
				continue;
			}
			if (noError)
				return -1;
			report_invalid_encoding(encoding, mbstr, len);
		}

		l = (*mbverify) ((const unsigned char *) mbstr, len);

		if (l < 0)
		{
			if (noError)
				return -1;
			report_invalid_encoding(encoding, mbstr, len);
		}

		mbstr += l;
		len -= l;
		mb_len++;
	}
	return mb_len;
}