char * CheckerConversion(Checker *checker, char *src) { int len; if (!checker->check_encoding) return src; len = strlen(src); if (checker->encoding == checker->db_encoding || checker->encoding == PG_SQL_ASCII) { /* * No conversion is needed, but we must still validate the data. */ pg_verify_mbstr(checker->db_encoding, src, len, false); return src; } if (checker->db_encoding == PG_SQL_ASCII) { /* * No conversion is possible, but we must still validate the data, * because the client-side code might have done string escaping using * the selected client_encoding. If the client encoding is ASCII-safe * then we just do a straight validation under that encoding. For an * ASCII-unsafe encoding we have a problem: we dare not pass such data * to the parser but we have no way to convert it. We compromise by * rejecting the data if it contains any non-ASCII characters. */ if (PG_VALID_BE_ENCODING(checker->encoding)) pg_verify_mbstr(checker->encoding, src, len, false); else { int i; for (i = 0; i < len; i++) { if (src[i] == '\0' || IS_HIGHBIT_SET(src[i])) ereport(ERROR, (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), errmsg("invalid byte value for encoding \"%s\": 0x%02x", pg_enc2name_tbl[PG_SQL_ASCII].name, (unsigned char) src[i]))); } } return src; } /* Convert the input into the database encoding. */ return (char *) pg_do_encoding_conversion((unsigned char *) src, len, checker->encoding, checker->db_encoding); }
/* * pg_custom_client_to_server * * convert client encoding to server encoding, but use the passed in encodings * instead of the global client and server encoding variables. * * This routine is basically a slightly modified version of pg_client_to_server. * Instead of creating this routine a better way may have been to just call * pg_do_encoding_conversion(), which takes in the necessary arguments, however * it does not do several necessary checks that pg_client_to_server() does, and * altering it to have those check may break other parts of the system. Therefore * until there's a better idea we resort to duplicating some code. * * The reason for creating this routine is to let external tables do data * conversion reliably. Since each external table has an encoding attached to * it we'd like to just convert from that encoding to the server encoding without * altering the global client_encoding variable for this local database. */ char * pg_custom_to_server(const char *s, int len, int src_encoding, void *cep) { FmgrInfo *custom_encoding_proc = (FmgrInfo *)cep; Assert(DatabaseEncoding); Assert(ClientEncoding); if (len <= 0) return (char *) s; if (src_encoding == DatabaseEncoding->encoding || src_encoding == PG_SQL_ASCII) { /* * No conversion is needed, but we must still validate the data. */ (void) pg_verify_mbstr(DatabaseEncoding->encoding, s, len, false); return (char *) s; } if (DatabaseEncoding->encoding == PG_SQL_ASCII) { /* * No conversion is possible, but we must still validate the data, * because the client-side code might have done string escaping using * the selected client_encoding. If the client encoding is ASCII-safe * then we just do a straight validation under that encoding. For an * ASCII-unsafe encoding we have a problem: we dare not pass such data * to the parser but we have no way to convert it. We compromise by * rejecting the data if it contains any non-ASCII characters. */ if (PG_VALID_BE_ENCODING(src_encoding)) (void) pg_verify_mbstr(src_encoding, s, len, false); else { int i; for (i = 0; i < len; i++) { if (s[i] == '\0' || IS_HIGHBIT_SET(s[i])) ereport(ERROR, (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), errmsg("invalid byte value for encoding \"%s\": 0x%02x", pg_enc2name_tbl[PG_SQL_ASCII].name, (unsigned char) s[i]))); } } return (char *) s; } return perform_default_encoding_conversion(s, len, true, src_encoding, custom_encoding_proc); }
/* * Read the next line from a tsearch data file (expected to be in UTF-8), and * convert it to database encoding if needed. The returned string is palloc'd. * NULL return means EOF. * * Note: direct use of this function is now deprecated. Go through * tsearch_readline() to provide better error reporting. */ char * t_readline(FILE *fp) { int len; char *recoded; char buf[4096]; /* lines must not be longer than this */ if (fgets(buf, sizeof(buf), fp) == NULL) return NULL; len = strlen(buf); /* Make sure the input is valid UTF-8 */ (void) pg_verify_mbstr(PG_UTF8, buf, len, false); /* And convert */ recoded = pg_any_to_server(buf, len, PG_UTF8); if (recoded == buf) { /* * conversion didn't pstrdup, so we must. We can use the length of the * original string, because no conversion was done. */ recoded = pnstrdup(recoded, len); } return recoded; }
/* * Perform output plugin write into tuplestore. */ static void LogicalOutputWrite(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid, bool last_write) { Datum values[3]; bool nulls[3]; DecodingOutputState *p; /* SQL Datums can only be of a limited length... */ if (ctx->out->len > MaxAllocSize - VARHDRSZ) elog(ERROR, "too much output for sql interface"); p = (DecodingOutputState *) ctx->output_writer_private; memset(nulls, 0, sizeof(nulls)); values[0] = LSNGetDatum(lsn); values[1] = TransactionIdGetDatum(xid); /* * Assert ctx->out is in database encoding when we're writing textual * output. */ if (!p->binary_output) Assert(pg_verify_mbstr(GetDatabaseEncoding(), ctx->out->data, ctx->out->len, false)); /* ick, but cstring_to_text_with_len works for bytea perfectly fine */ values[2] = PointerGetDatum( cstring_to_text_with_len(ctx->out->data, ctx->out->len)); tuplestore_putvalues(p->tupstore, p->tupdesc, values, nulls); p->returned_rows++; }
/* * Convert string using encoding_names. * * BYTEA convert(BYTEA string, NAME src_encoding_name, NAME dest_encoding_name) */ Datum pg_convert(PG_FUNCTION_ARGS) { bytea *string = PG_GETARG_BYTEA_P(0); char *src_encoding_name = NameStr(*PG_GETARG_NAME(1)); int src_encoding = pg_char_to_encoding(src_encoding_name); char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2)); int dest_encoding = pg_char_to_encoding(dest_encoding_name); unsigned char *result; bytea *retval; unsigned char *str; int len; if (src_encoding < 0) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid source encoding name \"%s\"", src_encoding_name))); if (dest_encoding < 0) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid destination encoding name \"%s\"", dest_encoding_name))); /* make sure that source string is valid and null terminated */ len = VARSIZE(string) - VARHDRSZ; pg_verify_mbstr(src_encoding, VARDATA(string), len, false); str = palloc(len + 1); memcpy(str, VARDATA(string), len); *(str + len) = '\0'; result = pg_do_encoding_conversion(str, len, src_encoding, dest_encoding); /* * build bytea data type structure. */ len = strlen((char *) result) + VARHDRSZ; retval = palloc(len); SET_VARSIZE(retval, len); memcpy(VARDATA(retval), result, len - VARHDRSZ); if (result != str) pfree(result); pfree(str); /* free memory if allocated by the toaster */ PG_FREE_IF_COPY(string, 0); PG_RETURN_BYTEA_P(retval); }
/* * Verify mbstr to make sure that it is validly encoded in the current * database encoding. Otherwise same as pg_verify_mbstr(). */ bool pg_verifymbstr(const char *mbstr, int len, bool noError) { return pg_verify_mbstr(GetDatabaseEncoding(), mbstr, len, noError); }
static text * get_line(FILE *f, int max_linesize, int encoding, bool *iseof) { int c; char *buffer = NULL; char *bpt; int csize = 0; text *result = NULL; bool eof = true; buffer = palloc(max_linesize + 2); bpt = buffer; errno = 0; while (csize < max_linesize && (c = fgetc(f)) != EOF) { eof = false; /* I was able read one char */ if (c == '\r') /* lookin ahead \n */ { c = fgetc(f); if (c == EOF) break; /* last char */ if (c != '\n') ungetc(c, f); /* skip \r\n */ break; } else if (c == '\n') break; ++csize; *bpt++ = c; } if (!eof) { char *decoded; int len; pg_verify_mbstr(encoding, buffer, csize, false); decoded = (char *) pg_do_encoding_conversion((unsigned char *) buffer, csize, encoding, GetDatabaseEncoding()); len = (decoded == buffer ? csize : strlen(decoded)); result = palloc(len + VARHDRSZ); memcpy(VARDATA(result), decoded, len); SET_VARSIZE(result, len + VARHDRSZ); if (decoded != buffer) pfree(decoded); *iseof = false; } else { switch (errno) { case 0: break; case EBADF: CUSTOM_EXCEPTION(INVALID_OPERATION, "file descriptor isn't valid for reading"); break; default: CUSTOM_EXCEPTION(READ_ERROR, strerror(errno)); break; } *iseof = true; } pfree(buffer); return result; }