/* * fetch maximum length of a char encoding */ int pg_encoding_max_length(int encoding) { Assert(PG_VALID_ENCODING(encoding)); return pg_wchar_table[encoding].maxmblen; }
/* * Verify mbstr to make sure that it is validly encoded in the specified * encoding. * * mbstr is not necessarily zero terminated; length of mbstr is * specified by len. * * If OK, return TRUE. If a problem is found, return FALSE when noError is * true; when noError is false, ereport() a descriptive message. */ bool pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError) { mbverifier mbverify; Assert(PG_VALID_ENCODING(encoding)); /* * In single-byte encodings, we need only reject nulls (\0). */ if (pg_encoding_max_length(encoding) <= 1) { const char *nullpos = memchr(mbstr, 0, len); if (nullpos == NULL) return true; if (noError) return false; report_invalid_encoding(encoding, nullpos, 1); } /* fetch function pointer just once */ mbverify = pg_wchar_table[encoding].mbverify; while (len > 0) { int l; /* fast path for ASCII-subset characters */ if (!IS_HIGHBIT_SET(*mbstr)) { if (*mbstr != '\0') { mbstr++; len--; continue; } if (noError) return false; report_invalid_encoding(encoding, mbstr, len); } l = (*mbverify) ((const unsigned char *) mbstr, len); if (l < 0) { if (noError) return false; report_invalid_encoding(encoding, mbstr, len); } mbstr += l; len -= l; } return true; }
/* * Verify the first multibyte character of the given string. * Return its byte length if good, -1 if bad. (See comments above for * full details of the mbverify API.) */ int pg_encoding_verifymb(int encoding, const char *mbstr, int len) { Assert(PG_VALID_ENCODING(encoding)); return ((encoding >= 0 && encoding < sizeof(pg_wchar_table) / sizeof(pg_wchar_tbl)) ? ((*pg_wchar_table[encoding].mbverify) ((const unsigned char *) mbstr, len)) : ((*pg_wchar_table[PG_SQL_ASCII].mbverify) ((const unsigned char *) mbstr, len))); }
/* * Returns the byte length of a multibyte word. */ int pg_encoding_mblen(int encoding, const unsigned char *mbstr) { Assert(PG_VALID_ENCODING(encoding)); return ((encoding >= 0 && encoding < sizeof(pg_wchar_table) / sizeof(pg_wchar_tbl)) ? ((*pg_wchar_table[encoding].mblen) (mbstr)) : ((*pg_wchar_table[PG_SQL_ASCII].mblen) (mbstr))); }
/* ---------- * convert to ASCII - enc is set as int4 * ---------- */ Datum to_ascii_enc(PG_FUNCTION_ARGS) { text *data = PG_GETARG_TEXT_P_COPY(0); int enc = PG_GETARG_INT32(1); if (!PG_VALID_ENCODING(enc)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("%d is not a valid encoding code", enc))); PG_RETURN_TEXT_P(encode_to_ascii(data, enc)); }
/* * check_encoding_conversion_args: check arguments of a conversion function * * "expected" arguments can be either an encoding ID or -1 to indicate that * the caller will check whether it accepts the ID. * * Note: the errors here are not really user-facing, so elog instead of * ereport seems sufficient. Also, we trust that the "expected" encoding * arguments are valid encoding IDs, but we don't trust the actuals. */ void check_encoding_conversion_args(int src_encoding, int dest_encoding, int len, int expected_src_encoding, int expected_dest_encoding) { if (!PG_VALID_ENCODING(src_encoding)) elog(ERROR, "invalid source encoding ID: %d", src_encoding); if (src_encoding != expected_src_encoding && expected_src_encoding >= 0) elog(ERROR, "expected source encoding \"%s\", but got \"%s\"", pg_enc2name_tbl[expected_src_encoding].name, pg_enc2name_tbl[src_encoding].name); if (!PG_VALID_ENCODING(dest_encoding)) elog(ERROR, "invalid destination encoding ID: %d", dest_encoding); if (dest_encoding != expected_dest_encoding && expected_dest_encoding >= 0) elog(ERROR, "expected destination encoding \"%s\", but got \"%s\"", pg_enc2name_tbl[expected_dest_encoding].name, pg_enc2name_tbl[dest_encoding].name); if (len < 0) elog(ERROR, "encoding conversion length must not be negative"); }
/* * local code ---> UTF8 * * iso: input local string (need not be null-terminated). * utf: pointer to the output area (must be large enough!) * map: the conversion map. * cmap: the conversion map for combined characters. * (optional) * size1: the size of the conversion map. * size2: the size of the conversion map for combined characters * (optional) * encoding: the PG identifier for the local encoding. * len: length of input string. */ void LocalToUtf(const unsigned char *iso, unsigned char *utf, const pg_local_to_utf *map, const pg_local_to_utf_combined *cmap, int size1, int size2, int encoding, int len) { unsigned int iiso; int l; pg_local_to_utf *p; pg_local_to_utf_combined *cp; if (!PG_VALID_ENCODING(encoding)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid encoding number: %d", encoding))); for (; len > 0; len -= l) { /* "break" cases all represent errors */ if (*iso == '\0') break; if (!IS_HIGHBIT_SET(*iso)) { /* ASCII case is easy */ *utf++ = *iso++; l = 1; continue; } l = pg_encoding_verifymb(encoding, (const char *) iso, len); if (l < 0) break; if (l == 1) iiso = *iso++; else if (l == 2) { iiso = *iso++ << 8; iiso |= *iso++; } else if (l == 3) { iiso = *iso++ << 16; iiso |= *iso++ << 8; iiso |= *iso++; } else if (l == 4) { iiso = *iso++ << 24; iiso |= *iso++ << 16; iiso |= *iso++ << 8; iiso |= *iso++; } p = bsearch(&iiso, map, size1, sizeof(pg_local_to_utf), compare2); if (p == NULL) { /* * not found in the ordinary map. if there's a combined character * map, try with it */ if (cmap) { cp = bsearch(&iiso, cmap, size2, sizeof(pg_local_to_utf_combined), compare4); if (cp) { if (cp->utf1 & 0xff000000) *utf++ = cp->utf1 >> 24; if (cp->utf1 & 0x00ff0000) *utf++ = (cp->utf1 & 0x00ff0000) >> 16; if (cp->utf1 & 0x0000ff00) *utf++ = (cp->utf1 & 0x0000ff00) >> 8; if (cp->utf1 & 0x000000ff) *utf++ = cp->utf1 & 0x000000ff; if (cp->utf2 & 0xff000000) *utf++ = cp->utf2 >> 24; if (cp->utf2 & 0x00ff0000) *utf++ = (cp->utf2 & 0x00ff0000) >> 16; if (cp->utf2 & 0x0000ff00) *utf++ = (cp->utf2 & 0x0000ff00) >> 8; if (cp->utf2 & 0x000000ff) *utf++ = cp->utf2 & 0x000000ff; continue; } } report_untranslatable_char(encoding, PG_UTF8, (const char *) (iso - l), len); } else { if (p->utf & 0xff000000) *utf++ = p->utf >> 24; if (p->utf & 0x00ff0000) *utf++ = (p->utf & 0x00ff0000) >> 16; if (p->utf & 0x0000ff00) *utf++ = (p->utf & 0x0000ff00) >> 8; if (p->utf & 0x000000ff) *utf++ = p->utf & 0x000000ff; } }
/* * local code ---> UTF8 * * iso: input string in local encoding (need not be null-terminated) * len: length of input string (in bytes) * utf: pointer to the output area (must be large enough!) (output string will be null-terminated) * map: conversion map for single characters * mapsize: number of entries in the conversion map * cmap: conversion map for combined characters * (optional, pass NULL if none) * cmapsize: number of entries in the conversion map for combined characters * (optional, pass 0 if none) * conv_func: algorithmic encoding conversion function * (optional, pass NULL if none) * encoding: PG identifier for the local encoding * * For each character, the map is consulted first; if no match, the cmap * (if provided) is consulted next; if still no match, the conv_func * (if provided) is applied. An error is raised if no match is found. * * See pg_wchar.h for more details about the data structures used here. */ void LocalToUtf(const unsigned char *iso, int len, unsigned char *utf, const pg_local_to_utf *map, int mapsize, const pg_local_to_utf_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding) { uint32 iiso; int l; const pg_local_to_utf *p; const pg_local_to_utf_combined *cp; if (!PG_VALID_ENCODING(encoding)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid encoding number: %d", encoding))); for (; len > 0; len -= l) { /* "break" cases all represent errors */ if (*iso == '\0') break; if (!IS_HIGHBIT_SET(*iso)) { /* ASCII case is easy, assume it's one-to-one conversion */ *utf++ = *iso++; l = 1; continue; } l = pg_encoding_verifymb(encoding, (const char *) iso, len); if (l < 0) break; /* collect coded char of length l */ if (l == 1) iiso = *iso++; else if (l == 2) { iiso = *iso++ << 8; iiso |= *iso++; } else if (l == 3) { iiso = *iso++ << 16; iiso |= *iso++ << 8; iiso |= *iso++; } else if (l == 4) { iiso = *iso++ << 24; iiso |= *iso++ << 16; iiso |= *iso++ << 8; iiso |= *iso++; } else { elog(ERROR, "unsupported character length %d", l); iiso = 0; /* keep compiler quiet */ } /* First check ordinary map */ p = bsearch(&iiso, map, mapsize, sizeof(pg_local_to_utf), compare2); if (p) { utf = store_coded_char(utf, p->utf); continue; } /* If there's a combined character map, try that */ if (cmap) { cp = bsearch(&iiso, cmap, cmapsize, sizeof(pg_local_to_utf_combined), compare4); if (cp) { utf = store_coded_char(utf, cp->utf1); utf = store_coded_char(utf, cp->utf2); continue; } } /* if there's a conversion function, try that */ if (conv_func) { uint32 converted = (*conv_func) (iiso); if (converted) { utf = store_coded_char(utf, converted); continue; } } /* failed to translate this character */ report_untranslatable_char(encoding, PG_UTF8, (const char *) (iso - l), len); } /* if we broke out of loop early, must be invalid input */ if (len > 0) report_invalid_encoding(encoding, (const char *) iso, len); *utf = '\0'; }
/* * UTF8 ---> local code * * utf: input string in UTF8 encoding (need not be null-terminated) * len: length of input string (in bytes) * iso: pointer to the output area (must be large enough!) (output string will be null-terminated) * map: conversion map for single characters * mapsize: number of entries in the conversion map * cmap: conversion map for combined characters * (optional, pass NULL if none) * cmapsize: number of entries in the conversion map for combined characters * (optional, pass 0 if none) * conv_func: algorithmic encoding conversion function * (optional, pass NULL if none) * encoding: PG identifier for the local encoding * * For each character, the cmap (if provided) is consulted first; if no match, * the map is consulted next; if still no match, the conv_func (if provided) * is applied. An error is raised if no match is found. * * See pg_wchar.h for more details about the data structures used here. */ void UtfToLocal(const unsigned char *utf, int len, unsigned char *iso, const pg_utf_to_local *map, int mapsize, const pg_utf_to_local_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding) { uint32 iutf; int l; const pg_utf_to_local *p; const pg_utf_to_local_combined *cp; if (!PG_VALID_ENCODING(encoding)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid encoding number: %d", encoding))); for (; len > 0; len -= l) { /* "break" cases all represent errors */ if (*utf == '\0') break; l = pg_utf_mblen(utf); if (len < l) break; if (!pg_utf8_islegal(utf, l)) break; if (l == 1) { /* ASCII case is easy, assume it's one-to-one conversion */ *iso++ = *utf++; continue; } /* collect coded char of length l */ if (l == 2) { iutf = *utf++ << 8; iutf |= *utf++; } else if (l == 3) { iutf = *utf++ << 16; iutf |= *utf++ << 8; iutf |= *utf++; } else if (l == 4) { iutf = *utf++ << 24; iutf |= *utf++ << 16; iutf |= *utf++ << 8; iutf |= *utf++; } else { elog(ERROR, "unsupported character length %d", l); iutf = 0; /* keep compiler quiet */ } /* First, try with combined map if possible */ if (cmap && len > l) { const unsigned char *utf_save = utf; int len_save = len; int l_save = l; /* collect next character, same as above */ len -= l; l = pg_utf_mblen(utf); if (len < l) break; if (!pg_utf8_islegal(utf, l)) break; /* We assume ASCII character cannot be in combined map */ if (l > 1) { uint32 iutf2; uint32 cutf[2]; if (l == 2) { iutf2 = *utf++ << 8; iutf2 |= *utf++; } else if (l == 3) { iutf2 = *utf++ << 16; iutf2 |= *utf++ << 8; iutf2 |= *utf++; } else if (l == 4) { iutf2 = *utf++ << 24; iutf2 |= *utf++ << 16; iutf2 |= *utf++ << 8; iutf2 |= *utf++; } else { elog(ERROR, "unsupported character length %d", l); iutf2 = 0; /* keep compiler quiet */ } cutf[0] = iutf; cutf[1] = iutf2; cp = bsearch(cutf, cmap, cmapsize, sizeof(pg_utf_to_local_combined), compare3); if (cp) { iso = store_coded_char(iso, cp->code); continue; } } /* fail, so back up to reprocess second character next time */ utf = utf_save; len = len_save; l = l_save; } /* Now check ordinary map */ p = bsearch(&iutf, map, mapsize, sizeof(pg_utf_to_local), compare1); if (p) { iso = store_coded_char(iso, p->code); continue; } /* if there's a conversion function, try that */ if (conv_func) { uint32 converted = (*conv_func) (iutf); if (converted) { iso = store_coded_char(iso, converted); continue; } } /* failed to translate this character */ report_untranslatable_char(PG_UTF8, encoding, (const char *) (utf - l), len); } /* if we broke out of loop early, must be invalid input */ if (len > 0) report_invalid_encoding(PG_UTF8, (const char *) utf, len); *iso = '\0'; }
/* * Like GetExtTableEntry(Oid), but returns NULL instead of throwing * an error if no pg_exttable entry is found. */ ExtTableEntry* GetExtTableEntryIfExists(Oid relid) { Relation pg_exttable_rel; ScanKeyData skey; SysScanDesc scan; HeapTuple tuple; ExtTableEntry *extentry; Datum locations, fmtcode, fmtopts, command, rejectlimit, rejectlimittype, fmterrtbl, encoding, iswritable; bool isNull; bool locationNull = false; pg_exttable_rel = heap_open(ExtTableRelationId, RowExclusiveLock); ScanKeyInit(&skey, Anum_pg_exttable_reloid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relid)); scan = systable_beginscan(pg_exttable_rel, ExtTableReloidIndexId, true, SnapshotNow, 1, &skey); tuple = systable_getnext(scan); if (!HeapTupleIsValid(tuple)) { systable_endscan(scan); heap_close(pg_exttable_rel, RowExclusiveLock); return NULL; } extentry = (ExtTableEntry *) palloc0(sizeof(ExtTableEntry)); /* get the location list */ locations = heap_getattr(tuple, Anum_pg_exttable_location, RelationGetDescr(pg_exttable_rel), &isNull); if (isNull) { Insist(false); /* location list is always populated (url or ON X) */ } else { Datum *elems; int nelems; int i; char* loc_str = NULL; deconstruct_array(DatumGetArrayTypeP(locations), TEXTOID, -1, false, 'i', &elems, NULL, &nelems); for (i = 0; i < nelems; i++) { loc_str = DatumGetCString(DirectFunctionCall1(textout, elems[i])); /* append to a list of Value nodes, size nelems */ extentry->locations = lappend(extentry->locations, makeString(pstrdup(loc_str))); } if(loc_str && (IS_FILE_URI(loc_str) || IS_GPFDIST_URI(loc_str) || IS_GPFDISTS_URI(loc_str))) extentry->isweb = false; else extentry->isweb = true; } /* get the execute command */ command = heap_getattr(tuple, Anum_pg_exttable_command, RelationGetDescr(pg_exttable_rel), &isNull); if(isNull) { if(locationNull) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("got invalid pg_exttable tuple. location and command are both NULL"))); extentry->command = NULL; } else { extentry->command = DatumGetCString(DirectFunctionCall1(textout, command)); } /* get the format code */ fmtcode = heap_getattr(tuple, Anum_pg_exttable_fmttype, RelationGetDescr(pg_exttable_rel), &isNull); Insist(!isNull); extentry->fmtcode = DatumGetChar(fmtcode); Insist(extentry->fmtcode == 'c' || extentry->fmtcode == 't' || extentry->fmtcode == 'b' || extentry->fmtcode == 'a' || extentry->fmtcode == 'p'); /* get the format options string */ fmtopts = heap_getattr(tuple, Anum_pg_exttable_fmtopts, RelationGetDescr(pg_exttable_rel), &isNull); Insist(!isNull); extentry->fmtopts = DatumGetCString(DirectFunctionCall1(textout, fmtopts)); /* get the reject limit */ rejectlimit = heap_getattr(tuple, Anum_pg_exttable_rejectlimit, RelationGetDescr(pg_exttable_rel), &isNull); if(!isNull) extentry->rejectlimit = DatumGetInt32(rejectlimit); else extentry->rejectlimit = -1; /* mark that no SREH requested */ /* get the reject limit type */ rejectlimittype = heap_getattr(tuple, Anum_pg_exttable_rejectlimittype, RelationGetDescr(pg_exttable_rel), &isNull); extentry->rejectlimittype = DatumGetChar(rejectlimittype); if(!isNull) Insist(extentry->rejectlimittype == 'r' || extentry->rejectlimittype == 'p'); else extentry->rejectlimittype = -1; /* get the error table oid */ fmterrtbl = heap_getattr(tuple, Anum_pg_exttable_fmterrtbl, RelationGetDescr(pg_exttable_rel), &isNull); if(isNull) extentry->fmterrtbl = InvalidOid; else extentry->fmterrtbl = DatumGetObjectId(fmterrtbl); /* get the table encoding */ encoding = heap_getattr(tuple, Anum_pg_exttable_encoding, RelationGetDescr(pg_exttable_rel), &isNull); Insist(!isNull); extentry->encoding = DatumGetInt32(encoding); Insist(PG_VALID_ENCODING(extentry->encoding)); /* get the table encoding */ iswritable = heap_getattr(tuple, Anum_pg_exttable_writable, RelationGetDescr(pg_exttable_rel), &isNull); Insist(!isNull); extentry->iswritable = DatumGetBool(iswritable); /* Finish up scan and close pg_exttable catalog. */ systable_endscan(scan); heap_close(pg_exttable_rel, RowExclusiveLock); return extentry; }