/* * EUC_TW * */ static int pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) { int cnt = 0; while (len > 0 && *from) { if (*from == SS2 && len >= 4) /* code set 2 */ { from++; *to = (((uint32) SS2) << 24) | (*from++ << 16); *to |= *from++ << 8; *to |= *from++; len -= 4; } else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */ { from++; *to = (SS3 << 16) | (*from++ << 8); *to |= *from++; len -= 3; } else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */ { *to = *from++ << 8; *to |= *from++; len -= 2; } else { *to = *from++; len--; } to++; cnt++; } *to = 0; return cnt; }
/* * EUC */ static int pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) { int cnt = 0; while (len > 0 && *from) { if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte * KANA") */ { from++; *to = (SS2 << 8) | *from++; len -= 2; } else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */ { from++; *to = (SS3 << 16) | (*from++ << 8); *to |= *from++; len -= 3; } else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */ { *to = *from++ << 8; *to |= *from++; len -= 2; } else /* must be ASCII */ { *to = *from++; len--; } to++; cnt++; } *to = 0; return cnt; }
static int pg_johab_verifier(const unsigned char *s, int len) { int l, mbl; unsigned char c; l = mbl = pg_johab_mblen(s); if (len < l) return -1; if (!IS_HIGHBIT_SET(*s)) return mbl; while (--l > 0) { c = *++s; if (!IS_EUC_RANGE_VALID(c)) return -1; } return mbl; }
/* * MIC ---> EUC_KR */ static void mic2euc_kr(const unsigned char *mic, unsigned char *p, int len) { int c1; int l; while (len > 0) { c1 = *mic; if (!IS_HIGHBIT_SET(c1)) { /* ASCII */ if (c1 == 0) report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); *p++ = c1; mic++; len--; continue; } l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len); if (l < 0) report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); if (c1 == LC_KS5601) { *p++ = mic[1]; *p++ = mic[2]; } else report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_KR, (const char *) mic, len); mic += l; len -= l; } *p = '\0'; }
/* * downcase_truncate_identifier() --- do appropriate downcasing and * truncation of an unquoted identifier. Optionally warn of truncation. * * Returns a palloc'd string containing the adjusted identifier. * * Note: in some usages the passed string is not null-terminated. * * Note: the API of this function is designed to allow for downcasing * transformations that increase the string length, but we don't yet * support that. If you want to implement it, you'll need to fix * SplitIdentifierString() in utils/adt/varlena.c. */ char * downcase_truncate_identifier(const char *ident, int len, bool warn) { char *result; int i; bool enc_is_single_byte; result = palloc(len + 1); enc_is_single_byte = pg_database_encoding_max_length() == 1; /* * SQL99 specifies Unicode-aware case normalization, which we don't yet * have the infrastructure for. Instead we use tolower() to provide a * locale-aware translation. However, there are some locales where this * is not right either (eg, Turkish may do strange things with 'i' and * 'I'). Our current compromise is to use tolower() for characters with * the high bit set, as long as they aren't part of a multi-byte * character, and use an ASCII-only downcasing for 7-bit characters. */ for (i = 0; i < len; i++) { unsigned char ch = (unsigned char) ident[i]; if (ch >= 'A' && ch <= 'Z') ch += 'a' - 'A'; else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch)) ch = tolower(ch); result[i] = (char) ch; } result[i] = '\0'; if (i >= NAMEDATALEN) truncate_identifier(result, i, warn); return result; }
/* * Convert a string value to an SQL string literal and append it to * the given buffer. We assume the specified client_encoding and * standard_conforming_strings settings. * * This is essentially equivalent to libpq's PQescapeStringInternal, * except for the output buffer structure. We need it in situations * where we do not have a PGconn available. Where we do, * appendStringLiteralConn is a better choice. */ void appendStringLiteral(PQExpBuffer buf, const char *str, int encoding, bool std_strings) { size_t length = strlen(str); const char *source = str; char *target; if (!enlargePQExpBuffer(buf, 2 * length + 2)) return; target = buf->data + buf->len; *target++ = '\''; while (*source != '\0') { char c = *source; int len; int i; /* Fast path for plain ASCII */ if (!IS_HIGHBIT_SET(c)) { /* Apply quoting if needed */ if (SQL_STR_DOUBLE(c, !std_strings)) *target++ = c; /* Copy the character */ *target++ = c; source++; continue; } /* Slow path for possible multibyte characters */ len = PQmblen(source, encoding); /* Copy the character */ for (i = 0; i < len; i++) { if (*source == '\0') break; *target++ = *source++; } /* * If we hit premature end of string (ie, incomplete multibyte * character), try to pad out to the correct length with spaces. We * may not be able to pad completely, but we will always be able to * insert at least one pad space (since we'd not have quoted a * multibyte character). This should be enough to make a string that * the server will error out on. */ if (i < len) { char *stop = buf->data + buf->maxlen - 2; for (; i < len; i++) { if (target >= stop) break; *target++ = ' '; } break; } } /* Write the terminating quote and NUL character. */ *target++ = '\''; *target = '\0'; buf->len = target - buf->data; }
/*------------------------------------------------------------------------- * The next token in the input stream is known to be a number; lex it. * * In JSON, a number consists of four parts: * * (1) An optional minus sign ('-'). * * (2) Either a single '0', or a string of one or more digits that does not * begin with a '0'. * * (3) An optional decimal part, consisting of a period ('.') followed by * one or more digits. (Note: While this part can be omitted * completely, it's not OK to have only the decimal point without * any digits afterwards.) * * (4) An optional exponent part, consisting of 'e' or 'E', optionally * followed by '+' or '-', followed by one or more digits. (Note: * As with the decimal part, if 'e' or 'E' is present, it must be * followed by at least one digit.) * * The 's' argument to this function points to the ostensible beginning * of part 2 - i.e. the character after any optional minus sign, and the * first character of the string if there is none. * *------------------------------------------------------------------------- */ static void json_lex_number(JsonLexContext *lex, char *s) { bool error = false; char *p; /* Part (1): leading sign indicator. */ /* Caller already did this for us; so do nothing. */ /* Part (2): parse main digit string. */ if (*s == '0') ++s; else if (*s >= '1' && *s <= '9') { do { ++s; } while (*s >= '0' && *s <= '9'); } else error = true; /* Part (3): parse optional decimal portion. */ if (*s == '.') { ++s; if (*s < '0' && *s > '9') error = true; else { do { ++s; } while (*s >= '0' && *s <= '9'); } } /* Part (4): parse optional exponent. */ if (*s == 'e' || *s == 'E') { ++s; if (*s == '+' || *s == '-') ++s; if (*s < '0' && *s > '9') error = true; else { do { ++s; } while (*s >= '0' && *s <= '9'); } } /* Check for trailing garbage. */ for (p = s; (*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_' || IS_HIGHBIT_SET(*p); ++p) ; lex->token_terminator = p; if (p > s || error) report_invalid_token(lex); }
/* * Lex one token from the input stream. */ static void json_lex(JsonLexContext *lex) { char *s; /* Skip leading whitespace. */ s = lex->token_terminator; while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r') { if (*s == '\n') ++lex->line_number; ++s; } lex->token_start = s; /* Determine token type. */ if (strchr("{}[],:", s[0])) { /* strchr() doesn't return false on a NUL input. */ if (s[0] == '\0') { /* End of string. */ lex->token_start = NULL; lex->token_terminator = NULL; } else { /* Single-character token, some kind of punctuation mark. */ lex->token_terminator = s + 1; } lex->token_type = JSON_VALUE_INVALID; } else if (*s == '"') { /* String. */ json_lex_string(lex); lex->token_type = JSON_VALUE_STRING; } else if (*s == '-') { /* Negative number. */ json_lex_number(lex, s + 1); lex->token_type = JSON_VALUE_NUMBER; } else if (*s >= '0' && *s <= '9') { /* Positive number. */ json_lex_number(lex, s); lex->token_type = JSON_VALUE_NUMBER; } else { char *p; /* * We're not dealing with a string, number, legal punctuation mark, * or end of string. The only legal tokens we might find here are * true, false, and null, but for error reporting purposes we scan * until we see a non-alphanumeric character. That way, we can report * the whole word as an unexpected token, rather than just some * unintuitive prefix thereof. */ for (p = s; (*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '_' || IS_HIGHBIT_SET(*p); ++p) ; /* * We got some sort of unexpected punctuation or an otherwise * unexpected character, so just complain about that one character. */ if (p == s) { lex->token_terminator = s + 1; report_invalid_token(lex); } /* * We've got a real alphanumeric token here. If it happens to be * true, false, or null, all is well. If not, error out. */ lex->token_terminator = p; if (p - s == 4) { if (memcmp(s, "true", 4) == 0) lex->token_type = JSON_VALUE_TRUE; else if (memcmp(s, "null", 4) == 0) lex->token_type = JSON_VALUE_NULL; else report_invalid_token(lex); } else if (p - s == 5 && memcmp(s, "false", 5) == 0) lex->token_type = JSON_VALUE_FALSE; else report_invalid_token(lex); } }
/** * @brief Performs data loading. * * Invokes pg_bulkload() user-defined function with given parameters * in single transaction. * * @return exitcode (always 0). */ static int LoaderLoadMain(List *options) { PGresult *res; const char *params[1]; StringInfoData buf; int encoding; int errors; ListCell *cell; if (options == NIL) ereport(ERROR, (errcode(EXIT_FAILURE), errmsg("requires control file or command line options"))); initStringInfo(&buf); reconnect(ERROR); encoding = PQclientEncoding(connection); elog(NOTICE, "BULK LOAD START"); /* form options as text[] */ appendStringInfoString(&buf, "{\""); foreach (cell, options) { const char *item = lfirst(cell); if (buf.len > 2) appendStringInfoString(&buf, "\",\""); /* escape " and \ */ while (*item) { if (*item == '"' || *item == '\\') { appendStringInfoChar(&buf, '\\'); appendStringInfoChar(&buf, *item); item++; } else if (!IS_HIGHBIT_SET(*item)) { appendStringInfoChar(&buf, *item); item++; } else { int n = PQmblen(item, encoding); appendBinaryStringInfo(&buf, item, n); item += n; } } } appendStringInfoString(&buf, "\"}"); command("BEGIN", 0, NULL); params[0] = buf.data; res = execute("SELECT * FROM pg_bulkload($1)", 1, params); if (PQresultStatus(res) == PGRES_COPY_IN) { PQclear(res); res = RemoteLoad(connection, stdin, type_binary); if (PQresultStatus(res) != PGRES_TUPLES_OK) elog(ERROR, "copy failed: %s", PQerrorMessage(connection)); } command("COMMIT", 0, NULL); errors = atoi(PQgetvalue(res, 0, 2)) + /* parse errors */ atoi(PQgetvalue(res, 0, 3)); /* duplicate errors */ elog(NOTICE, "BULK LOAD END\n" "\t%s Rows skipped.\n" "\t%s Rows successfully loaded.\n" "\t%s Rows not loaded due to parse errors.\n" "\t%s Rows not loaded due to duplicate errors.\n" "\t%s Rows replaced with new rows.", PQgetvalue(res, 0, 0), PQgetvalue(res, 0, 1), PQgetvalue(res, 0, 2), PQgetvalue(res, 0, 3), PQgetvalue(res, 0, 4)); PQclear(res); disconnect(); termStringInfo(&buf); if (errors > 0) { elog(WARNING, "some rows were not loaded due to errors."); return E_PG_USER; } else return 0; /* succeeded without errors */ }
/* * Report a CONTEXT line for bogus JSON input. * * lex->token_terminator must be set to identify the spot where we detected * the error. Note that lex->token_start might be NULL, in case we recognized * error at EOF. * * The return value isn't meaningful, but we make it non-void so that this * can be invoked inside ereport(). */ static int report_json_context(JsonLexContext *lex) { const char *context_start; const char *context_end; const char *line_start; int line_number; char *ctxt; int ctxtlen; const char *prefix; const char *suffix; /* Choose boundaries for the part of the input we will display */ context_start = lex->input; context_end = lex->token_terminator; line_start = context_start; line_number = 1; for (;;) { /* Always advance over newlines */ if (context_start < context_end && *context_start == '\n') { context_start++; line_start = context_start; line_number++; continue; } /* Otherwise, done as soon as we are close enough to context_end */ if (context_end - context_start < 50) break; /* Advance to next multibyte character */ if (IS_HIGHBIT_SET(*context_start)) context_start += pg_mblen(context_start); else context_start++; } /* * We add "..." to indicate that the excerpt doesn't start at the * beginning of the line ... but if we're within 3 characters of the * beginning of the line, we might as well just show the whole line. */ if (context_start - line_start <= 3) context_start = line_start; /* Get a null-terminated copy of the data to present */ ctxtlen = context_end - context_start; ctxt = palloc(ctxtlen + 1); memcpy(ctxt, context_start, ctxtlen); ctxt[ctxtlen] = '\0'; /* * Show the context, prefixing "..." if not starting at start of line, and * suffixing "..." if not ending at end of line. */ prefix = (context_start > line_start) ? "..." : ""; suffix = (lex->token_type != JSON_TOKEN_END && context_end - lex->input < lex->input_length && *context_end != '\n' && *context_end != '\r') ? "..." : ""; return errcontext("JSON data, line %d: %s%s%s", line_number, prefix, ctxt, suffix); }
/* * local code ---> UTF8 * * iso: input local string (need not be null-terminated). * utf: pointer to the output area (must be large enough!) * map: the conversion map. * cmap: the conversion map for combined characters. * (optional) * size1: the size of the conversion map. * size2: the size of the conversion map for combined characters * (optional) * encoding: the PG identifier for the local encoding. * len: length of input string. */ void LocalToUtf(const unsigned char *iso, unsigned char *utf, const pg_local_to_utf *map, const pg_local_to_utf_combined *cmap, int size1, int size2, int encoding, int len) { unsigned int iiso; int l; pg_local_to_utf *p; pg_local_to_utf_combined *cp; if (!PG_VALID_ENCODING(encoding)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid encoding number: %d", encoding))); for (; len > 0; len -= l) { /* "break" cases all represent errors */ if (*iso == '\0') break; if (!IS_HIGHBIT_SET(*iso)) { /* ASCII case is easy */ *utf++ = *iso++; l = 1; continue; } l = pg_encoding_verifymb(encoding, (const char *) iso, len); if (l < 0) break; if (l == 1) iiso = *iso++; else if (l == 2) { iiso = *iso++ << 8; iiso |= *iso++; } else if (l == 3) { iiso = *iso++ << 16; iiso |= *iso++ << 8; iiso |= *iso++; } else if (l == 4) { iiso = *iso++ << 24; iiso |= *iso++ << 16; iiso |= *iso++ << 8; iiso |= *iso++; } p = bsearch(&iiso, map, size1, sizeof(pg_local_to_utf), compare2); if (p == NULL) { /* * not found in the ordinary map. if there's a combined character * map, try with it */ if (cmap) { cp = bsearch(&iiso, cmap, size2, sizeof(pg_local_to_utf_combined), compare4); if (cp) { if (cp->utf1 & 0xff000000) *utf++ = cp->utf1 >> 24; if (cp->utf1 & 0x00ff0000) *utf++ = (cp->utf1 & 0x00ff0000) >> 16; if (cp->utf1 & 0x0000ff00) *utf++ = (cp->utf1 & 0x0000ff00) >> 8; if (cp->utf1 & 0x000000ff) *utf++ = cp->utf1 & 0x000000ff; if (cp->utf2 & 0xff000000) *utf++ = cp->utf2 >> 24; if (cp->utf2 & 0x00ff0000) *utf++ = (cp->utf2 & 0x00ff0000) >> 16; if (cp->utf2 & 0x0000ff00) *utf++ = (cp->utf2 & 0x0000ff00) >> 8; if (cp->utf2 & 0x000000ff) *utf++ = cp->utf2 & 0x000000ff; continue; } } report_untranslatable_char(encoding, PG_UTF8, (const char *) (iso - l), len); } else { if (p->utf & 0xff000000) *utf++ = p->utf >> 24; if (p->utf & 0x00ff0000) *utf++ = (p->utf & 0x00ff0000) >> 16; if (p->utf & 0x0000ff00) *utf++ = (p->utf & 0x0000ff00) >> 8; if (p->utf & 0x000000ff) *utf++ = p->utf & 0x000000ff; } }
/* * Output an attribute to text * This takes portions of the code of CopyAttributeOutText */ static void attribute_out_text(StringInfo buf, char *string) { char *ptr; char c; char *start; char delimc = COPYOPS_DELIMITER; bool need_transcoding, encoding_embeds_ascii; int file_encoding = pg_get_client_encoding(); need_transcoding = (file_encoding != GetDatabaseEncoding() || pg_database_encoding_max_length() > 1); encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(file_encoding); if (need_transcoding) ptr = pg_server_to_any(string, strlen(string), file_encoding); else ptr = string; /* * We have to grovel through the string searching for control characters * and instances of the delimiter character. In most cases, though, these * are infrequent. To avoid overhead from calling CopySendData once per * character, we dump out all characters between escaped characters in a * single call. The loop invariant is that the data from "start" to "ptr" * can be sent literally, but hasn't yet been. * * We can skip pg_encoding_mblen() overhead when encoding is safe, because * in valid backend encodings, extra bytes of a multibyte character never * look like ASCII. This loop is sufficiently performance-critical that * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out * of the normal safe-encoding path. */ if (encoding_embeds_ascii) { start = ptr; while ((c = *ptr) != '\0') { if ((unsigned char) c < (unsigned char) 0x20) { /* * \r and \n must be escaped, the others are traditional. We * prefer to dump these using the C-like notation, rather than * a backslash and the literal character, because it makes the * dump file a bit more proof against Microsoftish data * mangling. */ switch (c) { case '\b': c = 'b'; break; case '\f': c = 'f'; break; case '\n': c = 'n'; break; case '\r': c = 'r'; break; case '\t': c = 't'; break; case '\v': c = 'v'; break; default: /* If it's the delimiter, must backslash it */ if (c == delimc) break; /* All ASCII control chars are length 1 */ ptr++; continue; /* fall to end of loop */ } /* if we get here, we need to convert the control char */ DUMPSOFAR(); appendStringInfoCharMacro(buf, '\\'); appendStringInfoCharMacro(buf, c); start = ++ptr; } else if (c == '\\' || c == delimc) { DUMPSOFAR(); appendStringInfoCharMacro(buf, '\\'); appendStringInfoCharMacro(buf, c); start = ++ptr; } else if (IS_HIGHBIT_SET(c)) ptr += pg_encoding_mblen(file_encoding, ptr); else ptr++; } } else { start = ptr; while ((c = *ptr) != '\0') { if ((unsigned char) c < (unsigned char) 0x20) { /* * \r and \n must be escaped, the others are traditional. We * prefer to dump these using the C-like notation, rather than * a backslash and the literal character, because it makes the * dump file a bit more proof against Microsoftish data * mangling. */ switch (c) { case '\b': c = 'b'; break; case '\f': c = 'f'; break; case '\n': c = 'n'; break; case '\r': c = 'r'; break; case '\t': c = 't'; break; case '\v': c = 'v'; break; default: /* If it's the delimiter, must backslash it */ if (c == delimc) break; /* All ASCII control chars are length 1 */ ptr++; continue; /* fall to end of loop */ } /* if we get here, we need to convert the control char */ DUMPSOFAR(); appendStringInfoCharMacro(buf, '\\'); appendStringInfoCharMacro(buf, c); start = ++ptr; } else if (c == '\\' || c == delimc) { DUMPSOFAR(); appendStringInfoCharMacro(buf, '\\'); appendStringInfoCharMacro(buf, c); start = ++ptr; } else ptr++; } } DUMPSOFAR(); }
/* * local code ---> UTF8 * * iso: input string in local encoding (need not be null-terminated) * len: length of input string (in bytes) * utf: pointer to the output area (must be large enough!) (output string will be null-terminated) * map: conversion map for single characters * mapsize: number of entries in the conversion map * cmap: conversion map for combined characters * (optional, pass NULL if none) * cmapsize: number of entries in the conversion map for combined characters * (optional, pass 0 if none) * conv_func: algorithmic encoding conversion function * (optional, pass NULL if none) * encoding: PG identifier for the local encoding * * For each character, the map is consulted first; if no match, the cmap * (if provided) is consulted next; if still no match, the conv_func * (if provided) is applied. An error is raised if no match is found. * * See pg_wchar.h for more details about the data structures used here. */ void LocalToUtf(const unsigned char *iso, int len, unsigned char *utf, const pg_local_to_utf *map, int mapsize, const pg_local_to_utf_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, int encoding) { uint32 iiso; int l; const pg_local_to_utf *p; const pg_local_to_utf_combined *cp; if (!PG_VALID_ENCODING(encoding)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid encoding number: %d", encoding))); for (; len > 0; len -= l) { /* "break" cases all represent errors */ if (*iso == '\0') break; if (!IS_HIGHBIT_SET(*iso)) { /* ASCII case is easy, assume it's one-to-one conversion */ *utf++ = *iso++; l = 1; continue; } l = pg_encoding_verifymb(encoding, (const char *) iso, len); if (l < 0) break; /* collect coded char of length l */ if (l == 1) iiso = *iso++; else if (l == 2) { iiso = *iso++ << 8; iiso |= *iso++; } else if (l == 3) { iiso = *iso++ << 16; iiso |= *iso++ << 8; iiso |= *iso++; } else if (l == 4) { iiso = *iso++ << 24; iiso |= *iso++ << 16; iiso |= *iso++ << 8; iiso |= *iso++; } else { elog(ERROR, "unsupported character length %d", l); iiso = 0; /* keep compiler quiet */ } /* First check ordinary map */ p = bsearch(&iiso, map, mapsize, sizeof(pg_local_to_utf), compare2); if (p) { utf = store_coded_char(utf, p->utf); continue; } /* If there's a combined character map, try that */ if (cmap) { cp = bsearch(&iiso, cmap, cmapsize, sizeof(pg_local_to_utf_combined), compare4); if (cp) { utf = store_coded_char(utf, cp->utf1); utf = store_coded_char(utf, cp->utf2); continue; } } /* if there's a conversion function, try that */ if (conv_func) { uint32 converted = (*conv_func) (iiso); if (converted) { utf = store_coded_char(utf, converted); continue; } } /* failed to translate this character */ report_untranslatable_char(encoding, PG_UTF8, (const char *) (iso - l), len); } /* if we broke out of loop early, must be invalid input */ if (len > 0) report_invalid_encoding(encoding, (const char *) iso, len); *utf = '\0'; }
/* * pgfadvise_file */ static int pgfadvise_loader_file(char *filename, bool willneed, bool dontneed, VarBit *databit, pgfloaderStruct *pgfloader) { bits8 *sp; int bitlen; bits8 x; int i, k; /* * We use the AllocateFile(2) provided by PostgreSQL. We're going to * close it ourselves even if PostgreSQL close it anyway at transaction * end. */ FILE *fp; int fd; struct stat st; /* * OS things : Page size */ pgfloader->pageSize = sysconf(_SC_PAGESIZE); /* * we count the action we perform * both are theorical : we don't know if the page was or not in memory * when we call posix_fadvise */ pgfloader->pagesLoaded = 0; pgfloader->pagesUnloaded = 0; /* * Fopen and fstat file * fd will be provided to posix_fadvise * if there is no file, just return 1, it is expected to leave the SRF */ fp = AllocateFile(filename, "rb"); if (fp == NULL) return 1; fd = fileno(fp); if (fstat(fd, &st) == -1) { FreeFile(fp); elog(ERROR, "pgfadvise_loader: Can not stat object file: %s", filename); return 2; } elog(DEBUG1, "pgfadvise_loader: working on %s", filename); bitlen = VARBITLEN(databit); sp = VARBITS(databit); for (i = 0; i < bitlen - BITS_PER_BYTE; i += BITS_PER_BYTE, sp++) { x = *sp; /* Is this bit set ? */ for (k = 0; k < BITS_PER_BYTE; k++) { if (IS_HIGHBIT_SET(x)) { if (willneed) { (void) posix_fadvise(fd, ((i+k) * pgfloader->pageSize), pgfloader->pageSize, POSIX_FADV_WILLNEED); pgfloader->pagesLoaded++; } } else if (dontneed) { (void) posix_fadvise(fd, ((i+k) * pgfloader->pageSize), pgfloader->pageSize, POSIX_FADV_DONTNEED); pgfloader->pagesUnloaded++; } x <<= 1; } } /* * XXX this copy/paste of code to finnish to walk the bits is not pretty */ if (i < bitlen) { /* print the last partial byte */ x = *sp; for (k = i; k < bitlen; k++) { if (IS_HIGHBIT_SET(x)) { if (willneed) { (void) posix_fadvise(fd, (k * pgfloader->pageSize), pgfloader->pageSize, POSIX_FADV_WILLNEED); pgfloader->pagesLoaded++; } } else if (dontneed) { (void) posix_fadvise(fd, (k * pgfloader->pageSize), pgfloader->pageSize, POSIX_FADV_DONTNEED); pgfloader->pagesUnloaded++; } x <<= 1; } } FreeFile(fp); /* * OS things : Pages free */ pgfloader->pagesFree = sysconf(_SC_AVPHYS_PAGES); return 0; }
/* * Verify mbstr to make sure that it is validly encoded in the specified * encoding. * * mbstr is not necessarily zero terminated; length of mbstr is * specified by len. * * If OK, return length of string in the encoding. * If a problem is found, return -1 when noError is * true; when noError is false, ereport() a descriptive message. */ int pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError) { mbverifier mbverify; int mb_len; Assert(PG_VALID_ENCODING(encoding)); /* * In single-byte encodings, we need only reject nulls (\0). */ if (pg_encoding_max_length(encoding) <= 1) { const char *nullpos = memchr(mbstr, 0, len); if (nullpos == NULL) return len; if (noError) return -1; report_invalid_encoding(encoding, nullpos, 1); } /* fetch function pointer just once */ mbverify = pg_wchar_table[encoding].mbverify; mb_len = 0; while (len > 0) { int l; /* fast path for ASCII-subset characters */ if (!IS_HIGHBIT_SET(*mbstr)) { if (*mbstr != '\0') { mb_len++; mbstr++; len--; continue; } if (noError) return -1; report_invalid_encoding(encoding, mbstr, len); } l = (*mbverify) ((const unsigned char *) mbstr, len); if (l < 0) { if (noError) return -1; report_invalid_encoding(encoding, mbstr, len); } mbstr += l; len -= l; mb_len++; } return mb_len; }