/* Main loop of walsender process */ static int WalSndLoop(void) { char *output_message; bool caughtup = false; /* * Allocate buffer that will be used for each output message. We do this * just once to reduce palloc overhead. The buffer must be made large * enough for maximum-sized messages. */ output_message = palloc(1 + sizeof(WalDataMessageHeader) + MAX_SEND_SIZE); /* * Allocate buffer that will be used for processing reply messages. As * above, do this just once to reduce palloc overhead. */ initStringInfo(&reply_message); /* Initialize the last reply timestamp */ last_reply_timestamp = GetCurrentTimestamp(); /* Loop forever, unless we get an error */ for (;;) { /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (!PostmasterIsAlive(true)) exit(1); /* Process any requests or signals received recently */ if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); SyncRepInitConfig(); } /* Normal exit from the walsender is here */ if (walsender_shutdown_requested) { /* Inform the standby that XLOG streaming was done */ pq_puttextmessage('C', "COPY 0"); pq_flush(); proc_exit(0); } /* * If we don't have any pending data in the output buffer, try to send * some more. */ if (!pq_is_send_pending()) { XLogSend(output_message, &caughtup); /* * Even if we wrote all the WAL that was available when we started * sending, more might have arrived while we were sending this * batch. We had the latch set while sending, so we have not * received any signals from that time. Let's arm the latch again, * and after that check that we're still up-to-date. */ if (caughtup && !pq_is_send_pending()) { ResetLatch(&MyWalSnd->latch); XLogSend(output_message, &caughtup); } } /* Flush pending output to the client */ if (pq_flush_if_writable() != 0) break; /* * When SIGUSR2 arrives, we send any outstanding logs up to the * shutdown checkpoint record (i.e., the latest record) and exit. */ if (walsender_ready_to_stop && !pq_is_send_pending()) { XLogSend(output_message, &caughtup); ProcessRepliesIfAny(); if (caughtup && !pq_is_send_pending()) walsender_shutdown_requested = true; } if ((caughtup || pq_is_send_pending()) && !got_SIGHUP && !walsender_shutdown_requested) { TimestampTz finish_time = 0; long sleeptime; /* Reschedule replication timeout */ if (replication_timeout > 0) { long secs; int usecs; finish_time = TimestampTzPlusMilliseconds(last_reply_timestamp, replication_timeout); TimestampDifference(GetCurrentTimestamp(), finish_time, &secs, &usecs); sleeptime = secs * 1000 + usecs / 1000; if (WalSndDelay < sleeptime) sleeptime = WalSndDelay; } else { /* * XXX: Without timeout, we don't really need the periodic * wakeups anymore, WaitLatchOrSocket should reliably wake up * as soon as something interesting happens. */ sleeptime = WalSndDelay; } /* Sleep */ WaitLatchOrSocket(&MyWalSnd->latch, MyProcPort->sock, true, pq_is_send_pending(), sleeptime); /* Check for replication timeout */ if (replication_timeout > 0 && GetCurrentTimestamp() >= finish_time) { /* * Since typically expiration of replication timeout means * communication problem, we don't send the error message to * the standby. */ ereport(COMMERROR, (errmsg("terminating walsender process due to replication timeout"))); break; } } /* * If we're in catchup state, see if its time to move to streaming. * This is an important state change for users, since before this * point data loss might occur if the primary dies and we need to * failover to the standby. The state change is also important for * synchronous replication, since commits that started to wait at that * point might wait for some time. */ if (MyWalSnd->state == WALSNDSTATE_CATCHUP && caughtup) { ereport(DEBUG1, (errmsg("standby \"%s\" has now caught up with primary", application_name))); WalSndSetState(WALSNDSTATE_STREAMING); } ProcessRepliesIfAny(); } /* * Get here on send failure. Clean up and exit. * * Reset whereToSendOutput to prevent ereport from attempting to send any * more messages to the standby. */ if (whereToSendOutput == DestRemote) whereToSendOutput = DestNone; proc_exit(0); return 1; /* keep the compiler quiet */ }
/* * Validate the generic options given to a FOREIGN DATA WRAPPER, SERVER, * USER MAPPING or FOREIGN TABLE that uses file_fdw. * * Raise an ERROR if the option or its value is considered invalid. */ Datum file_fdw_validator(PG_FUNCTION_ARGS) { List *options_list = untransformRelOptions(PG_GETARG_DATUM(0)); Oid catalog = PG_GETARG_OID(1); char *filename = NULL; DefElem *force_not_null = NULL; List *other_options = NIL; ListCell *cell; /* * Only superusers are allowed to set options of a file_fdw foreign table. * This is because the filename is one of those options, and we don't want * non-superusers to be able to determine which file gets read. * * Putting this sort of permissions check in a validator is a bit of a * crock, but there doesn't seem to be any other place that can enforce * the check more cleanly. * * Note that the valid_options[] array disallows setting filename at any * options level other than foreign table --- otherwise there'd still be a * security hole. */ if (catalog == ForeignTableRelationId && !superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("only superuser can change options of a file_fdw foreign table"))); /* * Check that only options supported by file_fdw, and allowed for the * current object type, are given. */ foreach(cell, options_list) { DefElem *def = (DefElem *) lfirst(cell); if (!is_valid_option(def->defname, catalog)) { struct FileFdwOption *opt; StringInfoData buf; /* * Unknown option specified, complain about it. Provide a hint * with list of valid options for the object. */ initStringInfo(&buf); for (opt = valid_options; opt->optname; opt++) { if (catalog == opt->optcontext) appendStringInfo(&buf, "%s%s", (buf.len > 0) ? ", " : "", opt->optname); } ereport(ERROR, (errcode(ERRCODE_FDW_INVALID_OPTION_NAME), errmsg("invalid option \"%s\"", def->defname), errhint("Valid options in this context are: %s", buf.data))); } /* * Separate out filename and force_not_null, since ProcessCopyOptions * won't accept them. (force_not_null only comes in a boolean * per-column flavor here.) */ if (strcmp(def->defname, "filename") == 0) { if (filename) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options"))); filename = defGetString(def); } else if (strcmp(def->defname, "force_not_null") == 0) { if (force_not_null) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options"))); force_not_null = def; /* Don't care what the value is, as long as it's a legal boolean */ (void) defGetBoolean(def); } else other_options = lappend(other_options, def); }
/* * format_operator - converts operator OID to "opr_name(args)" * * This exports the useful functionality of regoperatorout for use * in other backend modules. The result is a palloc'd string. */ static char * format_operator_internal(Oid operator_oid, bool force_qualify) { char *result; HeapTuple opertup; opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(operator_oid)); if (HeapTupleIsValid(opertup)) { Form_pg_operator operform = (Form_pg_operator) GETSTRUCT(opertup); char *oprname = NameStr(operform->oprname); char *nspname; StringInfoData buf; /* XXX no support here for bootstrap mode */ initStringInfo(&buf); /* * Would this oper be found (given the right args) by regoperatorin? * If not, or if caller explicitely requests it, we need to qualify * it. */ if (force_qualify || !OperatorIsVisible(operator_oid)) { nspname = get_namespace_name(operform->oprnamespace); appendStringInfo(&buf, "%s.", quote_identifier(nspname)); } appendStringInfo(&buf, "%s(", oprname); if (operform->oprleft) appendStringInfo(&buf, "%s,", force_qualify ? format_type_be_qualified(operform->oprleft) : format_type_be(operform->oprleft)); else appendStringInfoString(&buf, "NONE,"); if (operform->oprright) appendStringInfo(&buf, "%s)", force_qualify ? format_type_be_qualified(operform->oprright) : format_type_be(operform->oprright)); else appendStringInfoString(&buf, "NONE)"); result = buf.data; ReleaseSysCache(opertup); } else { /* * If OID doesn't match any pg_operator entry, return it numerically */ result = (char *) palloc(NAMEDATALEN); snprintf(result, NAMEDATALEN, "%u", operator_oid); } return result; }
/* * record_in - input routine for any composite type. */ Datum record_in(PG_FUNCTION_ARGS) { char *string = PG_GETARG_CSTRING(0); Oid tupType = PG_GETARG_OID(1); #ifdef NOT_USED int32 typmod = PG_GETARG_INT32(2); #endif HeapTupleHeader result; int32 tupTypmod; TupleDesc tupdesc; HeapTuple tuple; RecordIOData *my_extra; bool needComma = false; int ncolumns; int i; char *ptr; Datum *values; bool *nulls; StringInfoData buf; /* * Use the passed type unless it's RECORD; we can't support input of * anonymous types, mainly because there's no good way to figure out which * anonymous type is wanted. Note that for RECORD, what we'll probably * actually get is RECORD's typelem, ie, zero. */ if (tupType == InvalidOid || tupType == RECORDOID) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("input of anonymous composite types is not implemented"))); tupTypmod = -1; /* for all non-anonymous types */ /* * This comes from the composite type's pg_type.oid and stores system oids * in user tables, specifically DatumTupleFields. This oid must be * preserved by binary upgrades. */ tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); ncolumns = tupdesc->natts; /* * We arrange to look up the needed I/O info just once per series of * calls, assuming the record type doesn't change underneath us. */ my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; if (my_extra == NULL || my_extra->ncolumns != ncolumns) { fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, sizeof(RecordIOData) - sizeof(ColumnIOData) + ncolumns * sizeof(ColumnIOData)); my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; my_extra->record_type = InvalidOid; my_extra->record_typmod = 0; } if (my_extra->record_type != tupType || my_extra->record_typmod != tupTypmod) { MemSet(my_extra, 0, sizeof(RecordIOData) - sizeof(ColumnIOData) + ncolumns * sizeof(ColumnIOData)); my_extra->record_type = tupType; my_extra->record_typmod = tupTypmod; my_extra->ncolumns = ncolumns; } values = (Datum *) palloc(ncolumns * sizeof(Datum)); nulls = (bool *) palloc(ncolumns * sizeof(bool)); /* * Scan the string. We use "buf" to accumulate the de-quoted data for * each column, which is then fed to the appropriate input converter. */ ptr = string; /* Allow leading whitespace */ while (*ptr && isspace((unsigned char) *ptr)) ptr++; if (*ptr++ != '(') ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed record literal: \"%s\"", string), errdetail("Missing left parenthesis."))); initStringInfo(&buf); for (i = 0; i < ncolumns; i++) { ColumnIOData *column_info = &my_extra->columns[i]; Oid column_type = tupdesc->attrs[i]->atttypid; char *column_data; /* Ignore dropped columns in datatype, but fill with nulls */ if (tupdesc->attrs[i]->attisdropped) { values[i] = (Datum) 0; nulls[i] = true; continue; } if (needComma) { /* Skip comma that separates prior field from this one */ if (*ptr == ',') ptr++; else /* *ptr must be ')' */ ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed record literal: \"%s\"", string), errdetail("Too few columns."))); } /* Check for null: completely empty input means null */ if (*ptr == ',' || *ptr == ')') { column_data = NULL; nulls[i] = true; } else { /* Extract string for this column */ bool inquote = false; resetStringInfo(&buf); while (inquote || !(*ptr == ',' || *ptr == ')')) { char ch = *ptr++; if (ch == '\0') ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed record literal: \"%s\"", string), errdetail("Unexpected end of input."))); if (ch == '\\') { if (*ptr == '\0') ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed record literal: \"%s\"", string), errdetail("Unexpected end of input."))); appendStringInfoChar(&buf, *ptr++); } else if (ch == '\"') { if (!inquote) inquote = true; else if (*ptr == '\"') { /* doubled quote within quote sequence */ appendStringInfoChar(&buf, *ptr++); } else inquote = false; } else appendStringInfoChar(&buf, ch); } column_data = buf.data; nulls[i] = false; } /* * Convert the column value */ if (column_info->column_type != column_type) { getTypeInputInfo(column_type, &column_info->typiofunc, &column_info->typioparam); fmgr_info_cxt(column_info->typiofunc, &column_info->proc, fcinfo->flinfo->fn_mcxt); column_info->column_type = column_type; } values[i] = InputFunctionCall(&column_info->proc, column_data, column_info->typioparam, tupdesc->attrs[i]->atttypmod); /* * Prep for next column */ needComma = true; } if (*ptr++ != ')') ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed record literal: \"%s\"", string), errdetail("Too many columns."))); /* Allow trailing whitespace */ while (*ptr && isspace((unsigned char) *ptr)) ptr++; if (*ptr) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed record literal: \"%s\"", string), errdetail("Junk after right parenthesis."))); tuple = heap_form_tuple(tupdesc, values, nulls); /* * We cannot return tuple->t_data because heap_form_tuple allocates it as * part of a larger chunk, and our caller may expect to be able to pfree * our result. So must copy the info into a new palloc chunk. */ result = (HeapTupleHeader) palloc(tuple->t_len); memcpy(result, tuple->t_data, tuple->t_len); heap_freetuple(tuple); pfree(buf.data); pfree(values); pfree(nulls); ReleaseTupleDesc(tupdesc); PG_RETURN_HEAPTUPLEHEADER(result); }
/* * format_operator - converts operator OID to "opr_name(args)" * * This exports the useful functionality of regoperatorout for use * in other backend modules. The result is a palloc'd string. */ char * format_operator(Oid operator_oid) { char *result; HeapTuple opertup; cqContext *pcqCtx; pcqCtx = caql_beginscan( NULL, cql("SELECT * FROM pg_operator " " WHERE oid = :1 ", ObjectIdGetDatum(operator_oid))); opertup = caql_getnext(pcqCtx); /* XXX XXX select oprname, oprnamespace from pg_operator */ if (HeapTupleIsValid(opertup)) { Form_pg_operator operform = (Form_pg_operator) GETSTRUCT(opertup); char *oprname = NameStr(operform->oprname); char *nspname; StringInfoData buf; /* XXX no support here for bootstrap mode */ initStringInfo(&buf); /* * Would this oper be found (given the right args) by regoperatorin? * If not, we need to qualify it. */ if (!OperatorIsVisible(operator_oid)) { nspname = get_namespace_name(operform->oprnamespace); appendStringInfo(&buf, "%s.", quote_identifier(nspname)); } appendStringInfo(&buf, "%s(", oprname); if (operform->oprleft) appendStringInfo(&buf, "%s,", format_type_be(operform->oprleft)); else appendStringInfo(&buf, "NONE,"); if (operform->oprright) appendStringInfo(&buf, "%s)", format_type_be(operform->oprright)); else appendStringInfo(&buf, "NONE)"); result = buf.data; } else { /* * If OID doesn't match any pg_operator entry, return it numerically */ result = (char *) palloc(NAMEDATALEN); snprintf(result, NAMEDATALEN, "%u", operator_oid); } caql_endscan(pcqCtx); return result; }
/* * Execute commands from walreceiver, until we enter streaming mode. */ static void WalSndHandshake(void) { StringInfoData input_message; bool replication_started = false; initStringInfo(&input_message); while (!replication_started) { int firstchar; WalSndSetState(WALSNDSTATE_STARTUP); set_ps_display("idle", false); /* Wait for a command to arrive */ firstchar = pq_getbyte(); /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (!PostmasterIsAlive()) exit(1); /* * Check for any other interesting events that happened while we * slept. */ if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); } if (firstchar != EOF) { /* * Read the message contents. This is expected to be done without * blocking because we've been able to get message type code. */ if (pq_getmessage(&input_message, 0)) firstchar = EOF; /* suitable message already logged */ } /* Handle the very limited subset of commands expected in this phase */ switch (firstchar) { case 'Q': /* Query message */ { const char *query_string; query_string = pq_getmsgstring(&input_message); pq_getmsgend(&input_message); if (HandleReplicationCommand(query_string)) replication_started = true; } break; case 'X': /* standby is closing the connection */ proc_exit(0); case EOF: /* standby disconnected unexpectedly */ ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("unexpected EOF on standby connection"))); proc_exit(0); default: ereport(FATAL, (errcode(ERRCODE_PROTOCOL_VIOLATION), errmsg("invalid standby handshake message type %d", firstchar))); } } }
/* * pg_get_tableschemadef_string returns the definition of a given table. This * definition includes table's schema, default column values, not null and check * constraints. The definition does not include constraints that trigger index * creations; specifically, unique and primary key constraints are excluded. */ static char * pg_shard_get_tableschemadef_string(Oid tableRelationId) { Relation relation = NULL; char *relationName = NULL; char relationKind = 0; TupleDesc tupleDescriptor = NULL; TupleConstr *tupleConstraints = NULL; int attributeIndex = 0; bool firstAttributePrinted = false; AttrNumber defaultValueIndex = 0; AttrNumber constraintIndex = 0; AttrNumber constraintCount = 0; StringInfoData buffer = { NULL, 0, 0, 0 }; /* * Instead of retrieving values from system catalogs as other functions in * ruleutils.c do, we follow an unusual approach here: we open the relation, * and fetch the relation's tuple descriptor. We do this because the tuple * descriptor already contains information harnessed from pg_attrdef, * pg_attribute, pg_constraint, and pg_class; and therefore using the * descriptor saves us from a lot of additional work. */ relation = relation_open(tableRelationId, AccessShareLock); relationName = generate_relation_name(tableRelationId); relationKind = relation->rd_rel->relkind; if (relationKind != RELKIND_RELATION && relationKind != RELKIND_FOREIGN_TABLE) { ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("%s is not a regular or foreign table", relationName))); } initStringInfo(&buffer); if (relationKind == RELKIND_RELATION) { appendStringInfo(&buffer, "CREATE TABLE %s (", relationName); } else { appendStringInfo(&buffer, "CREATE FOREIGN TABLE %s (", relationName); } /* * Iterate over the table's columns. If a particular column is not dropped * and is not inherited from another table, print the column's name and its * formatted type. */ tupleDescriptor = RelationGetDescr(relation); tupleConstraints = tupleDescriptor->constr; for (attributeIndex = 0; attributeIndex < tupleDescriptor->natts; attributeIndex++) { Form_pg_attribute attributeForm = tupleDescriptor->attrs[attributeIndex]; if (!attributeForm->attisdropped && attributeForm->attinhcount == 0) { const char *attributeName = NULL; const char *attributeTypeName = NULL; if (firstAttributePrinted) { appendStringInfoString(&buffer, ", "); } firstAttributePrinted = true; attributeName = NameStr(attributeForm->attname); appendStringInfo(&buffer, "%s ", quote_identifier(attributeName)); attributeTypeName = format_type_with_typemod(attributeForm->atttypid, attributeForm->atttypmod); appendStringInfoString(&buffer, attributeTypeName); /* if this column has a default value, append the default value */ if (attributeForm->atthasdef) { AttrDefault *defaultValueList = NULL; AttrDefault *defaultValue = NULL; Node *defaultNode = NULL; List *defaultContext = NULL; char *defaultString = NULL; Assert(tupleConstraints != NULL); defaultValueList = tupleConstraints->defval; Assert(defaultValueList != NULL); defaultValue = &(defaultValueList[defaultValueIndex]); defaultValueIndex++; Assert(defaultValue->adnum == (attributeIndex + 1)); Assert(defaultValueIndex <= tupleConstraints->num_defval); /* convert expression to node tree, and prepare deparse context */ defaultNode = (Node *) stringToNode(defaultValue->adbin); defaultContext = deparse_context_for(relationName, tableRelationId); /* deparse default value string */ defaultString = deparse_expression(defaultNode, defaultContext, false, false); appendStringInfo(&buffer, " DEFAULT %s", defaultString); } /* if this column has a not null constraint, append the constraint */ if (attributeForm->attnotnull) { appendStringInfoString(&buffer, " NOT NULL"); } } } /* * Now check if the table has any constraints. If it does, set the number of * check constraints here. Then iterate over all check constraints and print * them. */ if (tupleConstraints != NULL) { constraintCount = tupleConstraints->num_check; } for (constraintIndex = 0; constraintIndex < constraintCount; constraintIndex++) { ConstrCheck *checkConstraintList = tupleConstraints->check; ConstrCheck *checkConstraint = &(checkConstraintList[constraintIndex]); Node *checkNode = NULL; List *checkContext = NULL; char *checkString = NULL; /* if an attribute or constraint has been printed, format properly */ if (firstAttributePrinted || constraintIndex > 0) { appendStringInfoString(&buffer, ", "); } appendStringInfo(&buffer, "CONSTRAINT %s CHECK ", quote_identifier(checkConstraint->ccname)); /* convert expression to node tree, and prepare deparse context */ checkNode = (Node *) stringToNode(checkConstraint->ccbin); checkContext = deparse_context_for(relationName, tableRelationId); /* deparse check constraint string */ checkString = deparse_expression(checkNode, checkContext, false, false); appendStringInfoString(&buffer, checkString); } /* close create table's outer parentheses */ appendStringInfoString(&buffer, ")"); /* * If the relation is a foreign table, append the server name and options to * the create table statement. */ if (relationKind == RELKIND_FOREIGN_TABLE) { ForeignTable *foreignTable = GetForeignTable(tableRelationId); ForeignServer *foreignServer = GetForeignServer(foreignTable->serverid); char *serverName = foreignServer->servername; appendStringInfo(&buffer, " SERVER %s", quote_identifier(serverName)); AppendOptionListToString(&buffer, foreignTable->options); } relation_close(relation, AccessShareLock); return (buffer.data); }
/* * Format a nodeToString output for display on a terminal. * * The result is a palloc'd string. * * This version tries to indent intelligently. */ char * pretty_format_node_dump(const char *dump) { #define INDENTSTOP 3 #define MAXINDENT 60 #define LINELEN 78 char line[LINELEN + 1]; StringInfoData str; int indentLev; int indentDist; int i; int j; initStringInfo(&str); indentLev = 0; /* logical indent level */ indentDist = 0; /* physical indent distance */ i = 0; for (;;) { for (j = 0; j < indentDist; j++) line[j] = ' '; for (; j < LINELEN && dump[i] != '\0'; i++, j++) { line[j] = dump[i]; switch (line[j]) { case '}': if (j != indentDist) { /* print data before the } */ line[j] = '\0'; appendStringInfo(&str, "%s\n", line); } /* print the } at indentDist */ line[indentDist] = '}'; line[indentDist + 1] = '\0'; appendStringInfo(&str, "%s\n", line); /* outdent */ if (indentLev > 0) { indentLev--; indentDist = Min(indentLev * INDENTSTOP, MAXINDENT); } j = indentDist - 1; /* j will equal indentDist on next loop iteration */ /* suppress whitespace just after } */ while (dump[i + 1] == ' ') i++; break; case ')': /* force line break after ), unless another ) follows */ if (dump[i + 1] != ')') { line[j + 1] = '\0'; appendStringInfo(&str, "%s\n", line); j = indentDist - 1; while (dump[i + 1] == ' ') i++; } break; case '{': /* force line break before { */ if (j != indentDist) { line[j] = '\0'; appendStringInfo(&str, "%s\n", line); } /* indent */ indentLev++; indentDist = Min(indentLev * INDENTSTOP, MAXINDENT); for (j = 0; j < indentDist; j++) line[j] = ' '; line[j] = dump[i]; break; case ':': /* force line break before : */ if (j != indentDist) { line[j] = '\0'; appendStringInfo(&str, "%s\n", line); } j = indentDist; line[j] = dump[i]; break; } } line[j] = '\0'; if (dump[i] == '\0') break; appendStringInfo(&str, "%s\n", line); } if (j > 0) appendStringInfo(&str, "%s\n", line); return str.data; #undef INDENTSTOP #undef MAXINDENT #undef LINELEN }
/* * Deserialize a HeapTuple's data from a byte-array. * * This code is based on the binary input handling functions in copy.c. */ HeapTuple DeserializeTuple(SerTupInfo * pSerInfo, StringInfo serialTup) { MemoryContext oldCtxt; TupleDesc tupdesc; HeapTuple htup; int natts; SerAttrInfo *attrInfo; uint32 attr_size; int i; StringInfoData attr_data; bool fHandled; AssertArg(pSerInfo != NULL); AssertArg(serialTup != NULL); tupdesc = pSerInfo->tupdesc; natts = tupdesc->natts; /* * Flip to our tuple-serialization memory-context, to speed up memory * reclamation operations. */ AssertState(s_tupSerMemCtxt != NULL); oldCtxt = MemoryContextSwitchTo(s_tupSerMemCtxt); /* Receive nulls character-array. */ pq_copymsgbytes(serialTup, pSerInfo->nulls, natts); skipPadding(serialTup); /* Deserialize the non-NULL attributes of this tuple */ initStringInfo(&attr_data); for (i = 0; i < natts; ++i) { attrInfo = pSerInfo->myinfo + i; if (pSerInfo->nulls[i]) /* NULL field. */ { pSerInfo->values[i] = (Datum) 0; continue; } /* * Assume that the data's output will be handled by the special IO * code, and if not then we can handle it the slow way. */ fHandled = true; switch (attrInfo->atttypid) { case INT4OID: pSerInfo->values[i] = Int32GetDatum(stringInfoGetInt32(serialTup)); break; case CHAROID: pSerInfo->values[i] = CharGetDatum(pq_getmsgbyte(serialTup)); skipPadding(serialTup); break; case BPCHAROID: case VARCHAROID: case INT2VECTOROID: /* postgres serialization logic broken, use our own */ case OIDVECTOROID: /* postgres serialization logic broken, use our own */ case ANYARRAYOID: { text *pText; int textSize; textSize = stringInfoGetInt32(serialTup); #ifdef TUPSER_SCRATCH_SPACE if (textSize + VARHDRSZ <= attrInfo->varlen_scratch_size) pText = (text *) attrInfo->pv_varlen_scratch; else pText = (text *) palloc(textSize + VARHDRSZ); #else pText = (text *) palloc(textSize + VARHDRSZ); #endif SET_VARSIZE(pText, textSize + VARHDRSZ); pq_copymsgbytes(serialTup, VARDATA(pText), textSize); skipPadding(serialTup); pSerInfo->values[i] = PointerGetDatum(pText); break; } case DATEOID: { /* * TODO: I would LIKE to do something more efficient, but * DateADT is not strictly limited to 4 bytes by its * definition. */ DateADT date; pq_copymsgbytes(serialTup, (char *) &date, sizeof(DateADT)); skipPadding(serialTup); pSerInfo->values[i] = DateADTGetDatum(date); break; } case NUMERICOID: { /* * Treat the numeric as a varlena variable, and just push * the whole shebang to the output-buffer. We don't care * about the guts of the numeric. */ Numeric num; int numSize; numSize = stringInfoGetInt32(serialTup); #ifdef TUPSER_SCRATCH_SPACE if (numSize + VARHDRSZ <= attrInfo->varlen_scratch_size) num = (Numeric) attrInfo->pv_varlen_scratch; else num = (Numeric) palloc(numSize + VARHDRSZ); #else num = (Numeric) palloc(numSize + VARHDRSZ); #endif SET_VARSIZE(num, numSize + VARHDRSZ); pq_copymsgbytes(serialTup, VARDATA(num), numSize); skipPadding(serialTup); pSerInfo->values[i] = NumericGetDatum(num); break; } case ACLITEMOID: { int aclSize, k, cnt; char *inputstring, *starsfree; aclSize = stringInfoGetInt32(serialTup); inputstring = (char*) palloc(aclSize + 1); starsfree = (char*) palloc(aclSize + 1); cnt = 0; pq_copymsgbytes(serialTup, inputstring, aclSize); skipPadding(serialTup); inputstring[aclSize] = '\0'; for(k=0; k<aclSize; k++) { if( inputstring[k] != '*') { starsfree[cnt] = inputstring[k]; cnt++; } } starsfree[cnt] = '\0'; pSerInfo->values[i] = DirectFunctionCall1(aclitemin, CStringGetDatum(starsfree)); pfree(inputstring); break; } case 210: { int strsize; char *smgrstr; strsize = stringInfoGetInt32(serialTup); smgrstr = (char*) palloc(strsize + 1); pq_copymsgbytes(serialTup, smgrstr, strsize); skipPadding(serialTup); smgrstr[strsize] = '\0'; pSerInfo->values[i] = DirectFunctionCall1(smgrin, CStringGetDatum(smgrstr)); break; } default: fHandled = false; } if (fHandled) continue; attr_size = stringInfoGetInt32(serialTup); /* reset attr_data to empty, and load raw data into it */ attr_data.len = 0; attr_data.data[0] = '\0'; attr_data.cursor = 0; appendBinaryStringInfo(&attr_data, pq_getmsgbytes(serialTup, attr_size), attr_size); skipPadding(serialTup); /* Call the attribute type's binary input converter. */ if (attrInfo->recv_finfo.fn_nargs == 1) pSerInfo->values[i] = FunctionCall1(&attrInfo->recv_finfo, PointerGetDatum(&attr_data)); else if (attrInfo->recv_finfo.fn_nargs == 2) pSerInfo->values[i] = FunctionCall2(&attrInfo->recv_finfo, PointerGetDatum(&attr_data), ObjectIdGetDatum(attrInfo->recv_typio_param)); else if (attrInfo->recv_finfo.fn_nargs == 3) pSerInfo->values[i] = FunctionCall3(&attrInfo->recv_finfo, PointerGetDatum(&attr_data), ObjectIdGetDatum(attrInfo->recv_typio_param), Int32GetDatum(tupdesc->attrs[i]->atttypmod) ); else { ereport(ERROR, (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), errmsg("Conversion function takes %d args",attrInfo->recv_finfo.fn_nargs))); } /* Trouble if it didn't eat the whole buffer */ if (attr_data.cursor != attr_data.len) { ereport(ERROR, (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION), errmsg("incorrect binary data format"))); } } /* * Construct the tuple from the Datums and nulls values. NOTE: Switch * out of our temporary context before we form the tuple! */ MemoryContextSwitchTo(oldCtxt); htup = heap_form_tuple(tupdesc, pSerInfo->values, pSerInfo->nulls); MemoryContextReset(s_tupSerMemCtxt); /* All done. Return the result. */ return htup; }
/* * ExportSnapshot * Export the snapshot to a file so that other backends can import it. * Returns the token (the file name) that can be used to import this * snapshot. */ static char * ExportSnapshot(Snapshot snapshot) { TransactionId topXid; TransactionId *children; int nchildren; int addTopXid; StringInfoData buf; FILE *f; int i; MemoryContext oldcxt; char path[MAXPGPATH]; char pathtmp[MAXPGPATH]; /* * It's tempting to call RequireTransactionChain here, since it's not * very useful to export a snapshot that will disappear immediately * afterwards. However, we haven't got enough information to do that, * since we don't know if we're at top level or not. For example, we * could be inside a plpgsql function that is going to fire off other * transactions via dblink. Rather than disallow perfectly legitimate * usages, don't make a check. * * Also note that we don't make any restriction on the transaction's * isolation level; however, importers must check the level if they * are serializable. */ /* * This will assign a transaction ID if we do not yet have one. */ topXid = GetTopTransactionId(); /* * We cannot export a snapshot from a subtransaction because there's no * easy way for importers to verify that the same subtransaction is still * running. */ if (IsSubTransaction()) ereport(ERROR, (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION), errmsg("cannot export a snapshot from a subtransaction"))); /* * We do however allow previous committed subtransactions to exist. * Importers of the snapshot must see them as still running, so get their * XIDs to add them to the snapshot. */ nchildren = xactGetCommittedChildren(&children); /* * Copy the snapshot into TopTransactionContext, add it to the * exportedSnapshots list, and mark it pseudo-registered. We do this to * ensure that the snapshot's xmin is honored for the rest of the * transaction. (Right now, because SnapshotResetXmin is so stupid, this * is overkill; but later we might make that routine smarter.) */ snapshot = CopySnapshot(snapshot); oldcxt = MemoryContextSwitchTo(TopTransactionContext); exportedSnapshots = lappend(exportedSnapshots, snapshot); MemoryContextSwitchTo(oldcxt); snapshot->regd_count++; RegisteredSnapshots++; /* * Fill buf with a text serialization of the snapshot, plus identification * data about this transaction. The format expected by ImportSnapshot * is pretty rigid: each line must be fieldname:value. */ initStringInfo(&buf); appendStringInfo(&buf, "xid:%u\n", topXid); appendStringInfo(&buf, "dbid:%u\n", MyDatabaseId); appendStringInfo(&buf, "iso:%d\n", XactIsoLevel); appendStringInfo(&buf, "ro:%d\n", XactReadOnly); appendStringInfo(&buf, "xmin:%u\n", snapshot->xmin); appendStringInfo(&buf, "xmax:%u\n", snapshot->xmax); /* * We must include our own top transaction ID in the top-xid data, since * by definition we will still be running when the importing transaction * adopts the snapshot, but GetSnapshotData never includes our own XID in * the snapshot. (There must, therefore, be enough room to add it.) * * However, it could be that our topXid is after the xmax, in which case * we shouldn't include it because xip[] members are expected to be before * xmax. (We need not make the same check for subxip[] members, see * snapshot.h.) */ addTopXid = TransactionIdPrecedes(topXid, snapshot->xmax) ? 1 : 0; appendStringInfo(&buf, "xcnt:%d\n", snapshot->xcnt + addTopXid); for (i = 0; i < snapshot->xcnt; i++) appendStringInfo(&buf, "xip:%u\n", snapshot->xip[i]); if (addTopXid) appendStringInfo(&buf, "xip:%u\n", topXid); /* * Similarly, we add our subcommitted child XIDs to the subxid data. * Here, we have to cope with possible overflow. */ if (snapshot->suboverflowed || snapshot->subxcnt + nchildren > GetMaxSnapshotSubxidCount()) appendStringInfoString(&buf, "sof:1\n"); else { appendStringInfoString(&buf, "sof:0\n"); appendStringInfo(&buf, "sxcnt:%d\n", snapshot->subxcnt + nchildren); for (i = 0; i < snapshot->subxcnt; i++) appendStringInfo(&buf, "sxp:%u\n", snapshot->subxip[i]); for (i = 0; i < nchildren; i++) appendStringInfo(&buf, "sxp:%u\n", children[i]); } appendStringInfo(&buf, "rec:%u\n", snapshot->takenDuringRecovery); /* * Now write the text representation into a file. We first write to a * ".tmp" filename, and rename to final filename if no error. This * ensures that no other backend can read an incomplete file * (ImportSnapshot won't allow it because of its valid-characters check). */ XactExportFilePath(pathtmp, topXid, list_length(exportedSnapshots), ".tmp"); if (!(f = AllocateFile(pathtmp, PG_BINARY_W))) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create file \"%s\": %m", pathtmp))); if (fwrite(buf.data, buf.len, 1, f) != 1) ereport(ERROR, (errcode_for_file_access(), errmsg("could not write to file \"%s\": %m", pathtmp))); /* no fsync() since file need not survive a system crash */ if (FreeFile(f)) ereport(ERROR, (errcode_for_file_access(), errmsg("could not write to file \"%s\": %m", pathtmp))); /* * Now that we have written everything into a .tmp file, rename the file * to remove the .tmp suffix. */ XactExportFilePath(path, topXid, list_length(exportedSnapshots), ""); if (rename(pathtmp, path) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not rename file \"%s\" to \"%s\": %m", pathtmp, path))); /* * The basename of the file is what we return from pg_export_snapshot(). * It's already in path in a textual format and we know that the path * starts with SNAPSHOT_EXPORT_DIR. Skip over the prefix and the slash * and pstrdup it so as not to return the address of a local variable. */ return pstrdup(path + strlen(SNAPSHOT_EXPORT_DIR) + 1); }
char * tuple_to_cstring(TupleDesc tupdesc, HeapTuple tuple) { bool needComma = false; int ncolumns; int i; Datum *values; bool *nulls; StringInfoData buf; ncolumns = tupdesc->natts; values = (Datum *) palloc(ncolumns * sizeof(Datum)); nulls = (bool *) palloc(ncolumns * sizeof(bool)); /* Break down the tuple into fields */ heap_deform_tuple(tuple, tupdesc, values, nulls); /* And build the result string */ initStringInfo(&buf); for (i = 0; i < ncolumns; i++) { char *value; char *tmp; bool nq; /* Ignore dropped columns in datatype */ if (tupdesc->attrs[i]->attisdropped) continue; if (needComma) appendStringInfoChar(&buf, ','); needComma = true; if (nulls[i]) { /* emit nothing... */ continue; } else { Oid foutoid; bool typisvarlena; getTypeOutputInfo(tupdesc->attrs[i]->atttypid, &foutoid, &typisvarlena); value = OidOutputFunctionCall(foutoid, values[i]); } /* Detect whether we need double quotes for this value */ nq = (value[0] == '\0'); /* force quotes for empty string */ for (tmp = value; *tmp; tmp++) { char ch = *tmp; if (ch == '"' || ch == '\\' || ch == '(' || ch == ')' || ch == ',' || isspace((unsigned char) ch)) { nq = true; break; } } /* And emit the string */ if (nq) appendStringInfoChar(&buf, '"'); for (tmp = value; *tmp; tmp++) { char ch = *tmp; if (ch == '"' || ch == '\\') appendStringInfoChar(&buf, ch); appendStringInfoChar(&buf, ch); } if (nq) appendStringInfoChar(&buf, '"'); } pfree(values); pfree(nulls); return buf.data; }
/* * Validate the generic options given to a FOREIGN DATA WRAPPER, SERVER, * USER MAPPING or FOREIGN TABLE that uses file_fdw. * * Raise an ERROR if the option or its value is considered invalid. */ Datum redis_fdw_validator(PG_FUNCTION_ARGS) { List *options_list = untransformRelOptions(PG_GETARG_DATUM(0)); Oid catalog = PG_GETARG_OID(1); char *svr_address = NULL; int svr_port = 0; char *svr_password = NULL; int svr_database = 0; redis_table_type tabletype = PG_REDIS_SCALAR_TABLE; char *tablekeyprefix = NULL; char *tablekeyset = NULL; ListCell *cell; #ifdef DEBUG elog(NOTICE, "redis_fdw_validator"); #endif /* * Check that only options supported by redis_fdw, and allowed for the * current object type, are given. */ foreach(cell, options_list) { DefElem *def = (DefElem *) lfirst(cell); if (!redisIsValidOption(def->defname, catalog)) { struct RedisFdwOption *opt; StringInfoData buf; /* * Unknown option specified, complain about it. Provide a hint * with list of valid options for the object. */ initStringInfo(&buf); for (opt = valid_options; opt->optname; opt++) { if (catalog == opt->optcontext) appendStringInfo(&buf, "%s%s", (buf.len > 0) ? ", " : "", opt->optname); } ereport(ERROR, (errcode(ERRCODE_FDW_INVALID_OPTION_NAME), errmsg("invalid option \"%s\"", def->defname), errhint("Valid options in this context are: %s", buf.len ? buf.data : "<none>") )); } if (strcmp(def->defname, "address") == 0) { if (svr_address) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options: " "address (%s)", defGetString(def)) )); svr_address = defGetString(def); } else if (strcmp(def->defname, "port") == 0) { if (svr_port) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options: port (%s)", defGetString(def)) )); svr_port = atoi(defGetString(def)); } if (strcmp(def->defname, "password") == 0) { if (svr_password) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options: password") )); svr_password = defGetString(def); } else if (strcmp(def->defname, "database") == 0) { if (svr_database) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options: database " "(%s)", defGetString(def)) )); svr_database = atoi(defGetString(def)); } else if (strcmp(def->defname, "tablekeyprefix") == 0) { if (tablekeyset) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting options: tablekeyset(%s) and " "tablekeyprefix (%s)", tablekeyset, defGetString(def)) )); if (tablekeyprefix) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options: " "tablekeyprefix (%s)", defGetString(def)) )); tablekeyprefix = defGetString(def); } else if (strcmp(def->defname, "tablekeyset") == 0) { if (tablekeyprefix) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting options: tablekeyprefix (%s) and " "tablekeyset (%s)", tablekeyprefix, defGetString(def)) )); if (tablekeyset) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options: " "tablekeyset (%s)", defGetString(def)) )); tablekeyset = defGetString(def); } else if (strcmp(def->defname, "tabletype") == 0) { char *typeval = defGetString(def); if (tabletype) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("conflicting or redundant options: tabletype " "(%s)", typeval))); if (strcmp(typeval,"hash") == 0) tabletype = PG_REDIS_HASH_TABLE; else if (strcmp(typeval,"list") == 0) tabletype = PG_REDIS_LIST_TABLE; else if (strcmp(typeval,"set") == 0) tabletype = PG_REDIS_SET_TABLE; else if (strcmp(typeval,"zset") == 0) tabletype = PG_REDIS_ZSET_TABLE; else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("invalid tabletype (%s) - must be hash, " "list, set or zset", typeval))); } }
/* * input */ Datum bqarr_in(PG_FUNCTION_ARGS) { char *buf = (char *) PG_GETARG_POINTER(0); WORKSTATE state; int4 i; QUERYTYPE *query; int4 commonlen; ITEM *ptr; NODE *tmp; int4 pos = 0; #ifdef BS_DEBUG StringInfoData pbuf; #endif state.buf = buf; state.state = WAITOPERAND; state.count = 0; state.num = 0; state.str = NULL; /* make polish notation (postfix, but in reverse order) */ makepol(&state); if (!state.num) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("empty query"))); commonlen = COMPUTESIZE(state.num); query = (QUERYTYPE *) palloc(commonlen); SET_VARSIZE(query, commonlen); query->size = state.num; ptr = GETQUERY(query); for (i = state.num - 1; i >= 0; i--) { ptr[i].type = state.str->type; ptr[i].val = state.str->val; tmp = state.str->next; pfree(state.str); state.str = tmp; } pos = query->size - 1; findoprnd(ptr, &pos); #ifdef BS_DEBUG initStringInfo(&pbuf); for (i = 0; i < query->size; i++) { if (ptr[i].type == OPR) appendStringInfo(&pbuf, "%c(%d) ", ptr[i].val, ptr[i].left); else appendStringInfo(&pbuf, "%d ", ptr[i].val); } elog(DEBUG3, "POR: %s", pbuf.data); pfree(pbuf.data); #endif PG_RETURN_POINTER(query); }
/* * Process data received through the syslogger pipe. * * This routine interprets the log pipe protocol which sends log messages as * (hopefully atomic) chunks - such chunks are detected and reassembled here. * * The protocol has a header that starts with two nul bytes, then has a 16 bit * length, the pid of the sending process, and a flag to indicate if it is * the last chunk in a message. Incomplete chunks are saved until we read some * more, and non-final chunks are accumulated until we get the final chunk. * * All of this is to avoid 2 problems: * . partial messages being written to logfiles (messes rotation), and * . messages from different backends being interleaved (messages garbled). * * Any non-protocol messages are written out directly. These should only come * from non-PostgreSQL sources, however (e.g. third party libraries writing to * stderr). * * logbuffer is the data input buffer, and *bytes_in_logbuffer is the number * of bytes present. On exit, any not-yet-eaten data is left-justified in * logbuffer, and *bytes_in_logbuffer is updated. */ static void process_pipe_input(char *logbuffer, int *bytes_in_logbuffer) { char *cursor = logbuffer; int count = *bytes_in_logbuffer; int dest = LOG_DESTINATION_STDERR; /* While we have enough for a header, process data... */ while (count >= (int) sizeof(PipeProtoHeader)) { PipeProtoHeader p; int chunklen; /* Do we have a valid header? */ memcpy(&p, cursor, sizeof(PipeProtoHeader)); if (p.nuls[0] == '\0' && p.nuls[1] == '\0' && p.len > 0 && p.len <= PIPE_MAX_PAYLOAD && p.pid != 0 && (p.is_last == 't' || p.is_last == 'f' || p.is_last == 'T' || p.is_last == 'F')) { List *buffer_list; ListCell *cell; save_buffer *existing_slot = NULL, *free_slot = NULL; StringInfo str; chunklen = PIPE_HEADER_SIZE + p.len; /* Fall out of loop if we don't have the whole chunk yet */ if (count < chunklen) break; dest = (p.is_last == 'T' || p.is_last == 'F') ? LOG_DESTINATION_CSVLOG : LOG_DESTINATION_STDERR; /* Locate any existing buffer for this source pid */ buffer_list = buffer_lists[p.pid % NBUFFER_LISTS]; foreach(cell, buffer_list) { save_buffer *buf = (save_buffer *) lfirst(cell); if (buf->pid == p.pid) { existing_slot = buf; break; } if (buf->pid == 0 && free_slot == NULL) free_slot = buf; } if (p.is_last == 'f' || p.is_last == 'F') { /* * Save a complete non-final chunk in a per-pid buffer */ if (existing_slot != NULL) { /* Add chunk to data from preceding chunks */ str = &(existing_slot->data); appendBinaryStringInfo(str, cursor + PIPE_HEADER_SIZE, p.len); } else { /* First chunk of message, save in a new buffer */ if (free_slot == NULL) { /* * Need a free slot, but there isn't one in the list, * so create a new one and extend the list with it. */ free_slot = palloc(sizeof(save_buffer)); buffer_list = lappend(buffer_list, free_slot); buffer_lists[p.pid % NBUFFER_LISTS] = buffer_list; } free_slot->pid = p.pid; str = &(free_slot->data); initStringInfo(str); appendBinaryStringInfo(str, cursor + PIPE_HEADER_SIZE, p.len); } } else { /* * Final chunk --- add it to anything saved for that pid, and * either way write the whole thing out. */ if (existing_slot != NULL) { str = &(existing_slot->data); appendBinaryStringInfo(str, cursor + PIPE_HEADER_SIZE, p.len); write_syslogger_file(str->data, str->len, dest); /* Mark the buffer unused, and reclaim string storage */ existing_slot->pid = 0; pfree(str->data); } else { /* The whole message was one chunk, evidently. */ write_syslogger_file(cursor + PIPE_HEADER_SIZE, p.len, dest); } } /* Finished processing this chunk */ cursor += chunklen; count -= chunklen; }
/* * Check so target can accept typoid value * */ void plpgsql_check_assign_to_target_type(PLpgSQL_checkstate *cstate, Oid target_typoid, int32 target_typmod, Oid value_typoid, bool isnull) { /* the overhead UNKONWNOID --> TEXT is low */ if (target_typoid == TEXTOID && value_typoid == UNKNOWNOID) return; #if PG_VERSION_NUM < 90500 /* any used typmod enforces IO cast - performance warning for older than 9.5*/ if (target_typmod != -1) plpgsql_check_put_error(cstate, ERRCODE_DATATYPE_MISMATCH, 0, "target type has type modificator", NULL, "Usage of type modificator enforces slower IO casting.", PLPGSQL_CHECK_WARNING_PERFORMANCE, 0, NULL, NULL); #endif if (type_is_rowtype(value_typoid)) plpgsql_check_put_error(cstate, ERRCODE_DATATYPE_MISMATCH, 0, "cannot cast composite value to a scalar type", NULL, NULL, PLPGSQL_CHECK_ERROR, 0, NULL, NULL); else if (target_typoid != value_typoid && !isnull) { StringInfoData str; initStringInfo(&str); appendStringInfo(&str, "cast \"%s\" value to \"%s\" type", format_type_be(value_typoid), format_type_be(target_typoid)); /* accent warning when cast is without supported explicit casting */ if (!can_coerce_type(1, &value_typoid, &target_typoid, COERCION_EXPLICIT)) plpgsql_check_put_error(cstate, ERRCODE_DATATYPE_MISMATCH, 0, "target type is different type than source type", str.data, "There are no possible explicit coercion between those types, possibly bug!", PLPGSQL_CHECK_WARNING_OTHERS, 0, NULL, NULL); else if (!can_coerce_type(1, &value_typoid, &target_typoid, COERCION_ASSIGNMENT)) plpgsql_check_put_error(cstate, ERRCODE_DATATYPE_MISMATCH, 0, "target type is different type than source type", str.data, "The input expression type does not have an assignment cast to the target type.", PLPGSQL_CHECK_WARNING_OTHERS, 0, NULL, NULL); else { /* highly probably only performance issue */ if (!isnull) plpgsql_check_put_error(cstate, ERRCODE_DATATYPE_MISMATCH, 0, "target type is different type than source type", str.data, "Hidden casting can be a performance issue.", PLPGSQL_CHECK_WARNING_PERFORMANCE, 0, NULL, NULL); } pfree(str.data); } }
/* Split JVM options. The string is split on whitespace unless the * whitespace is found within a string or is escaped by backslash. A * backslash escaped quote is not considered a string delimiter. */ static void addUserJVMOptions(JVMOptList* optList) { const char* cp = pljava_vmoptions; if(cp != NULL) { StringInfoData buf; char quote = 0; char c; initStringInfo(&buf); for(;;) { c = *cp++; switch(c) { case 0: break; case '"': case '\'': if(quote == c) quote = 0; else quote = c; appendStringInfoChar(&buf, c); continue; case '\\': appendStringInfoChar(&buf, '\\'); c = *cp++; /* Interpret next character verbatim */ if(c == 0) break; appendStringInfoChar(&buf, c); continue; default: if(quote == 0 && isspace((int)c)) { while((c = *cp++) != 0) { if(!isspace((int)c)) break; } if(c == 0) break; if(c != '-') appendStringInfoChar(&buf, ' '); else if(buf.len > 0) { /* Whitespace followed by '-' triggers new * option declaration. */ JVMOptList_add(optList, buf.data, 0, true); buf.len = 0; buf.data[0] = 0; } } appendStringInfoChar(&buf, c); continue; } break; } if(buf.len > 0) JVMOptList_add(optList, buf.data, 0, true); pfree(buf.data); } }
/* * ProcSleep -- put a process to sleep on the specified lock * * Caller must have set MyProc->heldLocks to reflect locks already held * on the lockable object by this process (under all XIDs). * * The lock table's partition lock must be held at entry, and will be held * at exit. * * Result: STATUS_OK if we acquired the lock, STATUS_ERROR if not (deadlock). * * ASSUME: that no one will fiddle with the queue until after * we release the partition lock. * * NOTES: The process queue is now a priority queue for locking. */ int ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) { LOCKMODE lockmode = locallock->tag.mode; LOCK *lock = locallock->lock; PROCLOCK *proclock = locallock->proclock; uint32 hashcode = locallock->hashcode; LWLock *partitionLock = LockHashPartitionLock(hashcode); PROC_QUEUE *waitQueue = &(lock->waitProcs); LOCKMASK myHeldLocks = MyProc->heldLocks; bool early_deadlock = false; bool allow_autovacuum_cancel = true; int myWaitStatus; PGPROC *proc; int i; /* * Determine where to add myself in the wait queue. * * Normally I should go at the end of the queue. However, if I already * hold locks that conflict with the request of any previous waiter, put * myself in the queue just in front of the first such waiter. This is not * a necessary step, since deadlock detection would move me to before that * waiter anyway; but it's relatively cheap to detect such a conflict * immediately, and avoid delaying till deadlock timeout. * * Special case: if I find I should go in front of some waiter, check to * see if I conflict with already-held locks or the requests before that * waiter. If not, then just grant myself the requested lock immediately. * This is the same as the test for immediate grant in LockAcquire, except * we are only considering the part of the wait queue before my insertion * point. */ if (myHeldLocks != 0) { LOCKMASK aheadRequests = 0; proc = (PGPROC *) waitQueue->links.next; for (i = 0; i < waitQueue->size; i++) { /* Must he wait for me? */ if (lockMethodTable->conflictTab[proc->waitLockMode] & myHeldLocks) { /* Must I wait for him ? */ if (lockMethodTable->conflictTab[lockmode] & proc->heldLocks) { /* * Yes, so we have a deadlock. Easiest way to clean up * correctly is to call RemoveFromWaitQueue(), but we * can't do that until we are *on* the wait queue. So, set * a flag to check below, and break out of loop. Also, * record deadlock info for later message. */ RememberSimpleDeadLock(MyProc, lockmode, lock, proc); early_deadlock = true; break; } /* I must go before this waiter. Check special case. */ if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 && LockCheckConflicts(lockMethodTable, lockmode, lock, proclock) == STATUS_OK) { /* Skip the wait and just grant myself the lock. */ GrantLock(lock, proclock, lockmode); GrantAwaitedLock(); return STATUS_OK; } /* Break out of loop to put myself before him */ break; } /* Nope, so advance to next waiter */ aheadRequests |= LOCKBIT_ON(proc->waitLockMode); proc = (PGPROC *) proc->links.next; } /* * If we fall out of loop normally, proc points to waitQueue head, so * we will insert at tail of queue as desired. */ } else { /* I hold no locks, so I can't push in front of anyone. */ proc = (PGPROC *) &(waitQueue->links); } /* * Insert self into queue, ahead of the given proc (or at tail of queue). */ SHMQueueInsertBefore(&(proc->links), &(MyProc->links)); waitQueue->size++; lock->waitMask |= LOCKBIT_ON(lockmode); /* Set up wait information in PGPROC object, too */ MyProc->waitLock = lock; MyProc->waitProcLock = proclock; MyProc->waitLockMode = lockmode; MyProc->waitStatus = STATUS_WAITING; /* * If we detected deadlock, give up without waiting. This must agree with * CheckDeadLock's recovery code, except that we shouldn't release the * semaphore since we haven't tried to lock it yet. */ if (early_deadlock) { RemoveFromWaitQueue(MyProc, hashcode); return STATUS_ERROR; } /* mark that we are waiting for a lock */ lockAwaited = locallock; /* * Release the lock table's partition lock. * * NOTE: this may also cause us to exit critical-section state, possibly * allowing a cancel/die interrupt to be accepted. This is OK because we * have recorded the fact that we are waiting for a lock, and so * LockErrorCleanup will clean up if cancel/die happens. */ LWLockRelease(partitionLock); /* * Also, now that we will successfully clean up after an ereport, it's * safe to check to see if there's a buffer pin deadlock against the * Startup process. Of course, that's only necessary if we're doing Hot * Standby and are not the Startup process ourselves. */ if (RecoveryInProgress() && !InRecovery) CheckRecoveryConflictDeadlock(); /* Reset deadlock_state before enabling the timeout handler */ deadlock_state = DS_NOT_YET_CHECKED; got_deadlock_timeout = false; /* * Set timer so we can wake up after awhile and check for a deadlock. If a * deadlock is detected, the handler releases the process's semaphore and * sets MyProc->waitStatus = STATUS_ERROR, allowing us to know that we * must report failure rather than success. * * By delaying the check until we've waited for a bit, we can avoid * running the rather expensive deadlock-check code in most cases. * * If LockTimeout is set, also enable the timeout for that. We can save a * few cycles by enabling both timeout sources in one call. */ if (LockTimeout > 0) { EnableTimeoutParams timeouts[2]; timeouts[0].id = DEADLOCK_TIMEOUT; timeouts[0].type = TMPARAM_AFTER; timeouts[0].delay_ms = DeadlockTimeout; timeouts[1].id = LOCK_TIMEOUT; timeouts[1].type = TMPARAM_AFTER; timeouts[1].delay_ms = LockTimeout; enable_timeouts(timeouts, 2); } else enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout); /* * If somebody wakes us between LWLockRelease and WaitLatch, the latch * will not wait. But a set latch does not necessarily mean that the lock * is free now, as there are many other sources for latch sets than * somebody releasing the lock. * * We process interrupts whenever the latch has been set, so cancel/die * interrupts are processed quickly. This means we must not mind losing * control to a cancel/die interrupt here. We don't, because we have no * shared-state-change work to do after being granted the lock (the * grantor did it all). We do have to worry about canceling the deadlock * timeout and updating the locallock table, but if we lose control to an * error, LockErrorCleanup will fix that up. */ do { WaitLatch(MyLatch, WL_LATCH_SET, 0); ResetLatch(MyLatch); /* check for deadlocks first, as that's probably log-worthy */ if (got_deadlock_timeout) { CheckDeadLock(); got_deadlock_timeout = false; } CHECK_FOR_INTERRUPTS(); /* * waitStatus could change from STATUS_WAITING to something else * asynchronously. Read it just once per loop to prevent surprising * behavior (such as missing log messages). */ myWaitStatus = *((volatile int *) &MyProc->waitStatus); /* * If we are not deadlocked, but are waiting on an autovacuum-induced * task, send a signal to interrupt it. */ if (deadlock_state == DS_BLOCKED_BY_AUTOVACUUM && allow_autovacuum_cancel) { PGPROC *autovac = GetBlockingAutoVacuumPgproc(); PGXACT *autovac_pgxact = &ProcGlobal->allPgXact[autovac->pgprocno]; LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); /* * Only do it if the worker is not working to protect against Xid * wraparound. */ if ((autovac_pgxact->vacuumFlags & PROC_IS_AUTOVACUUM) && !(autovac_pgxact->vacuumFlags & PROC_VACUUM_FOR_WRAPAROUND)) { int pid = autovac->pid; StringInfoData locktagbuf; StringInfoData logbuf; /* errdetail for server log */ initStringInfo(&locktagbuf); initStringInfo(&logbuf); DescribeLockTag(&locktagbuf, &lock->tag); appendStringInfo(&logbuf, _("Process %d waits for %s on %s."), MyProcPid, GetLockmodeName(lock->tag.locktag_lockmethodid, lockmode), locktagbuf.data); /* release lock as quickly as possible */ LWLockRelease(ProcArrayLock); ereport(LOG, (errmsg("sending cancel to blocking autovacuum PID %d", pid), errdetail_log("%s", logbuf.data))); pfree(logbuf.data); pfree(locktagbuf.data); /* send the autovacuum worker Back to Old Kent Road */ if (kill(pid, SIGINT) < 0) { /* Just a warning to allow multiple callers */ ereport(WARNING, (errmsg("could not send signal to process %d: %m", pid))); } } else LWLockRelease(ProcArrayLock); /* prevent signal from being resent more than once */ allow_autovacuum_cancel = false; } /* * If awoken after the deadlock check interrupt has run, and * log_lock_waits is on, then report about the wait. */ if (log_lock_waits && deadlock_state != DS_NOT_YET_CHECKED) { StringInfoData buf, lock_waiters_sbuf, lock_holders_sbuf; const char *modename; long secs; int usecs; long msecs; SHM_QUEUE *procLocks; PROCLOCK *proclock; bool first_holder = true, first_waiter = true; int lockHoldersNum = 0; initStringInfo(&buf); initStringInfo(&lock_waiters_sbuf); initStringInfo(&lock_holders_sbuf); DescribeLockTag(&buf, &locallock->tag.lock); modename = GetLockmodeName(locallock->tag.lock.locktag_lockmethodid, lockmode); TimestampDifference(get_timeout_start_time(DEADLOCK_TIMEOUT), GetCurrentTimestamp(), &secs, &usecs); msecs = secs * 1000 + usecs / 1000; usecs = usecs % 1000; /* * we loop over the lock's procLocks to gather a list of all * holders and waiters. Thus we will be able to provide more * detailed information for lock debugging purposes. * * lock->procLocks contains all processes which hold or wait for * this lock. */ LWLockAcquire(partitionLock, LW_SHARED); procLocks = &(lock->procLocks); proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks, offsetof(PROCLOCK, lockLink)); while (proclock) { /* * we are a waiter if myProc->waitProcLock == proclock; we are * a holder if it is NULL or something different */ if (proclock->tag.myProc->waitProcLock == proclock) { if (first_waiter) { appendStringInfo(&lock_waiters_sbuf, "%d", proclock->tag.myProc->pid); first_waiter = false; } else appendStringInfo(&lock_waiters_sbuf, ", %d", proclock->tag.myProc->pid); } else { if (first_holder) { appendStringInfo(&lock_holders_sbuf, "%d", proclock->tag.myProc->pid); first_holder = false; } else appendStringInfo(&lock_holders_sbuf, ", %d", proclock->tag.myProc->pid); lockHoldersNum++; } proclock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->lockLink, offsetof(PROCLOCK, lockLink)); } LWLockRelease(partitionLock); if (deadlock_state == DS_SOFT_DEADLOCK) ereport(LOG, (errmsg("process %d avoided deadlock for %s on %s by rearranging queue order after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs), (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.", "Processes holding the lock: %s. Wait queue: %s.", lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data)))); else if (deadlock_state == DS_HARD_DEADLOCK) { /* * This message is a bit redundant with the error that will be * reported subsequently, but in some cases the error report * might not make it to the log (eg, if it's caught by an * exception handler), and we want to ensure all long-wait * events get logged. */ ereport(LOG, (errmsg("process %d detected deadlock while waiting for %s on %s after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs), (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.", "Processes holding the lock: %s. Wait queue: %s.", lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data)))); } if (myWaitStatus == STATUS_WAITING) ereport(LOG, (errmsg("process %d still waiting for %s on %s after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs), (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.", "Processes holding the lock: %s. Wait queue: %s.", lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data)))); else if (myWaitStatus == STATUS_OK) ereport(LOG, (errmsg("process %d acquired %s on %s after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs))); else { Assert(myWaitStatus == STATUS_ERROR); /* * Currently, the deadlock checker always kicks its own * process, which means that we'll only see STATUS_ERROR when * deadlock_state == DS_HARD_DEADLOCK, and there's no need to * print redundant messages. But for completeness and * future-proofing, print a message if it looks like someone * else kicked us off the lock. */ if (deadlock_state != DS_HARD_DEADLOCK) ereport(LOG, (errmsg("process %d failed to acquire %s on %s after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs), (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.", "Processes holding the lock: %s. Wait queue: %s.", lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data)))); } /* * At this point we might still need to wait for the lock. Reset * state so we don't print the above messages again. */ deadlock_state = DS_NO_DEADLOCK; pfree(buf.data); pfree(lock_holders_sbuf.data); pfree(lock_waiters_sbuf.data); } } while (myWaitStatus == STATUS_WAITING); /* * Disable the timers, if they are still running. As in LockErrorCleanup, * we must preserve the LOCK_TIMEOUT indicator flag: if a lock timeout has * already caused QueryCancelPending to become set, we want the cancel to * be reported as a lock timeout, not a user cancel. */ if (LockTimeout > 0) { DisableTimeoutParams timeouts[2]; timeouts[0].id = DEADLOCK_TIMEOUT; timeouts[0].keep_indicator = false; timeouts[1].id = LOCK_TIMEOUT; timeouts[1].keep_indicator = true; disable_timeouts(timeouts, 2); } else disable_timeout(DEADLOCK_TIMEOUT, false); /* * Re-acquire the lock table's partition lock. We have to do this to hold * off cancel/die interrupts before we can mess with lockAwaited (else we * might have a missed or duplicated locallock update). */ LWLockAcquire(partitionLock, LW_EXCLUSIVE); /* * We no longer want LockErrorCleanup to do anything. */ lockAwaited = NULL; /* * If we got the lock, be sure to remember it in the locallock table. */ if (MyProc->waitStatus == STATUS_OK) GrantAwaitedLock(); /* * We don't have to do anything else, because the awaker did all the * necessary update of the lock table and MyProc. */ return MyProc->waitStatus; }
/* * There are a few ways to arrive in the initsequencer. * 1. From _PG_init (called exactly once when the library is loaded for ANY * reason). * 1a. Because of the command LOAD 'libraryname'; * This case can be distinguished because _PG_init will have found the * LOAD command and saved the 'libraryname' in pljavaLoadPath. * 1b. Because of a CREATE FUNCTION naming this library. pljavaLoadPath will * be NULL. * 1c. By the first actual use of a PL/Java function, causing this library * to be loaded. pljavaLoadPath will be NULL. The called function's Oid * will be available to the call handler once we return from _PG_init, * but it isn't (easily) available here. * 2. From the call handler, if initialization isn't complete yet. That can only * mean something failed in the earlier call to _PG_init, and whatever it was * is highly likely to fail again. That may lead to the untidyness of * duplicated diagnostic messages, but for now I like the belt-and-suspenders * approach of making sure the init sequence gets as many chances as possible * to succeed. * 3. From a GUC assign hook, if the user has updated a setting that might allow * initialization to succeed. It resumes from where it left off. * * In all cases, the sequence must progress as far as starting the VM and * initializing the PL/Java classes. In all cases except 1a, that's enough, * assuming the language handlers and schema have all been set up already (or, * in case 1b, the user is intent on setting them up explicitly). * * In case 1a, we can go ahead and test for, and create, the schema, functions, * and language entries as needed, using pljavaLoadPath as the library path * if creating the language handler functions. One-stop shopping. (The presence * of pljavaLoadPath in any of the other cases, such as resumption by an assign * hook, indicates it is really a continuation of case 1a.) */ static void initsequencer(enum initstage is, bool tolerant) { JVMOptList optList; Invocation ctx; jint JNIresult; char *greeting; switch (is) { case IS_FORMLESS_VOID: initstage = IS_GUCS_REGISTERED; case IS_GUCS_REGISTERED: libjvmlocation = strdup("libjvm.so"); initstage = IS_PLJAVA_ENABLED; case IS_PLJAVA_ENABLED: libjvm_handle = pg_dlopen(libjvmlocation); if ( NULL == libjvm_handle ) { ereport(ERROR, ( errmsg("Cannot load libjvm.so library, check that it is available in LD_LIBRARY_PATH"), errdetail("%s", (char *)pg_dlerror()))); goto check_tolerant; } initstage = IS_CAND_JVMOPENED; case IS_CAND_JVMOPENED: pljava_createvm = (jint (JNICALL *)(JavaVM **, void **, void *)) pg_dlsym(libjvm_handle, "JNI_CreateJavaVM"); if ( NULL == pljava_createvm ) { /* * If it hasn't got the symbol, it can't be the right * library, so close/unload it so another can be tried. * Format the dlerror string first: dlclose may clobber it. */ char *dle = MemoryContextStrdup(ErrorContext, pg_dlerror()); pg_dlclose(libjvm_handle); initstage = IS_CAND_JVMLOCATION; ereport(ERROR, ( errmsg("Cannot start Java VM"), errdetail("%s", dle), errhint("Check that libjvm.so is available in LD_LIBRARY_PATH"))); goto check_tolerant; } initstage = IS_CREATEVM_SYM_FOUND; case IS_CREATEVM_SYM_FOUND: s_javaLogLevel = INFO; checkIntTimeType(); HashMap_initialize(); /* creates things in TopMemoryContext */ #ifdef PLJAVA_DEBUG /* Hard setting for debug. Don't forget to recompile... */ pljava_debug = 1; #endif initstage = IS_MISC_ONCE_DONE; case IS_MISC_ONCE_DONE: JVMOptList_init(&optList); /* uses CurrentMemoryContext */ seenVisualVMName = false; addUserJVMOptions(&optList); if ( ! seenVisualVMName ) JVMOptList_addVisualVMName(&optList); JVMOptList_add(&optList, "vfprintf", (void*)my_vfprintf, true); #ifndef GCJ JVMOptList_add(&optList, "-Xrs", 0, true); #endif effectiveClassPath = getClassPath("-Djava.class.path="); if(effectiveClassPath != 0) { JVMOptList_add(&optList, effectiveClassPath, 0, true); } initstage = IS_JAVAVM_OPTLIST; case IS_JAVAVM_OPTLIST: JNIresult = initializeJavaVM(&optList); /* frees the optList */ if( JNI_OK != JNIresult ) { initstage = IS_MISC_ONCE_DONE; /* optList has been freed */ StaticAssertStmt(sizeof(jint) <= sizeof(long int), "jint wider than long int?!"); ereport(WARNING, (errmsg("failed to create Java virtual machine"), errdetail("JNI_CreateJavaVM returned an error code: %ld", (long int)JNIresult), jvmStartedAtLeastOnce ? errhint("Because an earlier attempt during this session " "did start a VM before failing, this probably means your " "Java runtime environment does not support more than one " "VM creation per session. You may need to exit this " "session and start a new one.") : 0)); goto check_tolerant; } jvmStartedAtLeastOnce = true; elog(DEBUG2, "successfully created Java virtual machine"); initstage = IS_JAVAVM_STARTED; case IS_JAVAVM_STARTED: #ifdef USE_PLJAVA_SIGHANDLERS pqsignal(SIGINT, pljavaStatementCancelHandler); pqsignal(SIGTERM, pljavaDieHandler); #endif /* Register an on_proc_exit handler that destroys the VM */ on_proc_exit(_destroyJavaVM, 0); initstage = IS_SIGHANDLERS; case IS_SIGHANDLERS: Invocation_pushBootContext(&ctx); PG_TRY(); { initPLJavaClasses(); initJavaSession(); Invocation_popBootContext(); initstage = IS_PLJAVA_FOUND; } PG_CATCH(); { MemoryContextSwitchTo(ctx.upperContext); /* leave ErrorContext */ Invocation_popBootContext(); initstage = IS_MISC_ONCE_DONE; /* We can't stay here... */ if ( tolerant ) reLogWithChangedLevel(WARNING); /* so xact is not aborted */ else { EmitErrorReport(); /* no more unwinding, just log it */ /* Seeing an ERROR emitted to the log, without leaving the * transaction aborted, would violate the principle of least * astonishment. But at check_tolerant below, another ERROR will * be thrown immediately, so the transaction effect will be as * expected and this ERROR will contribute information beyond * what is in the generic one thrown down there. */ FlushErrorState(); } } PG_END_TRY(); if ( IS_PLJAVA_FOUND != initstage ) { /* JVM initialization failed for some reason. Destroy * the VM if it exists. Perhaps the user will try * fixing the pljava.classpath and make a new attempt. */ ereport(WARNING, ( errmsg("failed to load initial PL/Java classes"), errhint("The most common reason is that \"pljava_classpath\" " "needs to be set, naming the proper \"pljava.jar\" file.") )); _destroyJavaVM(0, 0); goto check_tolerant; } case IS_PLJAVA_FOUND: greeting = InstallHelper_hello(); ereport(NULL != pljavaLoadPath ? NOTICE : DEBUG1, ( errmsg("PL/Java loaded"), errdetail("versions:\n%s", greeting))); pfree(greeting); if ( NULL != pljavaLoadPath ) InstallHelper_groundwork(); /* sqlj schema, language handlers, ...*/ initstage = IS_COMPLETE; case IS_COMPLETE: pljavaLoadingAsExtension = false; if ( alteredSettingsWereNeeded ) { /* Use this StringInfoData to conditionally construct part of the * hint string suggesting ALTER DATABASE ... SET ... FROM CURRENT * provided the server is >= 9.2 where that will actually work. * In 9.3, psprintf appeared, which would make this all simpler, * but if 9.3+ were all that had to be supported, this would all * be moot anyway. Doing the initStringInfo inside the ereport * ensures the string is allocated in ErrorContext and won't leak. * Don't remove the extra parens grouping * (initStringInfo, appendStringInfo, errhint) ... with the parens, * that's a comma expression, which is sequenced; without them, they * are just function parameters with evaluation order unknown. */ StringInfoData buf; #if PG_VERSION_NUM >= 90200 #define MOREHINT \ appendStringInfo(&buf, \ "using ALTER DATABASE %s SET ... FROM CURRENT or ", \ pljavaDbName()), #else #define MOREHINT #endif ereport(NOTICE, ( errmsg("PL/Java successfully started after adjusting settings"), (initStringInfo(&buf), MOREHINT errhint("The settings that worked should be saved (%s" "in the \"%s\" file). For a reminder of what has been set, " "try: SELECT name, setting FROM pg_settings WHERE name LIKE" " 'pljava.%%' AND source = 'session'", buf.data, superuser() ? PG_GETCONFIGOPTION("config_file") : "postgresql.conf")))); #undef MOREHINT if ( loadAsExtensionFailed ) { ereport(NOTICE, (errmsg( "PL/Java load successful after failed CREATE EXTENSION"), errdetail( "PL/Java is now installed, but not as an extension."), errhint( "To correct that, either COMMIT or ROLLBACK, make sure " "the working settings are saved, exit this session, and " "in a new session, either: " "1. if committed, run " "\"CREATE EXTENSION pljava FROM unpackaged\", or 2. " "if rolled back, simply \"CREATE EXTENSION pljava\" again." ))); } } return; default: ereport(ERROR, ( errmsg("cannot set up PL/Java"), errdetail( "An unexpected stage was reached in the startup sequence."), errhint( "Please report the circumstances to the PL/Java maintainers.") )); } check_tolerant: if ( pljavaLoadingAsExtension ) { tolerant = false; loadAsExtensionFailed = true; pljavaLoadingAsExtension = false; } if ( !tolerant ) { ereport(ERROR, ( errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg( "cannot use PL/Java before successfully completing its setup"), errhint( "Check the log for messages closely preceding this one, " "detailing what step of setup failed and what will be needed, " "probably setting one of the \"pljava.\" configuration " "variables, to complete the setup. If there is not enough " "help in the log, try again with different settings for " "\"log_min_messages\" or \"log_error_verbosity\"."))); } }
/* Main loop of walsender process */ static int WalSndLoop(void) { char *output_message; bool caughtup = false; /* * Allocate buffer that will be used for each output message. We do this * just once to reduce palloc overhead. The buffer must be made large * enough for maximum-sized messages. */ output_message = palloc(1 + sizeof(WalDataMessageHeader) + MAX_SEND_SIZE); /* * Allocate buffer that will be used for processing reply messages. As * above, do this just once to reduce palloc overhead. */ initStringInfo(&reply_message); /* Initialize the last reply timestamp */ last_reply_timestamp = GetCurrentTimestamp(); /* Loop forever, unless we get an error */ for (;;) { /* Clear any already-pending wakeups */ ResetLatch(&MyWalSnd->latch); /* * Emergency bailout if postmaster has died. This is to avoid the * necessity for manual cleanup of all postmaster children. */ if (!PostmasterIsAlive()) exit(1); /* Process any requests or signals received recently */ if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); SyncRepInitConfig(); } /* Normal exit from the walsender is here */ if (walsender_shutdown_requested) { /* Inform the standby that XLOG streaming is done */ pq_puttextmessage('C', "COPY 0"); pq_flush(); proc_exit(0); } /* Check for input from the client */ ProcessRepliesIfAny(); /* * If we don't have any pending data in the output buffer, try to send * some more. If there is some, we don't bother to call XLogSend * again until we've flushed it ... but we'd better assume we are not * caught up. */ if (!pq_is_send_pending()) XLogSend(output_message, &caughtup); else caughtup = false; /* Try to flush pending output to the client */ if (pq_flush_if_writable() != 0) break; /* If nothing remains to be sent right now ... */ if (caughtup && !pq_is_send_pending()) { /* * If we're in catchup state, move to streaming. This is an * important state change for users to know about, since before * this point data loss might occur if the primary dies and we * need to failover to the standby. The state change is also * important for synchronous replication, since commits that * started to wait at that point might wait for some time. */ if (MyWalSnd->state == WALSNDSTATE_CATCHUP) { ereport(DEBUG1, (errmsg("standby \"%s\" has now caught up with primary", application_name))); WalSndSetState(WALSNDSTATE_STREAMING); } /* * When SIGUSR2 arrives, we send any outstanding logs up to the * shutdown checkpoint record (i.e., the latest record) and exit. * This may be a normal termination at shutdown, or a promotion, * the walsender is not sure which. */ if (walsender_ready_to_stop) { /* ... let's just be real sure we're caught up ... */ XLogSend(output_message, &caughtup); if (caughtup && !pq_is_send_pending()) { walsender_shutdown_requested = true; continue; /* don't want to wait more */ } } } /* * We don't block if not caught up, unless there is unsent data * pending in which case we'd better block until the socket is * write-ready. This test is only needed for the case where XLogSend * loaded a subset of the available data but then pq_flush_if_writable * flushed it all --- we should immediately try to send more. */ if (caughtup || pq_is_send_pending()) { TimestampTz finish_time = 0; long sleeptime = -1; int wakeEvents; wakeEvents = WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_SOCKET_READABLE; if (pq_is_send_pending()) wakeEvents |= WL_SOCKET_WRITEABLE; /* Determine time until replication timeout */ if (replication_timeout > 0) { long secs; int usecs; finish_time = TimestampTzPlusMilliseconds(last_reply_timestamp, replication_timeout); TimestampDifference(GetCurrentTimestamp(), finish_time, &secs, &usecs); sleeptime = secs * 1000 + usecs / 1000; /* Avoid Assert in WaitLatchOrSocket if timeout is past */ if (sleeptime < 0) sleeptime = 0; wakeEvents |= WL_TIMEOUT; } /* Sleep until something happens or replication timeout */ WaitLatchOrSocket(&MyWalSnd->latch, wakeEvents, MyProcPort->sock, sleeptime); /* * Check for replication timeout. Note we ignore the corner case * possibility that the client replied just as we reached the * timeout ... he's supposed to reply *before* that. */ if (replication_timeout > 0 && GetCurrentTimestamp() >= finish_time) { /* * Since typically expiration of replication timeout means * communication problem, we don't send the error message to * the standby. */ ereport(COMMERROR, (errmsg("terminating walsender process due to replication timeout"))); break; } } } /* * Get here on send failure. Clean up and exit. * * Reset whereToSendOutput to prevent ereport from attempting to send any * more messages to the standby. */ if (whereToSendOutput == DestRemote) whereToSendOutput = DestNone; proc_exit(0); return 1; /* keep the compiler quiet */ }
/* * Append those parts of path that has not yet been appended. The HashMap unique is * keeping track of what has been appended already. First appended part will be * prefixed with prefix. */ static void appendPathParts(const char* path, StringInfoData* bld, HashMap unique, const char* prefix) { StringInfoData buf; if(path == 0 || strlen(path) == 0) return; for (;;) { char* pathPart; size_t len; if(*path == 0) break; len = strcspn(path, ";:"); if(len == 1 && *(path+1) == ':' && isalnum(*path)) /* * Windows drive designator, leave it "as is". */ len = strcspn(path+2, ";:") + 2; else if(len == 0) { /* Ignore zero length components. */ ++path; continue; } initStringInfo(&buf); if(*path == '$') { if(len == 7 || (strcspn(path, "/\\") == 7 && strncmp(path, "$libdir", 7) == 0)) { char pathbuf[MAXPGPATH]; get_pkglib_path(my_exec_path, pathbuf); len -= 7; path += 7; appendStringInfoString(&buf, pathbuf); } else ereport(ERROR, ( errcode(ERRCODE_INVALID_NAME), errmsg("invalid macro name '%*s' in PL/Java classpath", (int)len, path))); } if(len > 0) { appendBinaryStringInfo(&buf, path, (int)len); path += len; } pathPart = buf.data; if(HashMap_getByString(unique, pathPart) == 0) { if(HashMap_size(unique) == 0) appendStringInfo(bld, "%s", prefix); else #if defined(WIN32) appendStringInfoChar(bld, ';'); #else appendStringInfoChar(bld, ':'); #endif appendStringInfo(bld, "%s", pathPart); HashMap_putByString(unique, pathPart, (void*)1); } pfree(pathPart); if(*path == 0) break; ++path; /* Skip ':' */ } }
/* * record_out - output routine for any composite type. */ Datum record_out(PG_FUNCTION_ARGS) { HeapTupleHeader rec = PG_GETARG_HEAPTUPLEHEADER(0); Oid tupType; int32 tupTypmod; TupleDesc tupdesc; HeapTupleData tuple; RecordIOData *my_extra; bool needComma = false; int ncolumns; int i; Datum *values; bool *nulls; StringInfoData buf; /* Extract type info from the tuple itself */ tupType = HeapTupleHeaderGetTypeId(rec); tupTypmod = HeapTupleHeaderGetTypMod(rec); tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); ncolumns = tupdesc->natts; /* Build a temporary HeapTuple control structure */ tuple.t_len = HeapTupleHeaderGetDatumLength(rec); ItemPointerSetInvalid(&(tuple.t_self)); tuple.t_tableOid = InvalidOid; tuple.t_data = rec; /* * We arrange to look up the needed I/O info just once per series of * calls, assuming the record type doesn't change underneath us. */ my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; if (my_extra == NULL || my_extra->ncolumns != ncolumns) { fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, sizeof(RecordIOData) - sizeof(ColumnIOData) + ncolumns * sizeof(ColumnIOData)); my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra; my_extra->record_type = InvalidOid; my_extra->record_typmod = 0; } if (my_extra->record_type != tupType || my_extra->record_typmod != tupTypmod) { MemSet(my_extra, 0, sizeof(RecordIOData) - sizeof(ColumnIOData) + ncolumns * sizeof(ColumnIOData)); my_extra->record_type = tupType; my_extra->record_typmod = tupTypmod; my_extra->ncolumns = ncolumns; } values = (Datum *) palloc(ncolumns * sizeof(Datum)); nulls = (bool *) palloc(ncolumns * sizeof(bool)); /* Break down the tuple into fields */ heap_deform_tuple(&tuple, tupdesc, values, nulls); /* And build the result string */ initStringInfo(&buf); appendStringInfoChar(&buf, '('); for (i = 0; i < ncolumns; i++) { ColumnIOData *column_info = &my_extra->columns[i]; Oid column_type = tupdesc->attrs[i]->atttypid; char *value; char *tmp; bool nq; /* Ignore dropped columns in datatype */ if (tupdesc->attrs[i]->attisdropped) continue; if (needComma) appendStringInfoChar(&buf, ','); needComma = true; if (nulls[i]) { /* emit nothing... */ continue; } /* * Convert the column value to text */ if (column_info->column_type != column_type) { bool typIsVarlena; getTypeOutputInfo(column_type, &column_info->typiofunc, &typIsVarlena); fmgr_info_cxt(column_info->typiofunc, &column_info->proc, fcinfo->flinfo->fn_mcxt); column_info->column_type = column_type; } value = OutputFunctionCall(&column_info->proc, values[i]); /* Detect whether we need double quotes for this value */ nq = (value[0] == '\0'); /* force quotes for empty string */ for (tmp = value; *tmp; tmp++) { char ch = *tmp; if (ch == '"' || ch == '\\' || ch == '(' || ch == ')' || ch == ',' || isspace((unsigned char) ch)) { nq = true; break; } } /* And emit the string */ if (nq) appendStringInfoChar(&buf, '"'); for (tmp = value; *tmp; tmp++) { char ch = *tmp; if (ch == '"' || ch == '\\') appendStringInfoChar(&buf, ch); appendStringInfoChar(&buf, ch); } if (nq) appendStringInfoChar(&buf, '"'); } appendStringInfoChar(&buf, ')'); pfree(values); pfree(nulls); ReleaseTupleDesc(tupdesc); PG_RETURN_CSTRING(buf.data); }
Datum hstore_to_json_loose(PG_FUNCTION_ARGS) { HStore *in = PG_GETARG_HS(0); int i; int count = HS_COUNT(in); char *base = STRPTR(in); HEntry *entries = ARRPTR(in); bool is_number; StringInfoData tmp, dst; if (count == 0) PG_RETURN_TEXT_P(cstring_to_text_with_len("{}",2)); initStringInfo(&tmp); initStringInfo(&dst); appendStringInfoChar(&dst, '{'); for (i = 0; i < count; i++) { resetStringInfo(&tmp); appendBinaryStringInfo(&tmp, HS_KEY(entries, base, i), HS_KEYLEN(entries, i)); escape_json(&dst, tmp.data); appendStringInfoString(&dst, ": "); if (HS_VALISNULL(entries, i)) appendStringInfoString(&dst, "null"); /* guess that values of 't' or 'f' are booleans */ else if (HS_VALLEN(entries, i) == 1 && *(HS_VAL(entries, base, i)) == 't') appendStringInfoString(&dst, "true"); else if (HS_VALLEN(entries, i) == 1 && *(HS_VAL(entries, base, i)) == 'f') appendStringInfoString(&dst, "false"); else { is_number = false; resetStringInfo(&tmp); appendBinaryStringInfo(&tmp, HS_VAL(entries, base, i), HS_VALLEN(entries, i)); /* * don't treat something with a leading zero followed by another * digit as numeric - could be a zip code or similar */ if (tmp.len > 0 && !(tmp.data[0] == '0' && isdigit((unsigned char) tmp.data[1])) && strspn(tmp.data, "+-0123456789Ee.") == tmp.len) { /* * might be a number. See if we can input it as a numeric * value. Ignore any actual parsed value. */ char *endptr = "junk"; long lval; lval = strtol(tmp.data, &endptr, 10); (void) lval; if (*endptr == '\0') { /* * strol man page says this means the whole string is * valid */ is_number = true; } else { /* not an int - try a double */ double dval; dval = strtod(tmp.data, &endptr); (void) dval; if (*endptr == '\0') is_number = true; } } if (is_number) appendBinaryStringInfo(&dst, tmp.data, tmp.len); else escape_json(&dst, tmp.data); } if (i + 1 != count) appendStringInfoString(&dst, ", "); } appendStringInfoChar(&dst, '}'); PG_RETURN_TEXT_P(cstring_to_text(dst.data)); }
/* * format_procedure - converts proc OID to "pro_name(args)" * * This exports the useful functionality of regprocedureout for use * in other backend modules. The result is a palloc'd string. */ char * format_procedure(Oid procedure_oid) { char *result; HeapTuple proctup; cqContext *pcqCtx; pcqCtx = caql_beginscan( NULL, cql("SELECT * FROM pg_proc " " WHERE oid = :1 ", ObjectIdGetDatum(procedure_oid))); proctup = caql_getnext(pcqCtx); /* XXX XXX select proname, pronamespace from pg_proc */ if (HeapTupleIsValid(proctup)) { Form_pg_proc procform = (Form_pg_proc) GETSTRUCT(proctup); char *proname = NameStr(procform->proname); int nargs = procform->pronargs; int i; char *nspname; StringInfoData buf; /* XXX no support here for bootstrap mode */ initStringInfo(&buf); /* * Would this proc be found (given the right args) by regprocedurein? * If not, we need to qualify it. */ if (FunctionIsVisible(procedure_oid)) nspname = NULL; else nspname = get_namespace_name(procform->pronamespace); appendStringInfo(&buf, "%s(", quote_qualified_identifier(nspname, proname)); for (i = 0; i < nargs; i++) { Oid thisargtype = procform->proargtypes.values[i]; if (i > 0) appendStringInfoChar(&buf, ','); appendStringInfoString(&buf, format_type_be(thisargtype)); } appendStringInfoChar(&buf, ')'); result = buf.data; } else { /* If OID doesn't match any pg_proc entry, return it numerically */ result = (char *) palloc(NAMEDATALEN); snprintf(result, NAMEDATALEN, "%u", procedure_oid); } caql_endscan(pcqCtx); return result; }
/* * kafka_consume_main * * Main function for Kafka consumers running as background workers */ void kafka_consume_main(Datum arg) { char err_msg[512]; rd_kafka_topic_conf_t *topic_conf; rd_kafka_t *kafka; rd_kafka_topic_t *topic; rd_kafka_message_t **messages; const struct rd_kafka_metadata *meta; struct rd_kafka_metadata_topic topic_meta; rd_kafka_resp_err_t err; bool found; Oid id = (Oid) arg; ListCell *lc; KafkaConsumerProc *proc = hash_search(consumer_procs, &id, HASH_FIND, &found); KafkaConsumer consumer; CopyStmt *copy; int valid_brokers = 0; int i; int my_partitions = 0; if (!found) elog(ERROR, "kafka consumer %d not found", id); pqsignal(SIGTERM, kafka_consume_main_sigterm); #define BACKTRACE_SEGFAULTS #ifdef BACKTRACE_SEGFAULTS pqsignal(SIGSEGV, debug_segfault); #endif /* we're now ready to receive signals */ BackgroundWorkerUnblockSignals(); /* give this proc access to the database */ BackgroundWorkerInitializeConnection(NameStr(proc->dbname), NULL); /* load saved consumer state */ StartTransactionCommand(); load_consumer_state(proc->consumer_id, &consumer); copy = get_copy_statement(&consumer); topic_conf = rd_kafka_topic_conf_new(); kafka = rd_kafka_new(RD_KAFKA_CONSUMER, NULL, err_msg, sizeof(err_msg)); rd_kafka_set_logger(kafka, logger); /* * Add all brokers currently in pipeline_kafka_brokers */ if (consumer.brokers == NIL) elog(ERROR, "no valid brokers were found"); foreach(lc, consumer.brokers) valid_brokers += rd_kafka_brokers_add(kafka, lfirst(lc)); if (!valid_brokers) elog(ERROR, "no valid brokers were found"); /* * Set up our topic to read from */ topic = rd_kafka_topic_new(kafka, consumer.topic, topic_conf); err = rd_kafka_metadata(kafka, false, topic, &meta, CONSUMER_TIMEOUT); if (err != RD_KAFKA_RESP_ERR_NO_ERROR) elog(ERROR, "failed to acquire metadata: %s", rd_kafka_err2str(err)); Assert(meta->topic_cnt == 1); topic_meta = meta->topics[0]; load_consumer_offsets(&consumer, &topic_meta, proc->offset); CommitTransactionCommand(); /* * Begin consuming all partitions that this process is responsible for */ for (i = 0; i < topic_meta.partition_cnt; i++) { int partition = topic_meta.partitions[i].id; Assert(partition <= consumer.num_partitions); if (partition % consumer.parallelism != proc->partition_group) continue; elog(LOG, "[kafka consumer] %s <- %s consuming partition %d from offset %ld", consumer.rel->relname, consumer.topic, partition, consumer.offsets[partition]); if (rd_kafka_consume_start(topic, partition, consumer.offsets[partition]) == -1) elog(ERROR, "failed to start consuming: %s", rd_kafka_err2str(rd_kafka_errno2err(errno))); my_partitions++; } /* * No point doing anything if we don't have any partitions assigned to us */ if (my_partitions == 0) { elog(LOG, "[kafka consumer] %s <- %s consumer %d doesn't have any partitions to read from", consumer.rel->relname, consumer.topic, MyProcPid); goto done; } messages = palloc0(sizeof(rd_kafka_message_t) * consumer.batch_size); /* * Consume messages until we are terminated */ while (!got_sigterm) { ssize_t num_consumed; int i; int messages_buffered = 0; int partition; StringInfoData buf; bool xact = false; for (partition = 0; partition < consumer.num_partitions; partition++) { if (partition % consumer.parallelism != proc->partition_group) continue; num_consumed = rd_kafka_consume_batch(topic, partition, CONSUMER_TIMEOUT, messages, consumer.batch_size); if (num_consumed <= 0) continue; if (!xact) { StartTransactionCommand(); xact = true; } initStringInfo(&buf); for (i = 0; i < num_consumed; i++) { if (messages[i]->payload != NULL) { appendBinaryStringInfo(&buf, messages[i]->payload, messages[i]->len); if (buf.len > 0 && buf.data[buf.len - 1] != '\n') appendStringInfoChar(&buf, '\n'); messages_buffered++; } consumer.offsets[partition] = messages[i]->offset; rd_kafka_message_destroy(messages[i]); } } if (!xact) { pg_usleep(1 * 1000); continue; } /* we don't want to die in the event of any errors */ PG_TRY(); { if (messages_buffered) execute_copy(copy, &buf); } PG_CATCH(); { elog(LOG, "[kafka consumer] %s <- %s failed to process batch, dropped %d message%s:", consumer.rel->relname, consumer.topic, (int) num_consumed, (num_consumed == 1 ? "" : "s")); EmitErrorReport(); FlushErrorState(); AbortCurrentTransaction(); xact = false; } PG_END_TRY(); if (!xact) StartTransactionCommand(); if (messages_buffered) save_consumer_state(&consumer, proc->partition_group); CommitTransactionCommand(); } done: hash_search(consumer_procs, &id, HASH_REMOVE, NULL); rd_kafka_topic_destroy(topic); rd_kafka_destroy(kafka); rd_kafka_wait_destroyed(CONSUMER_TIMEOUT); }
/** * read column chunk metadata information */ int readColumnMetadata( CompactProtocol *prot, struct ColumnChunkMetadata_4C *colChunk) { uint32_t xfer = 0; TType ftype; int16_t fid; readStructBegin(prot); bool isset_type = false; bool isset_encodings = false; bool isset_path_in_schema = false; bool isset_codec = false; bool isset_num_values = false; bool isset_total_uncompressed_size = false; bool isset_total_compressed_size = false; bool isset_data_page_offset = false; while (true) { xfer += readFieldBegin(prot, &ftype, &fid); if (ftype == T_STOP) { break; } switch (fid) { case 1: if (ftype == T_I32) { int32_t type; xfer += readI32(prot, &type); colChunk->type = (PrimitiveTypeName) type; isset_type = true; } break; case 2: if (ftype == T_LIST) { uint32_t encodingCount; TType etype; xfer += readListBegin(prot, &etype, &encodingCount); colChunk->EncodingCount = encodingCount; colChunk->pEncodings = (enum Encoding *) palloc0(sizeof(enum Encoding) * encodingCount); for (int i = 0; i < encodingCount; i++) { int32_t encoding; xfer += readI32(prot, &encoding); colChunk->pEncodings[i] = (enum Encoding) encoding; } isset_encodings = true; } break; case 3: if (ftype == T_LIST) { { /*process path in schema, setting colchunk->depth and colchunk->pathInSchema*/ TType etype; uint32_t lsize; StringInfoData colNameBuf; xfer += readListBegin(prot, &etype, &lsize); colChunk->depth = lsize; initStringInfo(&colNameBuf); char *path_in_schema; for (int i = 0; i < lsize - 1; i++) { xfer += readString(prot, &path_in_schema); appendStringInfo(&colNameBuf, "%s:", path_in_schema); pfree(path_in_schema); } xfer += readString(prot, &path_in_schema); appendStringInfo(&colNameBuf, "%s", path_in_schema); colChunk->pathInSchema = colNameBuf.data; colChunk->colName = path_in_schema; } isset_path_in_schema = true; } break; case 4: if (ftype == T_I32) { int32_t compresscode; xfer += readI32(prot, &compresscode); colChunk->codec = (enum CompressionCodecName) compresscode; isset_codec = true; } break; case 5: if (ftype == T_I64) { int64_t valCnt; xfer += readI64(prot, &valCnt); colChunk->valueCount = valCnt; isset_num_values = true; } break; case 6: if (ftype == T_I64) { xfer += readI64(prot, &(colChunk->totalUncompressedSize)); isset_total_uncompressed_size = true; } break; case 7: if (ftype == T_I64) { xfer += readI64(prot, &(colChunk->totalSize)); isset_total_compressed_size = true; } break; case 8: if (ftype == T_LIST) { xfer += skipType(prot, ftype); } break; case 9: if (ftype == T_I64) { xfer += readI64(prot, &(colChunk->firstDataPage)); isset_data_page_offset = true; } break; case 10: if (ftype == T_I64) { xfer += skipType(prot, ftype); } break; case 11: if (ftype == T_I64) { xfer += skipType(prot, ftype); } break; default: break; } } readStructEnd(prot); if (!isset_type) ereport(ERROR, (errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: row group column chunk type not set"))); if (!isset_encodings) ereport(ERROR, (errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: row group column chunk encoding not set"))); if (!isset_path_in_schema) ereport(ERROR, (errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: row group column chunk path_in_schema not set"))); if (!isset_codec) ereport(ERROR, (errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: row group column chunk compression code not set"))); if (!isset_num_values) ereport(ERROR, (errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: row group column chunk value number not set"))); if (!isset_total_uncompressed_size) ereport(ERROR, (errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: row group column chunk total uncompressed size not set"))); if (!isset_total_compressed_size) ereport(ERROR, (errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: row group column chunk total compressed size not set"))); if (!isset_data_page_offset) ereport(ERROR, (errcode(ERRCODE_GP_INTERNAL_ERROR), errmsg("file metadata: row group column chunk first data page not set"))); return xfer; }
void worker_spi_main(Datum main_arg) { int index = DatumGetInt32(main_arg); worktable *table; StringInfoData buf; char name[20]; table = palloc(sizeof(worktable)); sprintf(name, "schema%d", index); table->schema = pstrdup(name); table->name = pstrdup("counted"); /* Establish signal handlers before unblocking signals. */ pqsignal(SIGHUP, worker_spi_sighup); pqsignal(SIGTERM, worker_spi_sigterm); /* We're now ready to receive signals */ BackgroundWorkerUnblockSignals(); /* Connect to our database */ BackgroundWorkerInitializeConnection("postgres", NULL); elog(LOG, "%s initialized with %s.%s", MyBgworkerEntry->bgw_name, table->schema, table->name); initialize_worker_spi(table); /* * Quote identifiers passed to us. Note that this must be done after * initialize_worker_spi, because that routine assumes the names are not * quoted. * * Note some memory might be leaked here. */ table->schema = quote_identifier(table->schema); table->name = quote_identifier(table->name); initStringInfo(&buf); appendStringInfo(&buf, "WITH deleted AS (DELETE " "FROM %s.%s " "WHERE type = 'delta' RETURNING value), " "total AS (SELECT coalesce(sum(value), 0) as sum " "FROM deleted) " "UPDATE %s.%s " "SET value = %s.value + total.sum " "FROM total WHERE type = 'total' " "RETURNING %s.value", table->schema, table->name, table->schema, table->name, table->name, table->name); /* * Main loop: do this until the SIGTERM handler tells us to terminate */ while (!got_sigterm) { int ret; int rc; /* * Background workers mustn't call usleep() or any direct equivalent: * instead, they may wait on their process latch, which sleeps as * necessary, but is awakened if postmaster dies. That way the * background process goes away immediately in an emergency. */ rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, worker_spi_naptime * 1000L); ResetLatch(&MyProc->procLatch); /* emergency bailout if postmaster has died */ if (rc & WL_POSTMASTER_DEATH) proc_exit(1); /* * In case of a SIGHUP, just reload the configuration. */ if (got_sighup) { got_sighup = false; ProcessConfigFile(PGC_SIGHUP); } /* * Start a transaction on which we can run queries. Note that each * StartTransactionCommand() call should be preceded by a * SetCurrentStatementStartTimestamp() call, which sets both the time * for the statement we're about the run, and also the transaction * start time. Also, each other query sent to SPI should probably be * preceded by SetCurrentStatementStartTimestamp(), so that statement * start time is always up to date. * * The SPI_connect() call lets us run queries through the SPI manager, * and the PushActiveSnapshot() call creates an "active" snapshot * which is necessary for queries to have MVCC data to work on. * * The pgstat_report_activity() call makes our activity visible * through the pgstat views. */ SetCurrentStatementStartTimestamp(); StartTransactionCommand(); SPI_connect(); PushActiveSnapshot(GetTransactionSnapshot()); pgstat_report_activity(STATE_RUNNING, buf.data); /* We can now execute queries via SPI */ ret = SPI_execute(buf.data, false, 0); if (ret != SPI_OK_UPDATE_RETURNING) elog(FATAL, "cannot select from table %s.%s: error code %d", table->schema, table->name, ret); if (SPI_processed > 0) { bool isnull; int32 val; val = DatumGetInt32(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull)); if (!isnull) elog(LOG, "%s: count in %s.%s is now %d", MyBgworkerEntry->bgw_name, table->schema, table->name, val); } /* * And finish our transaction. */ SPI_finish(); PopActiveSnapshot(); CommitTransactionCommand(); pgstat_report_activity(STATE_IDLE, NULL); } proc_exit(1); }
static Tuplestorestate * build_tuplestore_recursively(char *key_fld, char *parent_key_fld, char *relname, char *orderby_fld, char *branch_delim, char *start_with, char *branch, int level, int *serial, int max_depth, bool show_branch, bool show_serial, MemoryContext per_query_ctx, AttInMetadata *attinmeta, Tuplestorestate *tupstore) { TupleDesc tupdesc = attinmeta->tupdesc; int ret; int proc; int serial_column; StringInfoData sql; char **values; char *current_key; char *current_key_parent; char current_level[INT32_STRLEN]; char serial_str[INT32_STRLEN]; char *current_branch; HeapTuple tuple; if (max_depth > 0 && level > max_depth) return tupstore; initStringInfo(&sql); /* Build initial sql statement */ if (!show_serial) { appendStringInfo(&sql, "SELECT %s, %s FROM %s WHERE %s = %s AND %s IS NOT NULL AND %s <> %s", key_fld, parent_key_fld, relname, parent_key_fld, quote_literal_cstr(start_with), key_fld, key_fld, parent_key_fld); serial_column = 0; } else { appendStringInfo(&sql, "SELECT %s, %s FROM %s WHERE %s = %s AND %s IS NOT NULL AND %s <> %s ORDER BY %s", key_fld, parent_key_fld, relname, parent_key_fld, quote_literal_cstr(start_with), key_fld, key_fld, parent_key_fld, orderby_fld); serial_column = 1; } if (show_branch) values = (char **) palloc((CONNECTBY_NCOLS + serial_column) * sizeof(char *)); else values = (char **) palloc((CONNECTBY_NCOLS_NOBRANCH + serial_column) * sizeof(char *)); /* First time through, do a little setup */ if (level == 0) { /* root value is the one we initially start with */ values[0] = start_with; /* root value has no parent */ values[1] = NULL; /* root level is 0 */ sprintf(current_level, "%d", level); values[2] = current_level; /* root branch is just starting root value */ if (show_branch) values[3] = start_with; /* root starts the serial with 1 */ if (show_serial) { sprintf(serial_str, "%d", (*serial)++); if (show_branch) values[4] = serial_str; else values[3] = serial_str; } /* construct the tuple */ tuple = BuildTupleFromCStrings(attinmeta, values); /* now store it */ tuplestore_puttuple(tupstore, tuple); /* increment level */ level++; } /* Retrieve the desired rows */ ret = SPI_execute(sql.data, true, 0); proc = SPI_processed; /* Check for qualifying tuples */ if ((ret == SPI_OK_SELECT) && (proc > 0)) { HeapTuple spi_tuple; SPITupleTable *tuptable = SPI_tuptable; TupleDesc spi_tupdesc = tuptable->tupdesc; int i; StringInfoData branchstr; StringInfoData chk_branchstr; StringInfoData chk_current_key; /* First time through, do a little more setup */ if (level == 0) { /* * Check that return tupdesc is compatible with the one we got * from the query, but only at level 0 -- no need to check more * than once */ if (!compatConnectbyTupleDescs(tupdesc, spi_tupdesc)) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("invalid return type"), errdetail("Return and SQL tuple descriptions are " \ "incompatible."))); } initStringInfo(&branchstr); initStringInfo(&chk_branchstr); initStringInfo(&chk_current_key); for (i = 0; i < proc; i++) { /* initialize branch for this pass */ appendStringInfo(&branchstr, "%s", branch); appendStringInfo(&chk_branchstr, "%s%s%s", branch_delim, branch, branch_delim); /* get the next sql result tuple */ spi_tuple = tuptable->vals[i]; /* get the current key and parent */ current_key = SPI_getvalue(spi_tuple, spi_tupdesc, 1); appendStringInfo(&chk_current_key, "%s%s%s", branch_delim, current_key, branch_delim); current_key_parent = pstrdup(SPI_getvalue(spi_tuple, spi_tupdesc, 2)); /* get the current level */ sprintf(current_level, "%d", level); /* check to see if this key is also an ancestor */ if (strstr(chk_branchstr.data, chk_current_key.data)) elog(ERROR, "infinite recursion detected"); /* OK, extend the branch */ appendStringInfo(&branchstr, "%s%s", branch_delim, current_key); current_branch = branchstr.data; /* build a tuple */ values[0] = pstrdup(current_key); values[1] = current_key_parent; values[2] = current_level; if (show_branch) values[3] = current_branch; if (show_serial) { sprintf(serial_str, "%d", (*serial)++); if (show_branch) values[4] = serial_str; else values[3] = serial_str; } tuple = BuildTupleFromCStrings(attinmeta, values); xpfree(current_key); xpfree(current_key_parent); /* store the tuple for later use */ tuplestore_puttuple(tupstore, tuple); heap_freetuple(tuple); /* recurse using current_key_parent as the new start_with */ tupstore = build_tuplestore_recursively(key_fld, parent_key_fld, relname, orderby_fld, branch_delim, values[0], current_branch, level + 1, serial, max_depth, show_branch, show_serial, per_query_ctx, attinmeta, tupstore); /* reset branch for next pass */ resetStringInfo(&branchstr); resetStringInfo(&chk_branchstr); resetStringInfo(&chk_current_key); } xpfree(branchstr.data); xpfree(chk_branchstr.data); xpfree(chk_current_key.data); } return tupstore; }
/* * checkSharedDependencies * * Check whether there are shared dependency entries for a given shared * object; return true if so. * * In addition, return a string containing a newline-separated list of object * descriptions that depend on the shared object, or NULL if none is found. * We actually return two such strings; the "detail" result is suitable for * returning to the client as an errdetail() string, and is limited in size. * The "detail_log" string is potentially much longer, and should be emitted * to the server log only. * * We can find three different kinds of dependencies: dependencies on objects * of the current database; dependencies on shared objects; and dependencies * on objects local to other databases. We can (and do) provide descriptions * of the two former kinds of objects, but we can't do that for "remote" * objects, so we just provide a count of them. * * If we find a SHARED_DEPENDENCY_PIN entry, we can error out early. */ bool checkSharedDependencies(Oid classId, Oid objectId, char **detail_msg, char **detail_log_msg) { Relation sdepRel; ScanKeyData key[2]; SysScanDesc scan; HeapTuple tup; int numReportedDeps = 0; int numNotReportedDeps = 0; int numNotReportedDbs = 0; List *remDeps = NIL; ListCell *cell; ObjectAddress object; StringInfoData descs; StringInfoData alldescs; /* * We limit the number of dependencies reported to the client to * MAX_REPORTED_DEPS, since client software may not deal well with * enormous error strings. The server log always gets a full report. */ #define MAX_REPORTED_DEPS 100 initStringInfo(&descs); initStringInfo(&alldescs); sdepRel = heap_open(SharedDependRelationId, AccessShareLock); ScanKeyInit(&key[0], Anum_pg_shdepend_refclassid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(classId)); ScanKeyInit(&key[1], Anum_pg_shdepend_refobjid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(objectId)); scan = systable_beginscan(sdepRel, SharedDependReferenceIndexId, true, NULL, 2, key); while (HeapTupleIsValid(tup = systable_getnext(scan))) { Form_pg_shdepend sdepForm = (Form_pg_shdepend) GETSTRUCT(tup); /* This case can be dispatched quickly */ if (sdepForm->deptype == SHARED_DEPENDENCY_PIN) { object.classId = classId; object.objectId = objectId; object.objectSubId = 0; ereport(ERROR, (errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST), errmsg("cannot drop %s because it is required by the database system", getObjectDescription(&object)))); } object.classId = sdepForm->classid; object.objectId = sdepForm->objid; object.objectSubId = sdepForm->objsubid; /* * If it's a dependency local to this database or it's a shared * object, describe it. * * If it's a remote dependency, keep track of it so we can report the * number of them later. */ if (sdepForm->dbid == MyDatabaseId) { if (numReportedDeps < MAX_REPORTED_DEPS) { numReportedDeps++; storeObjectDescription(&descs, LOCAL_OBJECT, &object, sdepForm->deptype, 0); } else numNotReportedDeps++; storeObjectDescription(&alldescs, LOCAL_OBJECT, &object, sdepForm->deptype, 0); } else if (sdepForm->dbid == InvalidOid) { if (numReportedDeps < MAX_REPORTED_DEPS) { numReportedDeps++; storeObjectDescription(&descs, SHARED_OBJECT, &object, sdepForm->deptype, 0); } else numNotReportedDeps++; storeObjectDescription(&alldescs, SHARED_OBJECT, &object, sdepForm->deptype, 0); } else { /* It's not local nor shared, so it must be remote. */ remoteDep *dep; bool stored = false; /* * XXX this info is kept on a simple List. Maybe it's not good * for performance, but using a hash table seems needlessly * complex. The expected number of databases is not high anyway, * I suppose. */ foreach(cell, remDeps) { dep = lfirst(cell); if (dep->dbOid == sdepForm->dbid) { dep->count++; stored = true; break; } } if (!stored) { dep = (remoteDep *) palloc(sizeof(remoteDep)); dep->dbOid = sdepForm->dbid; dep->count = 1; remDeps = lappend(remDeps, dep); } } }
/* * Extract all item values from a BRIN index page * * Usage: SELECT * FROM brin_page_items(get_raw_page('idx', 1), 'idx'::regclass); */ Datum brin_page_items(PG_FUNCTION_ARGS) { bytea *raw_page = PG_GETARG_BYTEA_P(0); Oid indexRelid = PG_GETARG_OID(1); ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; TupleDesc tupdesc; MemoryContext oldcontext; Tuplestorestate *tupstore; Relation indexRel; brin_column_state **columns; BrinDesc *bdesc; BrinMemTuple *dtup; Page page; OffsetNumber offset; AttrNumber attno; bool unusedItem; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use raw page functions")))); /* check to see if caller supports us returning a tuplestore */ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("set-valued function called in context that cannot accept a set"))); if (!(rsinfo->allowedModes & SFRM_Materialize) || rsinfo->expectedDesc == NULL) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("materialize mode required, but it is not allowed in this context"))); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); /* Build tuplestore to hold the result rows */ oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; rsinfo->setResult = tupstore; rsinfo->setDesc = tupdesc; MemoryContextSwitchTo(oldcontext); indexRel = index_open(indexRelid, AccessShareLock); bdesc = brin_build_desc(indexRel); /* minimally verify the page we got */ page = verify_brin_page(raw_page, BRIN_PAGETYPE_REGULAR, "regular"); /* * Initialize output functions for all indexed datatypes; simplifies * calling them later. */ columns = palloc(sizeof(brin_column_state *) * RelationGetDescr(indexRel)->natts); for (attno = 1; attno <= bdesc->bd_tupdesc->natts; attno++) { Oid output; bool isVarlena; BrinOpcInfo *opcinfo; int i; brin_column_state *column; opcinfo = bdesc->bd_info[attno - 1]; column = palloc(offsetof(brin_column_state, outputFn) + sizeof(FmgrInfo) * opcinfo->oi_nstored); column->nstored = opcinfo->oi_nstored; for (i = 0; i < opcinfo->oi_nstored; i++) { getTypeOutputInfo(opcinfo->oi_typcache[i]->type_id, &output, &isVarlena); fmgr_info(output, &column->outputFn[i]); } columns[attno - 1] = column; } offset = FirstOffsetNumber; unusedItem = false; dtup = NULL; for (;;) { Datum values[7]; bool nulls[7]; /* * This loop is called once for every attribute of every tuple in the * page. At the start of a tuple, we get a NULL dtup; that's our * signal for obtaining and decoding the next one. If that's not the * case, we output the next attribute. */ if (dtup == NULL) { ItemId itemId; /* verify item status: if there's no data, we can't decode */ itemId = PageGetItemId(page, offset); if (ItemIdIsUsed(itemId)) { dtup = brin_deform_tuple(bdesc, (BrinTuple *) PageGetItem(page, itemId)); attno = 1; unusedItem = false; } else unusedItem = true; } else attno++; MemSet(nulls, 0, sizeof(nulls)); if (unusedItem) { values[0] = UInt16GetDatum(offset); nulls[1] = true; nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; } else { int att = attno - 1; values[0] = UInt16GetDatum(offset); values[1] = UInt32GetDatum(dtup->bt_blkno); values[2] = UInt16GetDatum(attno); values[3] = BoolGetDatum(dtup->bt_columns[att].bv_allnulls); values[4] = BoolGetDatum(dtup->bt_columns[att].bv_hasnulls); values[5] = BoolGetDatum(dtup->bt_placeholder); if (!dtup->bt_columns[att].bv_allnulls) { BrinValues *bvalues = &dtup->bt_columns[att]; StringInfoData s; bool first; int i; initStringInfo(&s); appendStringInfoChar(&s, '{'); first = true; for (i = 0; i < columns[att]->nstored; i++) { char *val; if (!first) appendStringInfoString(&s, " .. "); first = false; val = OutputFunctionCall(&columns[att]->outputFn[i], bvalues->bv_values[i]); appendStringInfoString(&s, val); pfree(val); } appendStringInfoChar(&s, '}'); values[6] = CStringGetTextDatum(s.data); pfree(s.data); } else { nulls[6] = true; } } tuplestore_putvalues(tupstore, tupdesc, values, nulls); /* * If the item was unused, jump straight to the next one; otherwise, * the only cleanup needed here is to set our signal to go to the next * tuple in the following iteration, by freeing the current one. */ if (unusedItem) offset = OffsetNumberNext(offset); else if (attno >= bdesc->bd_tupdesc->natts) { pfree(dtup); dtup = NULL; offset = OffsetNumberNext(offset); } /* * If we're beyond the end of the page, we're done. */ if (offset > PageGetMaxOffsetNumber(page)) break; } /* clean up and return the tuplestore */ brin_free_desc(bdesc); tuplestore_donestoring(tupstore); index_close(indexRel, AccessShareLock); return (Datum) 0; }
/** * @brief Read the next tuple from parser. * @param rd [in/out] reader * @return type */ HeapTuple ReaderNext(Reader *rd) { HeapTuple tuple; MemoryContext ccxt; bool eof; Parser *parser = rd->parser; ccxt = CurrentMemoryContext; eof = false; do { tuple = NULL; parser->parsing_field = -1; PG_TRY(); { tuple = ParserRead(parser, &rd->checker); if (tuple == NULL) eof = true; else { tuple = CheckerTuple(&rd->checker, tuple, &parser->parsing_field); CheckerConstraints(&rd->checker, tuple, &parser->parsing_field); } } PG_CATCH(); { ErrorData *errdata; MemoryContext ecxt; char *message; StringInfoData buf; if (parser->parsing_field < 0) PG_RE_THROW(); /* should not ignore */ ecxt = MemoryContextSwitchTo(ccxt); errdata = CopyErrorData(); /* We cannot ignore query aborts. */ switch (errdata->sqlerrcode) { case ERRCODE_ADMIN_SHUTDOWN: case ERRCODE_QUERY_CANCELED: MemoryContextSwitchTo(ecxt); PG_RE_THROW(); break; } /* Absorb parse errors. */ rd->parse_errors++; if (errdata->message) message = pstrdup(errdata->message); else message = "<no error message>"; FlushErrorState(); FreeErrorData(errdata); initStringInfo(&buf); appendStringInfo(&buf, "Parse error Record " int64_FMT ": Input Record " int64_FMT ": Rejected", rd->parse_errors, parser->count); if (parser->parsing_field > 0) appendStringInfo(&buf, " - column %d", parser->parsing_field); appendStringInfo(&buf, ". %s\n", message); LoggerLog(WARNING, buf.data); /* Terminate if PARSE_ERRORS has been reached. */ if (rd->parse_errors > rd->max_parse_errors) { eof = true; LoggerLog(WARNING, "Maximum parse error count exceeded - " int64_FMT " error(s) found in input file\n", rd->parse_errors); } /* output parse bad file. */ if (rd->parse_fp == NULL) if ((rd->parse_fp = AllocateFile(rd->parse_badfile, "w")) == NULL) ereport(ERROR, (errcode_for_file_access(), errmsg("could not open parse bad file \"%s\": %m", rd->parse_badfile))); ParserDumpRecord(parser, rd->parse_fp, rd->parse_badfile); MemoryContextReset(ccxt); // Without the below line, the regression tests shows the different result on debug-build mode. tuple = NULL; } PG_END_TRY(); } while (!eof && !tuple); BULKLOAD_PROFILE(&prof_reader_parser); return tuple; }