/* ---------------- * EndCommand - clean up the destination at end of command * ---------------- */ void EndCommand(const char *commandTag, CommandDest dest) { StringInfoData buf; if (Gp_role == GP_ROLE_DISPATCH) { /* * Just before a successful reply, let's see if the DTM has * phase 2 retry work. */ doDtxPhase2Retry(); } switch (dest) { case DestRemote: case DestRemoteExecute: if (Gp_role == GP_ROLE_EXECUTE && Gp_is_writer) { /* * Extra information that indicates if the transaction made * updates. */ sendQEDetails(); pq_beginmessage(&buf, 'g'); pq_sendstring(&buf, commandTag); AddQEWriterTransactionInfo(&buf); pq_endmessage(&buf); } else if (Gp_role == GP_ROLE_EXECUTE) { sendQEDetails(); pq_beginmessage(&buf, 'C'); pq_sendstring(&buf, commandTag); pq_endmessage(&buf); } else pq_puttextmessage('C', commandTag); break; case DestNone: case DestDebug: case DestSPI: case DestTuplestore: case DestIntoRel: case DestCopyOut: break; } }
/* * IDENTIFY_SYSTEM */ static void IdentifySystem(void) { StringInfoData buf; char sysid[32]; char tli[11]; /* * Reply with a result set with one row, two columns. First col is system * ID, and second is timeline ID */ snprintf(sysid, sizeof(sysid), UINT64_FORMAT, GetSystemIdentifier()); snprintf(tli, sizeof(tli), "%u", ThisTimeLineID); /* Send a RowDescription message */ pq_beginmessage(&buf, 'T'); pq_sendint(&buf, 2, 2); /* 2 fields */ /* first field */ pq_sendstring(&buf, "systemid"); /* col name */ pq_sendint(&buf, 0, 4); /* table oid */ pq_sendint(&buf, 0, 2); /* attnum */ pq_sendint(&buf, TEXTOID, 4); /* type oid */ pq_sendint(&buf, -1, 2); /* typlen */ pq_sendint(&buf, 0, 4); /* typmod */ pq_sendint(&buf, 0, 2); /* format code */ /* second field */ pq_sendstring(&buf, "timeline"); /* col name */ pq_sendint(&buf, 0, 4); /* table oid */ pq_sendint(&buf, 0, 2); /* attnum */ pq_sendint(&buf, INT4OID, 4); /* type oid */ pq_sendint(&buf, 4, 2); /* typlen */ pq_sendint(&buf, 0, 4); /* typmod */ pq_sendint(&buf, 0, 2); /* format code */ pq_endmessage(&buf); /* Send a DataRow message */ pq_beginmessage(&buf, 'D'); pq_sendint(&buf, 2, 2); /* # of columns */ pq_sendint(&buf, strlen(sysid), 4); /* col1 len */ pq_sendbytes(&buf, (char *) &sysid, strlen(sysid)); pq_sendint(&buf, strlen(tli), 4); /* col2 len */ pq_sendbytes(&buf, (char *) tli, strlen(tli)); pq_endmessage(&buf); /* Send CommandComplete and ReadyForQuery messages */ EndCommand("SELECT", DestRemote); ReadyForQuery(DestRemote); /* ReadyForQuery did pq_flush for us */ }
/* ---------------- * EndCommand - clean up the destination at end of command * ---------------- */ void EndCommand(const char *commandTag, CommandDest dest) { StringInfoData buf; switch (dest) { case DestRemote: case DestRemoteExecute: if (Gp_role == GP_ROLE_EXECUTE) { sendQEDetails(); pq_beginmessage(&buf, 'C'); pq_sendstring(&buf, commandTag); pq_endmessage(&buf); } else pq_puttextmessage('C', commandTag); break; case DestNone: case DestDebug: case DestSPI: case DestTuplestore: case DestIntoRel: case DestCopyOut: break; } }
static void SendResultDescriptionMessage(AttributeDefinition *attrs, int natts) { int proto = PG_PROTOCOL_MAJOR(FrontendProtocol); int i; StringInfoData buf; pq_beginmessage(&buf, 'T'); /* tuple descriptor message type */ pq_sendint(&buf, natts, 2); /* # of attrs in tuples */ for (i = 0; i < natts; ++i) { pq_sendstring(&buf, attrs[i].name); /* column ID info appears in protocol 3.0 and up */ if (proto >= 3) { pq_sendint(&buf, 0, 4); pq_sendint(&buf, 0, 2); } /* If column is a domain, send the base type and typmod instead */ pq_sendint(&buf, attrs[i].typid, sizeof(Oid)); pq_sendint(&buf, attrs[i].typlen, sizeof(int16)); /* typmod appears in protocol 2.0 and up */ if (proto >= 2) pq_sendint(&buf, attrs[i].typmod, sizeof(int32)); /* format info appears in protocol 3.0 and up */ if (proto >= 3) pq_sendint(&buf, 0, 2); } pq_endmessage(&buf); }
static Source * CreateRemoteSource(const char *path, TupleDesc desc) { RemoteSource *self = (RemoteSource *) palloc0(sizeof(RemoteSource)); self->base.close = (SourceCloseProc) RemoteSourceClose; if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3) { /* new way */ StringInfoData buf; int16 format; int nattrs; int i; self->base.read = (SourceReadProc) RemoteSourceRead; /* count valid fields */ for (nattrs = 0, i = 0; i < desc->natts; i++) { if (desc->attrs[i]->attisdropped) continue; nattrs++; } format = (IsBinaryCopy() ? 1 : 0); pq_beginmessage(&buf, 'G'); pq_sendbyte(&buf, format); /* overall format */ pq_sendint(&buf, nattrs, 2); for (i = 0; i < nattrs; i++) pq_sendint(&buf, format, 2); /* per-column formats */ pq_endmessage(&buf); self->buffer = makeStringInfo(); } else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2) { self->base.read = (SourceReadProc) RemoteSourceReadOld; /* old way */ if (IsBinaryCopy()) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("COPY BINARY is not supported to stdout or from stdin"))); pq_putemptymessage('G'); } else { self->base.read = (SourceReadProc) RemoteSourceReadOld; /* very old way */ if (IsBinaryCopy()) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("COPY BINARY is not supported to stdout or from stdin"))); pq_putemptymessage('D'); } /* We *must* flush here to ensure FE knows it can send. */ pq_flush(); return (Source *) self; }
/* * Send NOTIFY message to my front end. */ static void NotifyMyFrontEnd(char *relname, int32 listenerPID) { if (whereToSendOutput == DestRemote) { StringInfoData buf; pq_beginmessage(&buf, 'A'); pq_sendint(&buf, listenerPID, sizeof(int32)); pq_sendstring(&buf, relname); if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3) { /* XXX Add parameter string here later */ pq_sendstring(&buf, ""); } pq_endmessage(&buf); /* * NOTE: we do not do pq_flush() here. For a self-notify, it will * happen at the end of the transaction, and for incoming notifies * ProcessIncomingNotify will do it after finding all the notifies. */ } else elog(INFO, "NOTIFY for %s", relname); }
/* ---------------- * ReadyForQuery - tell dest that we are ready for a new query * * The ReadyForQuery message is sent in protocol versions 2.0 and up * so that the FE can tell when we are done processing a query string. * In versions 3.0 and up, it also carries a transaction state indicator. * * Note that by flushing the stdio buffer here, we can avoid doing it * most other places and thus reduce the number of separate packets sent. * ---------------- */ void ReadyForQuery(CommandDest dest) { switch (dest) { case DestRemote: case DestRemoteExecute: if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3) { StringInfoData buf; pq_beginmessage(&buf, 'Z'); pq_sendbyte(&buf, TransactionBlockStatusCode()); pq_endmessage(&buf); } else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2) pq_putemptymessage('Z'); /* Flush output at end of cycle in any case. */ pq_flush(); break; case DestNone: case DestDebug: case DestSPI: case DestTuplestore: case DestIntoRel: case DestCopyOut: break; } }
/* * Execute the CREATE BARRIER command. Write a BARRIER WAL record and flush the * WAL buffers to disk before returning to the caller. Writing the WAL record * does not guarantee successful completion of the barrier command. */ void ProcessCreateBarrierExecute(const char *id) { StringInfoData buf; if (!IsConnFromCoord()) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("The CREATE BARRIER EXECUTE message is expected to " "arrive from a Coordinator"))); { XLogRecData rdata[1]; XLogRecPtr recptr; rdata[0].data = (char *) id; rdata[0].len = strlen(id) + 1; rdata[0].buffer = InvalidBuffer; rdata[0].next = NULL; recptr = XLogInsert(RM_BARRIER_ID, XLOG_BARRIER_CREATE, rdata); XLogFlush(recptr); } pq_beginmessage(&buf, 'b'); pq_sendstring(&buf, id); pq_endmessage(&buf); pq_flush(); }
/* * SendRowDescriptionMessage --- send a RowDescription message to the frontend * * Notes: the TupleDesc has typically been manufactured by ExecTypeFromTL() * or some similar function; it does not contain a full set of fields. * The targetlist will be NIL when executing a utility function that does * not have a plan. If the targetlist isn't NIL then it is a Query node's * targetlist; it is up to us to ignore resjunk columns in it. The formats[] * array pointer might be NULL (if we are doing Describe on a prepared stmt); * send zeroes for the format codes in that case. */ void SendRowDescriptionMessage(TupleDesc typeinfo, List *targetlist, int16 *formats) { Form_pg_attribute *attrs = typeinfo->attrs; int natts = typeinfo->natts; int proto = PG_PROTOCOL_MAJOR(FrontendProtocol); int i; StringInfoData buf; ListCell *tlist_item = list_head(targetlist); pq_beginmessage(&buf, 'T'); /* tuple descriptor message type */ pq_sendint(&buf, natts, 2); /* # of attrs in tuples */ for (i = 0; i < natts; ++i) { Oid atttypid = attrs[i]->atttypid; int32 atttypmod = attrs[i]->atttypmod; pq_sendstring(&buf, NameStr(attrs[i]->attname)); /* column ID info appears in protocol 3.0 and up */ if (proto >= 3) { /* Do we have a non-resjunk tlist item? */ while (tlist_item && ((TargetEntry *) lfirst(tlist_item))->resjunk) tlist_item = lnext(tlist_item); if (tlist_item) { TargetEntry *tle = (TargetEntry *) lfirst(tlist_item); pq_sendint(&buf, tle->resorigtbl, 4); pq_sendint(&buf, tle->resorigcol, 2); tlist_item = lnext(tlist_item); } else { /* No info available, so send zeroes */ pq_sendint(&buf, 0, 4); pq_sendint(&buf, 0, 2); } } /* If column is a domain, send the base type and typmod instead */ atttypid = getBaseTypeAndTypmod(atttypid, &atttypmod); pq_sendint(&buf, (int) atttypid, sizeof(atttypid)); pq_sendint(&buf, attrs[i]->attlen, sizeof(attrs[i]->attlen)); /* typmod appears in protocol 2.0 and up */ if (proto >= 2) pq_sendint(&buf, atttypmod, sizeof(atttypmod)); /* format info appears in protocol 3.0 and up */ if (proto >= 3) { if (formats) pq_sendint(&buf, formats[i], 2); else pq_sendint(&buf, 0, 2); } } pq_endmessage(&buf); }
/* * SendNumRowsRejected * * Using this function the QE sends back to the client QD the number * of rows that were rejected in this last data load in SREH mode. */ void SendNumRowsRejected(int numrejected) { StringInfoData buf; if (Gp_role != GP_ROLE_EXECUTE) elog(FATAL, "SendNumRowsRejected: called outside of execute context."); pq_beginmessage(&buf, 'j'); /* 'j' is the msg code for rejected records */ pq_sendint(&buf, numrejected, 4); pq_endmessage(&buf); }
static void putEndLocationReply(XLogRecPtr *endLocation) { StringInfoData buf; pq_beginmessage(&buf, 's'); pq_sendint(&buf, endLocation->xlogid, 4); pq_sendint(&buf, endLocation->xrecoff, 4); pq_endmessage(&buf); pq_flush(); }
/* * Send a gpdb libpq message. */ void sendQEDetails(void) { StringInfoData buf; pq_beginmessage(&buf, 'w'); pq_sendint(&buf, (int32) ICListenerPort, sizeof(int32)); pq_sendint(&buf, sizeof(PG_VERSION_STR), sizeof(int32)); pq_sendbytes(&buf, PG_VERSION_STR, sizeof(PG_VERSION_STR)); pq_endmessage(&buf); }
/* * Send a gpdb libpq message. */ void sendQEDetails(void) { StringInfoData buf; pq_beginmessage(&buf, 'w'); pq_sendint(&buf, (int32) Gp_listener_port, sizeof(int32)); pq_sendint64(&buf, VmemTracker_GetMaxReservedVmemBytes()); pq_sendint(&buf, sizeof(PG_VERSION_STR), sizeof(int32)); pq_sendbytes(&buf, PG_VERSION_STR, sizeof(PG_VERSION_STR)); pq_endmessage(&buf); }
/* * START_REPLICATION */ static void StartReplication(StartReplicationCmd *cmd) { StringInfoData buf; /* * Let postmaster know that we're streaming. Once we've declared us as a * WAL sender process, postmaster will let us outlive the bgwriter and * kill us last in the shutdown sequence, so we get a chance to stream all * remaining WAL at shutdown, including the shutdown checkpoint. Note that * there's no going back, and we mustn't write any WAL records after this. */ MarkPostmasterChildWalSender(); SendPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE); /* * Check that we're logging enough information in the WAL for * log-shipping. * * NOTE: This only checks the current value of wal_level. Even if the * current setting is not 'minimal', there can be old WAL in the pg_xlog * directory that was created with 'minimal'. So this is not bulletproof, * the purpose is just to give a user-friendly error message that hints * how to configure the system correctly. */ if (wal_level == WAL_LEVEL_MINIMAL) ereport(FATAL, (errcode(ERRCODE_CANNOT_CONNECT_NOW), errmsg("standby connections not allowed because wal_level=minimal"))); /* * When we first start replication the standby will be behind the primary. * For some applications, for example, synchronous replication, it is * important to have a clear state for this initial catchup mode, so we * can trigger actions when we change streaming state later. We may stay * in this state for a long time, which is exactly why we want to be able * to monitor whether or not we are still here. */ WalSndSetState(WALSNDSTATE_CATCHUP); /* Send a CopyBothResponse message, and start streaming */ pq_beginmessage(&buf, 'W'); pq_sendbyte(&buf, 0); pq_sendint(&buf, 0, 2); pq_endmessage(&buf); pq_flush(); /* * Initialize position to the received one, then the xlog records begin to * be shipped from that position */ sentPtr = cmd->startpoint; }
/* * START_REPLICATION */ static void StartReplication(StartReplicationCmd * cmd) { StringInfoData buf; /* * Let postmaster know that we're streaming. Once we've declared us as * a WAL sender process, postmaster will let us outlive the bgwriter and * kill us last in the shutdown sequence, so we get a chance to stream * all remaining WAL at shutdown, including the shutdown checkpoint. * Note that there's no going back, and we mustn't write any WAL records * after this. */ MarkPostmasterChildWalSender(); /* * Check that we're logging enough information in the WAL for * log-shipping. * * NOTE: This only checks the current value of wal_level. Even if the * current setting is not 'minimal', there can be old WAL in the pg_xlog * directory that was created with 'minimal'. So this is not bulletproof, * the purpose is just to give a user-friendly error message that hints * how to configure the system correctly. */ if (wal_level == WAL_LEVEL_MINIMAL) ereport(FATAL, (errcode(ERRCODE_CANNOT_CONNECT_NOW), errmsg("standby connections not allowed because wal_level=minimal"))); /* Send a CopyBothResponse message, and start streaming */ pq_beginmessage(&buf, 'W'); pq_sendbyte(&buf, 0); pq_sendint(&buf, 0, 2); pq_endmessage(&buf); pq_flush(); /* * Initialize position to the received one, then the xlog records begin to * be shipped from that position */ sentPtr = cmd->startpoint; }
/* ---------------- * EndCommand - clean up the destination at end of command * ---------------- */ void EndCommand(const char *commandTag, CommandDest dest) { StringInfoData buf; if (Gp_role == GP_ROLE_DISPATCH) { /* * Just before a successful reply, let's see if the DTM has * phase 2 retry work. */ doDtxPhase2Retry(); } switch (dest) { case DestRemote: case DestRemoteExecute: /* * We assume the commandTag is plain ASCII and therefore * requires no encoding conversion. */ if (Gp_role == GP_ROLE_EXECUTE) { sendQEDetails(); pq_beginmessage(&buf, 'C'); pq_send_ascii_string(&buf, commandTag); pq_endmessage(&buf); } else pq_putmessage('C', commandTag, strlen(commandTag) + 1); break; case DestNone: case DestDebug: case DestSPI: case DestTuplestore: case DestIntoRel: case DestCopyOut: break; } }
/* * Send an authentication request packet to the frontend. */ static void sendAuthRequest(Port *port, AuthRequest areq) { StringInfoData buf; pq_beginmessage(&buf, 'R'); pq_sendint(&buf, (int32) areq, sizeof(int32)); /* Add the salt for encrypted passwords. */ if (areq == AUTH_REQ_MD5) pq_sendbytes(&buf, port->md5Salt, 4); else if (areq == AUTH_REQ_CRYPT) pq_sendbytes(&buf, port->cryptSalt, 2); #if defined(ENABLE_GSS) || defined(ENABLE_SSPI) /* * Add the authentication data for the next step of the GSSAPI or SSPI * negotiation. */ else if (areq == AUTH_REQ_GSS_CONT) { if (port->gss->outbuf.length > 0) { elog(DEBUG4, "sending GSS token of length %u", (unsigned int) port->gss->outbuf.length); pq_sendbytes(&buf, port->gss->outbuf.value, port->gss->outbuf.length); } } #endif pq_endmessage(&buf); /* * Flush message so client will see it, except for AUTH_REQ_OK, which need * not be sent until we are ready for queries. */ if (areq != AUTH_REQ_OK) pq_flush(); }
void ProcessGTMBeginBackup(Port *myport, StringInfo message) { int ii; GTM_ThreadInfo *my_threadinfo; StringInfoData buf; pq_getmsgend(message); my_threadinfo = GetMyThreadInfo; for (ii = 0; ii < GTMThreads->gt_array_size; ii++) { if (GTMThreads->gt_threads[ii] && GTMThreads->gt_threads[ii] != my_threadinfo) GTM_RWLockAcquire(>MThreads->gt_threads[ii]->thr_lock, GTM_LOCKMODE_WRITE); } my_threadinfo->thr_status = GTM_THREAD_BACKUP; pq_beginmessage(&buf, 'S'); pq_sendint(&buf, BEGIN_BACKUP_RESULT, 4); pq_endmessage(myport, &buf); pq_flush(myport); }
void ProcessGTMEndBackup(Port *myport, StringInfo message) { int ii; GTM_ThreadInfo *my_threadinfo; StringInfoData buf; pq_getmsgend(message); my_threadinfo = GetMyThreadInfo; for (ii = 0; ii < GTMThreads->gt_array_size; ii++) { if (GTMThreads->gt_threads[ii] && GTMThreads->gt_threads[ii] != my_threadinfo) GTM_RWLockRelease(>MThreads->gt_threads[ii]->thr_lock); } my_threadinfo->thr_status = GTM_THREAD_RUNNING; pq_beginmessage(&buf, 'S'); pq_sendint(&buf, END_BACKUP_RESULT, 4); pq_endmessage(myport, &buf); pq_flush(myport); }
static void send_buffer() { if (buffer_len > 0) { StringInfoData msgbuf; char *cursor = buffer; while (--buffer_len > 0) { if (*cursor == '\0') *cursor = '\n'; cursor++; } if (*cursor != '\0') ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("internal error"), errdetail("Wrong message format detected"))); pq_beginmessage(&msgbuf, 'N'); if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3) { pq_sendbyte(&msgbuf, PG_DIAG_MESSAGE_PRIMARY); pq_sendstring(&msgbuf, buffer); pq_sendbyte(&msgbuf, '\0'); } else { *cursor++ = '\n'; *cursor = '\0'; pq_sendstring(&msgbuf, buffer); } pq_endmessage(&msgbuf); pq_flush(); } }
/* * Mark the completion of an on-going barrier. We must have remembered the * barrier ID when we received the CREATE BARRIER PREPARE command */ void ProcessCreateBarrierEnd(const char *id) { StringInfoData buf; if (!IS_PGXC_COORDINATOR || !IsConnFromCoord()) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("The CREATE BARRIER END message is expected to " "arrive at a Coordinator from another Coordinator"))); LWLockRelease(BarrierLock); pq_beginmessage(&buf, 'b'); pq_sendstring(&buf, id); pq_endmessage(&buf); pq_flush(); /* * TODO Stop the timer */ }
/* * Prepare ourselves for an incoming BARRIER. We must disable all new 2PC * commits and let the ongoing commits to finish. We then remember the * barrier id (so that it can be matched with the final END message) and * tell the driving Coordinator to proceed with the next step. * * A simple way to implement this is to grab a lock in an exclusive mode * while all other backend starting a 2PC will grab the lock in shared * mode. So as long as we hold the exclusive lock, no other backend start a * new 2PC and there can not be any 2PC in-progress. This technique would * rely on assumption that an exclusive lock requester is not starved by * share lock requesters. * * Note: To ensure that the 2PC are not blocked for a long time, we should * set a timeout. The lock should be release after the timeout and the * barrier should be canceled. */ void ProcessCreateBarrierPrepare(const char *id) { StringInfoData buf; if (!IS_PGXC_COORDINATOR || !IsConnFromCoord()) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("The CREATE BARRIER PREPARE message is expected to " "arrive at a Coordinator from another Coordinator"))); LWLockAcquire(BarrierLock, LW_EXCLUSIVE); pq_beginmessage(&buf, 'b'); pq_sendstring(&buf, id); pq_endmessage(&buf); pq_flush(); /* * TODO Start a timer to terminate the pending barrier after a specified * timeout */ }
/* * Send an authentication request packet to the frontend. */ static void sendAuthRequest(Port *port, AuthRequest areq) { StringInfoData buf; pq_beginmessage(&buf, 'R'); pq_sendint(&buf, (int32) areq, sizeof(int32)); /* Add the salt for encrypted passwords. */ if (areq == AUTH_REQ_MD5) pq_sendbytes(&buf, port->md5Salt, 4); else if (areq == AUTH_REQ_CRYPT) pq_sendbytes(&buf, port->cryptSalt, 2); pq_endmessage(&buf); /* * Flush message so client will see it, except for AUTH_REQ_OK, which * need not be sent until we are ready for queries. */ if (areq != AUTH_REQ_OK) pq_flush(); }
/* * Actually do a base backup for the specified tablespaces. * * This is split out mainly to avoid complaints about "variable might be * clobbered by longjmp" from stupider versions of gcc. */ static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir) { XLogRecPtr startptr; TimeLineID starttli; XLogRecPtr endptr; TimeLineID endtli; char *labelfile; backup_started_in_recovery = RecoveryInProgress(); startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli, &labelfile); SendXlogRecPtrResult(startptr, starttli); PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0); { List *tablespaces = NIL; ListCell *lc; struct dirent *de; tablespaceinfo *ti; /* Collect information about all tablespaces */ while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL) { char fullpath[MAXPGPATH]; char linkpath[MAXPGPATH]; int rllen; /* Skip special stuff */ if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) continue; snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name); #if defined(HAVE_READLINK) || defined(WIN32) rllen = readlink(fullpath, linkpath, sizeof(linkpath)); if (rllen < 0) { ereport(WARNING, (errmsg("could not read symbolic link \"%s\": %m", fullpath))); continue; } else if (rllen >= sizeof(linkpath)) { ereport(WARNING, (errmsg("symbolic link \"%s\" target is too long", fullpath))); continue; } linkpath[rllen] = '\0'; ti = palloc(sizeof(tablespaceinfo)); ti->oid = pstrdup(de->d_name); ti->path = pstrdup(linkpath); ti->size = opt->progress ? sendDir(linkpath, strlen(linkpath), true) : -1; tablespaces = lappend(tablespaces, ti); #else /* * If the platform does not have symbolic links, it should not be * possible to have tablespaces - clearly somebody else created * them. Warn about it and ignore. */ ereport(WARNING, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("tablespaces are not supported on this platform"))); #endif } /* Add a node for the base directory at the end */ ti = palloc0(sizeof(tablespaceinfo)); ti->size = opt->progress ? sendDir(".", 1, true) : -1; tablespaces = lappend(tablespaces, ti); /* Send tablespace header */ SendBackupHeader(tablespaces); /* Send off our tablespaces one by one */ foreach(lc, tablespaces) { tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc); StringInfoData buf; /* Send CopyOutResponse message */ pq_beginmessage(&buf, 'H'); pq_sendbyte(&buf, 0); /* overall format */ pq_sendint(&buf, 0, 2); /* natts */ pq_endmessage(&buf); /* In the main tar, include the backup_label first. */ if (ti->path == NULL) sendFileWithContent(BACKUP_LABEL_FILE, labelfile); sendDir(ti->path == NULL ? "." : ti->path, ti->path == NULL ? 1 : strlen(ti->path), false); /* In the main tar, include pg_control last. */ if (ti->path == NULL) { struct stat statbuf; if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat control file \"%s\": %m", XLOG_CONTROL_FILE))); } sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, false); } /* * If we're including WAL, and this is the main data directory we * don't terminate the tar stream here. Instead, we will append * the xlog files below and terminate it then. This is safe since * the main data directory is always sent *last*. */ if (opt->includewal && ti->path == NULL) { Assert(lnext(lc) == NULL); } else pq_putemptymessage('c'); /* CopyDone */ } }
/* ---------------- * printtup_internal_20 --- print a binary tuple in protocol 2.0 * * We use a different message type, i.e. 'B' instead of 'D' to * indicate a tuple in internal (binary) form. * * This is largely same as printtup_20, except we use binary formatting. * ---------------- */ static bool printtup_internal_20(TupleTableSlot *slot, DestReceiver *self) { TupleDesc typeinfo = slot->tts_tupleDescriptor; DR_printtup *myState = (DR_printtup *) self; MemoryContext oldcontext; StringInfoData buf; int natts = typeinfo->natts; int i, j, k; /* Set or update my derived attribute info, if needed */ if (myState->attrinfo != typeinfo || myState->nattrs != natts) printtup_prepare_info(myState, typeinfo, natts); /* Make sure the tuple is fully deconstructed */ slot_getallattrs(slot); /* Switch into per-row context so we can recover memory below */ oldcontext = MemoryContextSwitchTo(myState->tmpcontext); /* * tell the frontend to expect new tuple data (in binary style) */ pq_beginmessage(&buf, 'B'); /* * send a bitmap of which attributes are not null */ j = 0; k = 1 << 7; for (i = 0; i < natts; ++i) { if (!slot->tts_isnull[i]) j |= k; /* set bit if not null */ k >>= 1; if (k == 0) /* end of byte? */ { pq_sendint(&buf, j, 1); j = 0; k = 1 << 7; } } if (k != (1 << 7)) /* flush last partial byte */ pq_sendint(&buf, j, 1); /* * send the attributes of this tuple */ for (i = 0; i < natts; ++i) { PrinttupAttrInfo *thisState = myState->myinfo + i; Datum attr = slot->tts_values[i]; bytea *outputbytes; if (slot->tts_isnull[i]) continue; Assert(thisState->format == 1); outputbytes = SendFunctionCall(&thisState->finfo, attr); pq_sendint(&buf, VARSIZE(outputbytes) - VARHDRSZ, 4); pq_sendbytes(&buf, VARDATA(outputbytes), VARSIZE(outputbytes) - VARHDRSZ); } pq_endmessage(&buf); /* Return to caller's context, and flush row's temporary memory */ MemoryContextSwitchTo(oldcontext); MemoryContextReset(myState->tmpcontext); return true; }
/* ---------------- * printtup --- print a tuple in protocol 3.0 * ---------------- */ static bool printtup(TupleTableSlot *slot, DestReceiver *self) { TupleDesc typeinfo = slot->tts_tupleDescriptor; DR_printtup *myState = (DR_printtup *) self; MemoryContext oldcontext; StringInfoData buf; int natts = typeinfo->natts; int i; /* Set or update my derived attribute info, if needed */ if (myState->attrinfo != typeinfo || myState->nattrs != natts) printtup_prepare_info(myState, typeinfo, natts); /* Make sure the tuple is fully deconstructed */ slot_getallattrs(slot); /* Switch into per-row context so we can recover memory below */ oldcontext = MemoryContextSwitchTo(myState->tmpcontext); /* * Prepare a DataRow message (note buffer is in per-row context) */ pq_beginmessage(&buf, 'D'); pq_sendint(&buf, natts, 2); /* * send the attributes of this tuple */ for (i = 0; i < natts; ++i) { PrinttupAttrInfo *thisState = myState->myinfo + i; Datum attr = slot->tts_values[i]; if (slot->tts_isnull[i]) { pq_sendint(&buf, -1, 4); continue; } /* * Here we catch undefined bytes in datums that are returned to the * client without hitting disk; see comments at the related check in * PageAddItem(). This test is most useful for uncompressed, * non-external datums, but we're quite likely to see such here when * testing new C functions. */ if (thisState->typisvarlena) VALGRIND_CHECK_MEM_IS_DEFINED(DatumGetPointer(attr), VARSIZE_ANY(attr)); if (thisState->format == 0) { /* Text output */ char *outputstr; outputstr = OutputFunctionCall(&thisState->finfo, attr); pq_sendcountedtext(&buf, outputstr, strlen(outputstr), false); } else { /* Binary output */ bytea *outputbytes; outputbytes = SendFunctionCall(&thisState->finfo, attr); pq_sendint(&buf, VARSIZE(outputbytes) - VARHDRSZ, 4); pq_sendbytes(&buf, VARDATA(outputbytes), VARSIZE(outputbytes) - VARHDRSZ); } } pq_endmessage(&buf); /* Return to caller's context, and flush row's temporary memory */ MemoryContextSwitchTo(oldcontext); MemoryContextReset(myState->tmpcontext); return true; }
/* * START_REPLICATION */ static void StartReplication(StartReplicationCmd *cmd) { StringInfoData buf; /* * Let postmaster know that we're streaming. Once we've declared us as a * WAL sender process, postmaster will let us outlive the bgwriter and * kill us last in the shutdown sequence, so we get a chance to stream all * remaining WAL at shutdown, including the shutdown checkpoint. Note that * there's no going back, and we mustn't write any WAL records after this. */ MarkPostmasterChildWalSender(); SendPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE); /* * When promoting a cascading standby, postmaster sends SIGUSR2 to * any cascading walsenders to kill them. But there is a corner-case where * such walsender fails to receive SIGUSR2 and survives a standby promotion * unexpectedly. This happens when postmaster sends SIGUSR2 before * the walsender marks itself as a WAL sender, because postmaster sends * SIGUSR2 to only the processes marked as a WAL sender. * * To avoid this corner-case, if recovery is NOT in progress even though * the walsender is cascading one, we do the same thing as SIGUSR2 signal * handler does, i.e., set walsender_ready_to_stop to true. Which causes * the walsender to end later. * * When terminating cascading walsenders, usually postmaster writes * the log message announcing the terminations. But there is a race condition * here. If there is no walsender except this process before reaching here, * postmaster thinks that there is no walsender and suppresses that * log message. To handle this case, we always emit that log message here. * This might cause duplicate log messages, but which is less likely to happen, * so it's not worth writing some code to suppress them. */ if (am_cascading_walsender && !RecoveryInProgress()) { ereport(LOG, (errmsg("terminating walsender process to force cascaded standby " "to update timeline and reconnect"))); walsender_ready_to_stop = true; } /* * We assume here that we're logging enough information in the WAL for * log-shipping, since this is checked in PostmasterMain(). * * NOTE: wal_level can only change at shutdown, so in most cases it is * difficult for there to be WAL data that we can still see that was written * at wal_level='minimal'. */ /* * When we first start replication the standby will be behind the primary. * For some applications, for example, synchronous replication, it is * important to have a clear state for this initial catchup mode, so we * can trigger actions when we change streaming state later. We may stay * in this state for a long time, which is exactly why we want to be able * to monitor whether or not we are still here. */ WalSndSetState(WALSNDSTATE_CATCHUP); /* Send a CopyBothResponse message, and start streaming */ pq_beginmessage(&buf, 'W'); pq_sendbyte(&buf, 0); pq_sendint(&buf, 0, 2); pq_endmessage(&buf); pq_flush(); /* * Initialize position to the received one, then the xlog records begin to * be shipped from that position */ sentPtr = cmd->startpoint; }
/* * IDENTIFY_SYSTEM */ static void IdentifySystem(void) { StringInfoData buf; char sysid[32]; char tli[11]; char xpos[MAXFNAMELEN]; XLogRecPtr logptr; /* * Reply with a result set with one row, three columns. First col is * system ID, second is timeline ID, and third is current xlog location. */ snprintf(sysid, sizeof(sysid), UINT64_FORMAT, GetSystemIdentifier()); snprintf(tli, sizeof(tli), "%u", ThisTimeLineID); logptr = am_cascading_walsender ? GetStandbyFlushRecPtr() : GetInsertRecPtr(); snprintf(xpos, sizeof(xpos), "%X/%X", logptr.xlogid, logptr.xrecoff); /* Send a RowDescription message */ pq_beginmessage(&buf, 'T'); pq_sendint(&buf, 3, 2); /* 3 fields */ /* first field */ pq_sendstring(&buf, "systemid"); /* col name */ pq_sendint(&buf, 0, 4); /* table oid */ pq_sendint(&buf, 0, 2); /* attnum */ pq_sendint(&buf, TEXTOID, 4); /* type oid */ pq_sendint(&buf, -1, 2); /* typlen */ pq_sendint(&buf, 0, 4); /* typmod */ pq_sendint(&buf, 0, 2); /* format code */ /* second field */ pq_sendstring(&buf, "timeline"); /* col name */ pq_sendint(&buf, 0, 4); /* table oid */ pq_sendint(&buf, 0, 2); /* attnum */ pq_sendint(&buf, INT4OID, 4); /* type oid */ pq_sendint(&buf, 4, 2); /* typlen */ pq_sendint(&buf, 0, 4); /* typmod */ pq_sendint(&buf, 0, 2); /* format code */ /* third field */ pq_sendstring(&buf, "xlogpos"); pq_sendint(&buf, 0, 4); pq_sendint(&buf, 0, 2); pq_sendint(&buf, TEXTOID, 4); pq_sendint(&buf, -1, 2); pq_sendint(&buf, 0, 4); pq_sendint(&buf, 0, 2); pq_endmessage(&buf); /* Send a DataRow message */ pq_beginmessage(&buf, 'D'); pq_sendint(&buf, 3, 2); /* # of columns */ pq_sendint(&buf, strlen(sysid), 4); /* col1 len */ pq_sendbytes(&buf, (char *) &sysid, strlen(sysid)); pq_sendint(&buf, strlen(tli), 4); /* col2 len */ pq_sendbytes(&buf, (char *) tli, strlen(tli)); pq_sendint(&buf, strlen(xpos), 4); /* col3 len */ pq_sendbytes(&buf, (char *) xpos, strlen(xpos)); pq_endmessage(&buf); /* Send CommandComplete and ReadyForQuery messages */ EndCommand("SELECT", DestRemote); ReadyForQuery(DestRemote); /* ReadyForQuery did pq_flush for us */ }
/* * Actually do a base backup for the specified tablespaces. * * This is split out mainly to avoid complaints about "variable might be * clobbered by longjmp" from stupider versions of gcc. */ static void perform_base_backup(basebackup_options *opt, DIR *tblspcdir) { XLogRecPtr startptr; XLogRecPtr endptr; char *labelfile; startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &labelfile); Assert(!XLogRecPtrIsInvalid(startptr)); elogif(!debug_basebackup, LOG, "basebackup perform -- " "Basebackup start xlog location = %X/%X", startptr.xlogid, startptr.xrecoff); /* * Set xlogCleanUpTo so that checkpoint process knows * which old xlog files should not be cleaned */ WalSndSetXLogCleanUpTo(startptr); SIMPLE_FAULT_INJECTOR(BaseBackupPostCreateCheckpoint); SendXlogRecPtrResult(startptr); PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0); { List *filespaces = NIL; ListCell *lc; /* Collect information about all filespaces, including pg_system */ filespaces = get_filespaces_to_send(opt); /* Send filespace header */ SendBackupHeader(filespaces); /* Send off our filespaces one by one */ foreach(lc, filespaces) { filespaceinfo *fi = (filespaceinfo *) lfirst(lc); StringInfoData buf; /* Send CopyOutResponse message */ pq_beginmessage(&buf, 'H'); pq_sendbyte(&buf, 0); /* overall format */ pq_sendint(&buf, 0, 2); /* natts */ pq_endmessage(&buf); /* In the main tar, include the backup_label first. */ if (fi->primary_path == NULL) sendFileWithContent(BACKUP_LABEL_FILE, labelfile); sendDir(fi->primary_path == NULL ? "." : fi->primary_path, fi->primary_path == NULL ? 1 : strlen(fi->primary_path), opt->exclude, false); /* In the main tar, include pg_control last. */ if (fi->primary_path == NULL) { struct stat statbuf; if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat control file \"%s\": %m", XLOG_CONTROL_FILE))); } sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf); elogif(debug_basebackup, LOG, "basebackup perform -- Sent file %s." , XLOG_CONTROL_FILE); } /* * If we're including WAL, and this is the main data directory we * don't terminate the tar stream here. Instead, we will append * the xlog files below and terminate it then. This is safe since * the main data directory is always sent *last*. */ if (opt->includewal && fi->xlogdir) { Assert(lnext(lc) == NULL); } else pq_putemptymessage('c'); /* CopyDone */ } }
/* ---------------- * printtup_internal_20 --- print a binary tuple in protocol 2.0 * * We use a different message type, i.e. 'B' instead of 'D' to * indicate a tuple in internal (binary) form. * * This is largely same as printtup_20, except we use binary formatting. * ---------------- */ static void printtup_internal_20(TupleTableSlot *slot, DestReceiver *self) { TupleDesc typeinfo = slot->tts_tupleDescriptor; DR_printtup *myState = (DR_printtup *) self; StringInfoData buf; int natts = typeinfo->natts; int i, j, k; /* Set or update my derived attribute info, if needed */ if (myState->attrinfo != typeinfo || myState->nattrs != natts) printtup_prepare_info(myState, typeinfo, natts); /* Make sure the tuple is fully deconstructed */ slot_getallattrs(slot); /* * tell the frontend to expect new tuple data (in binary style) */ pq_beginmessage(&buf, 'B'); /* * send a bitmap of which attributes are not null */ j = 0; k = 1 << 7; for (i = 0; i < natts; ++i) { if (!slot->tts_isnull[i]) j |= k; /* set bit if not null */ k >>= 1; if (k == 0) /* end of byte? */ { pq_sendint(&buf, j, 1); j = 0; k = 1 << 7; } } if (k != (1 << 7)) /* flush last partial byte */ pq_sendint(&buf, j, 1); /* * send the attributes of this tuple */ for (i = 0; i < natts; ++i) { PrinttupAttrInfo *thisState = myState->myinfo + i; Datum origattr = slot->tts_values[i], attr; bytea *outputbytes; if (slot->tts_isnull[i]) continue; Assert(thisState->format == 1); /* * If we have a toasted datum, forcibly detoast it here to avoid * memory leakage inside the type's output routine. */ if (thisState->typisvarlena) attr = PointerGetDatum(PG_DETOAST_DATUM(origattr)); else attr = origattr; outputbytes = SendFunctionCall(&thisState->finfo, attr); /* We assume the result will not have been toasted */ pq_sendint(&buf, VARSIZE(outputbytes) - VARHDRSZ, 4); pq_sendbytes(&buf, VARDATA(outputbytes), VARSIZE(outputbytes) - VARHDRSZ); pfree(outputbytes); /* Clean up detoasted copy, if any */ if (DatumGetPointer(attr) != DatumGetPointer(origattr)) pfree(DatumGetPointer(attr)); } pq_endmessage(&buf); }