Exemple #1
0
/* ----------------
 *		EndCommand - clean up the destination at end of command
 * ----------------
 */
void
EndCommand(const char *commandTag, CommandDest dest)
{
	StringInfoData buf;

	if (Gp_role == GP_ROLE_DISPATCH)
	{
		/*
		 * Just before a successful reply, let's see if the DTM has
		 * phase 2 retry work.
		 */
		doDtxPhase2Retry();
	}
	
	switch (dest)
	{
		case DestRemote:
		case DestRemoteExecute:
			if (Gp_role == GP_ROLE_EXECUTE && Gp_is_writer)
			{
				/*
				 * Extra information that indicates if the transaction made
				 * updates.
				 */
				sendQEDetails();

				pq_beginmessage(&buf, 'g');
				pq_sendstring(&buf, commandTag);

				AddQEWriterTransactionInfo(&buf);

				pq_endmessage(&buf);
			}
			else if (Gp_role == GP_ROLE_EXECUTE)
			{
				sendQEDetails();

				pq_beginmessage(&buf, 'C');
				pq_sendstring(&buf, commandTag);
				pq_endmessage(&buf);
			}
			else
				pq_puttextmessage('C', commandTag);
			break;
		case DestNone:
		case DestDebug:
		case DestSPI:
		case DestTuplestore:
		case DestIntoRel:
		case DestCopyOut:
			break;
	}
}
Exemple #2
0
/*
 * IDENTIFY_SYSTEM
 */
static void
IdentifySystem(void)
{
	StringInfoData buf;
	char		sysid[32];
	char		tli[11];

	/*
	 * Reply with a result set with one row, two columns. First col is system
	 * ID, and second is timeline ID
	 */

	snprintf(sysid, sizeof(sysid), UINT64_FORMAT,
			 GetSystemIdentifier());
	snprintf(tli, sizeof(tli), "%u", ThisTimeLineID);

	/* Send a RowDescription message */
	pq_beginmessage(&buf, 'T');
	pq_sendint(&buf, 2, 2);		/* 2 fields */

	/* first field */
	pq_sendstring(&buf, "systemid");	/* col name */
	pq_sendint(&buf, 0, 4);		/* table oid */
	pq_sendint(&buf, 0, 2);		/* attnum */
	pq_sendint(&buf, TEXTOID, 4);		/* type oid */
	pq_sendint(&buf, -1, 2);	/* typlen */
	pq_sendint(&buf, 0, 4);		/* typmod */
	pq_sendint(&buf, 0, 2);		/* format code */

	/* second field */
	pq_sendstring(&buf, "timeline");	/* col name */
	pq_sendint(&buf, 0, 4);		/* table oid */
	pq_sendint(&buf, 0, 2);		/* attnum */
	pq_sendint(&buf, INT4OID, 4);		/* type oid */
	pq_sendint(&buf, 4, 2);		/* typlen */
	pq_sendint(&buf, 0, 4);		/* typmod */
	pq_sendint(&buf, 0, 2);		/* format code */
	pq_endmessage(&buf);

	/* Send a DataRow message */
	pq_beginmessage(&buf, 'D');
	pq_sendint(&buf, 2, 2);		/* # of columns */
	pq_sendint(&buf, strlen(sysid), 4); /* col1 len */
	pq_sendbytes(&buf, (char *) &sysid, strlen(sysid));
	pq_sendint(&buf, strlen(tli), 4);	/* col2 len */
	pq_sendbytes(&buf, (char *) tli, strlen(tli));
	pq_endmessage(&buf);

	/* Send CommandComplete and ReadyForQuery messages */
	EndCommand("SELECT", DestRemote);
	ReadyForQuery(DestRemote);
	/* ReadyForQuery did pq_flush for us */
}
Exemple #3
0
/* ----------------
 *		EndCommand - clean up the destination at end of command
 * ----------------
 */
void
EndCommand(const char *commandTag, CommandDest dest)
{
    StringInfoData buf;

    switch (dest)
    {
    case DestRemote:
    case DestRemoteExecute:
        if (Gp_role == GP_ROLE_EXECUTE)
        {
            sendQEDetails();

            pq_beginmessage(&buf, 'C');
            pq_sendstring(&buf, commandTag);
            pq_endmessage(&buf);
        }
        else
            pq_puttextmessage('C', commandTag);
        break;
    case DestNone:
    case DestDebug:
    case DestSPI:
    case DestTuplestore:
    case DestIntoRel:
    case DestCopyOut:
        break;
    }
}
static void
SendResultDescriptionMessage(AttributeDefinition *attrs, int natts)
{
	int			proto = PG_PROTOCOL_MAJOR(FrontendProtocol);
	int			i;
	StringInfoData buf;

	pq_beginmessage(&buf, 'T'); /* tuple descriptor message type */
	pq_sendint(&buf, natts, 2);	/* # of attrs in tuples */

	for (i = 0; i < natts; ++i)
	{
		pq_sendstring(&buf, attrs[i].name);
		/* column ID info appears in protocol 3.0 and up */
		if (proto >= 3)
		{
			pq_sendint(&buf, 0, 4);
			pq_sendint(&buf, 0, 2);
		}
		/* If column is a domain, send the base type and typmod instead */
		pq_sendint(&buf, attrs[i].typid, sizeof(Oid));
		pq_sendint(&buf, attrs[i].typlen, sizeof(int16));
		/* typmod appears in protocol 2.0 and up */
		if (proto >= 2)
			pq_sendint(&buf, attrs[i].typmod, sizeof(int32));
		/* format info appears in protocol 3.0 and up */
		if (proto >= 3)
			pq_sendint(&buf, 0, 2);
	}

	pq_endmessage(&buf);
}
static Source *
CreateRemoteSource(const char *path, TupleDesc desc)
{
	RemoteSource *self = (RemoteSource *) palloc0(sizeof(RemoteSource));
	self->base.close = (SourceCloseProc) RemoteSourceClose;

	if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
	{
		/* new way */
		StringInfoData	buf;
		int16			format;
		int				nattrs;
		int				i;

		self->base.read = (SourceReadProc) RemoteSourceRead;

		/* count valid fields */
		for (nattrs = 0, i = 0; i < desc->natts; i++)
		{
			if (desc->attrs[i]->attisdropped)
				continue;
			nattrs++;
		}

		format = (IsBinaryCopy() ? 1 : 0);
		pq_beginmessage(&buf, 'G');
		pq_sendbyte(&buf, format);		/* overall format */
		pq_sendint(&buf, nattrs, 2);
		for (i = 0; i < nattrs; i++)
			pq_sendint(&buf, format, 2);		/* per-column formats */
		pq_endmessage(&buf);
		self->buffer = makeStringInfo();
	}
	else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
	{
		self->base.read = (SourceReadProc) RemoteSourceReadOld;

		/* old way */
		if (IsBinaryCopy())
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
			errmsg("COPY BINARY is not supported to stdout or from stdin")));
		pq_putemptymessage('G');
	}
	else
	{
		self->base.read = (SourceReadProc) RemoteSourceReadOld;

		/* very old way */
		if (IsBinaryCopy())
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
			errmsg("COPY BINARY is not supported to stdout or from stdin")));
		pq_putemptymessage('D');
	}
	/* We *must* flush here to ensure FE knows it can send. */
	pq_flush();

	return (Source *) self;
}
Exemple #6
0
/*
 * Send NOTIFY message to my front end.
 */
static void
NotifyMyFrontEnd(char *relname, int32 listenerPID)
{
	if (whereToSendOutput == DestRemote)
	{
		StringInfoData buf;

		pq_beginmessage(&buf, 'A');
		pq_sendint(&buf, listenerPID, sizeof(int32));
		pq_sendstring(&buf, relname);
		if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
		{
			/* XXX Add parameter string here later */
			pq_sendstring(&buf, "");
		}
		pq_endmessage(&buf);

		/*
		 * NOTE: we do not do pq_flush() here.	For a self-notify, it will
		 * happen at the end of the transaction, and for incoming notifies
		 * ProcessIncomingNotify will do it after finding all the notifies.
		 */
	}
	else
		elog(INFO, "NOTIFY for %s", relname);
}
Exemple #7
0
/* ----------------
 *		ReadyForQuery - tell dest that we are ready for a new query
 *
 *		The ReadyForQuery message is sent in protocol versions 2.0 and up
 *		so that the FE can tell when we are done processing a query string.
 *		In versions 3.0 and up, it also carries a transaction state indicator.
 *
 *		Note that by flushing the stdio buffer here, we can avoid doing it
 *		most other places and thus reduce the number of separate packets sent.
 * ----------------
 */
void
ReadyForQuery(CommandDest dest)
{
	switch (dest)
	{
		case DestRemote:
		case DestRemoteExecute:
			if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
			{
				StringInfoData buf;

				pq_beginmessage(&buf, 'Z');
				pq_sendbyte(&buf, TransactionBlockStatusCode());
				pq_endmessage(&buf);
			}
			else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
				pq_putemptymessage('Z');
			/* Flush output at end of cycle in any case. */
			pq_flush();
			break;

		case DestNone:
		case DestDebug:
		case DestSPI:
		case DestTuplestore:
		case DestIntoRel:
		case DestCopyOut:
			break;
	}
}
Exemple #8
0
/*
 * Execute the CREATE BARRIER command. Write a BARRIER WAL record and flush the
 * WAL buffers to disk before returning to the caller. Writing the WAL record
 * does not guarantee successful completion of the barrier command.
 */
void
ProcessCreateBarrierExecute(const char *id)
{
	StringInfoData buf;

	if (!IsConnFromCoord())
		ereport(ERROR,
				(errcode(ERRCODE_INTERNAL_ERROR),
				 errmsg("The CREATE BARRIER EXECUTE message is expected to "
						"arrive from a Coordinator")));
	{
		XLogRecData rdata[1];
		XLogRecPtr recptr;

		rdata[0].data = (char *) id;
		rdata[0].len = strlen(id) + 1;
		rdata[0].buffer = InvalidBuffer;
		rdata[0].next = NULL;

		recptr = XLogInsert(RM_BARRIER_ID, XLOG_BARRIER_CREATE, rdata);
		XLogFlush(recptr);
	}

	pq_beginmessage(&buf, 'b');
	pq_sendstring(&buf, id);
	pq_endmessage(&buf);
	pq_flush();
}
Exemple #9
0
/*
 * SendRowDescriptionMessage --- send a RowDescription message to the frontend
 *
 * Notes: the TupleDesc has typically been manufactured by ExecTypeFromTL()
 * or some similar function; it does not contain a full set of fields.
 * The targetlist will be NIL when executing a utility function that does
 * not have a plan.  If the targetlist isn't NIL then it is a Query node's
 * targetlist; it is up to us to ignore resjunk columns in it.  The formats[]
 * array pointer might be NULL (if we are doing Describe on a prepared stmt);
 * send zeroes for the format codes in that case.
 */
void
SendRowDescriptionMessage(TupleDesc typeinfo, List *targetlist, int16 *formats)
{
	Form_pg_attribute *attrs = typeinfo->attrs;
	int			natts = typeinfo->natts;
	int			proto = PG_PROTOCOL_MAJOR(FrontendProtocol);
	int			i;
	StringInfoData buf;
	ListCell   *tlist_item = list_head(targetlist);

	pq_beginmessage(&buf, 'T'); /* tuple descriptor message type */
	pq_sendint(&buf, natts, 2); /* # of attrs in tuples */

	for (i = 0; i < natts; ++i)
	{
		Oid			atttypid = attrs[i]->atttypid;
		int32		atttypmod = attrs[i]->atttypmod;

		pq_sendstring(&buf, NameStr(attrs[i]->attname));
		/* column ID info appears in protocol 3.0 and up */
		if (proto >= 3)
		{
			/* Do we have a non-resjunk tlist item? */
			while (tlist_item &&
				   ((TargetEntry *) lfirst(tlist_item))->resjunk)
				tlist_item = lnext(tlist_item);
			if (tlist_item)
			{
				TargetEntry *tle = (TargetEntry *) lfirst(tlist_item);

				pq_sendint(&buf, tle->resorigtbl, 4);
				pq_sendint(&buf, tle->resorigcol, 2);
				tlist_item = lnext(tlist_item);
			}
			else
			{
				/* No info available, so send zeroes */
				pq_sendint(&buf, 0, 4);
				pq_sendint(&buf, 0, 2);
			}
		}
		/* If column is a domain, send the base type and typmod instead */
		atttypid = getBaseTypeAndTypmod(atttypid, &atttypmod);
		pq_sendint(&buf, (int) atttypid, sizeof(atttypid));
		pq_sendint(&buf, attrs[i]->attlen, sizeof(attrs[i]->attlen));
		/* typmod appears in protocol 2.0 and up */
		if (proto >= 2)
			pq_sendint(&buf, atttypmod, sizeof(atttypmod));
		/* format info appears in protocol 3.0 and up */
		if (proto >= 3)
		{
			if (formats)
				pq_sendint(&buf, formats[i], 2);
			else
				pq_sendint(&buf, 0, 2);
		}
	}
	pq_endmessage(&buf);
}
Exemple #10
0
/*
 * SendNumRowsRejected
 *
 * Using this function the QE sends back to the client QD the number 
 * of rows that were rejected in this last data load in SREH mode.
 */
void SendNumRowsRejected(int numrejected)
{
	StringInfoData buf;
	
	if (Gp_role != GP_ROLE_EXECUTE)
		elog(FATAL, "SendNumRowsRejected: called outside of execute context.");

	pq_beginmessage(&buf, 'j'); /* 'j' is the msg code for rejected records */
	pq_sendint(&buf, numrejected, 4);
	pq_endmessage(&buf);	
}
static void
putEndLocationReply(XLogRecPtr *endLocation)
{
	StringInfoData buf;
	
	pq_beginmessage(&buf, 's');
	pq_sendint(&buf, endLocation->xlogid, 4);
	pq_sendint(&buf, endLocation->xrecoff, 4);
	pq_endmessage(&buf);
	pq_flush();
}
Exemple #12
0
Fichier : dest.c Projet : 50wu/gpdb
/*
 * Send a gpdb libpq message.
 */
void
sendQEDetails(void)
{
	StringInfoData buf;

	pq_beginmessage(&buf, 'w');
	pq_sendint(&buf, (int32) ICListenerPort, sizeof(int32));
	pq_sendint(&buf, sizeof(PG_VERSION_STR), sizeof(int32));
	pq_sendbytes(&buf, PG_VERSION_STR, sizeof(PG_VERSION_STR));
	pq_endmessage(&buf);
}
Exemple #13
0
/*
 * Send a gpdb libpq message.
 */
void
sendQEDetails(void)
{
	StringInfoData buf;

	pq_beginmessage(&buf, 'w');
	pq_sendint(&buf, (int32) Gp_listener_port, sizeof(int32));			
	pq_sendint64(&buf, VmemTracker_GetMaxReservedVmemBytes());
	pq_sendint(&buf, sizeof(PG_VERSION_STR), sizeof(int32));
	pq_sendbytes(&buf, PG_VERSION_STR, sizeof(PG_VERSION_STR));
	pq_endmessage(&buf);
}
Exemple #14
0
/*
 * START_REPLICATION
 */
static void
StartReplication(StartReplicationCmd *cmd)
{
    StringInfoData buf;

    /*
     * Let postmaster know that we're streaming. Once we've declared us as a
     * WAL sender process, postmaster will let us outlive the bgwriter and
     * kill us last in the shutdown sequence, so we get a chance to stream all
     * remaining WAL at shutdown, including the shutdown checkpoint. Note that
     * there's no going back, and we mustn't write any WAL records after this.
     */
    MarkPostmasterChildWalSender();
    SendPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE);

    /*
     * Check that we're logging enough information in the WAL for
     * log-shipping.
     *
     * NOTE: This only checks the current value of wal_level. Even if the
     * current setting is not 'minimal', there can be old WAL in the pg_xlog
     * directory that was created with 'minimal'. So this is not bulletproof,
     * the purpose is just to give a user-friendly error message that hints
     * how to configure the system correctly.
     */
    if (wal_level == WAL_LEVEL_MINIMAL)
        ereport(FATAL,
                (errcode(ERRCODE_CANNOT_CONNECT_NOW),
                 errmsg("standby connections not allowed because wal_level=minimal")));

    /*
     * When we first start replication the standby will be behind the primary.
     * For some applications, for example, synchronous replication, it is
     * important to have a clear state for this initial catchup mode, so we
     * can trigger actions when we change streaming state later. We may stay
     * in this state for a long time, which is exactly why we want to be able
     * to monitor whether or not we are still here.
     */
    WalSndSetState(WALSNDSTATE_CATCHUP);

    /* Send a CopyBothResponse message, and start streaming */
    pq_beginmessage(&buf, 'W');
    pq_sendbyte(&buf, 0);
    pq_sendint(&buf, 0, 2);
    pq_endmessage(&buf);
    pq_flush();

    /*
     * Initialize position to the received one, then the xlog records begin to
     * be shipped from that position
     */
    sentPtr = cmd->startpoint;
}
Exemple #15
0
/*
 * START_REPLICATION
 */
static void
StartReplication(StartReplicationCmd * cmd)
{
	StringInfoData buf;

	/*
	 * Let postmaster know that we're streaming. Once we've declared us as
	 * a WAL sender process, postmaster will let us outlive the bgwriter and
	 * kill us last in the shutdown sequence, so we get a chance to stream
	 * all remaining WAL at shutdown, including the shutdown checkpoint.
	 * Note that there's no going back, and we mustn't write any WAL records
	 * after this.
	 */
	MarkPostmasterChildWalSender();

	/*
	 * Check that we're logging enough information in the WAL for
	 * log-shipping.
	 *
	 * NOTE: This only checks the current value of wal_level. Even if the
	 * current setting is not 'minimal', there can be old WAL in the pg_xlog
	 * directory that was created with 'minimal'. So this is not bulletproof,
	 * the purpose is just to give a user-friendly error message that hints
	 * how to configure the system correctly.
	 */
	if (wal_level == WAL_LEVEL_MINIMAL)
		ereport(FATAL,
				(errcode(ERRCODE_CANNOT_CONNECT_NOW),
		errmsg("standby connections not allowed because wal_level=minimal")));

	/* Send a CopyBothResponse message, and start streaming */
	pq_beginmessage(&buf, 'W');
	pq_sendbyte(&buf, 0);
	pq_sendint(&buf, 0, 2);
	pq_endmessage(&buf);
	pq_flush();

	/*
	 * Initialize position to the received one, then the xlog records begin to
	 * be shipped from that position
	 */
	sentPtr = cmd->startpoint;
}
Exemple #16
0
/* ----------------
 *		EndCommand - clean up the destination at end of command
 * ----------------
 */
void
EndCommand(const char *commandTag, CommandDest dest)
{
	StringInfoData buf;

	if (Gp_role == GP_ROLE_DISPATCH)
	{
		/*
		 * Just before a successful reply, let's see if the DTM has
		 * phase 2 retry work.
		 */
		doDtxPhase2Retry();
	}
	
	switch (dest)
	{
		case DestRemote:
		case DestRemoteExecute:
			/*
			 * We assume the commandTag is plain ASCII and therefore
			 * requires no encoding conversion.
			 */
			if (Gp_role == GP_ROLE_EXECUTE)
			{
				sendQEDetails();

				pq_beginmessage(&buf, 'C');
				pq_send_ascii_string(&buf, commandTag);
				pq_endmessage(&buf);
			}
			else
				pq_putmessage('C', commandTag, strlen(commandTag) + 1);
			break;
		case DestNone:
		case DestDebug:
		case DestSPI:
		case DestTuplestore:
		case DestIntoRel:
		case DestCopyOut:
			break;
	}
}
/*
 * Send an authentication request packet to the frontend.
 */
static void
sendAuthRequest(Port *port, AuthRequest areq)
{
	StringInfoData buf;

	pq_beginmessage(&buf, 'R');
	pq_sendint(&buf, (int32) areq, sizeof(int32));

	/* Add the salt for encrypted passwords. */
	if (areq == AUTH_REQ_MD5)
		pq_sendbytes(&buf, port->md5Salt, 4);
	else if (areq == AUTH_REQ_CRYPT)
		pq_sendbytes(&buf, port->cryptSalt, 2);

#if defined(ENABLE_GSS) || defined(ENABLE_SSPI)

	/*
	 * Add the authentication data for the next step of the GSSAPI or SSPI
	 * negotiation.
	 */
	else if (areq == AUTH_REQ_GSS_CONT)
	{
		if (port->gss->outbuf.length > 0)
		{
			elog(DEBUG4, "sending GSS token of length %u",
				 (unsigned int) port->gss->outbuf.length);

			pq_sendbytes(&buf, port->gss->outbuf.value, port->gss->outbuf.length);
		}
	}
#endif

	pq_endmessage(&buf);

	/*
	 * Flush message so client will see it, except for AUTH_REQ_OK, which need
	 * not be sent until we are ready for queries.
	 */
	if (areq != AUTH_REQ_OK)
		pq_flush();
}
Exemple #18
0
void
ProcessGTMBeginBackup(Port *myport, StringInfo message)
{
	int ii;
	GTM_ThreadInfo *my_threadinfo;
	StringInfoData buf;

	pq_getmsgend(message);
	my_threadinfo = GetMyThreadInfo;

	for (ii = 0; ii < GTMThreads->gt_array_size; ii++)
	{
		if (GTMThreads->gt_threads[ii] && GTMThreads->gt_threads[ii] != my_threadinfo)
			GTM_RWLockAcquire(&GTMThreads->gt_threads[ii]->thr_lock, GTM_LOCKMODE_WRITE);
	}
	my_threadinfo->thr_status = GTM_THREAD_BACKUP;
	pq_beginmessage(&buf, 'S');
	pq_sendint(&buf, BEGIN_BACKUP_RESULT, 4);
	pq_endmessage(myport, &buf);
	pq_flush(myport);
}
Exemple #19
0
void
ProcessGTMEndBackup(Port *myport, StringInfo message)
{
	int ii;
	GTM_ThreadInfo *my_threadinfo;
	StringInfoData buf;

	pq_getmsgend(message);
	my_threadinfo = GetMyThreadInfo;

	for (ii = 0; ii < GTMThreads->gt_array_size; ii++)
	{
		if (GTMThreads->gt_threads[ii] && GTMThreads->gt_threads[ii] != my_threadinfo)
			GTM_RWLockRelease(&GTMThreads->gt_threads[ii]->thr_lock);
	}
	my_threadinfo->thr_status = GTM_THREAD_RUNNING;
	pq_beginmessage(&buf, 'S');
	pq_sendint(&buf, END_BACKUP_RESULT, 4);
	pq_endmessage(myport, &buf);
	pq_flush(myport);
}
Exemple #20
0
static void
send_buffer()
{
    if (buffer_len > 0)
    {
	StringInfoData msgbuf;
	char *cursor = buffer;

	while (--buffer_len > 0)
	{
	    if (*cursor == '\0')
		*cursor = '\n';
	    cursor++;
	}

	if (*cursor != '\0')
	        ereport(ERROR,
		        (errcode(ERRCODE_INTERNAL_ERROR),
			 errmsg("internal error"),
		         errdetail("Wrong message format detected")));

	pq_beginmessage(&msgbuf, 'N');

	if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
	{
		pq_sendbyte(&msgbuf, PG_DIAG_MESSAGE_PRIMARY);
		pq_sendstring(&msgbuf, buffer);
		pq_sendbyte(&msgbuf, '\0');
	}
	else
	{
		*cursor++ = '\n';
		*cursor = '\0';
		pq_sendstring(&msgbuf, buffer);
	}

	pq_endmessage(&msgbuf);
	pq_flush();
    }
}
Exemple #21
0
/*
 * Mark the completion of an on-going barrier. We must have remembered the
 * barrier ID when we received the CREATE BARRIER PREPARE command
 */
void
ProcessCreateBarrierEnd(const char *id)
{
	StringInfoData buf;

	if (!IS_PGXC_COORDINATOR || !IsConnFromCoord())
		ereport(ERROR,
				(errcode(ERRCODE_INTERNAL_ERROR),
				 errmsg("The CREATE BARRIER END message is expected to "
						"arrive at a Coordinator from another Coordinator")));

	LWLockRelease(BarrierLock);

	pq_beginmessage(&buf, 'b');
	pq_sendstring(&buf, id);
	pq_endmessage(&buf);
	pq_flush();

	/*
	 * TODO Stop the timer
	 */
}
Exemple #22
0
/*
 * Prepare ourselves for an incoming BARRIER. We must disable all new 2PC
 * commits and let the ongoing commits to finish. We then remember the
 * barrier id (so that it can be matched with the final END message) and
 * tell the driving Coordinator to proceed with the next step.
 *
 * A simple way to implement this is to grab a lock in an exclusive mode
 * while all other backend starting a 2PC will grab the lock in shared
 * mode. So as long as we hold the exclusive lock, no other backend start a
 * new 2PC and there can not be any 2PC in-progress. This technique would
 * rely on assumption that an exclusive lock requester is not starved by
 * share lock requesters.
 *
 * Note: To ensure that the 2PC are not blocked for a long time, we should
 * set a timeout. The lock should be release after the timeout and the
 * barrier should be canceled.
 */
void
ProcessCreateBarrierPrepare(const char *id)
{
	StringInfoData buf;

	if (!IS_PGXC_COORDINATOR || !IsConnFromCoord())
		ereport(ERROR,
				(errcode(ERRCODE_INTERNAL_ERROR),
				 errmsg("The CREATE BARRIER PREPARE message is expected to "
						"arrive at a Coordinator from another Coordinator")));

	LWLockAcquire(BarrierLock, LW_EXCLUSIVE);

	pq_beginmessage(&buf, 'b');
	pq_sendstring(&buf, id);
	pq_endmessage(&buf);
	pq_flush();

	/*
	 * TODO Start a timer to terminate the pending barrier after a specified
	 * timeout
	 */
}
Exemple #23
0
/*
 * Send an authentication request packet to the frontend.
 */
static void
sendAuthRequest(Port *port, AuthRequest areq)
{
	StringInfoData buf;

	pq_beginmessage(&buf, 'R');
	pq_sendint(&buf, (int32) areq, sizeof(int32));

	/* Add the salt for encrypted passwords. */
	if (areq == AUTH_REQ_MD5)
		pq_sendbytes(&buf, port->md5Salt, 4);
	else if (areq == AUTH_REQ_CRYPT)
		pq_sendbytes(&buf, port->cryptSalt, 2);

	pq_endmessage(&buf);

	/*
	 * Flush message so client will see it, except for AUTH_REQ_OK, which
	 * need not be sent until we are ready for queries.
	 */
	if (areq != AUTH_REQ_OK)
		pq_flush();
}
Exemple #24
0
/*
 * Actually do a base backup for the specified tablespaces.
 *
 * This is split out mainly to avoid complaints about "variable might be
 * clobbered by longjmp" from stupider versions of gcc.
 */
static void
perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
{
	XLogRecPtr	startptr;
	TimeLineID	starttli;
	XLogRecPtr	endptr;
	TimeLineID	endtli;
	char	   *labelfile;

	backup_started_in_recovery = RecoveryInProgress();

	startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli,
								  &labelfile);
	SendXlogRecPtrResult(startptr, starttli);

	PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
	{
		List	   *tablespaces = NIL;
		ListCell   *lc;
		struct dirent *de;
		tablespaceinfo *ti;

		/* Collect information about all tablespaces */
		while ((de = ReadDir(tblspcdir, "pg_tblspc")) != NULL)
		{
			char		fullpath[MAXPGPATH];
			char		linkpath[MAXPGPATH];
			int			rllen;

			/* Skip special stuff */
			if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
				continue;

			snprintf(fullpath, sizeof(fullpath), "pg_tblspc/%s", de->d_name);

#if defined(HAVE_READLINK) || defined(WIN32)
			rllen = readlink(fullpath, linkpath, sizeof(linkpath));
			if (rllen < 0)
			{
				ereport(WARNING,
						(errmsg("could not read symbolic link \"%s\": %m",
								fullpath)));
				continue;
			}
			else if (rllen >= sizeof(linkpath))
			{
				ereport(WARNING,
						(errmsg("symbolic link \"%s\" target is too long",
								fullpath)));
				continue;
			}
			linkpath[rllen] = '\0';

			ti = palloc(sizeof(tablespaceinfo));
			ti->oid = pstrdup(de->d_name);
			ti->path = pstrdup(linkpath);
			ti->size = opt->progress ? sendDir(linkpath, strlen(linkpath), true) : -1;
			tablespaces = lappend(tablespaces, ti);
#else

			/*
			 * If the platform does not have symbolic links, it should not be
			 * possible to have tablespaces - clearly somebody else created
			 * them. Warn about it and ignore.
			 */
			ereport(WARNING,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				  errmsg("tablespaces are not supported on this platform")));
#endif
		}

		/* Add a node for the base directory at the end */
		ti = palloc0(sizeof(tablespaceinfo));
		ti->size = opt->progress ? sendDir(".", 1, true) : -1;
		tablespaces = lappend(tablespaces, ti);

		/* Send tablespace header */
		SendBackupHeader(tablespaces);

		/* Send off our tablespaces one by one */
		foreach(lc, tablespaces)
		{
			tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc);
			StringInfoData buf;

			/* Send CopyOutResponse message */
			pq_beginmessage(&buf, 'H');
			pq_sendbyte(&buf, 0);		/* overall format */
			pq_sendint(&buf, 0, 2);		/* natts */
			pq_endmessage(&buf);

			/* In the main tar, include the backup_label first. */
			if (ti->path == NULL)
				sendFileWithContent(BACKUP_LABEL_FILE, labelfile);

			sendDir(ti->path == NULL ? "." : ti->path,
					ti->path == NULL ? 1 : strlen(ti->path),
					false);

			/* In the main tar, include pg_control last. */
			if (ti->path == NULL)
			{
				struct stat statbuf;

				if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
				{
					ereport(ERROR,
							(errcode_for_file_access(),
							 errmsg("could not stat control file \"%s\": %m",
									XLOG_CONTROL_FILE)));
				}

				sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf, false);
			}

			/*
			 * If we're including WAL, and this is the main data directory we
			 * don't terminate the tar stream here. Instead, we will append
			 * the xlog files below and terminate it then. This is safe since
			 * the main data directory is always sent *last*.
			 */
			if (opt->includewal && ti->path == NULL)
			{
				Assert(lnext(lc) == NULL);
			}
			else
				pq_putemptymessage('c');		/* CopyDone */
		}
	}
Exemple #25
0
/* ----------------
 *		printtup_internal_20 --- print a binary tuple in protocol 2.0
 *
 * We use a different message type, i.e. 'B' instead of 'D' to
 * indicate a tuple in internal (binary) form.
 *
 * This is largely same as printtup_20, except we use binary formatting.
 * ----------------
 */
static bool
printtup_internal_20(TupleTableSlot *slot, DestReceiver *self)
{
	TupleDesc	typeinfo = slot->tts_tupleDescriptor;
	DR_printtup *myState = (DR_printtup *) self;
	MemoryContext oldcontext;
	StringInfoData buf;
	int			natts = typeinfo->natts;
	int			i,
				j,
				k;

	/* Set or update my derived attribute info, if needed */
	if (myState->attrinfo != typeinfo || myState->nattrs != natts)
		printtup_prepare_info(myState, typeinfo, natts);

	/* Make sure the tuple is fully deconstructed */
	slot_getallattrs(slot);

	/* Switch into per-row context so we can recover memory below */
	oldcontext = MemoryContextSwitchTo(myState->tmpcontext);

	/*
	 * tell the frontend to expect new tuple data (in binary style)
	 */
	pq_beginmessage(&buf, 'B');

	/*
	 * send a bitmap of which attributes are not null
	 */
	j = 0;
	k = 1 << 7;
	for (i = 0; i < natts; ++i)
	{
		if (!slot->tts_isnull[i])
			j |= k;				/* set bit if not null */
		k >>= 1;
		if (k == 0)				/* end of byte? */
		{
			pq_sendint(&buf, j, 1);
			j = 0;
			k = 1 << 7;
		}
	}
	if (k != (1 << 7))			/* flush last partial byte */
		pq_sendint(&buf, j, 1);

	/*
	 * send the attributes of this tuple
	 */
	for (i = 0; i < natts; ++i)
	{
		PrinttupAttrInfo *thisState = myState->myinfo + i;
		Datum		attr = slot->tts_values[i];
		bytea	   *outputbytes;

		if (slot->tts_isnull[i])
			continue;

		Assert(thisState->format == 1);

		outputbytes = SendFunctionCall(&thisState->finfo, attr);
		pq_sendint(&buf, VARSIZE(outputbytes) - VARHDRSZ, 4);
		pq_sendbytes(&buf, VARDATA(outputbytes),
					 VARSIZE(outputbytes) - VARHDRSZ);
	}

	pq_endmessage(&buf);

	/* Return to caller's context, and flush row's temporary memory */
	MemoryContextSwitchTo(oldcontext);
	MemoryContextReset(myState->tmpcontext);

	return true;
}
Exemple #26
0
/* ----------------
 *		printtup --- print a tuple in protocol 3.0
 * ----------------
 */
static bool
printtup(TupleTableSlot *slot, DestReceiver *self)
{
	TupleDesc	typeinfo = slot->tts_tupleDescriptor;
	DR_printtup *myState = (DR_printtup *) self;
	MemoryContext oldcontext;
	StringInfoData buf;
	int			natts = typeinfo->natts;
	int			i;

	/* Set or update my derived attribute info, if needed */
	if (myState->attrinfo != typeinfo || myState->nattrs != natts)
		printtup_prepare_info(myState, typeinfo, natts);

	/* Make sure the tuple is fully deconstructed */
	slot_getallattrs(slot);

	/* Switch into per-row context so we can recover memory below */
	oldcontext = MemoryContextSwitchTo(myState->tmpcontext);

	/*
	 * Prepare a DataRow message (note buffer is in per-row context)
	 */
	pq_beginmessage(&buf, 'D');

	pq_sendint(&buf, natts, 2);

	/*
	 * send the attributes of this tuple
	 */
	for (i = 0; i < natts; ++i)
	{
		PrinttupAttrInfo *thisState = myState->myinfo + i;
		Datum		attr = slot->tts_values[i];

		if (slot->tts_isnull[i])
		{
			pq_sendint(&buf, -1, 4);
			continue;
		}

		/*
		 * Here we catch undefined bytes in datums that are returned to the
		 * client without hitting disk; see comments at the related check in
		 * PageAddItem().  This test is most useful for uncompressed,
		 * non-external datums, but we're quite likely to see such here when
		 * testing new C functions.
		 */
		if (thisState->typisvarlena)
			VALGRIND_CHECK_MEM_IS_DEFINED(DatumGetPointer(attr),
										  VARSIZE_ANY(attr));

		if (thisState->format == 0)
		{
			/* Text output */
			char	   *outputstr;

			outputstr = OutputFunctionCall(&thisState->finfo, attr);
			pq_sendcountedtext(&buf, outputstr, strlen(outputstr), false);
		}
		else
		{
			/* Binary output */
			bytea	   *outputbytes;

			outputbytes = SendFunctionCall(&thisState->finfo, attr);
			pq_sendint(&buf, VARSIZE(outputbytes) - VARHDRSZ, 4);
			pq_sendbytes(&buf, VARDATA(outputbytes),
						 VARSIZE(outputbytes) - VARHDRSZ);
		}
	}

	pq_endmessage(&buf);

	/* Return to caller's context, and flush row's temporary memory */
	MemoryContextSwitchTo(oldcontext);
	MemoryContextReset(myState->tmpcontext);

	return true;
}
Exemple #27
0
/*
 * START_REPLICATION
 */
static void
StartReplication(StartReplicationCmd *cmd)
{
	StringInfoData buf;

	/*
	 * Let postmaster know that we're streaming. Once we've declared us as a
	 * WAL sender process, postmaster will let us outlive the bgwriter and
	 * kill us last in the shutdown sequence, so we get a chance to stream all
	 * remaining WAL at shutdown, including the shutdown checkpoint. Note that
	 * there's no going back, and we mustn't write any WAL records after this.
	 */
	MarkPostmasterChildWalSender();
	SendPostmasterSignal(PMSIGNAL_ADVANCE_STATE_MACHINE);

	/*
	 * When promoting a cascading standby, postmaster sends SIGUSR2 to
	 * any cascading walsenders to kill them. But there is a corner-case where
	 * such walsender fails to receive SIGUSR2 and survives a standby promotion
	 * unexpectedly. This happens when postmaster sends SIGUSR2 before
	 * the walsender marks itself as a WAL sender, because postmaster sends
	 * SIGUSR2 to only the processes marked as a WAL sender.
	 *
	 * To avoid this corner-case, if recovery is NOT in progress even though
	 * the walsender is cascading one, we do the same thing as SIGUSR2 signal
	 * handler does, i.e., set walsender_ready_to_stop to true. Which causes
	 * the walsender to end later.
	 *
	 * When terminating cascading walsenders, usually postmaster writes
	 * the log message announcing the terminations. But there is a race condition
	 * here. If there is no walsender except this process before reaching here,
	 * postmaster thinks that there is no walsender and suppresses that
	 * log message. To handle this case, we always emit that log message here.
	 * This might cause duplicate log messages, but which is less likely to happen,
	 * so it's not worth writing some code to suppress them.
	 */
	if (am_cascading_walsender && !RecoveryInProgress())
	{
		ereport(LOG,
				(errmsg("terminating walsender process to force cascaded standby "
						"to update timeline and reconnect")));
		walsender_ready_to_stop = true;
	}

	/*
	 * We assume here that we're logging enough information in the WAL for
	 * log-shipping, since this is checked in PostmasterMain().
	 *
	 * NOTE: wal_level can only change at shutdown, so in most cases it is
	 * difficult for there to be WAL data that we can still see that was written
	 * at wal_level='minimal'.
	 */

	/*
	 * When we first start replication the standby will be behind the primary.
	 * For some applications, for example, synchronous replication, it is
	 * important to have a clear state for this initial catchup mode, so we
	 * can trigger actions when we change streaming state later. We may stay
	 * in this state for a long time, which is exactly why we want to be able
	 * to monitor whether or not we are still here.
	 */
	WalSndSetState(WALSNDSTATE_CATCHUP);

	/* Send a CopyBothResponse message, and start streaming */
	pq_beginmessage(&buf, 'W');
	pq_sendbyte(&buf, 0);
	pq_sendint(&buf, 0, 2);
	pq_endmessage(&buf);
	pq_flush();

	/*
	 * Initialize position to the received one, then the xlog records begin to
	 * be shipped from that position
	 */
	sentPtr = cmd->startpoint;
}
Exemple #28
0
/*
 * IDENTIFY_SYSTEM
 */
static void
IdentifySystem(void)
{
	StringInfoData buf;
	char		sysid[32];
	char		tli[11];
	char		xpos[MAXFNAMELEN];
	XLogRecPtr	logptr;

	/*
	 * Reply with a result set with one row, three columns. First col is
	 * system ID, second is timeline ID, and third is current xlog location.
	 */

	snprintf(sysid, sizeof(sysid), UINT64_FORMAT,
			 GetSystemIdentifier());
	snprintf(tli, sizeof(tli), "%u", ThisTimeLineID);

	logptr = am_cascading_walsender ? GetStandbyFlushRecPtr() : GetInsertRecPtr();

	snprintf(xpos, sizeof(xpos), "%X/%X",
			 logptr.xlogid, logptr.xrecoff);

	/* Send a RowDescription message */
	pq_beginmessage(&buf, 'T');
	pq_sendint(&buf, 3, 2);		/* 3 fields */

	/* first field */
	pq_sendstring(&buf, "systemid");	/* col name */
	pq_sendint(&buf, 0, 4);		/* table oid */
	pq_sendint(&buf, 0, 2);		/* attnum */
	pq_sendint(&buf, TEXTOID, 4);		/* type oid */
	pq_sendint(&buf, -1, 2);	/* typlen */
	pq_sendint(&buf, 0, 4);		/* typmod */
	pq_sendint(&buf, 0, 2);		/* format code */

	/* second field */
	pq_sendstring(&buf, "timeline");	/* col name */
	pq_sendint(&buf, 0, 4);		/* table oid */
	pq_sendint(&buf, 0, 2);		/* attnum */
	pq_sendint(&buf, INT4OID, 4);		/* type oid */
	pq_sendint(&buf, 4, 2);		/* typlen */
	pq_sendint(&buf, 0, 4);		/* typmod */
	pq_sendint(&buf, 0, 2);		/* format code */

	/* third field */
	pq_sendstring(&buf, "xlogpos");
	pq_sendint(&buf, 0, 4);
	pq_sendint(&buf, 0, 2);
	pq_sendint(&buf, TEXTOID, 4);
	pq_sendint(&buf, -1, 2);
	pq_sendint(&buf, 0, 4);
	pq_sendint(&buf, 0, 2);
	pq_endmessage(&buf);

	/* Send a DataRow message */
	pq_beginmessage(&buf, 'D');
	pq_sendint(&buf, 3, 2);		/* # of columns */
	pq_sendint(&buf, strlen(sysid), 4); /* col1 len */
	pq_sendbytes(&buf, (char *) &sysid, strlen(sysid));
	pq_sendint(&buf, strlen(tli), 4);	/* col2 len */
	pq_sendbytes(&buf, (char *) tli, strlen(tli));
	pq_sendint(&buf, strlen(xpos), 4);	/* col3 len */
	pq_sendbytes(&buf, (char *) xpos, strlen(xpos));

	pq_endmessage(&buf);

	/* Send CommandComplete and ReadyForQuery messages */
	EndCommand("SELECT", DestRemote);
	ReadyForQuery(DestRemote);
	/* ReadyForQuery did pq_flush for us */
}
Exemple #29
0
/*
 * Actually do a base backup for the specified tablespaces.
 *
 * This is split out mainly to avoid complaints about "variable might be
 * clobbered by longjmp" from stupider versions of gcc.
 */
static void
perform_base_backup(basebackup_options *opt, DIR *tblspcdir)
{
	XLogRecPtr	startptr;
	XLogRecPtr	endptr;
	char	   *labelfile;

	startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &labelfile);
	Assert(!XLogRecPtrIsInvalid(startptr));

	elogif(!debug_basebackup, LOG,
		   "basebackup perform -- "
		   "Basebackup start xlog location = %X/%X",
		   startptr.xlogid, startptr.xrecoff);

	/*
	 * Set xlogCleanUpTo so that checkpoint process knows
	 * which old xlog files should not be cleaned
	 */
	WalSndSetXLogCleanUpTo(startptr);

	SIMPLE_FAULT_INJECTOR(BaseBackupPostCreateCheckpoint);

	SendXlogRecPtrResult(startptr);

	PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0);
	{
		List	   *filespaces = NIL;
		ListCell   *lc;

		/* Collect information about all filespaces, including pg_system */
		filespaces = get_filespaces_to_send(opt);

		/* Send filespace header */
		SendBackupHeader(filespaces);

		/* Send off our filespaces one by one */
		foreach(lc, filespaces)
		{
			filespaceinfo *fi = (filespaceinfo *) lfirst(lc);
			StringInfoData buf;

			/* Send CopyOutResponse message */
			pq_beginmessage(&buf, 'H');
			pq_sendbyte(&buf, 0);		/* overall format */
			pq_sendint(&buf, 0, 2);		/* natts */
			pq_endmessage(&buf);

			/* In the main tar, include the backup_label first. */
			if (fi->primary_path == NULL)
				sendFileWithContent(BACKUP_LABEL_FILE, labelfile);

			sendDir(fi->primary_path == NULL ? "." : fi->primary_path,
					fi->primary_path == NULL ? 1 : strlen(fi->primary_path),
					opt->exclude, false);

			/* In the main tar, include pg_control last. */
			if (fi->primary_path == NULL)
			{
				struct stat statbuf;

				if (lstat(XLOG_CONTROL_FILE, &statbuf) != 0)
				{
					ereport(ERROR,
							(errcode_for_file_access(),
							 errmsg("could not stat control file \"%s\": %m",
									XLOG_CONTROL_FILE)));
				}

				sendFile(XLOG_CONTROL_FILE, XLOG_CONTROL_FILE, &statbuf);

				elogif(debug_basebackup, LOG,
					   "basebackup perform -- Sent file %s." , XLOG_CONTROL_FILE);
			}

			/*
			 * If we're including WAL, and this is the main data directory we
			 * don't terminate the tar stream here. Instead, we will append
			 * the xlog files below and terminate it then. This is safe since
			 * the main data directory is always sent *last*.
			 */
			if (opt->includewal && fi->xlogdir)
			{
				Assert(lnext(lc) == NULL);
			}
			else
				pq_putemptymessage('c');		/* CopyDone */
		}
	}
Exemple #30
0
/* ----------------
 *		printtup_internal_20 --- print a binary tuple in protocol 2.0
 *
 * We use a different message type, i.e. 'B' instead of 'D' to
 * indicate a tuple in internal (binary) form.
 *
 * This is largely same as printtup_20, except we use binary formatting.
 * ----------------
 */
static void
printtup_internal_20(TupleTableSlot *slot, DestReceiver *self)
{
	TupleDesc	typeinfo = slot->tts_tupleDescriptor;
	DR_printtup *myState = (DR_printtup *) self;
	StringInfoData buf;
	int			natts = typeinfo->natts;
	int			i,
				j,
				k;

	/* Set or update my derived attribute info, if needed */
	if (myState->attrinfo != typeinfo || myState->nattrs != natts)
		printtup_prepare_info(myState, typeinfo, natts);

	/* Make sure the tuple is fully deconstructed */
	slot_getallattrs(slot);

	/*
	 * tell the frontend to expect new tuple data (in binary style)
	 */
	pq_beginmessage(&buf, 'B');

	/*
	 * send a bitmap of which attributes are not null
	 */
	j = 0;
	k = 1 << 7;
	for (i = 0; i < natts; ++i)
	{
		if (!slot->tts_isnull[i])
			j |= k;				/* set bit if not null */
		k >>= 1;
		if (k == 0)				/* end of byte? */
		{
			pq_sendint(&buf, j, 1);
			j = 0;
			k = 1 << 7;
		}
	}
	if (k != (1 << 7))			/* flush last partial byte */
		pq_sendint(&buf, j, 1);

	/*
	 * send the attributes of this tuple
	 */
	for (i = 0; i < natts; ++i)
	{
		PrinttupAttrInfo *thisState = myState->myinfo + i;
		Datum		origattr = slot->tts_values[i],
					attr;
		bytea	   *outputbytes;

		if (slot->tts_isnull[i])
			continue;

		Assert(thisState->format == 1);

		/*
		 * If we have a toasted datum, forcibly detoast it here to avoid
		 * memory leakage inside the type's output routine.
		 */
		if (thisState->typisvarlena)
			attr = PointerGetDatum(PG_DETOAST_DATUM(origattr));
		else
			attr = origattr;

		outputbytes = SendFunctionCall(&thisState->finfo, attr);
		/* We assume the result will not have been toasted */
		pq_sendint(&buf, VARSIZE(outputbytes) - VARHDRSZ, 4);
		pq_sendbytes(&buf, VARDATA(outputbytes),
					 VARSIZE(outputbytes) - VARHDRSZ);
		pfree(outputbytes);

		/* Clean up detoasted copy, if any */
		if (DatumGetPointer(attr) != DatumGetPointer(origattr))
			pfree(DatumGetPointer(attr));
	}

	pq_endmessage(&buf);
}