static void
sendMessageToPeerAndExitIfProblem( struct addrinfo *addrList, char *msgBody,
		char messageFromPeerOut[MESSAGE_FROM_PEER_BUF_SIZE],
		char resetNumberFromPeerOut[MESSAGE_FROM_PEER_BUF_SIZE])
{
	elog(DEBUG1, "peer reset: sending message to primary/mirror peer: %s", msgBody);

	/* set up receipt buffers (populated by the callback functions) */
	gResponseWasTooLarge = false;
	gErrorLogBuf[0] = '\0';
	gResultDataBuf[0] = '\0';

	/* make the call and check results */
	PrimaryMirrorTransitionClientInfo client;
	client.receivedDataCallbackFn = resetPeer_receivedDataCallbackFunction;
	client.errorLogFn = resetPeer_errorLogFunction;
	client.checkForNeedToExitFn = resetPeer_checkForNeedToExitFunction;
	int resultCode = sendTransitionMessage(&client, addrList, msgBody, strlen(msgBody),
							10 /* numRetries */, 3600 /* transition_timeout */);

	if (resultCode != TRANS_ERRCODE_SUCCESS)
	{
		elog(WARNING, "during reset, unable to contact primary/mirror peer to coordinate reset; "
						"will transition to fault state.  Error code %d and message '%s'",
						resultCode, gErrorLogBuf);
		proc_exit(EXIT_CODE_SHOULD_ENTER_FAULT);
	}

	/* extract the two fields into messageFromPeerOut and resetNumberFromPeerOut, skipping the first Success: line
	 *
	 * is there a way to make this simple string parser easier?
	 *
	 * The result will look like Success:\nLineToKeep1\nLineToKeep2
	 * This pulls LineToKeep1 and LineToKeep2 out into messageFromPeerOut and resetNumberFromPeerIndex
	 *
	 * Note that because gResultDataBuf is limited to MESSAGE_FROM_PEER_BUF_SIZE, we don't technically need
	 *   to check overflow here.
	 */
	int resetNumberFromPeerIndex = 0, messageFromPeerIndex = 0, whichLine = 0;
	char *buf = gResultDataBuf;
	while (*buf)
	{
		if ( *buf == '\n')
		{
			whichLine++;
			if ( whichLine == 3)
			{
				elog(WARNING, "during reset, invalid message contacting primary/mirror peer to coordinate reset; "
						"will transition to fault state.  Message received: %s",
						gResultDataBuf);
				proc_exit(EXIT_CODE_SHOULD_ENTER_FAULT);
			}
		}
		else
		{
			if (whichLine == 1)
			{
				messageFromPeerOut[messageFromPeerIndex] = *buf;
				messageFromPeerIndex++;

				 /* see comments above about why this is not strictly needed */
				Insist(messageFromPeerIndex < MESSAGE_FROM_PEER_BUF_SIZE);
			}
			else if (whichLine == 2)
			{
				resetNumberFromPeerOut[resetNumberFromPeerIndex] = *buf;
				resetNumberFromPeerIndex++;

				 /* see comments above about why this is not strictly needed */
				Insist(resetNumberFromPeerIndex < MESSAGE_FROM_PEER_BUF_SIZE);
			}
		}
		buf++;
	}

	messageFromPeerOut[messageFromPeerIndex] = '\0';
	resetNumberFromPeerOut[resetNumberFromPeerIndex] = '\0';

	if ( whichLine != 2 )
	{
		elog(WARNING, "during reset, invalid message contacting primary/mirror peer to coordinate reset; "
				"will transition to fault state.  Message received: %s",
				gResultDataBuf);
		proc_exit(EXIT_CODE_SHOULD_ENTER_FAULT);
	}
}
Example #2
0
Datum
gp_inject_fault(PG_FUNCTION_ARGS)
{
	char	   *faultName = TextDatumGetCString(PG_GETARG_DATUM(0));
	char	   *type = TextDatumGetCString(PG_GETARG_DATUM(1));
	char	   *ddlStatement = TextDatumGetCString(PG_GETARG_DATUM(2));
	char	   *databaseName = TextDatumGetCString(PG_GETARG_DATUM(3));
	char	   *tableName = TextDatumGetCString(PG_GETARG_DATUM(4));
	int			numOccurrences = PG_GETARG_INT32(5);
	int			sleepTimeSeconds = PG_GETARG_INT32(6);
	int         dbid = PG_GETARG_INT32(7);
	StringInfo  faultmsg = makeStringInfo();

	/* Fast path if injecting fault in our postmaster. */
	if (GpIdentity.dbid == dbid)
	{
		appendStringInfo(faultmsg, "%s\n%s\n%s\n%s\n%s\n%d\n%d\n",
						 faultName, type, ddlStatement, databaseName,
						 tableName, numOccurrences, sleepTimeSeconds);
		int offset = 0;
		char *response =
			processTransitionRequest_faultInject(
				faultmsg->data, &offset, faultmsg->len);
		if (!response)
			elog(ERROR, "failed to inject fault locally (dbid %d)", dbid);
		if (strncmp(response, "Success:",  strlen("Success:")) != 0)
			elog(ERROR, "%s", response);

		elog(NOTICE, "%s", response);
		PG_RETURN_DATUM(true);
	}

	/* Obtain host and port of the requested dbid */
	HeapTuple tuple;
	Relation rel = heap_open(GpSegmentConfigRelationId, AccessShareLock);
	ScanKeyData scankey;
	SysScanDesc sscan;
	ScanKeyInit(&scankey,
				Anum_gp_segment_configuration_dbid,
				BTEqualStrategyNumber, F_INT2EQ,
				Int16GetDatum((int16) dbid));
	sscan = systable_beginscan(rel, GpSegmentConfigDbidIndexId, true,
							   GetTransactionSnapshot(), 1, &scankey);
	tuple = systable_getnext(sscan);

	if (!HeapTupleIsValid(tuple))
		elog(ERROR, "cannot find dbid %d", dbid);

	bool isnull;
	Datum datum = heap_getattr(tuple, Anum_gp_segment_configuration_hostname,
							   RelationGetDescr(rel), &isnull);
	char *hostname;
	if (!isnull)
		hostname =
				DatumGetCString(DirectFunctionCall1(textout, datum));
	else
		elog(ERROR, "hostname is null for dbid %d", dbid);
	int port = DatumGetInt32(heap_getattr(tuple,
										  Anum_gp_segment_configuration_port,
										  RelationGetDescr(rel), &isnull));
	systable_endscan(sscan);
	heap_close(rel, NoLock);

	struct addrinfo *addrList = NULL;
	struct addrinfo hint;
	int			ret;

	/* Initialize hint structure */
	MemSet(&hint, 0, sizeof(hint));
	hint.ai_socktype = SOCK_STREAM;
	hint.ai_family = AF_UNSPEC;

	char portStr[100];
	if (snprintf(portStr, sizeof(portStr), "%d", port) >= sizeof(portStr))
		elog(ERROR, "port number too long for dbid %d", dbid);

	/* Use pg_getaddrinfo_all() to resolve the address */
	ret = pg_getaddrinfo_all(hostname, portStr, &hint, &addrList);
	if (ret || !addrList)
	{
		if (addrList)
			pg_freeaddrinfo_all(hint.ai_family, addrList);
		elog(ERROR, "could not translate host name \"%s\" to address: %s\n",
			 hostname, gai_strerror(ret));
	}

	PrimaryMirrorTransitionClientInfo client;
	client.receivedDataCallbackFn = transitionReceivedDataFn;
	client.errorLogFn = transitionErrorLogFn;
	client.checkForNeedToExitFn = checkForNeedToExitFn;
	transitionMsgErrors = makeStringInfo();

	appendStringInfo(faultmsg, "%s\n%s\n%s\n%s\n%s\n%s\n%d\n%d\n",
					 "faultInject",	faultName, type, ddlStatement,
					 databaseName, tableName, numOccurrences,
					 sleepTimeSeconds);

	if (sendTransitionMessage(&client, addrList, faultmsg->data, faultmsg->len,
							  1 /* retries */, 60 /* timeout */) !=
		TRANS_ERRCODE_SUCCESS)
	{
		pg_freeaddrinfo_all(hint.ai_family, addrList);
		ereport(ERROR, (errmsg("failed to inject %s fault in dbid %d",
							   faultName, dbid),
						errdetail("%s", transitionMsgErrors->data)));
	}

	pg_freeaddrinfo_all(hint.ai_family, addrList);
	PG_RETURN_DATUM(BoolGetDatum(true));
}
Example #3
0
int
main(int argc, char **argv)
{
	struct addrinfo *addrList = NULL;

	char *host = NULL, *port = NULL, *inputFile = NULL;

	char *mode = NULL;
	char *status = NULL;
	char *seg_addr = NULL;
	char *seg_pm_port = NULL;
	char *seg_rep_port = NULL;
	char *peer_addr = NULL;
	char *peer_pm_port = NULL;
	char *peer_rep_port = NULL;

	char *num_retries_str = NULL;
	char *transition_timeout_str = NULL;
	
	int num_retries = 20;
	int transition_timeout = 3600;  /* 1 hour */
	
	char opt;

	char msgBuffer[SEGMENT_MSG_BUF_SIZE];
	char *msg = NULL;
	int msgLen = 0;

	while ((opt = getopt(argc, argv, "m:s:H:P:R:h:p:r:i:n:t:")) != -1)
	{
		switch (opt)
		{
			case 'i':
				inputFile = optarg;
				break;
			case 'm':
				mode = optarg;
				break;
			case 's':
				status = optarg;
				break;
			case 'H':
				seg_addr = optarg;
				break;
			case 'P':
				seg_pm_port = optarg;
				break;
			case 'R':
				seg_rep_port = optarg;
				break;
			case 'h':
				host = peer_addr = optarg;
				break;
			case 'p':
				port = peer_pm_port = optarg;
				break;
			case 'r':
				peer_rep_port = optarg;
				break;
			case 'n':
				num_retries_str = optarg;
				break;
			case 't':
				transition_timeout_str = optarg;
				break;
			case '?':
				fprintf(stderr, "Unrecognized option: -%c\n", optopt);
		}
	}

	if (num_retries_str != NULL)
	{
		num_retries = (int) strtol(num_retries_str, NULL, 10);
		if (num_retries == 0 || errno == ERANGE)
		{
			fprintf(stderr, "Invalid num_retries (-n) argument\n");
			return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT;
		}
	}
	
	if (transition_timeout_str != NULL)
	{
		transition_timeout = (int) strtol (transition_timeout_str, NULL, 10);
		if (transition_timeout == 0 || errno == ERANGE)
		{
			fprintf(stderr, "Invalid transition_timeout (-t) argument\n");
			return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT;
		}
	}

	/* check if input file parameter is passed */
	if (seg_addr == NULL)
	{
		if ( host == NULL)
		{
			fprintf(stderr, "Missing host (-h) argument\n");
			return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT;
		}
		if ( port == NULL )
		{
			fprintf(stderr, "Missing port (-p) argument\n");
			return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT;
		}

		/* find the target machine */
		if ( ! determineTargetHost(&addrList, host, port))
		{
			return TRANS_ERRCODE_ERROR_HOST_LOOKUP_FAILED;
		}

		/* load the input message into memory */
		if ( inputFile == NULL)
		{
			msg = readFully(stdin, &msgLen);
		}
		else
		{

			FILE *f = fopen(inputFile, "r");
			if ( f == NULL)
			{
				fprintf(stderr, "Unable to open file %s\n", inputFile);
				return TRANS_ERRCODE_ERROR_READING_INPUT;
			}
			msg = readFully(f, &msgLen);
			fclose(f);
		}
	}
	else
	{
		/* build message from passed parameters */

		if (mode == NULL)
		{
			fprintf(stderr, "Missing mode (-m) argument\n");
			return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT;
		}
		if (status == NULL)
		{
			fprintf(stderr, "Missing status (-s) argument\n");
			return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT;
		}
		if (seg_addr == NULL)
		{
			fprintf(stderr, "Missing segment host (-H) argument\n");
			return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT;
		}
		if (seg_pm_port == NULL)
		{
			fprintf(stderr, "Missing segment postmaster port (-P) argument\n");
			return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT;
		}
		if (seg_rep_port == NULL)
		{
			fprintf(stderr, "Missing segment replication port (-R) argument\n");
			return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT;
		}
		if (peer_addr == NULL)
		{
			fprintf(stderr, "Missing peer host (-h) argument\n");
			return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT;
		}
		if (peer_pm_port == NULL)
		{
			fprintf(stderr, "Missing peer postmaster port (-p) argument\n");
			return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT;
		}
		if (peer_rep_port == NULL)
		{
			fprintf(stderr, "Missing peer replication port (-r) argument\n");
			return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT;
		}

		/* build message */
		msgLen = snprintf(
			msgBuffer, sizeof(msgBuffer),
			"%s\n%s\n%s\n%s\n%s\n%s\n%s\n",
			mode,
			status,
			seg_addr,
			seg_rep_port,
			peer_addr,
			peer_rep_port,
			peer_pm_port
			);

		msg = msgBuffer;

		/* find the target machine */
		if (!determineTargetHost(&addrList, seg_addr, seg_pm_port))
		{
			return TRANS_ERRCODE_ERROR_HOST_LOOKUP_FAILED;
		}
	}

	 /* check for errors while building the message */
	if ( msg == NULL )
	{
		return TRANS_ERRCODE_ERROR_READING_INPUT;
	}

	/* send the message */
	PrimaryMirrorTransitionClientInfo client;
	client.receivedDataCallbackFn = gpMirrorReceivedDataCallbackFunction;
	client.errorLogFn = gpMirrorErrorLogFunction;
	client.checkForNeedToExitFn = gpCheckForNeedToExitFn;
	return sendTransitionMessage(&client, addrList, msg, msgLen, num_retries, transition_timeout);
}