static void sendMessageToPeerAndExitIfProblem( struct addrinfo *addrList, char *msgBody, char messageFromPeerOut[MESSAGE_FROM_PEER_BUF_SIZE], char resetNumberFromPeerOut[MESSAGE_FROM_PEER_BUF_SIZE]) { elog(DEBUG1, "peer reset: sending message to primary/mirror peer: %s", msgBody); /* set up receipt buffers (populated by the callback functions) */ gResponseWasTooLarge = false; gErrorLogBuf[0] = '\0'; gResultDataBuf[0] = '\0'; /* make the call and check results */ PrimaryMirrorTransitionClientInfo client; client.receivedDataCallbackFn = resetPeer_receivedDataCallbackFunction; client.errorLogFn = resetPeer_errorLogFunction; client.checkForNeedToExitFn = resetPeer_checkForNeedToExitFunction; int resultCode = sendTransitionMessage(&client, addrList, msgBody, strlen(msgBody), 10 /* numRetries */, 3600 /* transition_timeout */); if (resultCode != TRANS_ERRCODE_SUCCESS) { elog(WARNING, "during reset, unable to contact primary/mirror peer to coordinate reset; " "will transition to fault state. Error code %d and message '%s'", resultCode, gErrorLogBuf); proc_exit(EXIT_CODE_SHOULD_ENTER_FAULT); } /* extract the two fields into messageFromPeerOut and resetNumberFromPeerOut, skipping the first Success: line * * is there a way to make this simple string parser easier? * * The result will look like Success:\nLineToKeep1\nLineToKeep2 * This pulls LineToKeep1 and LineToKeep2 out into messageFromPeerOut and resetNumberFromPeerIndex * * Note that because gResultDataBuf is limited to MESSAGE_FROM_PEER_BUF_SIZE, we don't technically need * to check overflow here. */ int resetNumberFromPeerIndex = 0, messageFromPeerIndex = 0, whichLine = 0; char *buf = gResultDataBuf; while (*buf) { if ( *buf == '\n') { whichLine++; if ( whichLine == 3) { elog(WARNING, "during reset, invalid message contacting primary/mirror peer to coordinate reset; " "will transition to fault state. Message received: %s", gResultDataBuf); proc_exit(EXIT_CODE_SHOULD_ENTER_FAULT); } } else { if (whichLine == 1) { messageFromPeerOut[messageFromPeerIndex] = *buf; messageFromPeerIndex++; /* see comments above about why this is not strictly needed */ Insist(messageFromPeerIndex < MESSAGE_FROM_PEER_BUF_SIZE); } else if (whichLine == 2) { resetNumberFromPeerOut[resetNumberFromPeerIndex] = *buf; resetNumberFromPeerIndex++; /* see comments above about why this is not strictly needed */ Insist(resetNumberFromPeerIndex < MESSAGE_FROM_PEER_BUF_SIZE); } } buf++; } messageFromPeerOut[messageFromPeerIndex] = '\0'; resetNumberFromPeerOut[resetNumberFromPeerIndex] = '\0'; if ( whichLine != 2 ) { elog(WARNING, "during reset, invalid message contacting primary/mirror peer to coordinate reset; " "will transition to fault state. Message received: %s", gResultDataBuf); proc_exit(EXIT_CODE_SHOULD_ENTER_FAULT); } }
Datum gp_inject_fault(PG_FUNCTION_ARGS) { char *faultName = TextDatumGetCString(PG_GETARG_DATUM(0)); char *type = TextDatumGetCString(PG_GETARG_DATUM(1)); char *ddlStatement = TextDatumGetCString(PG_GETARG_DATUM(2)); char *databaseName = TextDatumGetCString(PG_GETARG_DATUM(3)); char *tableName = TextDatumGetCString(PG_GETARG_DATUM(4)); int numOccurrences = PG_GETARG_INT32(5); int sleepTimeSeconds = PG_GETARG_INT32(6); int dbid = PG_GETARG_INT32(7); StringInfo faultmsg = makeStringInfo(); /* Fast path if injecting fault in our postmaster. */ if (GpIdentity.dbid == dbid) { appendStringInfo(faultmsg, "%s\n%s\n%s\n%s\n%s\n%d\n%d\n", faultName, type, ddlStatement, databaseName, tableName, numOccurrences, sleepTimeSeconds); int offset = 0; char *response = processTransitionRequest_faultInject( faultmsg->data, &offset, faultmsg->len); if (!response) elog(ERROR, "failed to inject fault locally (dbid %d)", dbid); if (strncmp(response, "Success:", strlen("Success:")) != 0) elog(ERROR, "%s", response); elog(NOTICE, "%s", response); PG_RETURN_DATUM(true); } /* Obtain host and port of the requested dbid */ HeapTuple tuple; Relation rel = heap_open(GpSegmentConfigRelationId, AccessShareLock); ScanKeyData scankey; SysScanDesc sscan; ScanKeyInit(&scankey, Anum_gp_segment_configuration_dbid, BTEqualStrategyNumber, F_INT2EQ, Int16GetDatum((int16) dbid)); sscan = systable_beginscan(rel, GpSegmentConfigDbidIndexId, true, GetTransactionSnapshot(), 1, &scankey); tuple = systable_getnext(sscan); if (!HeapTupleIsValid(tuple)) elog(ERROR, "cannot find dbid %d", dbid); bool isnull; Datum datum = heap_getattr(tuple, Anum_gp_segment_configuration_hostname, RelationGetDescr(rel), &isnull); char *hostname; if (!isnull) hostname = DatumGetCString(DirectFunctionCall1(textout, datum)); else elog(ERROR, "hostname is null for dbid %d", dbid); int port = DatumGetInt32(heap_getattr(tuple, Anum_gp_segment_configuration_port, RelationGetDescr(rel), &isnull)); systable_endscan(sscan); heap_close(rel, NoLock); struct addrinfo *addrList = NULL; struct addrinfo hint; int ret; /* Initialize hint structure */ MemSet(&hint, 0, sizeof(hint)); hint.ai_socktype = SOCK_STREAM; hint.ai_family = AF_UNSPEC; char portStr[100]; if (snprintf(portStr, sizeof(portStr), "%d", port) >= sizeof(portStr)) elog(ERROR, "port number too long for dbid %d", dbid); /* Use pg_getaddrinfo_all() to resolve the address */ ret = pg_getaddrinfo_all(hostname, portStr, &hint, &addrList); if (ret || !addrList) { if (addrList) pg_freeaddrinfo_all(hint.ai_family, addrList); elog(ERROR, "could not translate host name \"%s\" to address: %s\n", hostname, gai_strerror(ret)); } PrimaryMirrorTransitionClientInfo client; client.receivedDataCallbackFn = transitionReceivedDataFn; client.errorLogFn = transitionErrorLogFn; client.checkForNeedToExitFn = checkForNeedToExitFn; transitionMsgErrors = makeStringInfo(); appendStringInfo(faultmsg, "%s\n%s\n%s\n%s\n%s\n%s\n%d\n%d\n", "faultInject", faultName, type, ddlStatement, databaseName, tableName, numOccurrences, sleepTimeSeconds); if (sendTransitionMessage(&client, addrList, faultmsg->data, faultmsg->len, 1 /* retries */, 60 /* timeout */) != TRANS_ERRCODE_SUCCESS) { pg_freeaddrinfo_all(hint.ai_family, addrList); ereport(ERROR, (errmsg("failed to inject %s fault in dbid %d", faultName, dbid), errdetail("%s", transitionMsgErrors->data))); } pg_freeaddrinfo_all(hint.ai_family, addrList); PG_RETURN_DATUM(BoolGetDatum(true)); }
int main(int argc, char **argv) { struct addrinfo *addrList = NULL; char *host = NULL, *port = NULL, *inputFile = NULL; char *mode = NULL; char *status = NULL; char *seg_addr = NULL; char *seg_pm_port = NULL; char *seg_rep_port = NULL; char *peer_addr = NULL; char *peer_pm_port = NULL; char *peer_rep_port = NULL; char *num_retries_str = NULL; char *transition_timeout_str = NULL; int num_retries = 20; int transition_timeout = 3600; /* 1 hour */ char opt; char msgBuffer[SEGMENT_MSG_BUF_SIZE]; char *msg = NULL; int msgLen = 0; while ((opt = getopt(argc, argv, "m:s:H:P:R:h:p:r:i:n:t:")) != -1) { switch (opt) { case 'i': inputFile = optarg; break; case 'm': mode = optarg; break; case 's': status = optarg; break; case 'H': seg_addr = optarg; break; case 'P': seg_pm_port = optarg; break; case 'R': seg_rep_port = optarg; break; case 'h': host = peer_addr = optarg; break; case 'p': port = peer_pm_port = optarg; break; case 'r': peer_rep_port = optarg; break; case 'n': num_retries_str = optarg; break; case 't': transition_timeout_str = optarg; break; case '?': fprintf(stderr, "Unrecognized option: -%c\n", optopt); } } if (num_retries_str != NULL) { num_retries = (int) strtol(num_retries_str, NULL, 10); if (num_retries == 0 || errno == ERANGE) { fprintf(stderr, "Invalid num_retries (-n) argument\n"); return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT; } } if (transition_timeout_str != NULL) { transition_timeout = (int) strtol (transition_timeout_str, NULL, 10); if (transition_timeout == 0 || errno == ERANGE) { fprintf(stderr, "Invalid transition_timeout (-t) argument\n"); return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT; } } /* check if input file parameter is passed */ if (seg_addr == NULL) { if ( host == NULL) { fprintf(stderr, "Missing host (-h) argument\n"); return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT; } if ( port == NULL ) { fprintf(stderr, "Missing port (-p) argument\n"); return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT; } /* find the target machine */ if ( ! determineTargetHost(&addrList, host, port)) { return TRANS_ERRCODE_ERROR_HOST_LOOKUP_FAILED; } /* load the input message into memory */ if ( inputFile == NULL) { msg = readFully(stdin, &msgLen); } else { FILE *f = fopen(inputFile, "r"); if ( f == NULL) { fprintf(stderr, "Unable to open file %s\n", inputFile); return TRANS_ERRCODE_ERROR_READING_INPUT; } msg = readFully(f, &msgLen); fclose(f); } } else { /* build message from passed parameters */ if (mode == NULL) { fprintf(stderr, "Missing mode (-m) argument\n"); return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT; } if (status == NULL) { fprintf(stderr, "Missing status (-s) argument\n"); return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT; } if (seg_addr == NULL) { fprintf(stderr, "Missing segment host (-H) argument\n"); return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT; } if (seg_pm_port == NULL) { fprintf(stderr, "Missing segment postmaster port (-P) argument\n"); return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT; } if (seg_rep_port == NULL) { fprintf(stderr, "Missing segment replication port (-R) argument\n"); return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT; } if (peer_addr == NULL) { fprintf(stderr, "Missing peer host (-h) argument\n"); return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT; } if (peer_pm_port == NULL) { fprintf(stderr, "Missing peer postmaster port (-p) argument\n"); return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT; } if (peer_rep_port == NULL) { fprintf(stderr, "Missing peer replication port (-r) argument\n"); return TRANS_ERRCODE_ERROR_INVALID_ARGUMENT; } /* build message */ msgLen = snprintf( msgBuffer, sizeof(msgBuffer), "%s\n%s\n%s\n%s\n%s\n%s\n%s\n", mode, status, seg_addr, seg_rep_port, peer_addr, peer_rep_port, peer_pm_port ); msg = msgBuffer; /* find the target machine */ if (!determineTargetHost(&addrList, seg_addr, seg_pm_port)) { return TRANS_ERRCODE_ERROR_HOST_LOOKUP_FAILED; } } /* check for errors while building the message */ if ( msg == NULL ) { return TRANS_ERRCODE_ERROR_READING_INPUT; } /* send the message */ PrimaryMirrorTransitionClientInfo client; client.receivedDataCallbackFn = gpMirrorReceivedDataCallbackFunction; client.errorLogFn = gpMirrorErrorLogFunction; client.checkForNeedToExitFn = gpCheckForNeedToExitFn; return sendTransitionMessage(&client, addrList, msg, msgLen, num_retries, transition_timeout); }