/* * Wrap the REST call with a retry for the HA HDFS scenario */ static void rest_request(GPHDUri *hadoop_uri, ClientContext* client_context, char *rest_msg) { Assert(hadoop_uri->host != NULL && hadoop_uri->port != NULL); /* construct the request */ PG_TRY(); { call_rest(hadoop_uri, client_context, rest_msg); } PG_CATCH(); { if (hadoop_uri->ha_nodes) /* if we are in the HA scenario will try to access the second Namenode machine */ { char* message = elog_message(); elog(DEBUG2, "rest_request: calling first HA namenode failed, trying second (%s)", message ? message : "Unknown error"); /* release error state - we finished handling this error and need to clean the error stack. * ha_failover might fail, but that will generate its own error. */ if (!elog_dismiss(DEBUG5)) PG_RE_THROW(); /* hope to never get here! */ ha_failover(hadoop_uri, client_context, rest_msg); } else /*This is not HA - so let's re-throw */ PG_RE_THROW(); } PG_END_TRY(); }
/* * Used by clients to send a request to a service. */ bool ServiceClientSendRequest(ServiceClient *serviceClient, void* request, int requestLen) { ServiceConfig *serviceConfig; char *message; bool result = false; DECLARE_SAVE_SUPPRESS_PANIC(); Assert(serviceClient != NULL); Assert(request != NULL); PG_TRY(); { SUPPRESS_PANIC(); serviceConfig = serviceClient->serviceConfig; if (serviceConfig == NULL) { ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Not connected to '%s'", serviceConfig->title))); } if (requestLen != serviceClient->serviceConfig->requestLen) { ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Expecting request length %d and actual length is %d for '%s'", serviceClient->serviceConfig->requestLen, requestLen, serviceConfig->title))); } result = ServiceClientWrite(serviceClient, request, requestLen); RESTORE_PANIC(); } PG_CATCH(); { RESTORE_PANIC(); /* Report the error to the server log */ if (!elog_demote(WARNING)) { elog(LOG,"unable to demote error"); PG_RE_THROW(); } message = elog_message(); if (message != NULL && strlen(message) + 1 < sizeof(ClientErrorString)) strcpy(ClientErrorString, message); else strcpy(ClientErrorString, ""); EmitErrorReport(); FlushErrorState(); result = false; } PG_END_TRY(); return result; }
/* * cdbdisp_handleError * * When caller catches an error, the PG_CATCH handler can use this * function instead of cdbdisp_finishCommand to wait for all QEs * to finish, clean up, and report QE errors if appropriate. * This function should be called only from PG_CATCH handlers. * * This function destroys and frees the given CdbDispatchResults objects. * It is a no-op if both CdbDispatchResults ptrs are NULL. * * On return, the caller is expected to finish its own cleanup and * exit via PG_RE_THROW(). */ void cdbdisp_handleError(struct CdbDispatcherState *ds) { int qderrcode; bool useQeError = false; qderrcode = elog_geterrcode(); /* * If cdbdisp_dispatchToGang() wasn't called, don't wait. */ if (!ds || !ds->primaryResults) return; /* * Request any remaining commands executing on qExecs to stop. * We need to wait for the threads to finish. This allows for proper * cleanup of the results from the async command executions. * Cancel any QEs still running. */ CdbCheckDispatchResult(ds, DISPATCH_WAIT_CANCEL); /* * When a QE stops executing a command due to an error, as a * consequence there can be a cascade of interconnect errors * (usually "sender closed connection prematurely") thrown in * downstream processes (QEs and QD). So if we are handling * an interconnect error, and a QE hit a more interesting error, * we'll let the QE's error report take precedence. */ if (qderrcode == ERRCODE_GP_INTERCONNECTION_ERROR) { bool qd_lost_flag = false; char *qderrtext = elog_message(); if (qderrtext && strcmp(qderrtext, CDB_MOTION_LOST_CONTACT_STRING) == 0) qd_lost_flag = true; if (ds->primaryResults && ds->primaryResults->errcode) { if (qd_lost_flag && ds->primaryResults->errcode == ERRCODE_GP_INTERCONNECTION_ERROR) useQeError = true; else if (ds->primaryResults->errcode != ERRCODE_GP_INTERCONNECTION_ERROR) useQeError = true; } } if (useQeError) { /* * Throw the QE's error, catch it, and fall thru to return * normally so caller can finish cleaning up. Afterwards * caller must exit via PG_RE_THROW(). */ PG_TRY(); { cdbdisp_finishCommand(ds, NULL, NULL); } PG_CATCH(); { } /* nop; fall thru */ PG_END_TRY(); } else { /* * Discard any remaining results from QEs; don't confuse matters by * throwing a new error. Any results of interest presumably should * have been examined before raising the error that the caller is * currently handling. */ cdbdisp_destroyDispatcherState(ds); } }
static bool ServiceClientPollRead(ServiceClient *serviceClient, void* response, int responseLen, bool *pollResponseReceived) { ServiceConfig *serviceConfig; int n; int saved_err; char *message; bool result = false; DECLARE_SAVE_SUPPRESS_PANIC(); Assert(serviceClient != NULL); serviceConfig = serviceClient->serviceConfig; Assert(serviceConfig != NULL); Assert(response != NULL); PG_TRY(); { SUPPRESS_PANIC(); /* * Attempt to read the response */ while (true) { n = read(serviceClient->sockfd, ((char *)response), responseLen); saved_err = errno; if (n == 0) { ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Connection to '%s' is closed", serviceConfig->title))); } else if (n < 0) { if (saved_err == EWOULDBLOCK) { *pollResponseReceived = false; break; } if (saved_err == EINTR) continue; ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Read error from '%s': %s", serviceConfig->title, strerror(saved_err)))); } if (n != responseLen) { ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Expecting message length %d and actual read length was %d from '%s'", responseLen, n, serviceConfig->title))); return false; } *pollResponseReceived = true; break; } result = true; RESTORE_PANIC(); } PG_CATCH(); { RESTORE_PANIC(); /* Report the error to the server log */ if (!elog_demote(WARNING)) { elog(LOG,"unable to demote error"); PG_RE_THROW(); } message = elog_message(); if (message != NULL && strlen(message) + 1 < sizeof(ClientErrorString)) strcpy(ClientErrorString, message); else strcpy(ClientErrorString, ""); EmitErrorReport(); FlushErrorState(); result = false; } PG_END_TRY(); return result; }
static bool ServiceClientRead(ServiceClient *serviceClient, void* response, int responseLen, struct timeval *timeout) { ServiceConfig *serviceConfig; int n; int bytesRead = 0; int saved_err; char *message; bool result = false; mpp_fd_set rset; struct timeval rundownTimeout = {0,0}; // Use local variable since select modifies // the timeout parameter with remaining time. DECLARE_SAVE_SUPPRESS_PANIC(); Assert(serviceClient != NULL); serviceConfig = serviceClient->serviceConfig; Assert(serviceConfig != NULL); Assert(response != NULL); if (timeout != NULL) rundownTimeout = *timeout; PG_TRY(); { SUPPRESS_PANIC(); /* * read the response */ while (bytesRead < responseLen) { n = read(serviceClient->sockfd, ((char *)response) + bytesRead, responseLen - bytesRead); saved_err = errno; if (n == 0) { ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Connection to '%s' is closed (%d)", serviceConfig->title, serviceClient->sockfd))); } if (n < 0) { if (saved_err != EINTR && saved_err != EWOULDBLOCK) { ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Read error from '%s': %s (%d)", serviceConfig->title, strerror(saved_err), serviceClient->sockfd))); } if (saved_err == EWOULDBLOCK) { /* we shouldn't really get here since we are dealing with * small messages, but once we've read a bit of data we * need to finish out reading till we get the message (or error) */ do { MPP_FD_ZERO(&rset); MPP_FD_SET(serviceClient->sockfd, &rset); n = select(serviceClient->sockfd + 1, (fd_set *)&rset, NULL, NULL, (timeout == NULL ? NULL : &rundownTimeout)); if (n == 0) { if (timeout != NULL) { ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Read from '%s' timed out after %d.%03d seconds", serviceConfig->title, (int)timeout->tv_sec, (int)timeout->tv_usec / 1000))); } } else if (n < 0 && errno == EINTR) continue; else if (n < 0) { saved_err = errno; ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Read error from '%s': %s (%d)", serviceConfig->title, strerror(saved_err), serviceClient->sockfd))); } } while (n < 1); } /* else saved_err == EINTR */ continue; } else bytesRead += n; } result = true; RESTORE_PANIC(); } PG_CATCH(); { RESTORE_PANIC(); /* Report the error to the server log */ if (!elog_demote(WARNING)) { elog(LOG,"unable to demote error"); PG_RE_THROW(); } message = elog_message(); if (message != NULL && strlen(message) + 1 < sizeof(ClientErrorString)) strcpy(ClientErrorString, message); else strcpy(ClientErrorString, ""); EmitErrorReport(); FlushErrorState(); result = false; } PG_END_TRY(); return result; }
static bool ServiceDoConnect(ServiceConfig *serviceConfig, int listenerPort, ServiceClient *serviceClient, bool complain) { int n; struct sockaddr_in addr; int saved_err; char *message; bool result = false; DECLARE_SAVE_SUPPRESS_PANIC(); PG_TRY(); { SUPPRESS_PANIC(); for (;;) { /* * Open a connection to the service. */ serviceClient->sockfd = socket(AF_INET, SOCK_STREAM, 0); addr.sin_family = AF_INET; addr.sin_port = htons(listenerPort); addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); if ((n = connect(serviceClient->sockfd, (struct sockaddr *)&addr, sizeof(addr))) < 0) { saved_err = errno; close(serviceClient->sockfd); serviceClient->sockfd = -1; if (errno == EINTR) continue; ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Could not connect to '%s': %s", serviceConfig->title, strerror(saved_err)))); } else { //success. we're done here! break; } } /* make socket non-blocking BEFORE we connect. */ if (!pg_set_noblock(serviceClient->sockfd)) { saved_err = errno; close(serviceClient->sockfd); serviceClient->sockfd = -1; ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), errmsg("Could not set '%s' socket to non-blocking mode: %s", serviceConfig->title, strerror(saved_err)))); } result = true; RESTORE_PANIC(); } PG_CATCH(); { RESTORE_PANIC(); /* Report the error to the server log */ if (!elog_demote(WARNING)) { elog(LOG,"unable to demote error"); PG_RE_THROW(); } message = elog_message(); if (message != NULL && strlen(message) + 1 < sizeof(ClientErrorString)) strcpy(ClientErrorString, message); else strcpy(ClientErrorString, ""); if (complain) EmitErrorReport(); FlushErrorState(); result = false; } PG_END_TRY(); return result; }