Exemplo n.º 1
0
/*
 * Wrap the REST call with a retry for the HA HDFS scenario
 */
static void
rest_request(GPHDUri *hadoop_uri, ClientContext* client_context, char *rest_msg)
{
	Assert(hadoop_uri->host != NULL && hadoop_uri->port != NULL);

	/* construct the request */
	PG_TRY();
	{
		call_rest(hadoop_uri, client_context, rest_msg);
	}
	PG_CATCH();
	{
		if (hadoop_uri->ha_nodes) /* if we are in the HA scenario will try to access the second Namenode machine */
		{
			char* message = elog_message();
			elog(DEBUG2, "rest_request: calling first HA namenode failed, trying second (%s)", message ? message : "Unknown error");

			/* release error state - we finished handling this error and need to clean the error stack.
			 * ha_failover might fail, but that will generate its own error. */
			if (!elog_dismiss(DEBUG5))
				PG_RE_THROW(); /* hope to never get here! */

			ha_failover(hadoop_uri, client_context, rest_msg);
		}
		else /*This is not HA - so let's re-throw */
			PG_RE_THROW();
	}
	PG_END_TRY();
}
Exemplo n.º 2
0
/*
 * Used by clients to send a request to a service.
 */
bool
ServiceClientSendRequest(ServiceClient *serviceClient, void* request, int requestLen)
{
	ServiceConfig *serviceConfig;
	char        *message;
	bool		result = false;
	DECLARE_SAVE_SUPPRESS_PANIC();

	Assert(serviceClient != NULL);
	Assert(request != NULL);

	PG_TRY();
	{
		SUPPRESS_PANIC();

		serviceConfig = serviceClient->serviceConfig;
		if (serviceConfig == NULL)
		{
			ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
						    errmsg("Not connected to '%s'",
						           serviceConfig->title)));
		}
		if (requestLen != serviceClient->serviceConfig->requestLen)
		{
			ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
							 errmsg("Expecting request length %d and actual length is %d for '%s'",
				 					serviceClient->serviceConfig->requestLen, requestLen,
				 					serviceConfig->title)));
		}

		result = ServiceClientWrite(serviceClient, request, requestLen);
		RESTORE_PANIC();
	}
	PG_CATCH();
	{
		RESTORE_PANIC();
		/* Report the error to the server log */
	    if (!elog_demote(WARNING))
    	{
    		elog(LOG,"unable to demote error");
        	PG_RE_THROW();
    	}

		message = elog_message();
 		if (message != NULL && strlen(message) + 1 < sizeof(ClientErrorString))
			strcpy(ClientErrorString, message);
		else
			strcpy(ClientErrorString, "");

		EmitErrorReport();
		FlushErrorState();

		result = false;
	}
	PG_END_TRY();

	return result;

}
Exemplo n.º 3
0
/*
 * cdbdisp_handleError
 *
 * When caller catches an error, the PG_CATCH handler can use this
 * function instead of cdbdisp_finishCommand to wait for all QEs
 * to finish, clean up, and report QE errors if appropriate.
 * This function should be called only from PG_CATCH handlers.
 *
 * This function destroys and frees the given CdbDispatchResults objects.
 * It is a no-op if both CdbDispatchResults ptrs are NULL.
 *
 * On return, the caller is expected to finish its own cleanup and
 * exit via PG_RE_THROW().
 */
void
cdbdisp_handleError(struct CdbDispatcherState *ds)
{
	int			qderrcode;
	bool		useQeError = false;

	qderrcode = elog_geterrcode();

	/*
	 * If cdbdisp_dispatchToGang() wasn't called, don't wait.
	 */
	if (!ds || !ds->primaryResults)
		return;

	/*
	 * Request any remaining commands executing on qExecs to stop.
	 * We need to wait for the threads to finish.  This allows for proper
	 * cleanup of the results from the async command executions.
	 * Cancel any QEs still running.
	 */
	CdbCheckDispatchResult(ds, DISPATCH_WAIT_CANCEL);

	/*
	 * When a QE stops executing a command due to an error, as a
	 * consequence there can be a cascade of interconnect errors
	 * (usually "sender closed connection prematurely") thrown in
	 * downstream processes (QEs and QD).  So if we are handling
	 * an interconnect error, and a QE hit a more interesting error,
	 * we'll let the QE's error report take precedence.
	 */
	if (qderrcode == ERRCODE_GP_INTERCONNECTION_ERROR)
	{
		bool qd_lost_flag = false;
		char *qderrtext = elog_message();

		if (qderrtext
			&& strcmp(qderrtext, CDB_MOTION_LOST_CONTACT_STRING) == 0)
			qd_lost_flag = true;

		if (ds->primaryResults && ds->primaryResults->errcode)
		{
			if (qd_lost_flag
				&& ds->primaryResults->errcode == ERRCODE_GP_INTERCONNECTION_ERROR)
				useQeError = true;
			else if (ds->primaryResults->errcode != ERRCODE_GP_INTERCONNECTION_ERROR)
				useQeError = true;
		}
	}

	if (useQeError)
	{
		/*
		 * Throw the QE's error, catch it, and fall thru to return
		 * normally so caller can finish cleaning up.  Afterwards
		 * caller must exit via PG_RE_THROW().
		 */
		PG_TRY();
		{
			cdbdisp_finishCommand(ds, NULL, NULL);
		}
		PG_CATCH();
		{
		}						/* nop; fall thru */
		PG_END_TRY();
	}
	else
	{
		/*
		 * Discard any remaining results from QEs; don't confuse matters by
		 * throwing a new error.  Any results of interest presumably should
		 * have been examined before raising the error that the caller is
		 * currently handling.
		 */
		cdbdisp_destroyDispatcherState(ds);
	}
}
Exemplo n.º 4
0
static bool
ServiceClientPollRead(ServiceClient *serviceClient, void* response, int responseLen, bool *pollResponseReceived)
{
	ServiceConfig 	*serviceConfig;
	int				n;
	int				saved_err;
	char            *message;
	bool		    result = false;
	DECLARE_SAVE_SUPPRESS_PANIC();

	Assert(serviceClient != NULL);
	serviceConfig = serviceClient->serviceConfig;
	Assert(serviceConfig != NULL);
	Assert(response != NULL);

	PG_TRY();
	{
		SUPPRESS_PANIC();

		/*
		 * Attempt to read the response
		 */
		while (true)
		{
			n = read(serviceClient->sockfd,
				     ((char *)response),
				     responseLen);
			saved_err = errno;

			if (n == 0)
			{
				ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
								 errmsg("Connection to '%s' is closed",
								         serviceConfig->title)));
			}
			else if (n < 0)
			{
				if (saved_err == EWOULDBLOCK)
				{
					*pollResponseReceived = false;
					break;
				}

				if (saved_err == EINTR)
					continue;

				ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
								 errmsg("Read error from '%s': %s",
								        serviceConfig->title,
								        strerror(saved_err))));
			}

			if (n != responseLen)
			{
				ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
								 errmsg("Expecting message length %d and actual read length was %d from '%s'",
					 			        responseLen, n,
					 			        serviceConfig->title)));
				return false;
			}

			*pollResponseReceived = true;
			break;
		}

		result = true;
		RESTORE_PANIC();
	}
	PG_CATCH();
	{
		RESTORE_PANIC();

		/* Report the error to the server log */
	    if (!elog_demote(WARNING))
    	{
    		elog(LOG,"unable to demote error");
        	PG_RE_THROW();
    	}

		message = elog_message();
 		if (message != NULL && strlen(message) + 1 < sizeof(ClientErrorString))
			strcpy(ClientErrorString, message);
		else
			strcpy(ClientErrorString, "");

		EmitErrorReport();
		FlushErrorState();

		result = false;
	}
	PG_END_TRY();

	return result;
}
Exemplo n.º 5
0
static bool
ServiceClientRead(ServiceClient *serviceClient, void* response, int responseLen, struct timeval *timeout)
{
	ServiceConfig *serviceConfig;
	int		n;
	int	   bytesRead = 0;
	int			saved_err;
	char        *message;
	bool		result = false;
	mpp_fd_set	rset;
	struct timeval rundownTimeout = {0,0};
	// Use local variable since select modifies
	// the timeout parameter with remaining time.
	DECLARE_SAVE_SUPPRESS_PANIC();

	Assert(serviceClient != NULL);
	serviceConfig = serviceClient->serviceConfig;
	Assert(serviceConfig != NULL);
	Assert(response != NULL);
	if (timeout != NULL)
		rundownTimeout = *timeout;

	PG_TRY();
	{
		SUPPRESS_PANIC();

		/*
		 * read the response
		 */
		while (bytesRead < responseLen)
		{
			n = read(serviceClient->sockfd,
				     ((char *)response) + bytesRead,
				     responseLen - bytesRead);
			saved_err = errno;

			if (n == 0)
			{
				ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
								errmsg("Connection to '%s' is closed (%d)",
									   serviceConfig->title, serviceClient->sockfd)));
			}

			if (n < 0)
			{
				if (saved_err != EINTR && saved_err != EWOULDBLOCK)
				{
					ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
									errmsg("Read error from '%s': %s (%d)",
										   serviceConfig->title,
										   strerror(saved_err), serviceClient->sockfd)));
				}

				if (saved_err == EWOULDBLOCK)
				{
					/* we shouldn't really get here since we are dealing with
					 * small messages, but once we've read a bit of data we
					 * need to finish out reading till we get the message (or error)
					 */
					do
					{
						MPP_FD_ZERO(&rset);
						MPP_FD_SET(serviceClient->sockfd, &rset);
						n = select(serviceClient->sockfd + 1, (fd_set *)&rset, NULL, NULL, (timeout == NULL ? NULL : &rundownTimeout));
						if (n == 0)
						{
							if (timeout != NULL)
							{
								ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
												errmsg("Read from '%s' timed out after %d.%03d seconds",
													   serviceConfig->title,
													   (int)timeout->tv_sec,
													   (int)timeout->tv_usec / 1000)));
							}
						}
						else if (n < 0 && errno == EINTR)
							continue;
						else if (n < 0)
						{
							saved_err = errno;

							ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
											errmsg("Read error from '%s': %s (%d)",
												   serviceConfig->title,
												   strerror(saved_err), serviceClient->sockfd)));
						}
					}
					while (n < 1);
				}
				/* else saved_err == EINTR */

				continue;
			}
			else
				bytesRead += n;
		}

		result = true;
		RESTORE_PANIC();
	}
	PG_CATCH();
	{
		RESTORE_PANIC();

		/* Report the error to the server log */
	    if (!elog_demote(WARNING))
    	{
    		elog(LOG,"unable to demote error");
        	PG_RE_THROW();
    	}

		message = elog_message();
 		if (message != NULL && strlen(message) + 1 < sizeof(ClientErrorString))
			strcpy(ClientErrorString, message);
		else
			strcpy(ClientErrorString, "");

		EmitErrorReport();
		FlushErrorState();

		result = false;
	}
	PG_END_TRY();

	return result;
}
Exemplo n.º 6
0
static bool
ServiceDoConnect(ServiceConfig *serviceConfig, int listenerPort, ServiceClient *serviceClient, bool complain)
{
	int  n;
	struct sockaddr_in addr;
	int saved_err;
	char        *message;
	bool		result = false;
	DECLARE_SAVE_SUPPRESS_PANIC();

	PG_TRY();
	{
		SUPPRESS_PANIC();

		for (;;)
		{
			/*
			 * Open a connection to the service.
			 */
			serviceClient->sockfd = socket(AF_INET, SOCK_STREAM, 0);

			addr.sin_family = AF_INET;
			addr.sin_port = htons(listenerPort);
			addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);

			if ((n = connect(serviceClient->sockfd, (struct sockaddr *)&addr, sizeof(addr))) < 0)
			{
				saved_err = errno;

				close(serviceClient->sockfd);
				serviceClient->sockfd = -1;

				if (errno == EINTR)
					continue;

				ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
								errmsg("Could not connect to '%s': %s",
									   serviceConfig->title,
									   strerror(saved_err))));
			}
			else
			{
				//success. we're done here!
				break;
			}
		}

		/* make socket non-blocking BEFORE we connect. */
		if (!pg_set_noblock(serviceClient->sockfd))
		{
			saved_err = errno;

			close(serviceClient->sockfd);
			serviceClient->sockfd = -1;
			ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR),
							errmsg("Could not set '%s' socket to non-blocking mode: %s",
								   serviceConfig->title,
								   strerror(saved_err))));
		}

		result = true;
		RESTORE_PANIC();
	}
	PG_CATCH();
	{
		RESTORE_PANIC();

		/* Report the error to the server log */
	    if (!elog_demote(WARNING))
    	{
    		elog(LOG,"unable to demote error");
        	PG_RE_THROW();
    	}

		message = elog_message();
 		if (message != NULL && strlen(message) + 1 < sizeof(ClientErrorString))
			strcpy(ClientErrorString, message);
		else
			strcpy(ClientErrorString, "");

		if (complain)
			EmitErrorReport();
		FlushErrorState();

		result = false;
	}
	PG_END_TRY();

	return result;
}