Пример #1
0
/* notice backend connection error using SIGUSR1 */
void degenerate_backend_set(int *node_id_set, int count)
{
	pid_t parent = getppid();
	int i;

	if (pool_config->parallel_mode)
	{
		return;
	}

	pool_semaphore_lock(REQUEST_INFO_SEM);
	Req_info->kind = NODE_DOWN_REQUEST;
	for (i = 0; i < count; i++)
	{
		if (node_id_set[i] < 0 || node_id_set[i] >= MAX_NUM_BACKENDS ||
			!VALID_BACKEND(node_id_set[i]))
		{
			pool_log("notice_backend_error: node %d is not valid backend.", i);
			continue;
		}

		pool_log("notice_backend_error: %d fail over request from pid %d", node_id_set[i], getpid());
		Req_info->node_id[i] = node_id_set[i];
	}
	kill(parent, SIGUSR1);
	pool_semaphore_unlock(REQUEST_INFO_SEM);
}
Пример #2
0
POOL_STATUS NotificationResponse(POOL_CONNECTION *frontend,
								 POOL_CONNECTION_POOL *backend)
{
	int pid, pid1;
	char *condition, *condition1 = NULL;
	int len, len1 = 0;
	int i;
	POOL_STATUS status;

	pool_write(frontend, "A", 1);

	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (VALID_BACKEND(i))
		{
			if (pool_read(CONNECTION(backend, i), &pid, sizeof(pid)) < 0)
				return POOL_ERROR;
			condition = pool_read_string(CONNECTION(backend, i), &len, 0);
			if (condition == NULL)
				return POOL_END;

			if (IS_MASTER_NODE_ID(i))
			{
				pid1 = pid;
				len1 = len;
				condition1 = strdup(condition);
			}
		}
	}

	pool_write(frontend, &pid1, sizeof(pid1));
	status = pool_write_and_flush(frontend, condition1, len1);
	free(condition1);
	return status;
}
Пример #3
0
/*
 * Establish persistent connection to backend
 */
static void establish_persistent_connection(void)
{
	int i;
	BackendInfo *bkinfo;
	POOL_CONNECTION_POOL_SLOT *s;

	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (!VALID_BACKEND(i))
			continue;

		if (slots[i] == NULL)
		{
			bkinfo = pool_get_node_info(i);
			s = make_persistent_db_connection(bkinfo->backend_hostname, 
											  bkinfo->backend_port,
											  "postgres",
											  pool_config->sr_check_user,
											  pool_config->sr_check_password, true);
			if (s)
				slots[i] = s;
			else
				slots[i] = NULL;
		}
	}
}
Пример #4
0
/*
 * disconnect and release a connection to the database
 */
void pool_discard_cp(char *user, char *database, int protoMajor)
{
	POOL_CONNECTION_POOL *p = pool_get_cp(user, database, protoMajor, 0);
	ConnectionInfo *info;
	int i, freed = 0;

	if (p == NULL)
	{
		pool_error("pool_discard_cp: cannot get connection pool for user %s datbase %s", user, database);
		return;
	}

	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (!VALID_BACKEND(i))
			continue;

		if (!freed)
		{
			pool_free_startup_packet(CONNECTION_SLOT(p, i)->sp);
			freed = 1;
		}
		pool_close(CONNECTION(p, i));
		free(CONNECTION_SLOT(p, i));
	}

	info = p->info;
	memset(p, 0, sizeof(POOL_CONNECTION_POOL));
	p->info = info;
	memset(p->info, 0, sizeof(ConnectionInfo) * MAX_NUM_BACKENDS);
}
Пример #5
0
POOL_STATUS NoticeResponse(POOL_CONNECTION *frontend,
						   POOL_CONNECTION_POOL *backend)
{
	char *string = NULL;
	int len;
	int i;

	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (VALID_BACKEND(i))
		{
			/* read notice message */
			string = pool_read_string(CONNECTION(backend, i), &len, 0);
			if (string == NULL)
				return POOL_END;
		}
	}

	/* forward to the frontend */
	pool_write(frontend, "N", 1);
	if (pool_write_and_flush(frontend, string, len) < 0)
	{
		return POOL_END;
	}
	return POOL_CONTINUE;
}
Пример #6
0
POOL_STATUS CompletedResponse(POOL_CONNECTION *frontend,
							  POOL_CONNECTION_POOL *backend)
{
	int i;
	char *string = NULL;
	char *string1 = NULL;
	int len, len1 = 0;

	/* read command tag */
	string = pool_read_string(MASTER(backend), &len, 0);
	if (string == NULL)
		return POOL_END;
	else if (!strncmp(string, "BEGIN", 5))
		TSTATE(backend, MASTER_NODE_ID) = 'T';
	else if (!strncmp(string, "COMMIT", 6) || !strncmp(string, "ROLLBACK", 8))
		TSTATE(backend, MASTER_NODE_ID) = 'I';

	len1 = len;
	string1 = strdup(string);

	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (!VALID_BACKEND(i) || IS_MASTER_NODE_ID(i))
			continue;

		/* read command tag */
		string = pool_read_string(CONNECTION(backend, i), &len, 0);
		if (string == NULL)
			return POOL_END;
		else if (!strncmp(string, "BEGIN", 5))
			TSTATE(backend, i) = 'T';
		else if (!strncmp(string, "COMMIT", 6) || !strncmp(string, "ROLLBACK", 8))
			TSTATE(backend, i) = 'I';
 
		if (len != len1)
		{
			pool_debug("CompletedResponse: message length does not match between master(%d \"%s\",) and %d th server (%d \"%s\",)",
					   len, string, i, len1, string1);

			/* we except INSERT, because INSERT response has OID */
			if (strncmp(string1, "INSERT", 6))
			{
				free(string1);
				return POOL_END;
			}
		}
	}
	/* forward to the frontend */
	pool_write(frontend, "C", 1);
	pool_debug("CompletedResponse: string: \"%s\"", string1);
	if (pool_write(frontend, string1, len1) < 0)
	{
		free(string1);
		return POOL_END;
	}

	free(string1);
	return pool_flush(frontend);
}
Пример #7
0
POOL_STATUS ErrorResponse(POOL_CONNECTION *frontend,
						  POOL_CONNECTION_POOL *backend)
{
	char *string = NULL;
	int len;
	int i;
	POOL_STATUS ret = POOL_CONTINUE;

	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (VALID_BACKEND(i))
		{
			/* read error message */
			string = pool_read_string(CONNECTION(backend, i), &len, 0);
			if (string == NULL)
				return POOL_END;
		}
	}

	/* forward to the frontend */
	pool_write(frontend, "E", 1);
	if (pool_write_and_flush(frontend, string, len) < 0)
		return POOL_END;

	/* 
	 * check session context, because this function is called 
	 * by pool_do_auth too.
	 */
	if (pool_get_session_context())
		ret = raise_intentional_error_if_need(backend);

	/* change transaction state */
	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (VALID_BACKEND(i))
		{
			if (TSTATE(backend, i) == 'T')
				TSTATE(backend, i) = 'E';
		}
	}

	return ret;
}
Пример #8
0
static POOL_CONNECTION_POOL *connect_backend(StartupPacket *sp, POOL_CONNECTION *frontend)
{
	POOL_CONNECTION_POOL *backend;
	int i;

	/* connect to the backend */
	backend = pool_create_cp();
	if (backend == NULL)
	{
		pool_send_error_message(frontend, sp->major, "XX000", "connection cache is full", "",
								"increase max_pool", __FILE__, __LINE__);
		pool_close(frontend);
		pool_free_startup_packet(sp);
		return NULL;
	}

	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (VALID_BACKEND(i))
		{
			/* set DB node id */
			CONNECTION(backend, i)->db_node_id = i;

			/* mark this is a backend connection */
			CONNECTION(backend, i)->isbackend = 1;
			pool_ssl_negotiate_clientserver(CONNECTION(backend, i));

			/*
			 * save startup packet info
			 */
			CONNECTION_SLOT(backend, i)->sp = sp;

			/* send startup packet */
			if (send_startup_packet(CONNECTION_SLOT(backend, i)) < 0)
			{
				pool_error("do_child: fails to send startup packet to the %d th backend", i);
				pool_discard_cp(sp->user, sp->database, sp->major);
				pool_close(frontend);
				return NULL;
			}
		}
	}

	/*
	 * do authentication stuff
	 */
	if (pool_do_auth(frontend, backend))
	{
		pool_close(frontend);
		pool_discard_cp(sp->user, sp->database, sp->major);
		return NULL;
	}

	return backend;
}
Пример #9
0
/*
 * Select load balancing node
 */
int select_load_balancing_node(void)
{
	int selected_slot;
	double total_weight,r;
	int i;

	/* choose a backend in random manner with weight */
	selected_slot = MASTER_NODE_ID;
	total_weight = 0.0;

	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (VALID_BACKEND(i))
		{
			total_weight += BACKEND_INFO(i).backend_weight;
		}
	}

#if defined(sun) || defined(__sun)
	r = (((double)rand())/RAND_MAX) * total_weight;
#else
	r = (((double)random())/RAND_MAX) * total_weight;
#endif

	total_weight = 0.0;
	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (VALID_BACKEND(i) && BACKEND_INFO(i).backend_weight > 0.0)
		{
			if(r >= total_weight)
				selected_slot = i;
			else
				break;
			total_weight += BACKEND_INFO(i).backend_weight;
		}
	}

	pool_debug("select_load_balancing_node: selected backend id is %d", selected_slot);
	return selected_slot;
}
Пример #10
0
POOL_STATUS CursorResponse(POOL_CONNECTION *frontend,
						   POOL_CONNECTION_POOL *backend)
{
	char *string = NULL;
	char *string1 = NULL;
	int len, len1 = 0;
	int i;

	/* read cursor name */
	string = pool_read_string(MASTER(backend), &len, 0);
	if (string == NULL)
		return POOL_END;
	len1 = len;
	string1 = strdup(string);

	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (VALID_BACKEND(i) && !IS_MASTER_NODE_ID(i))
		{
			/* read cursor name */
			string = pool_read_string(CONNECTION(backend, i), &len, 0);
			if (string == NULL)
				return POOL_END;
			if (len != len1)
			{
				pool_error("CursorResponse: length does not match between master(%d) and %d th backend(%d)",
						   len, i, len1);
				pool_error("CursorResponse: master(%s) %d th backend(%s)", string1, i, string);
				free(string1);
				return POOL_END;
			}
		}
	}

	/* forward to the frontend */
	pool_write(frontend, "P", 1);
	if (pool_write(frontend, string1, len1) < 0)
	{
		free(string1);
		return POOL_END;
	}
	free(string1);

	if (pool_flush(frontend))
		return POOL_END;

	return POOL_CONTINUE;
}
Пример #11
0
/* send failback request using SIGUSR1 */
void send_failback_request(int node_id)
{
	pid_t parent = getppid();

	pool_log("send_failback_request: fail back %d th node request from pid %d", node_id, getpid());
	Req_info->kind = NODE_UP_REQUEST;
	Req_info->node_id[0] = node_id;

	if (node_id < 0 || node_id >= MAX_NUM_BACKENDS || VALID_BACKEND(node_id))
	{
		pool_error("send_failback_request: node %d is alive.", node_id);
		return;
	}

	kill(parent, SIGUSR1);
}
Пример #12
0
POOL_STATUS EmptyQueryResponse(POOL_CONNECTION *frontend,
							   POOL_CONNECTION_POOL *backend)
{
	char c;
	int i;

	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (VALID_BACKEND(i))
		{
			if (pool_read(CONNECTION(backend, i), &c, sizeof(c)) < 0)
				return POOL_END;
		}
	}

	pool_write(frontend, "I", 1);
	return pool_write_and_flush(frontend, "", 1);
}
Пример #13
0
/*
 * Unset frontend connected flag
 */
void pool_coninfo_unset_frontend_connected(int proc_id, int pool_index)
{
	ConnectionInfo *con;
	int i;

	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (!VALID_BACKEND(i))
			continue;

		con = pool_coninfo(proc_id, pool_index, i);

		if (con == NULL)
		{
			elog(WARNING,"failed to get connection info while marking the frontend is not connected for pool");
			return;
		}
		con->connected = false;
	}
}
Пример #14
0
/*
 * signal handler for SIGUSR1
 * close all idle connections
 */
static RETSIGTYPE close_idle_connection(int sig)
{
	int i, j;
	POOL_CONNECTION_POOL *p = pool_connection_pool;
	ConnectionInfo *info;

	pool_debug("child receives close connection request");

	for (j=0;j<pool_config->max_pool;j++, p++)
	{
		if (!MASTER_CONNECTION(p))
			continue;
 		if (!MASTER_CONNECTION(p)->sp)
			continue;
		if (MASTER_CONNECTION(p)->sp->user == NULL)
			continue;

		if (MASTER_CONNECTION(p)->closetime > 0)		/* idle connection? */
		{
			pool_debug("close_idle_connection: close idle connection: user %s database %s", MASTER_CONNECTION(p)->sp->user, MASTER_CONNECTION(p)->sp->database);
			pool_send_frontend_exits(p);

			for (i=0;i<NUM_BACKENDS;i++)
			{
				if (!VALID_BACKEND(i))
					continue;

				if (i == 0)
				{
					/* only first backend allocated the memory for the start up packet */
					pool_free_startup_packet(CONNECTION_SLOT(p, i)->sp);
				}
				pool_close(CONNECTION(p, i));
			}
			info = p->info;
			memset(p, 0, sizeof(POOL_CONNECTION_POOL));
			p->info = info;
			memset(p->info, 0, sizeof(ConnectionInfo));
		}
	}
}
Пример #15
0
/*
 * calculate next master node id
 */
static int get_next_master_node(void)
{
	int i;
	for (i=0;i<pool_config->backend_desc->num_backends;i++)
	{
		/*
		 * Do not use VALID_BACKEND macro in raw mode.
		 * VALID_BACKEND return true only if the argument is master
		 * node id. In other words, standby nodes are false. So need
		 * to check backend status without VALID_BACKEND.
		 */
		if (RAW_MODE)
		{
			if (BACKEND_INFO(i).backend_status == CON_CONNECT_WAIT)
				break;
		}
		else if (VALID_BACKEND(i))
			break;
	}
	return i;
}
Пример #16
0
/*
 * Send extended query and wait for response
 * send_type:
 *  -1: do not send this node_id
 *   0: send to all nodes
 *  >0: send to this node_id
 */
POOL_STATUS pool_extended_send_and_wait(POOL_QUERY_CONTEXT *query_context,
										char *kind, int len, char *contents,
										int send_type, int node_id)
{
	POOL_SESSION_CONTEXT *session_context;
	POOL_CONNECTION *frontend;
	POOL_CONNECTION_POOL *backend;
	bool is_commit;
	bool is_begin_read_write;
	int i;
	int str_len;
	int rewritten_len;
	char *str;
	char *rewritten_begin;

	session_context = pool_get_session_context();
	frontend = session_context->frontend;
	backend = session_context->backend;
	is_commit = is_commit_or_rollback_query(query_context->parse_tree);
	is_begin_read_write = false;
	str_len = 0;
	rewritten_len = 0;
	str = NULL;
	rewritten_begin = NULL;

	/*
	 * If the query is BEGIN READ WRITE or
	 * BEGIN ... SERIALIZABLE in master/slave mode,
	 * we send BEGIN to slaves/standbys instead.
	 * original_query which is BEGIN READ WRITE is sent to primary.
	 * rewritten_query which is BEGIN is sent to standbys.
	 */
	if (pool_need_to_treat_as_if_default_transaction(query_context))
	{
		is_begin_read_write = true;

		if (*kind == 'P')
		{
			rewritten_begin = remove_read_write(len, contents, &rewritten_len);
			if (rewritten_begin == NULL)
				return POOL_END;
		}
	}
	
	if (!rewritten_begin)
	{	
		str_len = len;
		str = contents;
	}

	/* Send query */
	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (!VALID_BACKEND(i))
			continue;
		else if (send_type < 0 && i == node_id)
			continue;
		else if (send_type > 0 && i != node_id)
			continue;

		/*
		 * If in reset context, we send COMMIT/ABORT to nodes those
		 * are not in I(idle) state.  This will ensure that
		 * transactions are closed.
		 */
		if (is_commit && session_context->reset_context && TSTATE(backend, i) == 'I')
		{
			pool_unset_node_to_be_sent(query_context, i);
			continue;
		}

		if (rewritten_begin)
		{
			if (REAL_PRIMARY_NODE_ID == i)
			{
				str = contents;
				str_len = len;
			}
			else
			{
				str = rewritten_begin;
				str_len = rewritten_len;
			}
		}

		if (pool_config->log_per_node_statement)
		{
			char msgbuf[QUERY_STRING_BUFFER_LEN];
			char *stmt;

			if (*kind == 'P' || *kind == 'E')
			{
				if (query_context->rewritten_query)
				{
					if (is_begin_read_write)
					{
						if (REAL_PRIMARY_NODE_ID == i)
							stmt = query_context->original_query;
						else
							stmt = query_context->rewritten_query;
					}
					else
					{
						stmt = query_context->rewritten_query;
					}
				}
				else
				{
					stmt = query_context->original_query;
				}

				if (*kind == 'P')
					snprintf(msgbuf, sizeof(msgbuf), "Parse: %s", stmt);
				else
					snprintf(msgbuf, sizeof(msgbuf), "Execute: %s", stmt);
			}
			else
			{
				snprintf(msgbuf, sizeof(msgbuf), "%c message", *kind);
			}

			per_node_statement_log(backend, i, msgbuf);
		}

		if (send_extended_protocol_message(backend, i, kind, str_len, str) != POOL_CONTINUE)
		{
			free(rewritten_begin);
			return POOL_END;
		}
	}

	if (!is_begin_read_write)
	{
		if (query_context->rewritten_query)
			str = query_context->rewritten_query;
		else
			str = query_context->original_query;
	}

	/* Wait for response */
	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (!VALID_BACKEND(i))
			continue;
		else if (send_type < 0 && i == node_id)
			continue;
		else if (send_type > 0 && i != node_id)
			continue;

		/*
		 * If in master/slave mode, we do not send COMMIT/ABORT to
		 * slaves/standbys if it's in I(idle) state.
		 */
		if (is_commit && MASTER_SLAVE && !IS_MASTER_NODE_ID(i) && TSTATE(backend, i) == 'I')
		{
			continue;
		}

		if (is_begin_read_write)
		{
			if (REAL_PRIMARY_NODE_ID == i)
				str = query_context->original_query;
			else
				str = query_context->rewritten_query;
		}

		if (wait_for_query_response(frontend, CONNECTION(backend, i), MAJOR(backend)) != POOL_CONTINUE)
		{
			/* Cancel current transaction */
			CancelPacket cancel_packet;

			cancel_packet.protoVersion = htonl(PROTO_CANCEL);
			cancel_packet.pid = MASTER_CONNECTION(backend)->pid;
			cancel_packet.key= MASTER_CONNECTION(backend)->key;
			cancel_request(&cancel_packet);

			free(rewritten_begin);
			return POOL_END;
		}

		/*
		 * Check if some error detected.  If so, emit
		 * log. This is usefull when invalid encoding error
		 * occurs. In this case, PostgreSQL does not report
		 * what statement caused that error and make users
		 * confused.
		 */		
		per_node_error_log(backend, i, str, "pool_send_and_wait: Error or notice message from backend: ", true);
	}

	free(rewritten_begin);
	return POOL_CONTINUE;
}
Пример #17
0
/*
 * Send simple query and wait for response
 * send_type:
 *  -1: do not send this node_id
 *   0: send to all nodes
 *  >0: send to this node_id
 */
POOL_STATUS pool_send_and_wait(POOL_QUERY_CONTEXT *query_context,
							   int send_type, int node_id)
{
	POOL_SESSION_CONTEXT *session_context;
	POOL_CONNECTION *frontend;
	POOL_CONNECTION_POOL *backend;
	bool is_commit;
	bool is_begin_read_write;
	int i;
	int len;
	char *string;

	session_context = pool_get_session_context();
	frontend = session_context->frontend;
	backend = session_context->backend;
	is_commit = is_commit_or_rollback_query(query_context->parse_tree);
	is_begin_read_write = false;
	len = 0;
	string = NULL;

	/*
	 * If the query is BEGIN READ WRITE or
	 * BEGIN ... SERIALIZABLE in master/slave mode,
	 * we send BEGIN to slaves/standbys instead.
	 * original_query which is BEGIN READ WRITE is sent to primary.
	 * rewritten_query which is BEGIN is sent to standbys.
	 */
	if (pool_need_to_treat_as_if_default_transaction(query_context))
	{
		is_begin_read_write = true;
	}
	else
	{
		if (query_context->rewritten_query)
		{
			len = query_context->rewritten_length;
			string = query_context->rewritten_query;
		}
		else
		{
			len = query_context->original_length;
			string = query_context->original_query;
		}
	}

	/* Send query */
	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (!VALID_BACKEND(i))
			continue;
		else if (send_type < 0 && i == node_id)
			continue;
		else if (send_type > 0 && i != node_id)
			continue;

		/*
		 * If in master/slave mode, we do not send COMMIT/ABORT to
		 * slaves/standbys if it's in I(idle) state.
		 */
		if (is_commit && MASTER_SLAVE && !IS_MASTER_NODE_ID(i) && TSTATE(backend, i) == 'I')
		{
			pool_unset_node_to_be_sent(query_context, i);
			continue;
		}

		/*
		 * If in reset context, we send COMMIT/ABORT to nodes those
		 * are not in I(idle) state.  This will ensure that
		 * transactions are closed.
		 */
		if (is_commit && session_context->reset_context && TSTATE(backend, i) == 'I')
		{
			pool_unset_node_to_be_sent(query_context, i);
			continue;
		}

		if (is_begin_read_write)
		{
			if (REAL_PRIMARY_NODE_ID == i)
			{
				len = query_context->original_length;
				string = query_context->original_query;
			}
			else
			{
				len = query_context->rewritten_length;
				string = query_context->rewritten_query;
			}
		}

		per_node_statement_log(backend, i, string);

		if (send_simplequery_message(CONNECTION(backend, i), len, string, MAJOR(backend)) != POOL_CONTINUE)
		{
			return POOL_END;
		}
	}

	/* Wait for response */
	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (!VALID_BACKEND(i))
			continue;
		else if (send_type < 0 && i == node_id)
			continue;
		else if (send_type > 0 && i != node_id)
			continue;

#ifdef NOT_USED
		/*
		 * If in master/slave mode, we do not send COMMIT/ABORT to
		 * slaves/standbys if it's in I(idle) state.
		 */
		if (is_commit && MASTER_SLAVE && !IS_MASTER_NODE_ID(i) && TSTATE(backend, i) == 'I')
		{
			continue;
		}
#endif

		if (is_begin_read_write)
		{
			if(REAL_PRIMARY_NODE_ID == i)
				string = query_context->original_query;
			else
				string = query_context->rewritten_query;
		}

		if (wait_for_query_response(frontend, CONNECTION(backend, i), MAJOR(backend)) != POOL_CONTINUE)
		{
			/* Cancel current transaction */
			CancelPacket cancel_packet;

			cancel_packet.protoVersion = htonl(PROTO_CANCEL);
			cancel_packet.pid = MASTER_CONNECTION(backend)->pid;
			cancel_packet.key= MASTER_CONNECTION(backend)->key;
			cancel_request(&cancel_packet);

			return POOL_END;
		}

		/*
		 * Check if some error detected.  If so, emit
		 * log. This is usefull when invalid encoding error
		 * occurs. In this case, PostgreSQL does not report
		 * what statement caused that error and make users
		 * confused.
		 */		
		per_node_error_log(backend, i, string, "pool_send_and_wait: Error or notice message from backend: ", true);
	}

	return POOL_CONTINUE;
}
Пример #18
0
/*
 * Start online recovery.
 * "recovery_node" is the node to be recovered.
 * Master or primary node is chosen in this function.
 */
void start_recovery(int recovery_node)
{
	int node_id;
	BackendInfo *backend;
	BackendInfo *recovery_backend;
	PGconn *conn;
	int failback_wait_count;
#define FAILBACK_WAIT_MAX_RETRY 5		/* 5 seconds should be enough for failback operation */

	ereport(LOG,
		(errmsg("starting recovering node %d", recovery_node)));

	if ( (recovery_node < 0) || (recovery_node >= pool_config->backend_desc->num_backends) )
		ereport(ERROR,
				(errmsg("node recovery failed, node id: %d is not valid", recovery_node)));

	if (VALID_BACKEND(recovery_node))
		ereport(ERROR,
				(errmsg("node recovery failed, node id: %d is alive", recovery_node)));

	/* select master/primary node */
	node_id = MASTER_SLAVE ? PRIMARY_NODE_ID : REAL_MASTER_NODE_ID;
	backend = &pool_config->backend_desc->backend_info[node_id];

	/* get node info to be recovered */
	recovery_backend = &pool_config->backend_desc->backend_info[recovery_node];

	conn = connect_backend_libpq(backend);
	if (conn == NULL)
		ereport(ERROR,
				(errmsg("node recovery failed, unable to connect to master node: %d ", node_id)));

	PG_TRY();
	{
		/* 1st stage */
		if (REPLICATION)
		{
			exec_checkpoint(conn);
			ereport(LOG,
				(errmsg("node recovery, CHECKPOINT in the 1st stage done")));
		}

		exec_recovery(conn, backend, recovery_backend, FIRST_STAGE);

		ereport(LOG,
			(errmsg("node recovery, 1st stage is done")));

		if (REPLICATION)
		{
			ereport(LOG,
				(errmsg("node recovery, starting 2nd stage")));

			/* 2nd stage */
			*InRecovery = RECOVERY_ONLINE;
			if (pool_config->use_watchdog)
			{
				/* announce start recovery */
				if (WD_OK != wd_start_recovery())
					ereport(ERROR,
							(errmsg("node recovery failed, failed to send start recovery packet")));
			}

			if (wait_connection_closed() != 0)
				ereport(ERROR,
						(errmsg("node recovery failed, waiting connection closed in the other pgpools timeout")));

			ereport(LOG,
				(errmsg("node recovery, all connections from clients have been closed")));

			exec_checkpoint(conn);

			ereport(LOG,
				(errmsg("node recovery"),
					 errdetail("CHECKPOINT in the 2nd stage done")));

			exec_recovery(conn, backend, recovery_backend, SECOND_STAGE);
		}

		exec_remote_start(conn, recovery_backend);

		check_postmaster_started(recovery_backend);

		ereport(LOG,
			(errmsg("node recovery, node: %d restarted", recovery_node)));

		/*
		 * reset failover completion flag.  this is necessary since
		 * previous failover/failback will set the flag to 1.
		 */
		pcp_wakeup_request = 0;

		/* send failback request to pgpool parent */
		send_failback_request(recovery_node);

		/* wait for failback */
		failback_wait_count = 0;
		while (!pcp_wakeup_request)
		{
			struct timeval t = {1, 0};
			/* polling SIGUSR2 signal every 1 sec */
			select(0, NULL, NULL, NULL, &t);
			failback_wait_count++;
			if (failback_wait_count >= FAILBACK_WAIT_MAX_RETRY)
			{
				ereport(LOG,
					(errmsg("node recovery"),
						errdetail("waiting for wake up request is timeout(%d seconds)",
							   FAILBACK_WAIT_MAX_RETRY)));

				break;
			}
		}
		pcp_wakeup_request = 0;
	}
	PG_CATCH();
	{
		PQfinish(conn);
		PG_RE_THROW();
	}
	PG_END_TRY();

	PQfinish(conn);

	ereport(LOG,
			(errmsg("recovery done")));
}
Пример #19
0
/*
* perform accept() and return new fd
*/
static POOL_CONNECTION *do_accept(int unix_fd, int inet_fd, struct timeval *timeout)
{
    fd_set	readmask;
    int fds;
	int save_errno;

	SockAddr saddr;
	int fd = 0;
	int afd;
	int inet = 0;
	POOL_CONNECTION *cp;
#ifdef ACCEPT_PERFORMANCE
	struct timeval now1, now2;
	static long atime;
	static int cnt;
#endif
	struct timeval *timeoutval;
	struct timeval tv1, tv2, tmback = {0, 0};

	set_ps_display("wait for connection request", false);

	/* Destroy session context for just in case... */
	pool_session_context_destroy();

	FD_ZERO(&readmask);
	FD_SET(unix_fd, &readmask);
	if (inet_fd)
		FD_SET(inet_fd, &readmask);

	if (timeout->tv_sec == 0 && timeout->tv_usec == 0)
		timeoutval = NULL;
	else
	{
		timeoutval = timeout;
		tmback.tv_sec = timeout->tv_sec;
		tmback.tv_usec = timeout->tv_usec;
		gettimeofday(&tv1, NULL);

#ifdef DEBUG
		pool_log("before select = {%d, %d}", timeoutval->tv_sec, timeoutval->tv_usec);
		pool_log("g:before select = {%d, %d}", tv1.tv_sec, tv1.tv_usec);
#endif
	}

	fds = select(Max(unix_fd, inet_fd)+1, &readmask, NULL, NULL, timeoutval);

	save_errno = errno;
	/* check backend timer is expired */
	if (backend_timer_expired)
	{
		pool_backend_timer();
		backend_timer_expired = 0;
	}

	/*
	 * following code fragment computes remaining timeout val in a
	 * portable way. Linux does this automatically but other platforms do not.
	 */
	if (timeoutval)
	{
		gettimeofday(&tv2, NULL);

		tmback.tv_usec -= tv2.tv_usec - tv1.tv_usec;
		tmback.tv_sec -= tv2.tv_sec - tv1.tv_sec;

		if (tmback.tv_usec < 0)
		{
			tmback.tv_sec--;
			if (tmback.tv_sec < 0)
			{
				timeout->tv_sec = 0;
				timeout->tv_usec = 0;
			}
			else
			{
				tmback.tv_usec += 1000000;
				timeout->tv_sec = tmback.tv_sec;
				timeout->tv_usec = tmback.tv_usec;
			}
		}
#ifdef DEBUG
		pool_log("g:after select = {%d, %d}", tv2.tv_sec, tv2.tv_usec);
		pool_log("after select = {%d, %d}", timeout->tv_sec, timeout->tv_usec);
#endif
	}

	errno = save_errno;

	if (fds == -1)
	{
		if (errno == EAGAIN || errno == EINTR)
			return NULL;

		pool_error("select() failed. reason %s", strerror(errno));
		return NULL;
	}

	/* timeout */
	if (fds == 0)
	{
		return NULL;
	}

	if (FD_ISSET(unix_fd, &readmask))
	{
		fd = unix_fd;
	}

	if (FD_ISSET(inet_fd, &readmask))
	{
		fd = inet_fd;
		inet++;
	}

	/*
	 * Note that some SysV systems do not work here. For those
	 * systems, we need some locking mechanism for the fd.
	 */
	memset(&saddr, 0, sizeof(saddr));
	saddr.salen = sizeof(saddr.addr);

#ifdef ACCEPT_PERFORMANCE
	gettimeofday(&now1,0);
#endif

 retry_accept:

	/* wait if recovery is started */
	while (*InRecovery == 1)
	{
		pause();
	}

	afd = accept(fd, (struct sockaddr *)&saddr.addr, &saddr.salen);

	save_errno = errno;
	/* check backend timer is expired */
	if (backend_timer_expired)
	{
		pool_backend_timer();
		backend_timer_expired = 0;
	}
	errno = save_errno;
	if (afd < 0)
	{
		if (errno == EINTR && *InRecovery)
			goto retry_accept;

		/*
		 * "Resource temporarily unavailable" (EAGAIN or EWOULDBLOCK)
		 * can be silently ignored. And EINTR can be ignored.
		 */
		if (errno != EAGAIN && errno != EWOULDBLOCK && errno != EINTR)
			pool_error("accept() failed. reason: %s", strerror(errno));
		return NULL;
	}
#ifdef ACCEPT_PERFORMANCE
	gettimeofday(&now2,0);
	atime += (now2.tv_sec - now1.tv_sec)*1000000 + (now2.tv_usec - now1.tv_usec);
	cnt++;
	if (cnt % 100 == 0)
	{
		pool_log("cnt: %d atime: %ld", cnt, atime);
	}
#endif

	/* reload config file */
	if (got_sighup)
	{
		pool_get_config(get_config_file_name(), RELOAD_CONFIG);
		if (pool_config->enable_pool_hba)
		{
			load_hba(get_hba_file_name());
			if (strcmp("", pool_config->pool_passwd))
				pool_reopen_passwd_file();
		}
		if (pool_config->parallel_mode)
			pool_memset_system_db_info(system_db_info->info);
		got_sighup = 0;
	}

	connection_count_up();
	accepted = 1;

	if (pool_config->parallel_mode)
	{
		/*
		 * do not accept new connection if any of DB node or SystemDB is down when operating in
		 * parallel mode
		 */
		int i;

		for (i=0;i<NUM_BACKENDS;i++)
		{
			if (BACKEND_INFO(i).backend_status == CON_DOWN || SYSDB_STATUS == CON_DOWN)
			{
				StartupPacket *sp;
				char *msg = "pgpool is not available in parallel query mode";

				if (SYSDB_STATUS == CON_DOWN)
					pool_log("Cannot accept() new connection. SystemDB is down");
				else
					pool_log("Cannot accept() new connection. %d th backend is down", i);

				if ((cp = pool_open(afd)) == NULL)
				{
					close(afd);
					child_exit(1);
				}

				sp = read_startup_packet(cp);
				if (sp == NULL)
				{
					/* failed to read the startup packet. return to the accept() loop */
					pool_close(cp);
					child_exit(1);
				}

				pool_debug("do_accept: send error message to frontend");

				if (sp->major == PROTO_MAJOR_V3)
				{
					char buf[256];

					if (SYSDB_STATUS == CON_DOWN)
						snprintf(buf, sizeof(buf), "SystemDB is down");
					else
						snprintf(buf, sizeof(buf), "%d th backend is down", i);

					pool_send_error_message(cp, sp->major, "08S01",
											msg,
											buf,
											((SYSDB_STATUS == CON_DOWN) ? "repair the SystemDB and restart pgpool"
											                           : "repair the backend and restart pgpool"),
											__FILE__,
											__LINE__);
				}
				else
				{
					pool_send_error_message(cp, sp->major,
											0,
											msg,
											"",
											"",
											"",
											0);
				}
				pool_close(cp);
				child_exit(1);
			}
		}
	}
	else
	{
		/*
		 * do not accept new connection if all DB nodes are down when operating in
		 * non parallel mode
		 */
		int i;
		int found = 0;

		for (i=0;i<NUM_BACKENDS;i++)
		{
			if (VALID_BACKEND(i))
			{
				found = 1;
			}
		}
		if (found == 0)
		{
			pool_log("Cannot accept() new connection. all backends are down");
			child_exit(1);
		}
	}

	pool_debug("I am %d accept fd %d", getpid(), afd);

	pool_getnameinfo_all(&saddr, remote_host, remote_port);
	snprintf(remote_ps_data, sizeof(remote_ps_data),
			 remote_port[0] == '\0' ? "%s" : "%s(%s)",
			 remote_host, remote_port);

	set_ps_display("accept connection", false);

	/* log who is connecting */
	if (pool_config->log_connections)
	{
		pool_log("connection received: host=%s%s%s",
				 remote_host, remote_port[0] ? " port=" : "", remote_port);
	}

	/* set NODELAY and KEEPALIVE options if INET connection */
	if (inet)
	{
		int on = 1;

		if (setsockopt(afd, IPPROTO_TCP, TCP_NODELAY,
					   (char *) &on,
					   sizeof(on)) < 0)
		{
			pool_error("do_accept: setsockopt() failed: %s", strerror(errno));
			close(afd);
			return NULL;
		}
		if (setsockopt(afd, SOL_SOCKET, SO_KEEPALIVE,
					   (char *) &on,
					   sizeof(on)) < 0)
		{
			pool_error("do_accept: setsockopt() failed: %s", strerror(errno));
			close(afd);
			return NULL;
		}
	}

	if ((cp = pool_open(afd)) == NULL)
	{
		close(afd);
		return NULL;
	}

	/* save ip address for hba */
	memcpy(&cp->raddr, &saddr, sizeof(SockAddr));
	if (cp->raddr.addr.ss_family == 0)
		cp->raddr.addr.ss_family = AF_UNIX;

	return cp;
}
Пример #20
0
POOL_STATUS AsciiRow(POOL_CONNECTION *frontend,
					 POOL_CONNECTION_POOL *backend,
					 short num_fields)
{
	static char nullmap[8192], nullmap1[8192];
	int nbytes;
	int i, j;
	unsigned char mask;
	int size, size1 = 0;
	char *buf = NULL, *sendbuf = NULL;
	char msgbuf[1024];

	pool_write(frontend, "D", 1);

	nbytes = (num_fields + 7)/8;

	if (nbytes <= 0)
		return POOL_CONTINUE;

	/* NULL map */
	pool_read(MASTER(backend), nullmap, nbytes);
	memcpy(nullmap1, nullmap, nbytes);
	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (VALID_BACKEND(i) && !IS_MASTER_NODE_ID(i))
		{
			pool_read(CONNECTION(backend, i), nullmap, nbytes);
			if (memcmp(nullmap, nullmap1, nbytes))
			{
				/* XXX: NULLMAP maybe different among
				   backends. If we were a paranoid, we have to treat
				   this as a fatal error. However in the real world
				   we'd better to adapt this situation. Just throw a
				   log... */
				pool_debug("AsciiRow: NULLMAP differ between master and %d th backend", i);
			}
		}
	}

	if (pool_write(frontend, nullmap1, nbytes) < 0)
		return POOL_END;

	mask = 0;

	for (i = 0;i<num_fields;i++)
	{
		if (mask == 0)
			mask = 0x80;

		/* NOT NULL? */
		if (mask & nullmap[i/8])
		{
			/* field size */
			if (pool_read(MASTER(backend), &size, sizeof(int)) < 0)
				return POOL_END;

			size1 = ntohl(size) - 4;

			/* read and send actual data only when size > 0 */
			if (size1 > 0)
			{
				sendbuf = pool_read2(MASTER(backend), size1);
				if (sendbuf == NULL)
					return POOL_END;
			}

			/* forward to frontend */
			pool_write(frontend, &size, sizeof(int));
			pool_write(frontend, sendbuf, size1);
			snprintf(msgbuf, Min(sizeof(msgbuf), size1+1), "%s", sendbuf);
			pool_debug("AsciiRow: len: %d data: %s", size1, msgbuf);

			for (j=0;j<NUM_BACKENDS;j++)
			{
				if (VALID_BACKEND(j) && !IS_MASTER_NODE_ID(j))
				{
					/* field size */
					if (pool_read(CONNECTION(backend, j), &size, sizeof(int)) < 0)
						return POOL_END;

					buf = NULL;
					size = ntohl(size) - 4;

					/* XXX: field size maybe different among
					   backends. If we were a paranoid, we have to treat
					   this as a fatal error. However in the real world
					   we'd better to adapt this situation. Just throw a
					   log... */
					if (size != size1)
						pool_debug("AsciiRow: %d th field size does not match between master(%d) and %d th backend(%d)",
								   i, ntohl(size), j, ntohl(size1));

					/* read and send actual data only when size > 0 */
					if (size > 0)
					{
						buf = pool_read2(CONNECTION(backend, j), size);
						if (buf == NULL)
							return POOL_END;
					}
				}
			}
		}

		mask >>= 1;
	}

	if (pool_flush(frontend))
		return POOL_END;

	return POOL_CONTINUE;
}
Пример #21
0
POOL_STATUS FunctionResultResponse(POOL_CONNECTION *frontend,
								   POOL_CONNECTION_POOL *backend)
{
	char dummy;
	int len;
	char *result = 0;
	int i;

	pool_write(frontend, "V", 1);

	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (VALID_BACKEND(i))
		{
			if (pool_read(CONNECTION(backend, i), &dummy, 1) < 0)
				return POOL_ERROR;
		}
	}
	pool_write(frontend, &dummy, 1);

	/* non empty result? */
	if (dummy == 'G')
	{
		for (i=0;i<NUM_BACKENDS;i++)
		{
			if (VALID_BACKEND(i))
			{
				/* length of result in bytes */
				if (pool_read(CONNECTION(backend, i), &len, sizeof(len)) < 0)
					return POOL_ERROR;
			}
		}
		pool_write(frontend, &len, sizeof(len));

		len = ntohl(len);

		for (i=0;i<NUM_BACKENDS;i++)
		{
			if (VALID_BACKEND(i))
			{
				/* result value itself */
				if ((result = pool_read2(MASTER(backend), len)) == NULL)
					return POOL_ERROR;
			}
		}
		pool_write(frontend, result, len);
	}

	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (VALID_BACKEND(i))
		{
			/* unused ('0') */
			if (pool_read(MASTER(backend), &dummy, 1) < 0)
				return POOL_ERROR;
		}
	}
	pool_write(frontend, "0", 1);

	return pool_flush(frontend);
}
Пример #22
0
/*
 * process cancel request
 */
void cancel_request(CancelPacket *sp)
{
	int	len;
	int fd;
	POOL_CONNECTION *con;
	int i,j,k;
	ConnectionInfo *c = NULL;
	CancelPacket cp;
	bool found = false;

	pool_debug("Cancel request received");

	/* look for cancel key from shmem info */
	for (i=0;i<pool_config->num_init_children;i++)
	{
		for (j=0;j<pool_config->max_pool;j++)
		{
			for (k=0;k<NUM_BACKENDS;k++)
			{
				c = pool_coninfo(i, j, k);
				pool_debug("con_info: address:%p database:%s user:%s pid:%d key:%d i:%d",
						   c, c->database, c->user, ntohl(c->pid), ntohl(c->key),i);

				if (c->pid == sp->pid && c->key == sp->key)
				{
					pool_debug("found pid:%d key:%d i:%d",ntohl(c->pid), ntohl(c->key),i);
					c = pool_coninfo(i, j, 0);
					found = true;
					goto found;
				}
			}
		}
	}

 found:
	if (!found)
	{
		pool_error("cancel_request: invalid cancel key: pid:%d key:%d",ntohl(sp->pid), ntohl(sp->key));
		return;	/* invalid key */
	}

	for (i=0;i<NUM_BACKENDS;i++,c++)
	{
		if (!VALID_BACKEND(i))
			continue;

		if (*(BACKEND_INFO(i).backend_hostname) == '/')
			fd = connect_unix_domain_socket(i, TRUE);
		else
			fd = connect_inet_domain_socket(i, TRUE);

		if (fd < 0)
		{
			pool_error("Could not create socket for sending cancel request for backend %d", i);
			return;
		}

		con = pool_open(fd);
		if (con == NULL)
			return;

		len = htonl(sizeof(len) + sizeof(CancelPacket));
		pool_write(con, &len, sizeof(len));

		cp.protoVersion = sp->protoVersion;
		cp.pid = c->pid;
		cp.key = c->key;

		pool_log("cancel_request: canceling backend pid:%d key: %d", ntohl(cp.pid),ntohl(cp.key));

		if (pool_write_and_flush(con, &cp, sizeof(CancelPacket)) < 0)
			pool_error("Could not send cancel request packet for backend %d", i);

		pool_close(con);

		/*
		 * this is needed to ensure that the next DB node executes the
		 * query supposed to be canceled.
		 */
		sleep(1);
	}
}
Пример #23
0
/*
 * Reuse existing connection
 */
static bool connect_using_existing_connection(POOL_CONNECTION *frontend, 
											  POOL_CONNECTION_POOL *backend,
											  StartupPacket *sp)
{
	int i, freed = 0;
	/*
	 * Save startup packet info
	 */
	for (i = 0; i < NUM_BACKENDS; i++)
	{
		if (VALID_BACKEND(i))
		{
			if (!freed)
			{
				pool_free_startup_packet(backend->slots[i]->sp);
				freed = 1;
			}
			backend->slots[i]->sp = sp;
		}
	}

	/* Reuse existing connection to backend */

	if (pool_do_reauth(frontend, backend))
	{
		pool_close(frontend);
		connection_count_down();
		return false;
	}

	if (MAJOR(backend) == 3)
	{
		char command_buf[1024];

		/* If we have received application_name in the start up
		 * packet, we send SET command to backend. Also we add or
		 * replace existing application_name data.
		 */
		if (sp->application_name)
		{
			snprintf(command_buf, sizeof(command_buf), "SET application_name TO '%s'", sp->application_name);

			for (i=0;i<NUM_BACKENDS;i++)
			{
				if (VALID_BACKEND(i))
					if (do_command(frontend, CONNECTION(backend, i),
							   command_buf, MAJOR(backend),
								   MASTER_CONNECTION(backend)->pid,
								   MASTER_CONNECTION(backend)->key, 0) != POOL_CONTINUE)
					{
						pool_error("connect_using_existing_connection: do_command failed. command: %s", command_buf);
						return false;
					}
			}

			pool_add_param(&MASTER(backend)->params, "application_name", sp->application_name);
		}

		if (send_params(frontend, backend))
		{
			pool_close(frontend);
			connection_count_down();
			return false;
		}
	}

	/* Send ReadyForQuery to frontend */
	pool_write(frontend, "Z", 1);

	if (MAJOR(backend) == 3)
	{
		int len;
		char tstate;

		len = htonl(5);
		pool_write(frontend, &len, sizeof(len));
		tstate = TSTATE(backend, MASTER_NODE_ID);
		pool_write(frontend, &tstate, 1);
	}

	if (pool_flush(frontend) < 0)
	{
		pool_close(frontend);
		connection_count_down();
		return false;
	}
	return true;
}
Пример #24
0
int RowDescription(POOL_CONNECTION *frontend,
				   POOL_CONNECTION_POOL *backend,
				   short *result)
{
	short num_fields, num_fields1 = 0;
	int oid, mod;
	int oid1, mod1;
	short size, size1;
	char *string;
	int len, len1;
	int i;

	pool_read(MASTER(backend), &num_fields, sizeof(short));
	num_fields1 = num_fields;
	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (VALID_BACKEND(i) && !IS_MASTER_NODE_ID(i))
		{
			/* # of fields (could be 0) */
			pool_read(CONNECTION(backend, i), &num_fields, sizeof(short));
			if (num_fields != num_fields1)
			{
				pool_error("RowDescription: num_fields does not match between backends master(%d) and %d th backend(%d)",
						   num_fields, i, num_fields1);
				return POOL_FATAL;
			}
		}
	}

	/* forward it to the frontend */
	pool_write(frontend, "T", 1);
	pool_write(frontend, &num_fields, sizeof(short));
	num_fields = ntohs(num_fields);
	for (i = 0;i<num_fields;i++)
	{
		int j;

		/* field name */
		string = pool_read_string(MASTER(backend), &len, 0);
		if (string == NULL)
			return POOL_END;
		len1 = len;
		if (pool_write(frontend, string, len) < 0)
			return POOL_END;

		for (j=0;j<NUM_BACKENDS;j++)
		{
			if (VALID_BACKEND(j) && !IS_MASTER_NODE_ID(j))
			{
				string = pool_read_string(CONNECTION(backend, j), &len, 0);
				if (string == NULL)
					return POOL_END;

				if (len != len1)
				{
					pool_error("RowDescription: field length does not match between backends master(%d) and %d th backend(%d)",
							   ntohl(len), j, ntohl(len1));
					return POOL_FATAL;
				}
			}
		}

		/* type oid */
		pool_read(MASTER(backend), &oid, sizeof(int));
		oid1 = oid;
		pool_debug("RowDescription: type oid: %d", ntohl(oid));
		for (j=0;j<NUM_BACKENDS;j++)
		{
			if (VALID_BACKEND(j) && !IS_MASTER_NODE_ID(j))
			{
				pool_read(CONNECTION(backend, j), &oid, sizeof(int));

				/* we do not regard oid mismatch as fatal */
				if (oid != oid1)
				{
					pool_debug("RowDescription: field oid does not match between backends master(%d) and %d th backend(%d)",
							   ntohl(oid), j, ntohl(oid1));
				}
			}
		}
		if (pool_write(frontend, &oid1, sizeof(int)) < 0)
			return POOL_END;

		/* size */
		pool_read(MASTER(backend), &size, sizeof(short));
		size1 = size;
		for (j=0;j<NUM_BACKENDS;j++)
		{
			if (VALID_BACKEND(j) && !IS_MASTER_NODE_ID(j))
			{
				pool_read(CONNECTION(backend, j), &size, sizeof(short));
				if (size1 != size1)
				{
					pool_error("RowDescription: field size does not match between backends master(%d) and %d th backend(%d)",
							   ntohs(size), j, ntohs(size1));
					return POOL_FATAL;
				}
			}
		}
		pool_debug("RowDescription: field size: %d", ntohs(size));
		pool_write(frontend, &size1, sizeof(short));

		/* modifier */
		pool_read(MASTER(backend), &mod, sizeof(int));
		pool_debug("RowDescription: modifier: %d", ntohs(mod));
		mod1 = mod;
		for (j=0;j<NUM_BACKENDS;j++)
		{
			if (VALID_BACKEND(j) && !IS_MASTER_NODE_ID(j))
			{
				pool_read(CONNECTION(backend, j), &mod, sizeof(int));
				if (mod != mod1)
				{
					pool_debug("RowDescription: modifier does not match between backends master(%d) and %d th backend(%d)",
							   ntohl(mod), j, ntohl(mod1));
				}
			}
		}
		if (pool_write(frontend, &mod1, sizeof(int)) < 0)
			return POOL_END;
	}

	*result = num_fields;

	return pool_flush(frontend);
}
Пример #25
0
/*
 * create actual connections to backends
 */
static POOL_CONNECTION_POOL *new_connection(POOL_CONNECTION_POOL *p)
{
	POOL_CONNECTION_POOL_SLOT *s;
	int active_backend_count = 0;
	int i;

	for (i=0;i<NUM_BACKENDS;i++)
	{
		pool_debug("new_connection: connecting %d backend", i);

		if (!VALID_BACKEND(i))
		{
			pool_debug("new_connection: skipping slot %d because backend_status = %d",
					   i, BACKEND_INFO(i).backend_status);
			continue;
		}

		s = malloc(sizeof(POOL_CONNECTION_POOL_SLOT));
		if (s == NULL)
		{
			pool_error("new_connection: malloc() failed");
			return NULL;
		}

		if (create_cp(s, i) == NULL)
		{
			/* connection failed. mark this backend down */
			pool_error("new_connection: create_cp() failed");

			/* If fail_over_on_backend_error is true, do failover.
			 * Otherwise, just exit this session.
			 */
			if (pool_config->fail_over_on_backend_error)
			{
				notice_backend_error(i);
			}
			else
			{
				pool_log("new_connection: do not failover because fail_over_on_backend_error is off");
			}
			child_exit(1);
		}

		p->info[i].create_time = time(NULL);
		p->slots[i] = s;

		if (pool_init_params(&s->con->params))
		{
			return NULL;
		}

		BACKEND_INFO(i).backend_status = CON_UP;
		active_backend_count++;
	}

	if (active_backend_count > 0)
	{
		return p;
	}

	return NULL;
}
Пример #26
0
/*
* create a connection pool by user and database
*/
POOL_CONNECTION_POOL *pool_create_cp(void)
{
	int i, freed = 0;
	time_t closetime;
	POOL_CONNECTION_POOL *oldestp;
	POOL_CONNECTION_POOL *ret;
	ConnectionInfo *info;

	POOL_CONNECTION_POOL *p = pool_connection_pool;

	if (p == NULL)
	{
		pool_error("pool_create_cp: pool_connection_pool is not initialized");
		return NULL;
	}

	for (i=0;i<pool_config->max_pool;i++)
	{
		if (MASTER_CONNECTION(p) == NULL)
		{
			ret = new_connection(p);
			if (ret)
				pool_index = i;
			return ret;
		}
		p++;
	}

	pool_debug("no empty connection slot was found");

	/*
	 * no empty connection slot was found. look for the oldest connection and discard it.
	 */
	oldestp = p = pool_connection_pool;
	closetime = MASTER_CONNECTION(p)->closetime;
	pool_index = 0;

	for (i=0;i<pool_config->max_pool;i++)
	{
		pool_debug("user: %s database: %s closetime: %ld",
				   MASTER_CONNECTION(p)->sp->user,
				   MASTER_CONNECTION(p)->sp->database,
				   MASTER_CONNECTION(p)->closetime);
		if (MASTER_CONNECTION(p)->closetime < closetime)
		{
			closetime = MASTER_CONNECTION(p)->closetime;
			oldestp = p;
			pool_index = i;
		}
		p++;
	}

	p = oldestp;
	pool_send_frontend_exits(p);

	pool_debug("discarding old %zd th connection. user: %s database: %s",
			   oldestp - pool_connection_pool,
			   MASTER_CONNECTION(p)->sp->user,
			   MASTER_CONNECTION(p)->sp->database);

	for (i=0;i<NUM_BACKENDS;i++)
	{
		if (!VALID_BACKEND(i))
			continue;

		if (!freed)
		{
			pool_free_startup_packet(CONNECTION_SLOT(p, i)->sp);
			freed = 1;
		}

		pool_close(CONNECTION(p, i));
		free(CONNECTION_SLOT(p, i));
	}

	info = p->info;
	memset(p, 0, sizeof(POOL_CONNECTION_POOL));
	p->info = info;
	memset(p->info, 0, sizeof(ConnectionInfo) * MAX_NUM_BACKENDS);

	ret = new_connection(p);
	return ret;
}
Пример #27
0
void pool_backend_timer(void)
{
#define TMINTMAX 0x7fffffff

	POOL_CONNECTION_POOL *p = pool_connection_pool;
	int i, j;
	time_t now;
	time_t nearest = TMINTMAX;
	ConnectionInfo *info;

	POOL_SETMASK(&BlockSig);

	now = time(NULL);

	pool_debug("pool_backend_timer_handler called at %ld", now);

	for (i=0;i<pool_config->max_pool;i++, p++)
	{
		if (!MASTER_CONNECTION(p))
			continue;
		if (!MASTER_CONNECTION(p)->sp)
			continue;
		if (MASTER_CONNECTION(p)->sp->user == NULL)
			continue;

		/* timer expire? */
		if (MASTER_CONNECTION(p)->closetime)
		{
			int freed = 0;

			pool_debug("pool_backend_timer_handler: expire time: %ld",
					   MASTER_CONNECTION(p)->closetime+pool_config->connection_life_time);

			if (now >= (MASTER_CONNECTION(p)->closetime+pool_config->connection_life_time))
			{
				/* discard expired connection */
				pool_debug("pool_backend_timer_handler: expires user %s database %s",
						   MASTER_CONNECTION(p)->sp->user, MASTER_CONNECTION(p)->sp->database);

				pool_send_frontend_exits(p);

				for (j=0;j<NUM_BACKENDS;j++)
				{
					if (!VALID_BACKEND(j))
						continue;

					if (!freed)
					{
						pool_free_startup_packet(CONNECTION_SLOT(p, j)->sp);
						freed = 1;
					}

					pool_close(CONNECTION(p, j));
					free(CONNECTION_SLOT(p, j));
				}
				info = p->info;
				memset(p, 0, sizeof(POOL_CONNECTION_POOL));
				p->info = info;
				memset(p->info, 0, sizeof(ConnectionInfo) * MAX_NUM_BACKENDS);

				/* prepare to shutdown connections to system db */
				if(pool_config->system_db_dynamic_connection && (pool_config->parallel_mode || pool_config->enable_query_cache))
				{
					if (system_db_info->pgconn)
						pool_close_libpq_connection();
					if (pool_system_db_connection() && pool_system_db_connection()->con)
					{
						pool_send_frontend_exit(pool_system_db_connection()->con);
						pool_close(pool_system_db_connection()->con);
					}

					if( system_db_info->connection )
					{
						free( system_db_info->connection );
						memset(system_db_info->connection, 0, sizeof(POOL_CONNECTION_POOL_SLOT));
						system_db_info->connection = NULL;
					}
				}
			}
			else
			{
				/* look for nearest timer */
				if (MASTER_CONNECTION(p)->closetime < nearest)
					nearest = MASTER_CONNECTION(p)->closetime;
			}
		}
	}

	/* any remaining timer */
	if (nearest != TMINTMAX)
	{
		nearest = pool_config->connection_life_time - (now - nearest);
		if (nearest <= 0)
		  nearest = 1;
		pool_signal(SIGALRM, pool_backend_timer_handler);
		alarm(nearest);
	}

	POOL_SETMASK(&UnBlockSig);
}
Пример #28
0
/*
 * backend connection error, failover/failback request, if possible
 * failover() must be called under protecting signals.
 */
static void failover(void)
{
	int i;
	int node_id;
	int new_master;
	int nodes[MAX_NUM_BACKENDS];

	pool_debug("failover_handler called");

	memset(nodes, 0, sizeof(int) * MAX_NUM_BACKENDS);

	/*
	 * this could happen in a child process if a signal has been sent
	 * before resetting signal handler
	 */
	if (getpid() != mypid)
	{
		pool_debug("failover_handler: I am not parent");
		kill(pcp_pid, SIGUSR2);
		return;
	}

	/*
	 * processing SIGTERM, SIGINT or SIGQUIT
	 */
	if (exiting)
	{
		pool_debug("failover_handler called while exiting");
		kill(pcp_pid, SIGUSR2);
		return;
	}

	/*
	 * processing fail over or switch over
	 */
	if (switching)
	{
		pool_debug("failover_handler called while switching");
		kill(pcp_pid, SIGUSR2);
		return;
	}

	pool_semaphore_lock(REQUEST_INFO_SEM);

	if (Req_info->kind == CLOSE_IDLE_REQUEST)
	{
		pool_semaphore_unlock(REQUEST_INFO_SEM);
		kill_all_children(SIGUSR1);
		kill(pcp_pid, SIGUSR2);
		return;
	}

	/*
	 * if not in replication mode/master slave mode, we treat this a restart request.
	 * otherwise we need to check if we have already failovered.
	 */
	pool_debug("failover_handler: starting to select new master node");
	switching = 1;
	node_id = Req_info->node_id[0];

	/* failback request? */
	if (Req_info->kind == NODE_UP_REQUEST)
	{
		if (node_id >= MAX_NUM_BACKENDS ||
			(Req_info->kind == NODE_UP_REQUEST && VALID_BACKEND(node_id)) ||
			(Req_info->kind == NODE_DOWN_REQUEST && !VALID_BACKEND(node_id)))
		{
			pool_semaphore_unlock(REQUEST_INFO_SEM);
			pool_error("failover_handler: invalid node_id %d status:%d MAX_NUM_BACKENDS: %d", node_id,
					   BACKEND_INFO(node_id).backend_status, MAX_NUM_BACKENDS);
			kill(pcp_pid, SIGUSR2);
			switching = 0;
			return;
		}

		pool_log("starting fail back. reconnect host %s(%d)",
				 BACKEND_INFO(node_id).backend_hostname,
				 BACKEND_INFO(node_id).backend_port);
		BACKEND_INFO(node_id).backend_status = CON_CONNECT_WAIT;	/* unset down status */
		trigger_failover_command(node_id, pool_config->failback_command);
	}
	else
	{
		int cnt = 0;

		for (i = 0; i < MAX_NUM_BACKENDS; i++)
		{
			if (Req_info->node_id[i] != -1 &&
				VALID_BACKEND(Req_info->node_id[i]))
			{
				pool_log("starting degeneration. shutdown host %s(%d)",
						 BACKEND_INFO(Req_info->node_id[i]).backend_hostname,
						 BACKEND_INFO(Req_info->node_id[i]).backend_port);


				BACKEND_INFO(Req_info->node_id[i]).backend_status = CON_DOWN;	/* set down status */
				/* save down node */
				nodes[Req_info->node_id[i]] = 1;
				cnt++;
			}
		}

		if (cnt == 0)
		{
			pool_log("failover: no backends are degenerated");
			pool_semaphore_unlock(REQUEST_INFO_SEM);
			kill(pcp_pid, SIGUSR2);
			switching = 0;
			return;
		}
	}

	new_master = get_next_master_node();

	if (new_master == pool_config->backend_desc->num_backends)
	{
		pool_error("failover_handler: no valid DB node found");
	}

/*
 * Before we tried to minimize restarting pgpool to protect existing
 * connections from clients to pgpool children. What we did here was,
 * if children other than master went down, we did not fail over.
 * This is wrong. Think about following scenario. If someone
 * accidentally plugs out the network cable, the TCP/IP stack keeps
 * retrying for long time (typically 2 hours). The only way to stop
 * the retry is restarting the process.  Bottom line is, we need to
 * restart all children in any case.  See pgpool-general list posting
 * "TCP connections are *not* closed when a backend timeout" on Jul 13
 * 2008 for more details.
 */

#ifdef NOT_USED
	else
	{
		if (Req_info->master_node_id == new_master && *InRecovery == 0)
		{
			pool_log("failover_handler: do not restart pgpool. same master node %d was selected", new_master);
			if (Req_info->kind == NODE_UP_REQUEST)
			{
				pool_log("failback done. reconnect host %s(%d)",
						 BACKEND_INFO(node_id).backend_hostname,
						 BACKEND_INFO(node_id).backend_port);
			}
			else
			{
				pool_log("failover done. shutdown host %s(%d)",
						 BACKEND_INFO(node_id).backend_hostname,
						 BACKEND_INFO(node_id).backend_port);
			}

			/* exec failover_command */
			for (i = 0; i < pool_config->backend_desc->num_backends; i++)
			{
				if (nodes[i])
					trigger_failover_command(i, pool_config->failover_command);
			}

			pool_semaphore_unlock(REQUEST_INFO_SEM);
			switching = 0;
			kill(pcp_pid, SIGUSR2);
			switching = 0;
			return;
		}
	}
#endif
	/* kill all children */
	for (i = 0; i < pool_config->num_init_children; i++)
	{
		pid_t pid = pids[i].pid;
		if (pid)
		{
			kill(pid, SIGQUIT);
			pool_debug("failover_handler: kill %d", pid);
		}
	}

	/* exec failover_command */
	for (i = 0; i < pool_config->backend_desc->num_backends; i++)
	{
		if (nodes[i])
			trigger_failover_command(i, pool_config->failover_command);
	}

	pool_log("failover_handler: set new master node: %d", new_master);
	Req_info->master_node_id = new_master;

/* no need to wait since it will be done in reap_handler */
#ifdef NOT_USED
	while (wait(NULL) > 0)
		;

	if (errno != ECHILD)
		pool_error("failover_handler: wait() failed. reason:%s", strerror(errno));
#endif

	memset(Req_info->node_id, -1, sizeof(int) * MAX_NUM_BACKENDS);
	pool_semaphore_unlock(REQUEST_INFO_SEM);

	/* fork the children */
	for (i=0;i<pool_config->num_init_children;i++)
	{
		pids[i].pid = fork_a_child(unix_fd, inet_fd, i);
		pids[i].start_time = time(NULL);
	}

	if (Req_info->kind == NODE_UP_REQUEST)
	{
		pool_log("failback done. reconnect host %s(%d)",
				 BACKEND_INFO(node_id).backend_hostname,
				 BACKEND_INFO(node_id).backend_port);
	}
	else
	{
		pool_log("failover done. shutdown host %s(%d)",
				 BACKEND_INFO(node_id).backend_hostname,
				 BACKEND_INFO(node_id).backend_port);
	}

	switching = 0;

	/* kick wakeup_handler in pcp_child to notice that
	 * faiover/failback done
	 */
	kill(pcp_pid, SIGUSR2);
}
Пример #29
0
/*
* find connection by user and database
*/
POOL_CONNECTION_POOL *pool_get_cp(char *user, char *database, int protoMajor, int check_socket)
{
#ifdef HAVE_SIGPROCMASK
	sigset_t oldmask;
#else
	int	oldmask;
#endif

	int i, freed = 0;
	ConnectionInfo *info;

	POOL_CONNECTION_POOL *p = pool_connection_pool;

	if (p == NULL)
	{
		pool_error("pool_get_cp: pool_connection_pool is not initialized");
		return NULL;
	}

	POOL_SETMASK2(&BlockSig, &oldmask);

	for (i=0;i<pool_config->max_pool;i++)
	{
		if (MASTER_CONNECTION(p) &&
			MASTER_CONNECTION(p)->sp &&
			MASTER_CONNECTION(p)->sp->major == protoMajor &&
			MASTER_CONNECTION(p)->sp->user != NULL &&
			strcmp(MASTER_CONNECTION(p)->sp->user, user) == 0 &&
			strcmp(MASTER_CONNECTION(p)->sp->database, database) == 0)
		{
			int sock_broken = 0;
			int j;

			/* mark this connection is under use */
			MASTER_CONNECTION(p)->closetime = 0;
			for (j=0;j<NUM_BACKENDS;j++)
			{
				p->info[j].counter++;
			}
			POOL_SETMASK(&oldmask);

			if (check_socket)
			{
				for (j=0;j<NUM_BACKENDS;j++)
				{
					if (!VALID_BACKEND(j))
						continue;

					if  (CONNECTION_SLOT(p, j))
					{
						sock_broken = check_socket_status(CONNECTION(p, j)->fd);
						if (sock_broken < 0)
							break;
					}
					else
					{
						sock_broken = -1;
						break;
					}
				}

				if (sock_broken < 0)
				{
					pool_log("connection closed. retry to create new connection pool.");
					for (j=0;j<NUM_BACKENDS;j++)
					{
						if (!VALID_BACKEND(j) || (CONNECTION_SLOT(p, j) == NULL))
							continue;

						if (!freed)
						{
							pool_free_startup_packet(CONNECTION_SLOT(p, j)->sp);
							freed = 1;
						}

						pool_close(CONNECTION(p, j));
						free(CONNECTION_SLOT(p, j));
					}
					info = p->info;
					memset(p, 0, sizeof(POOL_CONNECTION_POOL_SLOT));
					p->info = info;
					memset(p->info, 0, sizeof(ConnectionInfo) * MAX_NUM_BACKENDS);
					POOL_SETMASK(&oldmask);
					return NULL;
				}
			}
			POOL_SETMASK(&oldmask);
			pool_index = i;
			return p;
		}
		p++;
	}

	POOL_SETMASK(&oldmask);
	return NULL;
}
Пример #30
0
/*
 * Start online recovery.
 * "recovery_node" is the node to be recovered.
 * Master or primary node is chosen in this function.
 */
int start_recovery(int recovery_node)
{
	int node_id;
	BackendInfo *backend;
	BackendInfo *recovery_backend;
	PGconn *conn;
	int failback_wait_count;
#define FAILBACK_WAIT_MAX_RETRY 5		/* 5 seconds should be enough for failback operation */

	pool_log("starting recovering node %d", recovery_node);

	if (VALID_BACKEND(recovery_node))
	{
		pool_error("start_recovery: backend node %d is alive", recovery_node);
		return 1;
	}

	Req_info->kind = NODE_RECOVERY_REQUEST;

	/* select master/primary node */
	node_id = MASTER_SLAVE ? PRIMARY_NODE_ID : REAL_MASTER_NODE_ID;
	backend = &pool_config->backend_desc->backend_info[node_id];

	/* get node info to be recovered */
	recovery_backend = &pool_config->backend_desc->backend_info[recovery_node];

	conn = connect_backend_libpq(backend);
	if (conn == NULL)
	{
		pool_error("start_recovery: could not connect master node (%d)", node_id);
		return 1;
	}

	/* 1st stage */
	if (REPLICATION)
	{
		if (exec_checkpoint(conn) != 0)
		{
			PQfinish(conn);
			pool_error("start_recovery: CHECKPOINT failed");
			return 1;
		}
		pool_log("CHECKPOINT in the 1st stage done");
	}

	if (exec_recovery(conn, recovery_backend, FIRST_STAGE) != 0)
	{
		PQfinish(conn);
		return 1;
	}

	pool_log("1st stage is done");

	if (REPLICATION)
	{
		pool_log("starting 2nd stage");

		/* 2nd stage */
		*InRecovery = RECOVERY_ONLINE;
		if (pool_config->use_watchdog)
		{
			/* announce start recovery */
			if (WD_OK != wd_start_recovery())
			{
				PQfinish(conn);
				pool_error("start_recovery: timeover for waiting connection closed in the other pgpools");
				return 1;
			}
		}

		if (wait_connection_closed() != 0)
		{
			PQfinish(conn);
			pool_error("start_recovery: timeover for waiting connection closed");
			return 1;
		}

		pool_log("all connections from clients have been closed");

		if (exec_checkpoint(conn) != 0)
		{
			PQfinish(conn);
			pool_error("start_recovery: CHECKPOINT failed");
			return 1;
		}

		pool_log("CHECKPOINT in the 2nd stage done");

		if (exec_recovery(conn, recovery_backend, SECOND_STAGE) != 0)
		{
			PQfinish(conn);
			return 1;
		}
	}

	if (exec_remote_start(conn, recovery_backend) != 0)
	{
		PQfinish(conn);
		pool_error("start_recovery: remote start failed");
		return 1;
	}

	if (check_postmaster_started(recovery_backend))
	{
		PQfinish(conn);
		pool_error("start_recovery: check start failed");
		return 1;
	}

	pool_log("%d node restarted", recovery_node);

	/*
	 * reset failover completion flag.  this is necessary since
	 * previous failover/failback will set the flag to 1.
	 */
	pcp_wakeup_request = 0;

	/* send failback request to pgpool parent */
	send_failback_request(recovery_node);

	/* wait for failback */
	failback_wait_count = 0;
	while (!pcp_wakeup_request)
	{
		struct timeval t = {1, 0};
		/* polling SIGUSR2 signal every 1 sec */
		select(0, NULL, NULL, NULL, &t);
		failback_wait_count++;
		if (failback_wait_count >= FAILBACK_WAIT_MAX_RETRY)
		{
			pool_log("start_recovery: waiting for wake up request is timeout(%d seconds)",
					 FAILBACK_WAIT_MAX_RETRY);
			break;
		}
	}
	pcp_wakeup_request = 0;

	PQfinish(conn);

	pool_log("recovery done");

	return 0;
}