Ejemplo n.º 1
0
/* Detach a node */
static int pool_detach_node(int node_id, bool gracefully)
{
	if (!gracefully)
	{
		degenerate_backend_set_ex(&node_id, 1, true, false);
		return 0;
	}

	/* Check if the NODE DOWN can be executed on
	 * the given node id.
	 */
	degenerate_backend_set_ex(&node_id, 1, true, true);

	/*
	 * Wait until all frontends exit
	 */
	*InRecovery = RECOVERY_DETACH;	/* This wiil ensure that new incoming
									 * connection requests are blocked */

	if (wait_connection_closed())
	{
		/* wait timed out */
		finish_recovery();
		return -1;
	}

	pcp_worker_wakeup_request = 0;

	/*
	 * Now all frontends have gone. Let's do failover.
	 */
	degenerate_backend_set_ex(&node_id, 1, true, false);

	/*
	 * Wait for failover completed.
	 */

	while (!pcp_worker_wakeup_request)
	{
		struct timeval t = {1, 0};
		select(0, NULL, NULL, NULL, &t);
	}
	pcp_worker_wakeup_request = 0;

	/*
	 * Start to accept incoming connections and send SIGUSR2 to pgpool
	 * parent to distribute SIGUSR2 all pgpool children.
	 */
	finish_recovery();
	
	return 0;
}
Ejemplo n.º 2
0
/* Promote a node */
static int pool_promote_node(int node_id, bool gracefully)
{
	if (!gracefully)
	{
		promote_backend(node_id);	/* send promote request */
		return 0;
	}

	/*
	 * Wait until all frontends exit
	 */
	*InRecovery = RECOVERY_PROMOTE;	/* This wiil ensure that new incoming
									 * connection requests are blocked */

	if (wait_connection_closed())
	{
		/* wait timed out */
		finish_recovery();
		return -1;
	}

	/*
	 * Now all frontends have gone. Let's do failover.
	 */
	promote_backend(node_id);		/* send promote request */

	/*
	 * Wait for failover completed.
	 */
	pcp_worker_wakeup_request = 0;

	while (!pcp_worker_wakeup_request)
	{
		struct timeval t = {1, 0};
		select(0, NULL, NULL, NULL, &t);
	}
	pcp_worker_wakeup_request = 0;

	/*
	 * Start to accept incoming connections and send SIGUSR2 to pgpool
	 * parent to distribute SIGUSR2 all pgpool children.
	 */
	finish_recovery();
	return 0;
}
Ejemplo n.º 3
0
static int
wd_send_response(int sock, WdPacket * recv_pack)
{
	int rtn = WD_NG;
	WdInfo * p, *q;
	WdNodeInfo * node;
	WdLockInfo * lock;
	WdPacket send_packet;
	struct timeval tv;
	char pack_str[WD_MAX_PACKET_STRING];
	int pack_str_len;
	char hash[(MD5_PASSWD_LEN+1)*2];
	bool is_node_packet = false;

	if (recv_pack == NULL)
	{
		return rtn;
	}
	memset(&send_packet, 0, sizeof(WdPacket));
	p = &(recv_pack->wd_body.wd_info);	

	/* auhtentication */
	if (strlen(pool_config->wd_authkey))
	{
		/* calculate hash from packet */
		pack_str_len = wd_packet_to_string(*recv_pack, pack_str, sizeof(pack_str));
		wd_calc_hash(pack_str, pack_str_len, hash);

		if (strcmp(recv_pack->hash, hash))
		{
			pool_log("wd_send_response: watchdog authentication failed");
			rtn = wd_authentication_failed(sock);
			return rtn;
		}
	}

	/* set response packet no */
	switch (recv_pack->packet_no)
	{
		/* add request into the watchdog list */
		case WD_ADD_REQ:
			p = &(recv_pack->wd_body.wd_info);	
			if (wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status) > 0)
			{
				send_packet.packet_no = WD_ADD_ACCEPT;
			}
			else
			{
				send_packet.packet_no = WD_ADD_REJECT;
			}
			memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			break;

		/* announce candidacy to be the new master */
		case WD_STAND_FOR_MASTER:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status);
			/* check exist master */
			if ((q = wd_is_alive_master()) != NULL)
			{
				/* vote against the candidate */
				send_packet.packet_no = WD_MASTER_EXIST;
				memcpy(&(send_packet.wd_body.wd_info), q, sizeof(WdInfo));
			}
			else
			{
				if (WD_MYSELF->tv.tv_sec <= p->tv.tv_sec )
				{
					memcpy(&tv,&(p->tv),sizeof(struct timeval));
					tv.tv_sec += 1;
					wd_set_myself(&tv, WD_NORMAL);
				}
				/* vote for the candidate */
				send_packet.packet_no = WD_VOTE_YOU;
				memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			}
			break;

		/* announce assumption to be the new master */
		case WD_DECLARE_NEW_MASTER:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status);
			if (WD_MYSELF->status == WD_MASTER)
			{
				/* resign master server */
				pool_log("wd_declare_new_master: ifconfig down to resign master server");
				wd_IP_down();
				wd_set_myself(NULL, WD_NORMAL);
			}
			send_packet.packet_no = WD_READY;
			memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			break;

		/* annouce to assume lock holder */
		case WD_STAND_FOR_LOCK_HOLDER:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status);
			/* only master handles lock holder privilege */
			if (WD_MYSELF->status == WD_MASTER)
			{
				/* if theare are no lock holder yet */
				if (wd_get_lock_holder() != NULL)
				{
					send_packet.packet_no = WD_LOCK_HOLDER_EXIST;
				}
			}
			memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			break;

		case WD_DECLARE_LOCK_HOLDER:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status);
			wd_set_lock_holder(p, true);
			send_packet.packet_no = WD_READY;
			memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			break;

		/* annouce to resigne lock holder */
		case WD_RESIGN_LOCK_HOLDER:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status);
			wd_set_lock_holder(p, false);
			send_packet.packet_no = WD_READY;
			memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			break;

		case WD_START_INTERLOCK:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status);
			wd_set_interlocking(p, true);
			break;

		case WD_END_INTERLOCK:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status);
			wd_set_interlocking(p, false);
			break;

		/* announce that server is down */
		case WD_SERVER_DOWN:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), WD_DOWN);
			send_packet.packet_no = WD_READY;
			memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			if (wd_am_I_oldest() == WD_OK && WD_MYSELF->status != WD_MASTER)
			{
				wd_escalation();
			}
			break;

		/* announce start online recovery */
		case WD_START_RECOVERY:
			if (*InRecovery != RECOVERY_INIT)
			{
				send_packet.packet_no = WD_NODE_FAILED;
			}
			else
			{
				send_packet.packet_no = WD_NODE_READY;
				*InRecovery = RECOVERY_ONLINE;
				if (wait_connection_closed() != 0)
				{
					send_packet.packet_no = WD_NODE_FAILED;
				}
			}
			break;
		case WD_END_RECOVERY:
			send_packet.packet_no = WD_NODE_READY;
			*InRecovery = RECOVERY_INIT;
			kill(wd_ppid, SIGUSR2);
			break;
		case WD_FAILBACK_REQUEST:
			node = &(recv_pack->wd_body.wd_node_info);	
			wd_set_node_mask(WD_FAILBACK_REQUEST,node->node_id_set,node->node_num);
			is_node_packet = true;
			send_packet.packet_no = WD_NODE_READY;
			break;
		case WD_DEGENERATE_BACKEND:
			node = &(recv_pack->wd_body.wd_node_info);	
			wd_set_node_mask(WD_DEGENERATE_BACKEND,node->node_id_set, node->node_num);
			is_node_packet = true;
			send_packet.packet_no = WD_NODE_READY;
			break;
		case WD_PROMOTE_BACKEND:
			node = &(recv_pack->wd_body.wd_node_info);	
			wd_set_node_mask(WD_PROMOTE_BACKEND,node->node_id_set, node->node_num);
			is_node_packet = true;
			send_packet.packet_no = WD_NODE_READY;
			break;

		case WD_UNLOCK_REQUEST:
			lock = &(recv_pack->wd_body.wd_lock_info);	
			wd_set_lock(lock->lock_id, false);
			send_packet.packet_no = WD_LOCK_READY;
			break;

		default:
			send_packet.packet_no = WD_INVALID;
			memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			break;
	}

	/* send response packet */
	rtn = wd_send_packet(sock, &send_packet);

	/* send node request signal.
	 * wd_node_request_singnal() uses a semaphore lock internally, so should be
	 * called after sending a response pakcet to prevent dead lock.
	 */
	if (is_node_packet)
		wd_node_request_signal(recv_pack->packet_no, node);

	return rtn;
}
Ejemplo n.º 4
0
/*
 * Start online recovery.
 * "recovery_node" is the node to be recovered.
 * Master or primary node is chosen in this function.
 */
void start_recovery(int recovery_node)
{
	int node_id;
	BackendInfo *backend;
	BackendInfo *recovery_backend;
	PGconn *conn;
	int failback_wait_count;
#define FAILBACK_WAIT_MAX_RETRY 5		/* 5 seconds should be enough for failback operation */

	ereport(LOG,
		(errmsg("starting recovering node %d", recovery_node)));

	if ( (recovery_node < 0) || (recovery_node >= pool_config->backend_desc->num_backends) )
		ereport(ERROR,
				(errmsg("node recovery failed, node id: %d is not valid", recovery_node)));

	if (VALID_BACKEND(recovery_node))
		ereport(ERROR,
				(errmsg("node recovery failed, node id: %d is alive", recovery_node)));

	/* select master/primary node */
	node_id = MASTER_SLAVE ? PRIMARY_NODE_ID : REAL_MASTER_NODE_ID;
	backend = &pool_config->backend_desc->backend_info[node_id];

	/* get node info to be recovered */
	recovery_backend = &pool_config->backend_desc->backend_info[recovery_node];

	conn = connect_backend_libpq(backend);
	if (conn == NULL)
		ereport(ERROR,
				(errmsg("node recovery failed, unable to connect to master node: %d ", node_id)));

	PG_TRY();
	{
		/* 1st stage */
		if (REPLICATION)
		{
			exec_checkpoint(conn);
			ereport(LOG,
				(errmsg("node recovery, CHECKPOINT in the 1st stage done")));
		}

		exec_recovery(conn, backend, recovery_backend, FIRST_STAGE);

		ereport(LOG,
			(errmsg("node recovery, 1st stage is done")));

		if (REPLICATION)
		{
			ereport(LOG,
				(errmsg("node recovery, starting 2nd stage")));

			/* 2nd stage */
			*InRecovery = RECOVERY_ONLINE;
			if (pool_config->use_watchdog)
			{
				/* announce start recovery */
				if (WD_OK != wd_start_recovery())
					ereport(ERROR,
							(errmsg("node recovery failed, failed to send start recovery packet")));
			}

			if (wait_connection_closed() != 0)
				ereport(ERROR,
						(errmsg("node recovery failed, waiting connection closed in the other pgpools timeout")));

			ereport(LOG,
				(errmsg("node recovery, all connections from clients have been closed")));

			exec_checkpoint(conn);

			ereport(LOG,
				(errmsg("node recovery"),
					 errdetail("CHECKPOINT in the 2nd stage done")));

			exec_recovery(conn, backend, recovery_backend, SECOND_STAGE);
		}

		exec_remote_start(conn, recovery_backend);

		check_postmaster_started(recovery_backend);

		ereport(LOG,
			(errmsg("node recovery, node: %d restarted", recovery_node)));

		/*
		 * reset failover completion flag.  this is necessary since
		 * previous failover/failback will set the flag to 1.
		 */
		pcp_wakeup_request = 0;

		/* send failback request to pgpool parent */
		send_failback_request(recovery_node);

		/* wait for failback */
		failback_wait_count = 0;
		while (!pcp_wakeup_request)
		{
			struct timeval t = {1, 0};
			/* polling SIGUSR2 signal every 1 sec */
			select(0, NULL, NULL, NULL, &t);
			failback_wait_count++;
			if (failback_wait_count >= FAILBACK_WAIT_MAX_RETRY)
			{
				ereport(LOG,
					(errmsg("node recovery"),
						errdetail("waiting for wake up request is timeout(%d seconds)",
							   FAILBACK_WAIT_MAX_RETRY)));

				break;
			}
		}
		pcp_wakeup_request = 0;
	}
	PG_CATCH();
	{
		PQfinish(conn);
		PG_RE_THROW();
	}
	PG_END_TRY();

	PQfinish(conn);

	ereport(LOG,
			(errmsg("recovery done")));
}
Ejemplo n.º 5
0
/*
 * Start online recovery.
 * "recovery_node" is the node to be recovered.
 * Master or primary node is chosen in this function.
 */
int start_recovery(int recovery_node)
{
	int node_id;
	BackendInfo *backend;
	BackendInfo *recovery_backend;
	PGconn *conn;
	int failback_wait_count;
#define FAILBACK_WAIT_MAX_RETRY 5		/* 5 seconds should be enough for failback operation */

	pool_log("starting recovering node %d", recovery_node);

	if (VALID_BACKEND(recovery_node))
	{
		pool_error("start_recovery: backend node %d is alive", recovery_node);
		return 1;
	}

	Req_info->kind = NODE_RECOVERY_REQUEST;

	/* select master/primary node */
	node_id = MASTER_SLAVE ? PRIMARY_NODE_ID : REAL_MASTER_NODE_ID;
	backend = &pool_config->backend_desc->backend_info[node_id];

	/* get node info to be recovered */
	recovery_backend = &pool_config->backend_desc->backend_info[recovery_node];

	conn = connect_backend_libpq(backend);
	if (conn == NULL)
	{
		pool_error("start_recovery: could not connect master node (%d)", node_id);
		return 1;
	}

	/* 1st stage */
	if (REPLICATION)
	{
		if (exec_checkpoint(conn) != 0)
		{
			PQfinish(conn);
			pool_error("start_recovery: CHECKPOINT failed");
			return 1;
		}
		pool_log("CHECKPOINT in the 1st stage done");
	}

	if (exec_recovery(conn, recovery_backend, FIRST_STAGE) != 0)
	{
		PQfinish(conn);
		return 1;
	}

	pool_log("1st stage is done");

	if (REPLICATION)
	{
		pool_log("starting 2nd stage");

		/* 2nd stage */
		*InRecovery = RECOVERY_ONLINE;
		if (pool_config->use_watchdog)
		{
			/* announce start recovery */
			if (WD_OK != wd_start_recovery())
			{
				PQfinish(conn);
				pool_error("start_recovery: timeover for waiting connection closed in the other pgpools");
				return 1;
			}
		}

		if (wait_connection_closed() != 0)
		{
			PQfinish(conn);
			pool_error("start_recovery: timeover for waiting connection closed");
			return 1;
		}

		pool_log("all connections from clients have been closed");

		if (exec_checkpoint(conn) != 0)
		{
			PQfinish(conn);
			pool_error("start_recovery: CHECKPOINT failed");
			return 1;
		}

		pool_log("CHECKPOINT in the 2nd stage done");

		if (exec_recovery(conn, recovery_backend, SECOND_STAGE) != 0)
		{
			PQfinish(conn);
			return 1;
		}
	}

	if (exec_remote_start(conn, recovery_backend) != 0)
	{
		PQfinish(conn);
		pool_error("start_recovery: remote start failed");
		return 1;
	}

	if (check_postmaster_started(recovery_backend))
	{
		PQfinish(conn);
		pool_error("start_recovery: check start failed");
		return 1;
	}

	pool_log("%d node restarted", recovery_node);

	/*
	 * reset failover completion flag.  this is necessary since
	 * previous failover/failback will set the flag to 1.
	 */
	pcp_wakeup_request = 0;

	/* send failback request to pgpool parent */
	send_failback_request(recovery_node);

	/* wait for failback */
	failback_wait_count = 0;
	while (!pcp_wakeup_request)
	{
		struct timeval t = {1, 0};
		/* polling SIGUSR2 signal every 1 sec */
		select(0, NULL, NULL, NULL, &t);
		failback_wait_count++;
		if (failback_wait_count >= FAILBACK_WAIT_MAX_RETRY)
		{
			pool_log("start_recovery: waiting for wake up request is timeout(%d seconds)",
					 FAILBACK_WAIT_MAX_RETRY);
			break;
		}
	}
	pcp_wakeup_request = 0;

	PQfinish(conn);

	pool_log("recovery done");

	return 0;
}