Ejemplo n.º 1
0
/*
 * Check if pgpool is living
 */
int
wd_lifecheck(void)
{
	struct timeval tv;

	/* I'm in down.... */
	if (WD_MYSELF->status == WD_DOWN)
	{
		ereport(NOTICE,
				(errmsg("watchdog lifecheck, watchdog status is DOWN. You need to restart pgpool")));
		return WD_NG;
	}

	/* set startup time */
	gettimeofday(&tv, NULL);

	/* check upper connection */
	if (strlen(pool_config->trusted_servers))
	{
		if(wd_is_upper_ok(pool_config->trusted_servers) != WD_OK)
		{
			ereport(WARNING,
					(errmsg("watchdog lifecheck, failed to connect to any trusted servers")));

			if (WD_MYSELF->status == WD_MASTER &&
				strlen(pool_config->delegate_IP) != 0)
			{
				wd_IP_down();
			}
			wd_set_myself(&tv, WD_DOWN);
			wd_notice_server_down();

			return WD_NG;
		}
	}

	/* skip lifecheck during recovery execution */
	if (*InRecovery != RECOVERY_INIT)
	{
		return WD_OK;
	}

	/* check and update pgpool status */
	check_pgpool_status();

	return WD_OK;
}
Ejemplo n.º 2
0
static int
wd_send_response(int sock, WdPacket * recv_pack)
{
	int rtn = WD_NG;
	WdInfo * p, *q;
	WdNodeInfo * node;
	WdLockInfo * lock;
	WdPacket send_packet;
	struct timeval tv;
	char pack_str[WD_MAX_PACKET_STRING];
	int pack_str_len;
	char hash[(MD5_PASSWD_LEN+1)*2];
	bool is_node_packet = false;

	if (recv_pack == NULL)
	{
		return rtn;
	}
	memset(&send_packet, 0, sizeof(WdPacket));
	p = &(recv_pack->wd_body.wd_info);	

	/* auhtentication */
	if (strlen(pool_config->wd_authkey))
	{
		/* calculate hash from packet */
		pack_str_len = wd_packet_to_string(*recv_pack, pack_str, sizeof(pack_str));
		wd_calc_hash(pack_str, pack_str_len, hash);

		if (strcmp(recv_pack->hash, hash))
		{
			pool_log("wd_send_response: watchdog authentication failed");
			rtn = wd_authentication_failed(sock);
			return rtn;
		}
	}

	/* set response packet no */
	switch (recv_pack->packet_no)
	{
		/* add request into the watchdog list */
		case WD_ADD_REQ:
			p = &(recv_pack->wd_body.wd_info);	
			if (wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status) > 0)
			{
				send_packet.packet_no = WD_ADD_ACCEPT;
			}
			else
			{
				send_packet.packet_no = WD_ADD_REJECT;
			}
			memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			break;

		/* announce candidacy to be the new master */
		case WD_STAND_FOR_MASTER:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status);
			/* check exist master */
			if ((q = wd_is_alive_master()) != NULL)
			{
				/* vote against the candidate */
				send_packet.packet_no = WD_MASTER_EXIST;
				memcpy(&(send_packet.wd_body.wd_info), q, sizeof(WdInfo));
			}
			else
			{
				if (WD_MYSELF->tv.tv_sec <= p->tv.tv_sec )
				{
					memcpy(&tv,&(p->tv),sizeof(struct timeval));
					tv.tv_sec += 1;
					wd_set_myself(&tv, WD_NORMAL);
				}
				/* vote for the candidate */
				send_packet.packet_no = WD_VOTE_YOU;
				memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			}
			break;

		/* announce assumption to be the new master */
		case WD_DECLARE_NEW_MASTER:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status);
			if (WD_MYSELF->status == WD_MASTER)
			{
				/* resign master server */
				pool_log("wd_declare_new_master: ifconfig down to resign master server");
				wd_IP_down();
				wd_set_myself(NULL, WD_NORMAL);
			}
			send_packet.packet_no = WD_READY;
			memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			break;

		/* annouce to assume lock holder */
		case WD_STAND_FOR_LOCK_HOLDER:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status);
			/* only master handles lock holder privilege */
			if (WD_MYSELF->status == WD_MASTER)
			{
				/* if theare are no lock holder yet */
				if (wd_get_lock_holder() != NULL)
				{
					send_packet.packet_no = WD_LOCK_HOLDER_EXIST;
				}
			}
			memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			break;

		case WD_DECLARE_LOCK_HOLDER:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status);
			wd_set_lock_holder(p, true);
			send_packet.packet_no = WD_READY;
			memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			break;

		/* annouce to resigne lock holder */
		case WD_RESIGN_LOCK_HOLDER:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status);
			wd_set_lock_holder(p, false);
			send_packet.packet_no = WD_READY;
			memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			break;

		case WD_START_INTERLOCK:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status);
			wd_set_interlocking(p, true);
			break;

		case WD_END_INTERLOCK:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status);
			wd_set_interlocking(p, false);
			break;

		/* announce that server is down */
		case WD_SERVER_DOWN:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), WD_DOWN);
			send_packet.packet_no = WD_READY;
			memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			if (wd_am_I_oldest() == WD_OK && WD_MYSELF->status != WD_MASTER)
			{
				wd_escalation();
			}
			break;

		/* announce start online recovery */
		case WD_START_RECOVERY:
			if (*InRecovery != RECOVERY_INIT)
			{
				send_packet.packet_no = WD_NODE_FAILED;
			}
			else
			{
				send_packet.packet_no = WD_NODE_READY;
				*InRecovery = RECOVERY_ONLINE;
				if (wait_connection_closed() != 0)
				{
					send_packet.packet_no = WD_NODE_FAILED;
				}
			}
			break;
		case WD_END_RECOVERY:
			send_packet.packet_no = WD_NODE_READY;
			*InRecovery = RECOVERY_INIT;
			kill(wd_ppid, SIGUSR2);
			break;
		case WD_FAILBACK_REQUEST:
			node = &(recv_pack->wd_body.wd_node_info);	
			wd_set_node_mask(WD_FAILBACK_REQUEST,node->node_id_set,node->node_num);
			is_node_packet = true;
			send_packet.packet_no = WD_NODE_READY;
			break;
		case WD_DEGENERATE_BACKEND:
			node = &(recv_pack->wd_body.wd_node_info);	
			wd_set_node_mask(WD_DEGENERATE_BACKEND,node->node_id_set, node->node_num);
			is_node_packet = true;
			send_packet.packet_no = WD_NODE_READY;
			break;
		case WD_PROMOTE_BACKEND:
			node = &(recv_pack->wd_body.wd_node_info);	
			wd_set_node_mask(WD_PROMOTE_BACKEND,node->node_id_set, node->node_num);
			is_node_packet = true;
			send_packet.packet_no = WD_NODE_READY;
			break;

		case WD_UNLOCK_REQUEST:
			lock = &(recv_pack->wd_body.wd_lock_info);	
			wd_set_lock(lock->lock_id, false);
			send_packet.packet_no = WD_LOCK_READY;
			break;

		default:
			send_packet.packet_no = WD_INVALID;
			memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			break;
	}

	/* send response packet */
	rtn = wd_send_packet(sock, &send_packet);

	/* send node request signal.
	 * wd_node_request_singnal() uses a semaphore lock internally, so should be
	 * called after sending a response pakcet to prevent dead lock.
	 */
	if (is_node_packet)
		wd_node_request_signal(recv_pack->packet_no, node);

	return rtn;
}
Ejemplo n.º 3
0
static void
check_pgpool_status_by_query(void)
{
	WdInfo * p = WD_List;
	struct timeval tv;
	pthread_attr_t attr;
	pthread_t thread[MAX_WATCHDOG_NUM];
	WdPgpoolThreadArg thread_arg[MAX_WATCHDOG_NUM];
	int rc;
	int i,cnt;

	/* set startup time */
	gettimeofday(&tv, NULL);

	/* thread init */
	pthread_attr_init(&attr);
	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);

	/* send queries to all pgpools using threads */
	cnt = 0;
	while (p->status != WD_END)
	{
		if (p->status != WD_DOWN)
		{
			thread_arg[cnt].conn = create_conn(p->hostname, p->pgpool_port);
			rc = watchdog_thread_create(&thread[cnt], &attr, thread_ping_pgpool, (void*)&thread_arg[cnt]);
		}
		p ++;
		cnt ++;
		if (cnt >= MAX_WATCHDOG_NUM)
		{
			ereport(WARNING,
					(errmsg("checking pgpool status by query, pgpool num is out of range:%d",cnt)));
			break;
		}
	}
	pthread_attr_destroy(&attr);

	/* check results of queries */
	p = WD_List;
	for (i = 0; i < cnt; )
	{
		int result;

		ereport(DEBUG1,
				(errmsg("checking pgpool status by query"),
					errdetail("checking pgpool %d (%s:%d)",
						   i, p->hostname, p->pgpool_port)));

		if (p->status == WD_DOWN)
		{
			ereport(LOG,
				(errmsg("checking pgpool status by query"),
					errdetail("pgpool %d (%s:%d) is in down status",
						   i, p->hostname, p->pgpool_port)));
			i++;
			p++;
			continue;
		}
		else
		{
			rc = pthread_join(thread[i], (void **)&result);
			if ((rc != 0) && (errno == EINTR))
			{
				usleep(100);
				continue;
			}
		}

		if (result == WD_OK)
		{
			ereport(DEBUG1,
				(errmsg("checking pgpool status by query"),
					 errdetail("WD_OK: status: %d", p->status)));

			/* life point init */
			p->life = pool_config->wd_life_point;
		}
		else
		{
			ereport(DEBUG1,
				(errmsg("checking pgpool status by query"),
					 errdetail("NG; status: %d life:%d", p->status, p->life)));
			if (p->life > 0)
			{
				p->life --;
			}

			/* pgpool goes down */
			if (p->life <= 0)
			{
				ereport(LOG,
					(errmsg("checking pgpool status by query"),
						errdetail("lifecheck failed %d times. pgpool %d (%s:%d) seems not to be working",
								   pool_config->wd_life_point, i, p->hostname, p->pgpool_port)));

				/* It's me! */
				if ((i == 0) &&
					(WD_MYSELF->status != WD_DOWN))
				{
					wd_set_myself(&tv, WD_DOWN);
					wd_notice_server_down();
				}

				/* It's other pgpool */
				else if (p->status != WD_DOWN)
					pgpool_down(p);
			}
		}
		i++;
		p++;
	}
}
Ejemplo n.º 4
0
static void
check_pgpool_status_by_hb(void)
{
	int cnt;
	WdInfo *p = WD_List;
	struct timeval tv;

	gettimeofday(&tv, NULL);

	cnt = 0;
	while (p->status != WD_END)
	{
		ereport(DEBUG1,
			(errmsg("watchdog life checking by heartbeat"),
				errdetail("checking pgpool %d (%s:%d)",
					   cnt, p->hostname, p->pgpool_port)));

		/* about myself */
		if (p == WD_MYSELF)
		{
			/* parent is dead so it's orphan.... */
			if (is_parent_alive() == WD_NG && WD_MYSELF->status != WD_DOWN)
			{
				ereport(LOG,
					(errmsg("checking pgpool status by heartbeat"),
						errdetail("lifecheck failed. pgpool %d (%s:%d) seems not to be working",
							   cnt, p->hostname, p->pgpool_port)));

				wd_set_myself(&tv, WD_DOWN);
				wd_notice_server_down();
			}
			/* otherwise, the parent would take care of children. */
			else
			{
				ereport(DEBUG1,
					(errmsg("watchdog life checking by heartbeat"),
						 errdetail("OK; status %d", p->status)));
			}
		}

		/*  about other pgpools, check the latest heartbeat. */
		else
		{
			if (p->status == WD_DOWN)
			{
				ereport(LOG,
					(errmsg("checking pgpool status by heartbeat"),
						 errdetail("pgpool: %d at \"%s:%d\" status is down",
								   cnt, p->hostname, p->pgpool_port)));

			}
			else if (wd_check_heartbeat(p) == WD_NG)
			{
				ereport(DEBUG1,
						(errmsg("checking pgpool status by heartbeat"),
						 errdetail("NG; status %d", p->status)));

				ereport(LOG,
					(errmsg("checking pgpool status by heartbeat"),
						 errdetail("lifecheck failed. pgpool: %d at \"%s:%d\" seems not to be working",
								   cnt, p->hostname, p->pgpool_port)));

				if (p->status != WD_DOWN)
					pgpool_down(p);
			}
			else
			{
				ereport(DEBUG1,
					(errmsg("checking pgpool status by heartbeat"),
						 errdetail("OK; status %d", p->status)));
			}
		}

		p++;
		cnt++;
		if (cnt >= MAX_WATCHDOG_NUM)
		{
			ereport(WARNING,
					(errmsg("checking pgpool status by heartbeat, pgpool num is out of range:%d",cnt)));
			break;
		}
	}
}
Ejemplo n.º 5
0
static void
check_pgpool_status_by_hb(void)
{
	int cnt;
	WdInfo * p = WD_List;
	struct timeval tv;

	gettimeofday(&tv, NULL);

	cnt = 0;
	while (p->status != WD_END)
	{
		pool_debug("check_pgpool_status_by_hb: checking pgpool %d (%s:%d)",
		           cnt, p->hostname, p->pgpool_port);

		/* about myself */
		if (p == WD_MYSELF)
		{
			/* parent is dead so it's orphan.... */
			if (is_parent_alive() == WD_NG && WD_MYSELF->status != WD_DOWN)
			{
				pool_debug("check_pgpool_status_by_hb: NG; the main pgpool process does't exist.");
				pool_log("check_pgpool_status_by_hb: lifecheck failed. pgpool %d (%s:%d) seems not to be working",
		                 cnt, p->hostname, p->pgpool_port);
				wd_set_myself(&tv, WD_DOWN);
				wd_notice_server_down();
			}
			/* otherwise, the parent would take care of children. */
			else
			{
				pool_debug("check_pgpool_status_by_hb: OK; status %d", p->status);
			}
		}

		/*  about other pgpools, check the latest heartbeat. */
		else
		{
			if (p->status == WD_DOWN)
			{
				pool_log("check_pgpool_status_by_hb: pgpool %d (%s:%d) is in down status",
		                 cnt, p->hostname, p->pgpool_port);
			}
			else if (wd_check_heartbeat(p) == WD_NG)
			{
				pool_debug("check_pgpool_status_by_hb: NG; status %d", p->status);

				pool_log("check_pgpool_status_by_hb: lifecheck failed. pgpool %d (%s:%d) seems not to be working",
		                 cnt, p->hostname, p->pgpool_port);

				if (p->status != WD_DOWN)
					pgpool_down(p);
			}
			else
			{
				pool_debug("check_pgpool_status_by_hb: OK; status %d", p->status);
			}
		}

		p++;
		cnt++;
		if (cnt >= MAX_WATCHDOG_NUM)
		{
			pool_error("check_pgpool_status_by_hb: pgpool num is out of range(%d)",cnt);
			break;
		}
	}
}