示例#1
0
/* notice backend connection error using SIGUSR1 */
void degenerate_backend_set(int *node_id_set, int count)
{
	pid_t parent = getppid();
	int i;

	if (pool_config->parallel_mode)
	{
		return;
	}

	pool_semaphore_lock(REQUEST_INFO_SEM);
	Req_info->kind = NODE_DOWN_REQUEST;
	for (i = 0; i < count; i++)
	{
		if (node_id_set[i] < 0 || node_id_set[i] >= MAX_NUM_BACKENDS ||
			!VALID_BACKEND(node_id_set[i]))
		{
			pool_log("notice_backend_error: node %d is not valid backend.", i);
			continue;
		}

		pool_log("notice_backend_error: %d fail over request from pid %d", node_id_set[i], getpid());
		Req_info->node_id[i] = node_id_set[i];
	}
	kill(parent, SIGUSR1);
	pool_semaphore_unlock(REQUEST_INFO_SEM);
}
示例#2
0
/*
 * Count up connection counter (from frontend to pgpool)
 * in shared memory
 */
static void connection_count_up(void)
{
#ifdef HAVE_SIGPROCMASK
	sigset_t oldmask;
#else
	int	oldmask;
#endif

	POOL_SETMASK2(&BlockSig, &oldmask);
	pool_semaphore_lock(CONN_COUNTER_SEM);
	Req_info->conn_counter++;
	pool_semaphore_unlock(CONN_COUNTER_SEM);
	POOL_SETMASK(&oldmask);
}
示例#3
0
/*
 * Count down connection counter (from frontend to pgpool)
 * in shared memory
 */
static void connection_count_down(void)
{
#ifdef HAVE_SIGPROCMASK
	sigset_t oldmask;
#else
	int	oldmask;
#endif

	POOL_SETMASK2(&BlockSig, &oldmask);
	pool_semaphore_lock(CONN_COUNTER_SEM);
	/*
	 * Make sure that we do not decrement too much.  If failed to read
	 * a start up packet, or receive cancel request etc.,
	 * connection_count_down() is called and goes back to the
	 * connection accept loop. Problem is, at the very beginning of
	 * the connection accept loop, if we have received a signal, we
	 * call child_exit() which calls connection_count_down() again.
	 */
	if (Req_info->conn_counter > 0)
		Req_info->conn_counter--;
	pool_semaphore_unlock(CONN_COUNTER_SEM);
	POOL_SETMASK(&oldmask);
}
示例#4
0
/*
 * backend connection error, failover/failback request, if possible
 * failover() must be called under protecting signals.
 */
static void failover(void)
{
	int i;
	int node_id;
	int new_master;
	int nodes[MAX_NUM_BACKENDS];

	pool_debug("failover_handler called");

	memset(nodes, 0, sizeof(int) * MAX_NUM_BACKENDS);

	/*
	 * this could happen in a child process if a signal has been sent
	 * before resetting signal handler
	 */
	if (getpid() != mypid)
	{
		pool_debug("failover_handler: I am not parent");
		kill(pcp_pid, SIGUSR2);
		return;
	}

	/*
	 * processing SIGTERM, SIGINT or SIGQUIT
	 */
	if (exiting)
	{
		pool_debug("failover_handler called while exiting");
		kill(pcp_pid, SIGUSR2);
		return;
	}

	/*
	 * processing fail over or switch over
	 */
	if (switching)
	{
		pool_debug("failover_handler called while switching");
		kill(pcp_pid, SIGUSR2);
		return;
	}

	pool_semaphore_lock(REQUEST_INFO_SEM);

	if (Req_info->kind == CLOSE_IDLE_REQUEST)
	{
		pool_semaphore_unlock(REQUEST_INFO_SEM);
		kill_all_children(SIGUSR1);
		kill(pcp_pid, SIGUSR2);
		return;
	}

	/*
	 * if not in replication mode/master slave mode, we treat this a restart request.
	 * otherwise we need to check if we have already failovered.
	 */
	pool_debug("failover_handler: starting to select new master node");
	switching = 1;
	node_id = Req_info->node_id[0];

	/* failback request? */
	if (Req_info->kind == NODE_UP_REQUEST)
	{
		if (node_id >= MAX_NUM_BACKENDS ||
			(Req_info->kind == NODE_UP_REQUEST && VALID_BACKEND(node_id)) ||
			(Req_info->kind == NODE_DOWN_REQUEST && !VALID_BACKEND(node_id)))
		{
			pool_semaphore_unlock(REQUEST_INFO_SEM);
			pool_error("failover_handler: invalid node_id %d status:%d MAX_NUM_BACKENDS: %d", node_id,
					   BACKEND_INFO(node_id).backend_status, MAX_NUM_BACKENDS);
			kill(pcp_pid, SIGUSR2);
			switching = 0;
			return;
		}

		pool_log("starting fail back. reconnect host %s(%d)",
				 BACKEND_INFO(node_id).backend_hostname,
				 BACKEND_INFO(node_id).backend_port);
		BACKEND_INFO(node_id).backend_status = CON_CONNECT_WAIT;	/* unset down status */
		trigger_failover_command(node_id, pool_config->failback_command);
	}
	else
	{
		int cnt = 0;

		for (i = 0; i < MAX_NUM_BACKENDS; i++)
		{
			if (Req_info->node_id[i] != -1 &&
				VALID_BACKEND(Req_info->node_id[i]))
			{
				pool_log("starting degeneration. shutdown host %s(%d)",
						 BACKEND_INFO(Req_info->node_id[i]).backend_hostname,
						 BACKEND_INFO(Req_info->node_id[i]).backend_port);


				BACKEND_INFO(Req_info->node_id[i]).backend_status = CON_DOWN;	/* set down status */
				/* save down node */
				nodes[Req_info->node_id[i]] = 1;
				cnt++;
			}
		}

		if (cnt == 0)
		{
			pool_log("failover: no backends are degenerated");
			pool_semaphore_unlock(REQUEST_INFO_SEM);
			kill(pcp_pid, SIGUSR2);
			switching = 0;
			return;
		}
	}

	new_master = get_next_master_node();

	if (new_master == pool_config->backend_desc->num_backends)
	{
		pool_error("failover_handler: no valid DB node found");
	}

/*
 * Before we tried to minimize restarting pgpool to protect existing
 * connections from clients to pgpool children. What we did here was,
 * if children other than master went down, we did not fail over.
 * This is wrong. Think about following scenario. If someone
 * accidentally plugs out the network cable, the TCP/IP stack keeps
 * retrying for long time (typically 2 hours). The only way to stop
 * the retry is restarting the process.  Bottom line is, we need to
 * restart all children in any case.  See pgpool-general list posting
 * "TCP connections are *not* closed when a backend timeout" on Jul 13
 * 2008 for more details.
 */

#ifdef NOT_USED
	else
	{
		if (Req_info->master_node_id == new_master && *InRecovery == 0)
		{
			pool_log("failover_handler: do not restart pgpool. same master node %d was selected", new_master);
			if (Req_info->kind == NODE_UP_REQUEST)
			{
				pool_log("failback done. reconnect host %s(%d)",
						 BACKEND_INFO(node_id).backend_hostname,
						 BACKEND_INFO(node_id).backend_port);
			}
			else
			{
				pool_log("failover done. shutdown host %s(%d)",
						 BACKEND_INFO(node_id).backend_hostname,
						 BACKEND_INFO(node_id).backend_port);
			}

			/* exec failover_command */
			for (i = 0; i < pool_config->backend_desc->num_backends; i++)
			{
				if (nodes[i])
					trigger_failover_command(i, pool_config->failover_command);
			}

			pool_semaphore_unlock(REQUEST_INFO_SEM);
			switching = 0;
			kill(pcp_pid, SIGUSR2);
			switching = 0;
			return;
		}
	}
#endif
	/* kill all children */
	for (i = 0; i < pool_config->num_init_children; i++)
	{
		pid_t pid = pids[i].pid;
		if (pid)
		{
			kill(pid, SIGQUIT);
			pool_debug("failover_handler: kill %d", pid);
		}
	}

	/* exec failover_command */
	for (i = 0; i < pool_config->backend_desc->num_backends; i++)
	{
		if (nodes[i])
			trigger_failover_command(i, pool_config->failover_command);
	}

	pool_log("failover_handler: set new master node: %d", new_master);
	Req_info->master_node_id = new_master;

/* no need to wait since it will be done in reap_handler */
#ifdef NOT_USED
	while (wait(NULL) > 0)
		;

	if (errno != ECHILD)
		pool_error("failover_handler: wait() failed. reason:%s", strerror(errno));
#endif

	memset(Req_info->node_id, -1, sizeof(int) * MAX_NUM_BACKENDS);
	pool_semaphore_unlock(REQUEST_INFO_SEM);

	/* fork the children */
	for (i=0;i<pool_config->num_init_children;i++)
	{
		pids[i].pid = fork_a_child(unix_fd, inet_fd, i);
		pids[i].start_time = time(NULL);
	}

	if (Req_info->kind == NODE_UP_REQUEST)
	{
		pool_log("failback done. reconnect host %s(%d)",
				 BACKEND_INFO(node_id).backend_hostname,
				 BACKEND_INFO(node_id).backend_port);
	}
	else
	{
		pool_log("failover done. shutdown host %s(%d)",
				 BACKEND_INFO(node_id).backend_hostname,
				 BACKEND_INFO(node_id).backend_port);
	}

	switching = 0;

	/* kick wakeup_handler in pcp_child to notice that
	 * faiover/failback done
	 */
	kill(pcp_pid, SIGUSR2);
}