Ejemplo n.º 1
0
/* notice backend connection error using SIGUSR1 */
void degenerate_backend_set(int *node_id_set, int count)
{
	pid_t parent = getppid();
	int i;

	if (pool_config->parallel_mode)
	{
		return;
	}

	pool_semaphore_lock(REQUEST_INFO_SEM);
	Req_info->kind = NODE_DOWN_REQUEST;
	for (i = 0; i < count; i++)
	{
		if (node_id_set[i] < 0 || node_id_set[i] >= MAX_NUM_BACKENDS ||
			!VALID_BACKEND(node_id_set[i]))
		{
			pool_log("notice_backend_error: node %d is not valid backend.", i);
			continue;
		}

		pool_log("notice_backend_error: %d fail over request from pid %d", node_id_set[i], getpid());
		Req_info->node_id[i] = node_id_set[i];
	}
	kill(parent, SIGUSR1);
	pool_semaphore_unlock(REQUEST_INFO_SEM);
}
Ejemplo n.º 2
0
/* handle other pgpool's down */
static int
pgpool_down(WdInfo * pool)
{
	int rtn = WD_OK;
	WD_STATUS prev_status;

	pool_log("pgpool_down: %s:%d is going down",
	         pool->hostname, pool->pgpool_port);

	prev_status = pool->status;
	pool->status = WD_DOWN;

	/* the active pgpool goes down and I'm sandby pgpool */
	if (prev_status == WD_MASTER && WD_MYSELF->status == WD_NORMAL)
	{
		if (wd_am_I_oldest() == WD_OK)
		{
			pool_log("pgpool_down: I'm oldest so standing for master");
			/* stand for master */
			rtn = wd_stand_for_master();
			if (rtn == WD_OK)
			{
				/* win */
				wd_escalation();
			}
			else
			{
				/* rejected by others */
				pool->status = prev_status;
			}
		}
	}

	return rtn;
}
Ejemplo n.º 3
0
static RETSIGTYPE exit_handler(int sig)
{
	int i;

	POOL_SETMASK(&AuthBlockSig);

	/*
	 * this could happen in a child process if a signal has been sent
	 * before resetting signal handler
	 */
	if (getpid() != mypid)
	{
		pool_debug("exit_handler: I am not parent");
		POOL_SETMASK(&UnBlockSig);
		pool_shmem_exit(0);
		exit(0);
	}

	if (sig == SIGTERM)
		pool_log("received smart shutdown request");
	else if (sig == SIGINT)
		pool_log("received fast shutdown request");
	else if (sig == SIGQUIT)
		pool_log("received immediate shutdown request");
	else
	{
		pool_error("exit_handler: unknown signal received %d", sig);
		POOL_SETMASK(&UnBlockSig);
		return;
	}

	exiting = 1;

	for (i = 0; i < pool_config->num_init_children; i++)
	{
		pid_t pid = pids[i].pid;
		if (pid)
		{
			kill(pid, sig);
		}
	}

	kill(pcp_pid, sig);

	POOL_SETMASK(&UnBlockSig);

	while (wait(NULL) > 0)
		;

	if (errno != ECHILD)
		pool_error("wait() failed. reason:%s", strerror(errno));

	pids = NULL;
	myexit(0);
}
Ejemplo n.º 4
0
/*
* Read the status file
*/
static int read_status_file(void)
{
	FILE *fd;
	char fnamebuf[POOLMAXPATHLEN];
	int i;

	snprintf(fnamebuf, sizeof(fnamebuf), "%s/%s", pool_config->logdir, STATUS_FILE_NAME);
	fd = fopen(fnamebuf, "r");
	if (!fd)
	{
		pool_log("Backend status file %s does not exist", fnamebuf);
		return -1;
	}
	if (fread(&backend_rec, 1, sizeof(backend_rec), fd) <= 0)
	{
		pool_error("Could not read backend status file as %s. reason: %s",
				   fnamebuf, strerror(errno));
		fclose(fd);
		return -1;
	}
	fclose(fd);

	for (i=0;i< pool_config->backend_desc->num_backends;i++)
	{
		if (backend_rec.status[i] == CON_DOWN)
			BACKEND_INFO(i).backend_status = CON_DOWN;
		else
			BACKEND_INFO(i).backend_status = CON_CONNECT_WAIT;
	}

	return 0;
}
Ejemplo n.º 5
0
/*
 * Do house keeping works when pgpool child process exits
 */
void child_exit(int code)
{
	if (getpid() == mypid)
	{
		pool_log("child_exit: called from pgpool main. ignored.");
		return;
	}

	/* count down global connection counter */
	if (accepted)
		connection_count_down();

	/* prepare to shutdown connections to system db */
	if(pool_config->parallel_mode || pool_config->enable_query_cache)
	{
		if (system_db_info->pgconn)
			pool_close_libpq_connection();
		if (pool_system_db_connection())
			pool_close(pool_system_db_connection()->con);
	}

	if (pool_config->memory_cache_enabled && !pool_is_shmem_cache())
	{
		memcached_disconnect();
	}

	/* let backend know now we are exiting */
	if (pool_connection_pool)
		send_frontend_exits();

	exit(code);
}
Ejemplo n.º 6
0
/*
* find connection by user and database
*/
POOL_CONNECTION_POOL *pool_get_cp(char *user, char *database, int protoMajor, int check_socket)
{
#ifdef HAVE_SIGPROCMASK
	sigset_t oldmask;
#else
	int	oldmask;
#endif

	int i;

	POOL_CONNECTION_POOL *p = pool_connection_pool;

	if (p == NULL)
	{
		pool_error("pool_get_cp: pool_connection_pool is not initialized");
		return NULL;
	}

	POOL_SETMASK2(&BlockSig, &oldmask);

	for (i=0;i<pool_config.max_pool;i++)
	{
		if (MASTER_CONNECTION(p) &&
			MASTER_CONNECTION(p)->sp->major == protoMajor &&
			MASTER_CONNECTION(p)->sp->user != NULL &&
			strcmp(MASTER_CONNECTION(p)->sp->user, user) == 0 &&
			strcmp(MASTER_CONNECTION(p)->sp->database, database) == 0)
		{
			/* mark this connection is under use */
			MASTER_CONNECTION(p)->closetime = 0;
			POOL_SETMASK(&oldmask);

			if (check_socket &&
				(check_socket_status(MASTER(p)->fd) < 0 ||
				 (DUAL_MODE && check_socket_status(MASTER(p)->fd) < 0)))
			{
				pool_log("connection closed. retry to create new connection pool.");
				pool_free_startup_packet(MASTER_CONNECTION(p)->sp);
				pool_close(MASTER_CONNECTION(p)->con);
				free(MASTER_CONNECTION(p));

				if (DUAL_MODE)
				{
					pool_close(SECONDARY_CONNECTION(p)->con);
					free(SECONDARY_CONNECTION(p));
				}

				memset(p, 0, sizeof(POOL_CONNECTION_POOL));
				return NULL;
			}

			return p;
		}
		p++;
	}

	POOL_SETMASK(&oldmask);
	return NULL;
}
Ejemplo n.º 7
0
static void reload_config(void)
{
	pool_log("reload config files.");
	pool_get_config(get_config_file_name(), RELOAD_CONFIG);
	if (pool_config->enable_pool_hba)
		load_hba(get_hba_file_name());
	reload_config_request = 0;
}
Ejemplo n.º 8
0
static void reload_config(void)
{
	pool_log("reload config files.");
	pool_get_config(conf_file, RELOAD_CONFIG);
	if (pool_config->enable_pool_hba)
		load_hba(hba_file);
	if (pool_config->parallel_mode)
		pool_memset_system_db_info(system_db_info->info);
	kill_all_children(SIGHUP);
}
Ejemplo n.º 9
0
int
wd_escalation(void)
{
	int rtn;

	pool_log("wd_escalation: escalating to master pgpool");

	/* clear shared memory cache */
	if (pool_config->memory_cache_enabled && pool_is_shmem_cache() &&
	    pool_config->clear_memqcache_on_escalation)
	{
		pool_log("wd_escalation: clear all the query cache on shared memory");
		pool_clear_memory_cache();
	}

	/* execute escalation command */
	if (strlen(pool_config->wd_escalation_command))
	{
		system(pool_config->wd_escalation_command);
	}

	/* interface up as delegate IP */
	if (strlen(pool_config->delegate_IP) != 0)
		wd_IP_up();

	/* set master status to the wd list */
	wd_set_wd_list(pool_config->wd_hostname, pool_config->port,
	               pool_config->wd_port, pool_config->delegate_IP,
	               NULL, WD_MASTER);

	/* send declare packet */
	rtn = wd_declare();
	if (rtn == WD_OK)
	{
		pool_log("wd_escalation: escalated to master pgpool successfully");
	}

	return rtn;
}
Ejemplo n.º 10
0
/* fork lifecheck process*/
static pid_t
fork_a_lifecheck(int fork_wait_time)
{
	pid_t pid;

	pid = fork();
	if (pid != 0)
	{
		if (pid == -1)
			pool_error("fork_a_lifecheck: fork() failed.");

		return pid;
	}

	if (fork_wait_time > 0) {
		sleep(fork_wait_time);
	}

	myargv = save_ps_display_args(myargc, myargv);

	POOL_SETMASK(&UnBlockSig);

	init_ps_display("", "", "", "");

	signal(SIGTERM, wd_exit);	
	signal(SIGINT, wd_exit);	
	signal(SIGQUIT, wd_exit);	
	signal(SIGCHLD, SIG_DFL);
	signal(SIGHUP, SIG_IGN);	
	signal(SIGPIPE, SIG_IGN);	

	set_ps_display("lifecheck",false);

	/* wait until ready to go */
	while (WD_OK != is_wd_lifecheck_ready())
	{
		sleep(pool_config->wd_interval * 10);
	}

	pool_log("watchdog: lifecheck started");

	/* watchdog loop */
	for (;;)
	{
		/* pgpool life check */
		wd_lifecheck();
		sleep(pool_config->wd_interval);
	}

	return pid;
}
Ejemplo n.º 11
0
int
wd_chk_setuid(void)
{
	char path[128];
	char cmd[128];
	
	/* check setuid bit of ifup command */
	wd_get_cmd(cmd, pool_config->if_up_cmd);
	snprintf(path, sizeof(path), "%s/%s", pool_config->ifconfig_path, cmd);
	if (! has_setuid_bit(path))
	{
		pool_log("wd_chk_setuid: ifup[%s] doesn't have setuid bit", path);
		return 0;
	}

	/* check setuid bit of ifdown command */
	wd_get_cmd(cmd, pool_config->if_down_cmd);
	snprintf(path, sizeof(path), "%s/%s", pool_config->ifconfig_path, cmd);
	if (! has_setuid_bit(path))
	{
		pool_log("wd_chk_setuid: ifdown[%s] doesn't have setuid bit", path);
		return 0;
	}

	/* check setuid bit of arping command */
	wd_get_cmd(cmd, pool_config->arping_cmd);
	snprintf(path, sizeof(path), "%s/%s", pool_config->arping_path, cmd);
	if (! has_setuid_bit(path))
	{
		pool_log("wd_chk_setuid: arping[%s] doesn't have setuid bit", path);
		return 0;
	}

	pool_log("wd_chk_setuid all commands have setuid bit");
	return 1;
}
Ejemplo n.º 12
0
/* send failback request using SIGUSR1 */
void send_failback_request(int node_id)
{
	pid_t parent = getppid();

	pool_log("send_failback_request: fail back %d th node request from pid %d", node_id, getpid());
	Req_info->kind = NODE_UP_REQUEST;
	Req_info->node_id[0] = node_id;

	if (node_id < 0 || node_id >= MAX_NUM_BACKENDS || VALID_BACKEND(node_id))
	{
		pool_error("send_failback_request: node %d is alive.", node_id);
		return;
	}

	kill(parent, SIGUSR1);
}
Ejemplo n.º 13
0
/*
* Read the status file
*/
static int read_status_file(void)
{
	FILE *fd;
	char fnamebuf[POOLMAXPATHLEN];
	int i;
	bool someone_wakeup = false;

	snprintf(fnamebuf, sizeof(fnamebuf), "%s/%s", pool_config->logdir, STATUS_FILE_NAME);
	fd = fopen(fnamebuf, "r");
	if (!fd)
	{
		pool_log("Backend status file %s does not exist", fnamebuf);
		return -1;
	}
	if (fread(&backend_rec, 1, sizeof(backend_rec), fd) != sizeof(backend_rec))
	{
		pool_error("Could not read backend status file as %s. reason: %s",
				   fnamebuf, strerror(errno));
		fclose(fd);
		return -1;
	}
	fclose(fd);

	for (i=0;i< pool_config->backend_desc->num_backends;i++)
	{
		if (backend_rec.status[i] == CON_DOWN)
			BACKEND_INFO(i).backend_status = CON_DOWN;
		else
		{
			BACKEND_INFO(i).backend_status = CON_CONNECT_WAIT;
			someone_wakeup = true;
		}
	}

	/*
	 * If no one woke up, we regard the status file bogus
	 */
	if (someone_wakeup == false)
	{
		for (i=0;i< pool_config->backend_desc->num_backends;i++)
		{
			BACKEND_INFO(i).backend_status = CON_CONNECT_WAIT;
		}
	}

	return 0;
}
Ejemplo n.º 14
0
/*
 * Deletes a shared memory segment (called as an on_shmem_exit callback,
 * hence funny argument list)
 */
static void
IpcMemoryDelete(int status, Datum shmId)
{
  	struct shmid_ds shmStat;

  	/*
  	 * Is a previously-existing shmem segment still existing and in use?
  	 */
  	if (shmctl(shmId, IPC_STAT, &shmStat) < 0
  		&& (errno == EINVAL || errno == EACCES))
  		return;
  	else if (shmStat.shm_nattch != 0)
  		return;

	if (shmctl(shmId, IPC_RMID, NULL) < 0)
		pool_log("shmctl(%lu, %d, 0) failed: %s",
				 shmId, IPC_RMID, strerror(errno));
}
Ejemplo n.º 15
0
/*
 * Call pgpool_recovery() function.
 */
static int exec_recovery(PGconn *conn, BackendInfo *backend, char stage)
{
	PGresult *result;
	char *hostname;
	char *script;
	int r;

	if (strlen(backend->backend_hostname) == 0 || *(backend->backend_hostname) == '/')
		hostname = "localhost";
	else
		hostname = backend->backend_hostname;

	script = (stage == FIRST_STAGE) ?
		pool_config->recovery_1st_stage_command : pool_config->recovery_2nd_stage_command;

	if (script == NULL || strlen(script) == 0)
	{
		/* do not execute script */
		return 0;
	}

	snprintf(recovery_command,
			 sizeof(recovery_command),
			 "SELECT pgpool_recovery('%s', '%s', '%s')",
			 script,
			 hostname,
			 backend->backend_data_directory);

	pool_log("starting recovery command: \"%s\"", recovery_command);

	pool_debug("exec_recovery: start recovery");
	result = PQexec(conn, recovery_command);
	r = (PQresultStatus(result) !=  PGRES_TUPLES_OK);
	if (r != 0)
	{
		pool_error("exec_recovery: %s command failed at %s",
				   script,
				   (stage == FIRST_STAGE) ? "1st stage" : "2nd stage");
	}
	PQclear(result);
	pool_debug("exec_recovery: finish recovery");
	return r;
}
Ejemplo n.º 16
0
int
wd_IP_down(void)
{
	int rtn = WD_OK;
	char path[WD_MAX_PATH_LEN];
	char cmd[128];
	int i;

	if (strlen(pool_config->delegate_IP) == 0)
		return WD_NG;

	if (WD_List->delegate_ip_flag == 1)
	{
		WD_List->delegate_ip_flag = 0;
		wd_get_cmd(cmd,pool_config->if_down_cmd);
		snprintf(path, sizeof(path), "%s/%s", pool_config->ifconfig_path, cmd);
		rtn = exec_ifconfig(path,pool_config->if_down_cmd);

		if (rtn == WD_OK)
		{
			for (i = 0; i < 3; i++)
			{
				if (wd_is_unused_ip(pool_config->delegate_IP))
					break;
			}

			if (i >= 3)
				rtn = WD_NG;
		}

		if (rtn == WD_OK)
			pool_log("wd_IP_down: ifconfig down succeeded");
		else
			pool_error("wd_IP_down: ifconfig down failed");
	}
	else
	{
		pool_debug("wd_IP_down: not delegate IP holder");
	}

	return rtn;
}
Ejemplo n.º 17
0
/*
 * Connect to PostgreSQL server by using UNIX domain socket.
 * If retry is true, retry to call connect() upon receiving EINTR error.
 */
int connect_unix_domain_socket_by_port(int port, char *socket_dir, bool retry)
{
	struct sockaddr_un addr;
	int fd;
	int len;

	fd = socket(AF_UNIX, SOCK_STREAM, 0);
	if (fd == -1)
	{
		pool_error("connect_unix_domain_socket_by_port: socket() failed: %s", strerror(errno));
		return -1;
	}

	memset((char *) &addr, 0, sizeof(addr));
	addr.sun_family = AF_UNIX;
	snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/.s.PGSQL.%d", socket_dir, port);
	len = sizeof(struct sockaddr_un);

	for (;;)
	{
		if (exit_request)		/* exit request already sent */
		{
			pool_log("connect_unix_domain_socket_by_port: exit request has been sent");
			close(fd);
			return -1;
		}

		if (connect(fd, (struct sockaddr *)&addr, len) < 0)
		{
			if ((errno == EINTR && retry) || errno == EAGAIN)
				continue;

			pool_error("connect_unix_domain_socket_by_port: connect() failed to %s: %s", addr.sun_path, strerror(errno));
			close(fd);
			return -1;
		}
		break;
	}

	return fd;
}
Ejemplo n.º 18
0
/*
 * Convert logid/recoff style text to 64bit log location (LSN)
 */
static long text_to_lsn(char *text)
{
/*
 * WAL segment size in bytes.  XXX We should fetch this from
 * PostgreSQL, rather than having fixed value.
 */
#define WALSEGMENTSIZE 16 * 1024 * 1024

	unsigned int xlogid;
	unsigned int xrecoff;
	unsigned long long int lsn;

	if (sscanf(text, "%X/%X", &xlogid, &xrecoff) != 2)
	{
		pool_error("text_to_lsn: wrong log location format: %s", text);
		return 0;
	}
	lsn = xlogid * ((unsigned long long int)0xffffffff - WALSEGMENTSIZE) + xrecoff;
#ifdef DEBUG
	pool_log("lsn: %X %X %llX", xlogid, xrecoff, lsn);
#endif
	return lsn;
}
Ejemplo n.º 19
0
/*
 * signal handler for SIGALRM
 *
 */
static RETSIGTYPE authentication_timeout(int sig)
{
	pool_log("authentication is timeout");
	child_exit(1);
}
Ejemplo n.º 20
0
/*
 * flush write buffer
 */
int pool_flush_it(POOL_CONNECTION *cp)
{
	int sts;
	int wlen;
	int offset;
	wlen = cp->wbufpo;

	if (wlen == 0)
	{
		return 0;
	}

	offset = 0;

	for (;;)
	{
		errno = 0;

#ifdef NOT_USED
		if (!cp->isbackend)
		{
			fd_set	writemask;
			fd_set	exceptmask;

			FD_ZERO(&writemask);
			FD_ZERO(&exceptmask);
			FD_SET(cp->fd, &writemask);
			FD_SET(cp->fd, &exceptmask);

			sts = select(cp->fd+1, NULL, &writemask, &exceptmask, NULL);
			if (sts == -1)
			{
				if (errno == EAGAIN || errno == EINTR || errno == EWOULDBLOCK)
					continue;

				pool_error("pool_flush_it: select() failed. reason: %s", strerror(errno));
				cp->wbufpo = 0;
				return -1;
			}
			else if (sts == 0)
			{
				continue;
			}
			else if (FD_ISSET(cp->fd, &exceptmask))
			{
				pool_log("pool_flush_it: exception occured");
				cp->wbufpo = 0;
				return -1;
			}
		}
#endif
		if (cp->ssl_active > 0) {
		  sts = pool_ssl_write(cp, cp->wbuf + offset, wlen);
		} else {
		  sts = write(cp->fd, cp->wbuf + offset, wlen);
		}

		if (sts > 0)
		{
			wlen -= sts;

			if (wlen == 0)
			{
				/* write completed */
				break;
			}

			else if (wlen < 0)
			{
				pool_error("pool_flush_it: invalid write size %d", sts);
				cp->wbufpo = 0;
				return -1;
			}

			else
			{
				/* need to write remaining data */
				offset += sts;
				continue;
			}
		}

		else if (errno == EAGAIN || errno == EINTR)
		{
			continue;
		}

		else
		{
			/* If this is the backend stream, report error. Otherwise
			 * just report debug message.
			 */
			if (cp->isbackend)
				pool_error("pool_flush_it: write failed to backend (%d). reason: %s offset: %d wlen: %d",
						   cp->db_node_id, strerror(errno), offset, wlen);
			else
				pool_debug("pool_flush_it: write failed to frontend. reason: %s offset: %d wlen: %d",
						   strerror(errno), offset, wlen);

			cp->wbufpo = 0;
			return -1;
		}
	}

	cp->wbufpo = 0;

	return 0;
}
Ejemplo n.º 21
0
/*
 * Removes a shared memory segment from process' address spaceq (called as
 * an on_shmem_exit callback, hence funny argument list)
 */
static void
IpcMemoryDetach(int status, Datum shmaddr)
{
	if (shmdt((void *) shmaddr) < 0)
		pool_log("shmdt(%p) failed: %s", (void *) shmaddr, strerror(errno));
}
Ejemplo n.º 22
0
/*
* find connection by user and database
*/
POOL_CONNECTION_POOL *pool_get_cp(char *user, char *database, int protoMajor, int check_socket)
{
#ifdef HAVE_SIGPROCMASK
	sigset_t oldmask;
#else
	int	oldmask;
#endif

	int i, freed = 0;
	ConnectionInfo *info;

	POOL_CONNECTION_POOL *p = pool_connection_pool;

	if (p == NULL)
	{
		pool_error("pool_get_cp: pool_connection_pool is not initialized");
		return NULL;
	}

	POOL_SETMASK2(&BlockSig, &oldmask);

	for (i=0;i<pool_config->max_pool;i++)
	{
		if (MASTER_CONNECTION(p) &&
			MASTER_CONNECTION(p)->sp &&
			MASTER_CONNECTION(p)->sp->major == protoMajor &&
			MASTER_CONNECTION(p)->sp->user != NULL &&
			strcmp(MASTER_CONNECTION(p)->sp->user, user) == 0 &&
			strcmp(MASTER_CONNECTION(p)->sp->database, database) == 0)
		{
			int sock_broken = 0;
			int j;

			/* mark this connection is under use */
			MASTER_CONNECTION(p)->closetime = 0;
			for (j=0;j<NUM_BACKENDS;j++)
			{
				p->info[j].counter++;
			}
			POOL_SETMASK(&oldmask);

			if (check_socket)
			{
				for (j=0;j<NUM_BACKENDS;j++)
				{
					if (!VALID_BACKEND(j))
						continue;

					if  (CONNECTION_SLOT(p, j))
					{
						sock_broken = check_socket_status(CONNECTION(p, j)->fd);
						if (sock_broken < 0)
							break;
					}
					else
					{
						sock_broken = -1;
						break;
					}
				}

				if (sock_broken < 0)
				{
					pool_log("connection closed. retry to create new connection pool.");
					for (j=0;j<NUM_BACKENDS;j++)
					{
						if (!VALID_BACKEND(j) || (CONNECTION_SLOT(p, j) == NULL))
							continue;

						if (!freed)
						{
							pool_free_startup_packet(CONNECTION_SLOT(p, j)->sp);
							freed = 1;
						}

						pool_close(CONNECTION(p, j));
						free(CONNECTION_SLOT(p, j));
					}
					info = p->info;
					memset(p, 0, sizeof(POOL_CONNECTION_POOL_SLOT));
					p->info = info;
					memset(p->info, 0, sizeof(ConnectionInfo) * MAX_NUM_BACKENDS);
					POOL_SETMASK(&oldmask);
					return NULL;
				}
			}
			POOL_SETMASK(&oldmask);
			pool_index = i;
			return p;
		}
		p++;
	}

	POOL_SETMASK(&oldmask);
	return NULL;
}
Ejemplo n.º 23
0
static int
wd_send_response(int sock, WdPacket * recv_pack)
{
	int rtn = WD_NG;
	WdInfo * p, *q;
	WdNodeInfo * node;
	WdLockInfo * lock;
	WdPacket send_packet;
	struct timeval tv;
	char pack_str[WD_MAX_PACKET_STRING];
	int pack_str_len;
	char hash[(MD5_PASSWD_LEN+1)*2];
	bool is_node_packet = false;

	if (recv_pack == NULL)
	{
		return rtn;
	}
	memset(&send_packet, 0, sizeof(WdPacket));
	p = &(recv_pack->wd_body.wd_info);	

	/* auhtentication */
	if (strlen(pool_config->wd_authkey))
	{
		/* calculate hash from packet */
		pack_str_len = wd_packet_to_string(*recv_pack, pack_str, sizeof(pack_str));
		wd_calc_hash(pack_str, pack_str_len, hash);

		if (strcmp(recv_pack->hash, hash))
		{
			pool_log("wd_send_response: watchdog authentication failed");
			rtn = wd_authentication_failed(sock);
			return rtn;
		}
	}

	/* set response packet no */
	switch (recv_pack->packet_no)
	{
		/* add request into the watchdog list */
		case WD_ADD_REQ:
			p = &(recv_pack->wd_body.wd_info);	
			if (wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status) > 0)
			{
				send_packet.packet_no = WD_ADD_ACCEPT;
			}
			else
			{
				send_packet.packet_no = WD_ADD_REJECT;
			}
			memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			break;

		/* announce candidacy to be the new master */
		case WD_STAND_FOR_MASTER:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status);
			/* check exist master */
			if ((q = wd_is_alive_master()) != NULL)
			{
				/* vote against the candidate */
				send_packet.packet_no = WD_MASTER_EXIST;
				memcpy(&(send_packet.wd_body.wd_info), q, sizeof(WdInfo));
			}
			else
			{
				if (WD_MYSELF->tv.tv_sec <= p->tv.tv_sec )
				{
					memcpy(&tv,&(p->tv),sizeof(struct timeval));
					tv.tv_sec += 1;
					wd_set_myself(&tv, WD_NORMAL);
				}
				/* vote for the candidate */
				send_packet.packet_no = WD_VOTE_YOU;
				memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			}
			break;

		/* announce assumption to be the new master */
		case WD_DECLARE_NEW_MASTER:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status);
			if (WD_MYSELF->status == WD_MASTER)
			{
				/* resign master server */
				pool_log("wd_declare_new_master: ifconfig down to resign master server");
				wd_IP_down();
				wd_set_myself(NULL, WD_NORMAL);
			}
			send_packet.packet_no = WD_READY;
			memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			break;

		/* annouce to assume lock holder */
		case WD_STAND_FOR_LOCK_HOLDER:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status);
			/* only master handles lock holder privilege */
			if (WD_MYSELF->status == WD_MASTER)
			{
				/* if theare are no lock holder yet */
				if (wd_get_lock_holder() != NULL)
				{
					send_packet.packet_no = WD_LOCK_HOLDER_EXIST;
				}
			}
			memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			break;

		case WD_DECLARE_LOCK_HOLDER:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status);
			wd_set_lock_holder(p, true);
			send_packet.packet_no = WD_READY;
			memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			break;

		/* annouce to resigne lock holder */
		case WD_RESIGN_LOCK_HOLDER:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status);
			wd_set_lock_holder(p, false);
			send_packet.packet_no = WD_READY;
			memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			break;

		case WD_START_INTERLOCK:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status);
			wd_set_interlocking(p, true);
			break;

		case WD_END_INTERLOCK:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status);
			wd_set_interlocking(p, false);
			break;

		/* announce that server is down */
		case WD_SERVER_DOWN:
			p = &(recv_pack->wd_body.wd_info);	
			wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), WD_DOWN);
			send_packet.packet_no = WD_READY;
			memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			if (wd_am_I_oldest() == WD_OK && WD_MYSELF->status != WD_MASTER)
			{
				wd_escalation();
			}
			break;

		/* announce start online recovery */
		case WD_START_RECOVERY:
			if (*InRecovery != RECOVERY_INIT)
			{
				send_packet.packet_no = WD_NODE_FAILED;
			}
			else
			{
				send_packet.packet_no = WD_NODE_READY;
				*InRecovery = RECOVERY_ONLINE;
				if (wait_connection_closed() != 0)
				{
					send_packet.packet_no = WD_NODE_FAILED;
				}
			}
			break;
		case WD_END_RECOVERY:
			send_packet.packet_no = WD_NODE_READY;
			*InRecovery = RECOVERY_INIT;
			kill(wd_ppid, SIGUSR2);
			break;
		case WD_FAILBACK_REQUEST:
			node = &(recv_pack->wd_body.wd_node_info);	
			wd_set_node_mask(WD_FAILBACK_REQUEST,node->node_id_set,node->node_num);
			is_node_packet = true;
			send_packet.packet_no = WD_NODE_READY;
			break;
		case WD_DEGENERATE_BACKEND:
			node = &(recv_pack->wd_body.wd_node_info);	
			wd_set_node_mask(WD_DEGENERATE_BACKEND,node->node_id_set, node->node_num);
			is_node_packet = true;
			send_packet.packet_no = WD_NODE_READY;
			break;
		case WD_PROMOTE_BACKEND:
			node = &(recv_pack->wd_body.wd_node_info);	
			wd_set_node_mask(WD_PROMOTE_BACKEND,node->node_id_set, node->node_num);
			is_node_packet = true;
			send_packet.packet_no = WD_NODE_READY;
			break;

		case WD_UNLOCK_REQUEST:
			lock = &(recv_pack->wd_body.wd_lock_info);	
			wd_set_lock(lock->lock_id, false);
			send_packet.packet_no = WD_LOCK_READY;
			break;

		default:
			send_packet.packet_no = WD_INVALID;
			memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo));
			break;
	}

	/* send response packet */
	rtn = wd_send_packet(sock, &send_packet);

	/* send node request signal.
	 * wd_node_request_singnal() uses a semaphore lock internally, so should be
	 * called after sending a response pakcet to prevent dead lock.
	 */
	if (is_node_packet)
		wd_node_request_signal(recv_pack->packet_no, node);

	return rtn;
}
Ejemplo n.º 24
0
/*
 * Connect to PostgreSQL server by using INET domain socket.
 * If retry is true, retry to call connect() upon receiving EINTR error.
 */
int connect_inet_domain_socket_by_port(char *host, int port, bool retry)
{
	int fd;
	int len;
	int on = 1;
	struct sockaddr_in addr;
	struct hostent *hp;

	fd = socket(AF_INET, SOCK_STREAM, 0);
	if (fd < 0)
	{
		pool_error("connect_inet_domain_socket_by_port: socket() failed: %s", strerror(errno));
		return -1;
	}

	/* set nodelay */
	if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY,
				   (char *) &on,
				   sizeof(on)) < 0)
	{
		pool_error("connect_inet_domain_socket_by_port: setsockopt() failed: %s", strerror(errno));
		close(fd);
		return -1;
	}

	memset((char *) &addr, 0, sizeof(addr));
	addr.sin_family = AF_INET;

	addr.sin_port = htons(port);
	len = sizeof(struct sockaddr_in);

	hp = gethostbyname(host);
	if ((hp == NULL) || (hp->h_addrtype != AF_INET))
	{
		pool_error("connect_inet_domain_socket: gethostbyname() failed: %s host: %s", hstrerror(h_errno), host);
		close(fd);
		return -1;
	}
	memmove((char *) &(addr.sin_addr),
			(char *) hp->h_addr,
			hp->h_length);

	pool_set_nonblock(fd);

	for (;;)
	{
		if (exit_request)		/* exit request already sent */
		{
			pool_log("connect_inet_domain_socket_by_port: exit request has been sent");
			close(fd);
			return -1;
		}

		if (health_check_timer_expired)		/* has health check timer expired */
		{
			pool_log("connect_inet_domain_socket_by_port: health check timer expired");
			close(fd);
			return -1;
		}

		if (connect(fd, (struct sockaddr *)&addr, len) < 0)
		{
			if (errno == EISCONN)
			{
				/* Socket is already connected */
				break;
			}

			if ((errno == EINTR && retry) || errno == EAGAIN)
				continue;

			/* Non block fd could return these */
			if (errno == EINPROGRESS || errno == EALREADY)
				continue;

			pool_error("connect_inet_domain_socket: connect() failed: %s",strerror(errno));
			close(fd);
			return -1;
		}
		break;
	}

	pool_unset_nonblock(fd);
	return fd;
}
Ejemplo n.º 25
0
/*
 * create actual connections to backends
 */
static POOL_CONNECTION_POOL *new_connection(POOL_CONNECTION_POOL *p)
{
	POOL_CONNECTION_POOL_SLOT *s;
	int active_backend_count = 0;
	int i;

	for (i=0;i<NUM_BACKENDS;i++)
	{
		pool_debug("new_connection: connecting %d backend", i);

		if (!VALID_BACKEND(i))
		{
			pool_debug("new_connection: skipping slot %d because backend_status = %d",
					   i, BACKEND_INFO(i).backend_status);
			continue;
		}

		s = malloc(sizeof(POOL_CONNECTION_POOL_SLOT));
		if (s == NULL)
		{
			pool_error("new_connection: malloc() failed");
			return NULL;
		}

		if (create_cp(s, i) == NULL)
		{
			/* connection failed. mark this backend down */
			pool_error("new_connection: create_cp() failed");

			/* If fail_over_on_backend_error is true, do failover.
			 * Otherwise, just exit this session.
			 */
			if (pool_config->fail_over_on_backend_error)
			{
				notice_backend_error(i);
			}
			else
			{
				pool_log("new_connection: do not failover because fail_over_on_backend_error is off");
			}
			child_exit(1);
		}

		p->info[i].create_time = time(NULL);
		p->slots[i] = s;

		if (pool_init_params(&s->con->params))
		{
			return NULL;
		}

		BACKEND_INFO(i).backend_status = CON_UP;
		active_backend_count++;
	}

	if (active_backend_count > 0)
	{
		return p;
	}

	return NULL;
}
Ejemplo n.º 26
0
/*
 * read a string until EOF or NULL is encountered.
 * if line is not 0, read until new line is encountered.
*/
char *pool_read_string(POOL_CONNECTION *cp, int *len, int line)
{
	int readp;
	int readsize;
	int readlen;
	int strlength;
	int flag;
	int consume_size;

#ifdef DEBUG
	static char pbuf[READBUFSZ];
#endif

	*len = 0;
	readp = 0;

	/* initialize read buffer */
	if (cp->sbufsz == 0)
	{
		cp->sbuf = malloc(READBUFSZ);
		if (cp->sbuf == NULL)
		{
			pool_error("pool_read_string: malloc failed");
			return NULL;
		}
		cp->sbufsz = READBUFSZ;
		*cp->sbuf = '\0';
	}

	/* any pending data? */
	if (cp->len)
	{
		if (line)
			strlength = mystrlinelen(cp->hp+cp->po, cp->len, &flag);
		else
			strlength = mystrlen(cp->hp+cp->po, cp->len, &flag);

		/* buffer is too small? */
		if ((strlength + 1) > cp->sbufsz)
		{
			cp->sbufsz = ((strlength+1)/READBUFSZ+1)*READBUFSZ;
			cp->sbuf = realloc(cp->sbuf, cp->sbufsz);
			if (cp->sbuf == NULL)
			{
				pool_error("pool_read_string: realloc failed");
				return NULL;
			}
		}

		/* consume pending and save to read string buffer */
		consume_size = consume_pending_data(cp, cp->sbuf, strlength);

		*len = strlength;

		/* is the string null terminated? */
		if (consume_size == strlength && !flag)
		{
			/* not null or line terminated.
			 * we need to read more since we have not encountered NULL or new line yet
			 */
			readsize = cp->sbufsz - strlength;
			readp = strlength;
		}
		else
		{
			pool_debug("pool_read_string: read all from pending data. po:%d len:%d",
					   cp->po, cp->len);
			return cp->sbuf;
		}
	} else
	{
		readsize = cp->sbufsz;
	}

	for (;;)
	{
		if (pool_check_fd(cp))
		{
			if (!IS_MASTER_NODE_ID(cp->db_node_id))
			{
				pool_log("pool_read_string: data is not ready in DB node:%d. abort this session",
						 cp->db_node_id);
				exit(1);
			}
			else
			{
				pool_error("pool_read_string: pool_check_fd failed (%s)", strerror(errno));
			    return NULL;
			}
		}

		if (cp->ssl_active > 0) {
		  readlen = pool_ssl_read(cp, cp->sbuf+readp, readsize);
		} else {
		  readlen = read(cp->fd, cp->sbuf+readp, readsize);
		}

		if (readlen == -1)
		{
			pool_error("pool_read_string: read() failed. reason:%s", strerror(errno));

			if (cp->isbackend)
			{
				notice_backend_error(cp->db_node_id);
				child_exit(1);
			}
			else
			{
			    return NULL;
			}
		}
		else if (readlen == 0)	/* EOF detected */
		{
			/*
			 * just returns an error, not trigger failover or degeneration
			 */
			pool_error("pool_read_string: read () EOF detected");
			return NULL;
		}

		/* check overrun */
		if (line)
			strlength = mystrlinelen(cp->sbuf+readp, readlen, &flag);
		else
			strlength = mystrlen(cp->sbuf+readp, readlen, &flag);

		if (strlength < readlen)
		{
			save_pending_data(cp, cp->sbuf+readp+strlength, readlen-strlength);
			*len += strlength;
			pool_debug("pool_read_string: total result %d with pending data po:%d len:%d", *len, cp->po, cp->len);
			return cp->sbuf;
		}

		*len += readlen;

		/* encountered null or newline? */
		if (flag)
		{
			/* ok we have read all data */
			pool_debug("pool_read_string: total result %d ", *len);
			break;
		}

		readp += readlen;
		readsize = READBUFSZ;

		if ((*len+readsize) > cp->sbufsz)
		{
			cp->sbufsz += READBUFSZ;

			cp->sbuf = realloc(cp->sbuf, cp->sbufsz);
			if (cp->sbuf == NULL)
			{
				pool_error("pool_read_string: realloc failed");
				return NULL;
			}
		}
	}
	return cp->sbuf;
}
Ejemplo n.º 27
0
/*
 * process cancel request
 */
void cancel_request(CancelPacket *sp)
{
	int	len;
	int fd;
	POOL_CONNECTION *con;
	int i,j,k;
	ConnectionInfo *c = NULL;
	CancelPacket cp;
	bool found = false;

	pool_debug("Cancel request received");

	/* look for cancel key from shmem info */
	for (i=0;i<pool_config->num_init_children;i++)
	{
		for (j=0;j<pool_config->max_pool;j++)
		{
			for (k=0;k<NUM_BACKENDS;k++)
			{
				c = pool_coninfo(i, j, k);
				pool_debug("con_info: address:%p database:%s user:%s pid:%d key:%d i:%d",
						   c, c->database, c->user, ntohl(c->pid), ntohl(c->key),i);

				if (c->pid == sp->pid && c->key == sp->key)
				{
					pool_debug("found pid:%d key:%d i:%d",ntohl(c->pid), ntohl(c->key),i);
					c = pool_coninfo(i, j, 0);
					found = true;
					goto found;
				}
			}
		}
	}

 found:
	if (!found)
	{
		pool_error("cancel_request: invalid cancel key: pid:%d key:%d",ntohl(sp->pid), ntohl(sp->key));
		return;	/* invalid key */
	}

	for (i=0;i<NUM_BACKENDS;i++,c++)
	{
		if (!VALID_BACKEND(i))
			continue;

		if (*(BACKEND_INFO(i).backend_hostname) == '/')
			fd = connect_unix_domain_socket(i, TRUE);
		else
			fd = connect_inet_domain_socket(i, TRUE);

		if (fd < 0)
		{
			pool_error("Could not create socket for sending cancel request for backend %d", i);
			return;
		}

		con = pool_open(fd);
		if (con == NULL)
			return;

		len = htonl(sizeof(len) + sizeof(CancelPacket));
		pool_write(con, &len, sizeof(len));

		cp.protoVersion = sp->protoVersion;
		cp.pid = c->pid;
		cp.key = c->key;

		pool_log("cancel_request: canceling backend pid:%d key: %d", ntohl(cp.pid),ntohl(cp.key));

		if (pool_write_and_flush(con, &cp, sizeof(CancelPacket)) < 0)
			pool_error("Could not send cancel request packet for backend %d", i);

		pool_close(con);

		/*
		 * this is needed to ensure that the next DB node executes the
		 * query supposed to be canceled.
		 */
		sleep(1);
	}
}
Ejemplo n.º 28
0
int
wd_reaper_watchdog(pid_t pid, int status)
{
	int i;

	/* watchdog lifecheck process exits */
	if (pid == lifecheck_pid)
	{
		if (WIFSIGNALED(status))
			pool_debug("watchdog lifecheck process %d exits with status %d by signal %d",
			           pid, status, WTERMSIG(status));
		else
			pool_debug("watchdog lifecheck process %d exits with status %d", pid, status);

		lifecheck_pid = fork_a_lifecheck(1);

		if (lifecheck_pid < 0)
		{
			pool_error("wd_reaper: fork a watchdog lifecheck process failed");
			return 0;
		}

		pool_log("fork a new watchdog lifecheck pid %d", lifecheck_pid);
	}

	/* watchdog child process exits */
	else if (pid == child_pid)
	{
		if (WIFSIGNALED(status))
			pool_debug("watchdog child process %d exits with status %d by signal %d",
			           pid, status, WTERMSIG(status));
		else
			pool_debug("watchdog child process %d exits with status %d", pid, status);

		child_pid = wd_child(1);

		if (child_pid < 0)
		{
			pool_error("wd_reaper: fork a watchdog child process failed");
			return 0;
		}

		pool_log("fork a new watchdog child pid %d", child_pid);
	}

	/* receiver/sender process exits */
	else
	{
		for (i = 0; i < pool_config->num_hb_if; i++)
		{
			if (pid == hb_receiver_pid[i])
			{
				if (WIFSIGNALED(status))
					pool_debug("watchdog heartbeat receiver process %d exits with status %d by signal %d",
					           pid, status, WTERMSIG(status));
				else
					pool_debug("watchdog heartbeat receiver process %d exits with status %d", pid, status);

				hb_receiver_pid[i] = wd_hb_receiver(1, pool_config->hb_if[i]);

				if (hb_receiver_pid[i] < 0)
				{
					pool_error("wd_reaper: fork a watchdog heartbeat receiver process failed");
					return 0;
				}
		
				pool_log("fork a new watchdog heartbeat receiver: pid %d", hb_receiver_pid[i]);
				break;
			}

			else if (pid == hb_sender_pid[i])
			{
				if (WIFSIGNALED(status))
					pool_debug("watchdog heartbeat sender process %d exits with status %d by signal %d",
					           pid, status, WTERMSIG(status));
				else
					pool_debug("watchdog heartbeat sender process %d exits with status %d", pid, status);

				hb_sender_pid[i] = wd_hb_sender(1, pool_config->hb_if[i]);

				if (hb_sender_pid[i] < 0)
				{
					pool_error("wd_reaper: fork a watchdog heartbeat sender process failed");
					return 0;
				}
		
				pool_log("fork a new watchdog heartbeat sender: pid %d", hb_sender_pid[i]);
				break;
			}
		}
	}
	
	return 1;
}
Ejemplo n.º 29
0
/*
* perform accept() and return new fd
*/
static POOL_CONNECTION *do_accept(int unix_fd, int inet_fd, struct timeval *timeout)
{
    fd_set	readmask;
    int fds;
	int save_errno;

	SockAddr saddr;
	int fd = 0;
	int afd;
	int inet = 0;
	POOL_CONNECTION *cp;
#ifdef ACCEPT_PERFORMANCE
	struct timeval now1, now2;
	static long atime;
	static int cnt;
#endif
	struct timeval *timeoutval;
	struct timeval tv1, tv2, tmback = {0, 0};

	set_ps_display("wait for connection request", false);

	/* Destroy session context for just in case... */
	pool_session_context_destroy();

	FD_ZERO(&readmask);
	FD_SET(unix_fd, &readmask);
	if (inet_fd)
		FD_SET(inet_fd, &readmask);

	if (timeout->tv_sec == 0 && timeout->tv_usec == 0)
		timeoutval = NULL;
	else
	{
		timeoutval = timeout;
		tmback.tv_sec = timeout->tv_sec;
		tmback.tv_usec = timeout->tv_usec;
		gettimeofday(&tv1, NULL);

#ifdef DEBUG
		pool_log("before select = {%d, %d}", timeoutval->tv_sec, timeoutval->tv_usec);
		pool_log("g:before select = {%d, %d}", tv1.tv_sec, tv1.tv_usec);
#endif
	}

	fds = select(Max(unix_fd, inet_fd)+1, &readmask, NULL, NULL, timeoutval);

	save_errno = errno;
	/* check backend timer is expired */
	if (backend_timer_expired)
	{
		pool_backend_timer();
		backend_timer_expired = 0;
	}

	/*
	 * following code fragment computes remaining timeout val in a
	 * portable way. Linux does this automatically but other platforms do not.
	 */
	if (timeoutval)
	{
		gettimeofday(&tv2, NULL);

		tmback.tv_usec -= tv2.tv_usec - tv1.tv_usec;
		tmback.tv_sec -= tv2.tv_sec - tv1.tv_sec;

		if (tmback.tv_usec < 0)
		{
			tmback.tv_sec--;
			if (tmback.tv_sec < 0)
			{
				timeout->tv_sec = 0;
				timeout->tv_usec = 0;
			}
			else
			{
				tmback.tv_usec += 1000000;
				timeout->tv_sec = tmback.tv_sec;
				timeout->tv_usec = tmback.tv_usec;
			}
		}
#ifdef DEBUG
		pool_log("g:after select = {%d, %d}", tv2.tv_sec, tv2.tv_usec);
		pool_log("after select = {%d, %d}", timeout->tv_sec, timeout->tv_usec);
#endif
	}

	errno = save_errno;

	if (fds == -1)
	{
		if (errno == EAGAIN || errno == EINTR)
			return NULL;

		pool_error("select() failed. reason %s", strerror(errno));
		return NULL;
	}

	/* timeout */
	if (fds == 0)
	{
		return NULL;
	}

	if (FD_ISSET(unix_fd, &readmask))
	{
		fd = unix_fd;
	}

	if (FD_ISSET(inet_fd, &readmask))
	{
		fd = inet_fd;
		inet++;
	}

	/*
	 * Note that some SysV systems do not work here. For those
	 * systems, we need some locking mechanism for the fd.
	 */
	memset(&saddr, 0, sizeof(saddr));
	saddr.salen = sizeof(saddr.addr);

#ifdef ACCEPT_PERFORMANCE
	gettimeofday(&now1,0);
#endif

 retry_accept:

	/* wait if recovery is started */
	while (*InRecovery == 1)
	{
		pause();
	}

	afd = accept(fd, (struct sockaddr *)&saddr.addr, &saddr.salen);

	save_errno = errno;
	/* check backend timer is expired */
	if (backend_timer_expired)
	{
		pool_backend_timer();
		backend_timer_expired = 0;
	}
	errno = save_errno;
	if (afd < 0)
	{
		if (errno == EINTR && *InRecovery)
			goto retry_accept;

		/*
		 * "Resource temporarily unavailable" (EAGAIN or EWOULDBLOCK)
		 * can be silently ignored. And EINTR can be ignored.
		 */
		if (errno != EAGAIN && errno != EWOULDBLOCK && errno != EINTR)
			pool_error("accept() failed. reason: %s", strerror(errno));
		return NULL;
	}
#ifdef ACCEPT_PERFORMANCE
	gettimeofday(&now2,0);
	atime += (now2.tv_sec - now1.tv_sec)*1000000 + (now2.tv_usec - now1.tv_usec);
	cnt++;
	if (cnt % 100 == 0)
	{
		pool_log("cnt: %d atime: %ld", cnt, atime);
	}
#endif

	/* reload config file */
	if (got_sighup)
	{
		pool_get_config(get_config_file_name(), RELOAD_CONFIG);
		if (pool_config->enable_pool_hba)
		{
			load_hba(get_hba_file_name());
			if (strcmp("", pool_config->pool_passwd))
				pool_reopen_passwd_file();
		}
		if (pool_config->parallel_mode)
			pool_memset_system_db_info(system_db_info->info);
		got_sighup = 0;
	}

	connection_count_up();
	accepted = 1;

	if (pool_config->parallel_mode)
	{
		/*
		 * do not accept new connection if any of DB node or SystemDB is down when operating in
		 * parallel mode
		 */
		int i;

		for (i=0;i<NUM_BACKENDS;i++)
		{
			if (BACKEND_INFO(i).backend_status == CON_DOWN || SYSDB_STATUS == CON_DOWN)
			{
				StartupPacket *sp;
				char *msg = "pgpool is not available in parallel query mode";

				if (SYSDB_STATUS == CON_DOWN)
					pool_log("Cannot accept() new connection. SystemDB is down");
				else
					pool_log("Cannot accept() new connection. %d th backend is down", i);

				if ((cp = pool_open(afd)) == NULL)
				{
					close(afd);
					child_exit(1);
				}

				sp = read_startup_packet(cp);
				if (sp == NULL)
				{
					/* failed to read the startup packet. return to the accept() loop */
					pool_close(cp);
					child_exit(1);
				}

				pool_debug("do_accept: send error message to frontend");

				if (sp->major == PROTO_MAJOR_V3)
				{
					char buf[256];

					if (SYSDB_STATUS == CON_DOWN)
						snprintf(buf, sizeof(buf), "SystemDB is down");
					else
						snprintf(buf, sizeof(buf), "%d th backend is down", i);

					pool_send_error_message(cp, sp->major, "08S01",
											msg,
											buf,
											((SYSDB_STATUS == CON_DOWN) ? "repair the SystemDB and restart pgpool"
											                           : "repair the backend and restart pgpool"),
											__FILE__,
											__LINE__);
				}
				else
				{
					pool_send_error_message(cp, sp->major,
											0,
											msg,
											"",
											"",
											"",
											0);
				}
				pool_close(cp);
				child_exit(1);
			}
		}
	}
	else
	{
		/*
		 * do not accept new connection if all DB nodes are down when operating in
		 * non parallel mode
		 */
		int i;
		int found = 0;

		for (i=0;i<NUM_BACKENDS;i++)
		{
			if (VALID_BACKEND(i))
			{
				found = 1;
			}
		}
		if (found == 0)
		{
			pool_log("Cannot accept() new connection. all backends are down");
			child_exit(1);
		}
	}

	pool_debug("I am %d accept fd %d", getpid(), afd);

	pool_getnameinfo_all(&saddr, remote_host, remote_port);
	snprintf(remote_ps_data, sizeof(remote_ps_data),
			 remote_port[0] == '\0' ? "%s" : "%s(%s)",
			 remote_host, remote_port);

	set_ps_display("accept connection", false);

	/* log who is connecting */
	if (pool_config->log_connections)
	{
		pool_log("connection received: host=%s%s%s",
				 remote_host, remote_port[0] ? " port=" : "", remote_port);
	}

	/* set NODELAY and KEEPALIVE options if INET connection */
	if (inet)
	{
		int on = 1;

		if (setsockopt(afd, IPPROTO_TCP, TCP_NODELAY,
					   (char *) &on,
					   sizeof(on)) < 0)
		{
			pool_error("do_accept: setsockopt() failed: %s", strerror(errno));
			close(afd);
			return NULL;
		}
		if (setsockopt(afd, SOL_SOCKET, SO_KEEPALIVE,
					   (char *) &on,
					   sizeof(on)) < 0)
		{
			pool_error("do_accept: setsockopt() failed: %s", strerror(errno));
			close(afd);
			return NULL;
		}
	}

	if ((cp = pool_open(afd)) == NULL)
	{
		close(afd);
		return NULL;
	}

	/* save ip address for hba */
	memcpy(&cp->raddr, &saddr, sizeof(SockAddr));
	if (cp->raddr.addr.ss_family == 0)
		cp->raddr.addr.ss_family = AF_UNIX;

	return cp;
}
Ejemplo n.º 30
0
/*
* child main loop
*/
void do_child(int unix_fd, int inet_fd)
{
	POOL_CONNECTION *frontend;
	POOL_CONNECTION_POOL *backend;
	struct timeval now;
	struct timezone tz;
	struct timeval timeout;
	static int connected;		/* non 0 if has been accepted connections from frontend */
	int connections_count = 0;	/* used if child_max_connections > 0 */
	int found;
	char psbuf[NI_MAXHOST + 128];

	pool_debug("I am %d", getpid());

	/* Identify myself via ps */
	init_ps_display("", "", "", "");

	/* set up signal handlers */
	signal(SIGALRM, SIG_DFL);
	signal(SIGTERM, die);
	signal(SIGINT, die);
	signal(SIGHUP, reload_config_handler);
	signal(SIGQUIT, die);
	signal(SIGCHLD, SIG_DFL);
	signal(SIGUSR1, close_idle_connection);
	signal(SIGUSR2, wakeup_handler);
	signal(SIGPIPE, SIG_IGN);

#ifdef NONE_BLOCK
	/* set listen fds to none-blocking */
	pool_set_nonblock(unix_fd);
	if (inet_fd)
	{
		pool_set_nonblock(inet_fd);
	}
#endif

	/* Initialize my backend status */
	pool_initialize_private_backend_status();

	/* Initialize per process context */
	pool_init_process_context();

	/* initialize random seed */
	gettimeofday(&now, &tz);
#if defined(sun) || defined(__sun)
	srand((unsigned int) now.tv_usec);
#else
	srandom((unsigned int) now.tv_usec);
#endif

	/* initialize system db connection */
	init_system_db_connection();

	/* initialize connection pool */
	if (pool_init_cp())
	{
		child_exit(1);
	}

	/*
	 * Open pool_passwd in child process.  This is necessary to avoid the
	 * file descriptor race condition reported in [pgpool-general: 1141].
	 */
	if (strcmp("", pool_config->pool_passwd))
	{
		pool_reopen_passwd_file();
	}

	timeout.tv_sec = pool_config->child_life_time;
	timeout.tv_usec = 0;

	for (;;)
	{
		StartupPacket *sp;

		idle = 1;

		/* pgpool stop request already sent? */
		check_stop_request();

		/* Check if restart request is set because of failback event
		 * happend.  If so, exit myself with exit code 1 to be
		 * restarted by pgpool parent.
		 */
		if (pool_get_my_process_info()->need_to_restart)
		{
			pool_log("do_child: failback event found. restart myself.");
			pool_get_my_process_info()->need_to_restart = 0;
			child_exit(1);
		}

		accepted = 0;

		/* perform accept() */
		frontend = do_accept(unix_fd, inet_fd, &timeout);

		if (frontend == NULL)	/* connection request from frontend timed out */
		{
			/* check select() timeout */
			if (connected && pool_config->child_life_time > 0 &&
				timeout.tv_sec == 0 && timeout.tv_usec == 0)
			{
				pool_debug("child life %d seconds expired", pool_config->child_life_time);
				/*
				 * Doesn't need to call this. child_exit() calls it.
				 * send_frontend_exits();
				 */
				child_exit(2);
			}
			continue;
		}

		/* set frontend fd to blocking */
		pool_unset_nonblock(frontend->fd);

		/* reset busy flag */
		idle = 0;

		/* check backend timer is expired */
		if (backend_timer_expired)
		{
			pool_backend_timer();
			backend_timer_expired = 0;
		}

		/* read the startup packet */
	retry_startup:
		sp = read_startup_packet(frontend);
		if (sp == NULL)
		{
			/* failed to read the startup packet. return to the accept() loop */
			pool_close(frontend);
			connection_count_down();
			continue;
		}

		/* cancel request? */
		if (sp->major == 1234 && sp->minor == 5678)
		{
			cancel_request((CancelPacket *)sp->startup_packet);

			pool_close(frontend);
			pool_free_startup_packet(sp);
			connection_count_down();
			continue;
		}

		/* SSL? */
		if (sp->major == 1234 && sp->minor == 5679 && !frontend->ssl_active)
		{
			pool_debug("SSLRequest from client");
			pool_ssl_negotiate_serverclient(frontend);
			goto retry_startup;
		}

		if (pool_config->enable_pool_hba)
		{
			/*
			 * do client authentication.
			 * Note that ClientAuthentication does not return if frontend
			 * was rejected; it simply terminates this process.
			 */
			frontend->protoVersion = sp->major;
			frontend->database = strdup(sp->database);
			if (frontend->database == NULL)
			{
				pool_error("do_child: strdup failed: %s\n", strerror(errno));
				child_exit(1);
			}
			frontend->username = strdup(sp->user);
			if (frontend->username == NULL)
			{
				pool_error("do_child: strdup failed: %s\n", strerror(errno));
				child_exit(1);
			}
			ClientAuthentication(frontend);
		}

		/*
		 * Ok, negotiation with frontend has been done. Let's go to the
		 * next step.  Connect to backend if there's no existing
		 * connection which can be reused by this frontend.
		 * Authentication is also done in this step.
		 */

		/* Check if restart request is set because of failback event
		 * happend.  If so, close idle connections to backend and make
		 * a new copy of backend status.
		 */
		if (pool_get_my_process_info()->need_to_restart)
		{
			pool_log("do_child: failback event found. discard existing connections");
			pool_get_my_process_info()->need_to_restart = 0;
			close_idle_connection(0);
			pool_initialize_private_backend_status();
		}

		/*
		 * if there's no connection associated with user and database,
		 * we need to connect to the backend and send the startup packet.
		 */

		/* look for existing connection */
		found = 0;
		backend = pool_get_cp(sp->user, sp->database, sp->major, 1);

		if (backend != NULL)
		{
			found = 1;

			/* existing connection associated with same user/database/major found.
			 * however we should make sure that the startup packet contents are identical.
			 * OPTION data and others might be different.
			 */
			if (sp->len != MASTER_CONNECTION(backend)->sp->len)
			{
				pool_debug("do_child: connection exists but startup packet length is not identical");
				found = 0;
			}
			else if(memcmp(sp->startup_packet, MASTER_CONNECTION(backend)->sp->startup_packet, sp->len) != 0)
			{
				pool_debug("do_child: connection exists but startup packet contents is not identical");
				found = 0;
			}

			if (found == 0)
			{
				/* we need to discard existing connection since startup packet is different */
				pool_discard_cp(sp->user, sp->database, sp->major);
				backend = NULL;
			}
		}

		if (backend == NULL)
		{
			/* create a new connection to backend */
			if ((backend = connect_backend(sp, frontend)) == NULL)
			{
				connection_count_down();
				continue;
			}
		}

		else
		{
			/* reuse existing connection */
			if (!connect_using_existing_connection(frontend, backend, sp))
				continue;
		}

		connected = 1;

 		/* show ps status */
		sp = MASTER_CONNECTION(backend)->sp;
		snprintf(psbuf, sizeof(psbuf), "%s %s %s idle",
				 sp->user, sp->database, remote_ps_data);
		set_ps_display(psbuf, false);

		/*
		 * Initialize per session context
		 */
		pool_init_session_context(frontend, backend);

		/* Mark this connection pool is connected from frontend */
		pool_coninfo_set_frontend_connected(pool_get_process_context()->proc_id, pool_pool_index());

		/* query process loop */
		for (;;)
		{
			POOL_STATUS status;

			status = pool_process_query(frontend, backend, 0);

			sp = MASTER_CONNECTION(backend)->sp;

			switch (status)
			{
				/* client exits */
				case POOL_END:
					/*
					 * do not cache connection if:
					 * pool_config->connection_cahe == 0 or
					 * database name is template0, template1, postgres or regression
					 */
					if (pool_config->connection_cache == 0 ||
						!strcmp(sp->database, "template0") ||
						!strcmp(sp->database, "template1") ||
						!strcmp(sp->database, "postgres") ||
						!strcmp(sp->database, "regression"))
					{
						reset_connection();
						pool_close(frontend);
						pool_send_frontend_exits(backend);
						pool_discard_cp(sp->user, sp->database, sp->major);
					}
					else
					{
						POOL_STATUS status1;

						/* send reset request to backend */
						status1 = pool_process_query(frontend, backend, 1);
						pool_close(frontend);

						/* if we detect errors on resetting connection, we need to discard
						 * this connection since it might be in unknown status
						 */
						if (status1 != POOL_CONTINUE)
						{
							pool_debug("error in resetting connections. discarding connection pools...");
							pool_send_frontend_exits(backend);
							pool_discard_cp(sp->user, sp->database, sp->major);
						}
						else
							pool_connection_pool_timer(backend);
					}
					break;

				/* error occurred. discard backend connection pool
                   and disconnect connection to the frontend */
				case POOL_ERROR:
					pool_log("do_child: exits with status 1 due to error");
					child_exit(1);
					break;

				/* fatal error occurred. just exit myself... */
				case POOL_FATAL:
					notice_backend_error(1);
					child_exit(1);
					break;

				/* not implemented yet */
				case POOL_IDLE:
					do_accept(unix_fd, inet_fd, &timeout);
					pool_debug("accept while idle");
					break;

				default:
					break;
			}

			if (status != POOL_CONTINUE)
				break;
		}

		/* Destroy session context */
		pool_session_context_destroy();

		/* Mark this connection pool is not connected from frontend */
		pool_coninfo_unset_frontend_connected(pool_get_process_context()->proc_id, pool_pool_index());

		accepted = 0;
		connection_count_down();

		timeout.tv_sec = pool_config->child_life_time;
		timeout.tv_usec = 0;

		/* increment queries counter if necessary */
		if ( pool_config->child_max_connections > 0 )
			connections_count++;

		/* check if maximum connections count for this child reached */
		if ( ( pool_config->child_max_connections > 0 ) &&
			( connections_count >= pool_config->child_max_connections ) )
		{
			pool_log("child exiting, %d connections reached", pool_config->child_max_connections);
			send_frontend_exits();
			child_exit(2);
		}
	}
	child_exit(0);
}