/* notice backend connection error using SIGUSR1 */ void degenerate_backend_set(int *node_id_set, int count) { pid_t parent = getppid(); int i; if (pool_config->parallel_mode) { return; } pool_semaphore_lock(REQUEST_INFO_SEM); Req_info->kind = NODE_DOWN_REQUEST; for (i = 0; i < count; i++) { if (node_id_set[i] < 0 || node_id_set[i] >= MAX_NUM_BACKENDS || !VALID_BACKEND(node_id_set[i])) { pool_log("notice_backend_error: node %d is not valid backend.", i); continue; } pool_log("notice_backend_error: %d fail over request from pid %d", node_id_set[i], getpid()); Req_info->node_id[i] = node_id_set[i]; } kill(parent, SIGUSR1); pool_semaphore_unlock(REQUEST_INFO_SEM); }
/* handle other pgpool's down */ static int pgpool_down(WdInfo * pool) { int rtn = WD_OK; WD_STATUS prev_status; pool_log("pgpool_down: %s:%d is going down", pool->hostname, pool->pgpool_port); prev_status = pool->status; pool->status = WD_DOWN; /* the active pgpool goes down and I'm sandby pgpool */ if (prev_status == WD_MASTER && WD_MYSELF->status == WD_NORMAL) { if (wd_am_I_oldest() == WD_OK) { pool_log("pgpool_down: I'm oldest so standing for master"); /* stand for master */ rtn = wd_stand_for_master(); if (rtn == WD_OK) { /* win */ wd_escalation(); } else { /* rejected by others */ pool->status = prev_status; } } } return rtn; }
static RETSIGTYPE exit_handler(int sig) { int i; POOL_SETMASK(&AuthBlockSig); /* * this could happen in a child process if a signal has been sent * before resetting signal handler */ if (getpid() != mypid) { pool_debug("exit_handler: I am not parent"); POOL_SETMASK(&UnBlockSig); pool_shmem_exit(0); exit(0); } if (sig == SIGTERM) pool_log("received smart shutdown request"); else if (sig == SIGINT) pool_log("received fast shutdown request"); else if (sig == SIGQUIT) pool_log("received immediate shutdown request"); else { pool_error("exit_handler: unknown signal received %d", sig); POOL_SETMASK(&UnBlockSig); return; } exiting = 1; for (i = 0; i < pool_config->num_init_children; i++) { pid_t pid = pids[i].pid; if (pid) { kill(pid, sig); } } kill(pcp_pid, sig); POOL_SETMASK(&UnBlockSig); while (wait(NULL) > 0) ; if (errno != ECHILD) pool_error("wait() failed. reason:%s", strerror(errno)); pids = NULL; myexit(0); }
/* * Read the status file */ static int read_status_file(void) { FILE *fd; char fnamebuf[POOLMAXPATHLEN]; int i; snprintf(fnamebuf, sizeof(fnamebuf), "%s/%s", pool_config->logdir, STATUS_FILE_NAME); fd = fopen(fnamebuf, "r"); if (!fd) { pool_log("Backend status file %s does not exist", fnamebuf); return -1; } if (fread(&backend_rec, 1, sizeof(backend_rec), fd) <= 0) { pool_error("Could not read backend status file as %s. reason: %s", fnamebuf, strerror(errno)); fclose(fd); return -1; } fclose(fd); for (i=0;i< pool_config->backend_desc->num_backends;i++) { if (backend_rec.status[i] == CON_DOWN) BACKEND_INFO(i).backend_status = CON_DOWN; else BACKEND_INFO(i).backend_status = CON_CONNECT_WAIT; } return 0; }
/* * Do house keeping works when pgpool child process exits */ void child_exit(int code) { if (getpid() == mypid) { pool_log("child_exit: called from pgpool main. ignored."); return; } /* count down global connection counter */ if (accepted) connection_count_down(); /* prepare to shutdown connections to system db */ if(pool_config->parallel_mode || pool_config->enable_query_cache) { if (system_db_info->pgconn) pool_close_libpq_connection(); if (pool_system_db_connection()) pool_close(pool_system_db_connection()->con); } if (pool_config->memory_cache_enabled && !pool_is_shmem_cache()) { memcached_disconnect(); } /* let backend know now we are exiting */ if (pool_connection_pool) send_frontend_exits(); exit(code); }
/* * find connection by user and database */ POOL_CONNECTION_POOL *pool_get_cp(char *user, char *database, int protoMajor, int check_socket) { #ifdef HAVE_SIGPROCMASK sigset_t oldmask; #else int oldmask; #endif int i; POOL_CONNECTION_POOL *p = pool_connection_pool; if (p == NULL) { pool_error("pool_get_cp: pool_connection_pool is not initialized"); return NULL; } POOL_SETMASK2(&BlockSig, &oldmask); for (i=0;i<pool_config.max_pool;i++) { if (MASTER_CONNECTION(p) && MASTER_CONNECTION(p)->sp->major == protoMajor && MASTER_CONNECTION(p)->sp->user != NULL && strcmp(MASTER_CONNECTION(p)->sp->user, user) == 0 && strcmp(MASTER_CONNECTION(p)->sp->database, database) == 0) { /* mark this connection is under use */ MASTER_CONNECTION(p)->closetime = 0; POOL_SETMASK(&oldmask); if (check_socket && (check_socket_status(MASTER(p)->fd) < 0 || (DUAL_MODE && check_socket_status(MASTER(p)->fd) < 0))) { pool_log("connection closed. retry to create new connection pool."); pool_free_startup_packet(MASTER_CONNECTION(p)->sp); pool_close(MASTER_CONNECTION(p)->con); free(MASTER_CONNECTION(p)); if (DUAL_MODE) { pool_close(SECONDARY_CONNECTION(p)->con); free(SECONDARY_CONNECTION(p)); } memset(p, 0, sizeof(POOL_CONNECTION_POOL)); return NULL; } return p; } p++; } POOL_SETMASK(&oldmask); return NULL; }
static void reload_config(void) { pool_log("reload config files."); pool_get_config(get_config_file_name(), RELOAD_CONFIG); if (pool_config->enable_pool_hba) load_hba(get_hba_file_name()); reload_config_request = 0; }
static void reload_config(void) { pool_log("reload config files."); pool_get_config(conf_file, RELOAD_CONFIG); if (pool_config->enable_pool_hba) load_hba(hba_file); if (pool_config->parallel_mode) pool_memset_system_db_info(system_db_info->info); kill_all_children(SIGHUP); }
int wd_escalation(void) { int rtn; pool_log("wd_escalation: escalating to master pgpool"); /* clear shared memory cache */ if (pool_config->memory_cache_enabled && pool_is_shmem_cache() && pool_config->clear_memqcache_on_escalation) { pool_log("wd_escalation: clear all the query cache on shared memory"); pool_clear_memory_cache(); } /* execute escalation command */ if (strlen(pool_config->wd_escalation_command)) { system(pool_config->wd_escalation_command); } /* interface up as delegate IP */ if (strlen(pool_config->delegate_IP) != 0) wd_IP_up(); /* set master status to the wd list */ wd_set_wd_list(pool_config->wd_hostname, pool_config->port, pool_config->wd_port, pool_config->delegate_IP, NULL, WD_MASTER); /* send declare packet */ rtn = wd_declare(); if (rtn == WD_OK) { pool_log("wd_escalation: escalated to master pgpool successfully"); } return rtn; }
/* fork lifecheck process*/ static pid_t fork_a_lifecheck(int fork_wait_time) { pid_t pid; pid = fork(); if (pid != 0) { if (pid == -1) pool_error("fork_a_lifecheck: fork() failed."); return pid; } if (fork_wait_time > 0) { sleep(fork_wait_time); } myargv = save_ps_display_args(myargc, myargv); POOL_SETMASK(&UnBlockSig); init_ps_display("", "", "", ""); signal(SIGTERM, wd_exit); signal(SIGINT, wd_exit); signal(SIGQUIT, wd_exit); signal(SIGCHLD, SIG_DFL); signal(SIGHUP, SIG_IGN); signal(SIGPIPE, SIG_IGN); set_ps_display("lifecheck",false); /* wait until ready to go */ while (WD_OK != is_wd_lifecheck_ready()) { sleep(pool_config->wd_interval * 10); } pool_log("watchdog: lifecheck started"); /* watchdog loop */ for (;;) { /* pgpool life check */ wd_lifecheck(); sleep(pool_config->wd_interval); } return pid; }
int wd_chk_setuid(void) { char path[128]; char cmd[128]; /* check setuid bit of ifup command */ wd_get_cmd(cmd, pool_config->if_up_cmd); snprintf(path, sizeof(path), "%s/%s", pool_config->ifconfig_path, cmd); if (! has_setuid_bit(path)) { pool_log("wd_chk_setuid: ifup[%s] doesn't have setuid bit", path); return 0; } /* check setuid bit of ifdown command */ wd_get_cmd(cmd, pool_config->if_down_cmd); snprintf(path, sizeof(path), "%s/%s", pool_config->ifconfig_path, cmd); if (! has_setuid_bit(path)) { pool_log("wd_chk_setuid: ifdown[%s] doesn't have setuid bit", path); return 0; } /* check setuid bit of arping command */ wd_get_cmd(cmd, pool_config->arping_cmd); snprintf(path, sizeof(path), "%s/%s", pool_config->arping_path, cmd); if (! has_setuid_bit(path)) { pool_log("wd_chk_setuid: arping[%s] doesn't have setuid bit", path); return 0; } pool_log("wd_chk_setuid all commands have setuid bit"); return 1; }
/* send failback request using SIGUSR1 */ void send_failback_request(int node_id) { pid_t parent = getppid(); pool_log("send_failback_request: fail back %d th node request from pid %d", node_id, getpid()); Req_info->kind = NODE_UP_REQUEST; Req_info->node_id[0] = node_id; if (node_id < 0 || node_id >= MAX_NUM_BACKENDS || VALID_BACKEND(node_id)) { pool_error("send_failback_request: node %d is alive.", node_id); return; } kill(parent, SIGUSR1); }
/* * Read the status file */ static int read_status_file(void) { FILE *fd; char fnamebuf[POOLMAXPATHLEN]; int i; bool someone_wakeup = false; snprintf(fnamebuf, sizeof(fnamebuf), "%s/%s", pool_config->logdir, STATUS_FILE_NAME); fd = fopen(fnamebuf, "r"); if (!fd) { pool_log("Backend status file %s does not exist", fnamebuf); return -1; } if (fread(&backend_rec, 1, sizeof(backend_rec), fd) != sizeof(backend_rec)) { pool_error("Could not read backend status file as %s. reason: %s", fnamebuf, strerror(errno)); fclose(fd); return -1; } fclose(fd); for (i=0;i< pool_config->backend_desc->num_backends;i++) { if (backend_rec.status[i] == CON_DOWN) BACKEND_INFO(i).backend_status = CON_DOWN; else { BACKEND_INFO(i).backend_status = CON_CONNECT_WAIT; someone_wakeup = true; } } /* * If no one woke up, we regard the status file bogus */ if (someone_wakeup == false) { for (i=0;i< pool_config->backend_desc->num_backends;i++) { BACKEND_INFO(i).backend_status = CON_CONNECT_WAIT; } } return 0; }
/* * Deletes a shared memory segment (called as an on_shmem_exit callback, * hence funny argument list) */ static void IpcMemoryDelete(int status, Datum shmId) { struct shmid_ds shmStat; /* * Is a previously-existing shmem segment still existing and in use? */ if (shmctl(shmId, IPC_STAT, &shmStat) < 0 && (errno == EINVAL || errno == EACCES)) return; else if (shmStat.shm_nattch != 0) return; if (shmctl(shmId, IPC_RMID, NULL) < 0) pool_log("shmctl(%lu, %d, 0) failed: %s", shmId, IPC_RMID, strerror(errno)); }
/* * Call pgpool_recovery() function. */ static int exec_recovery(PGconn *conn, BackendInfo *backend, char stage) { PGresult *result; char *hostname; char *script; int r; if (strlen(backend->backend_hostname) == 0 || *(backend->backend_hostname) == '/') hostname = "localhost"; else hostname = backend->backend_hostname; script = (stage == FIRST_STAGE) ? pool_config->recovery_1st_stage_command : pool_config->recovery_2nd_stage_command; if (script == NULL || strlen(script) == 0) { /* do not execute script */ return 0; } snprintf(recovery_command, sizeof(recovery_command), "SELECT pgpool_recovery('%s', '%s', '%s')", script, hostname, backend->backend_data_directory); pool_log("starting recovery command: \"%s\"", recovery_command); pool_debug("exec_recovery: start recovery"); result = PQexec(conn, recovery_command); r = (PQresultStatus(result) != PGRES_TUPLES_OK); if (r != 0) { pool_error("exec_recovery: %s command failed at %s", script, (stage == FIRST_STAGE) ? "1st stage" : "2nd stage"); } PQclear(result); pool_debug("exec_recovery: finish recovery"); return r; }
int wd_IP_down(void) { int rtn = WD_OK; char path[WD_MAX_PATH_LEN]; char cmd[128]; int i; if (strlen(pool_config->delegate_IP) == 0) return WD_NG; if (WD_List->delegate_ip_flag == 1) { WD_List->delegate_ip_flag = 0; wd_get_cmd(cmd,pool_config->if_down_cmd); snprintf(path, sizeof(path), "%s/%s", pool_config->ifconfig_path, cmd); rtn = exec_ifconfig(path,pool_config->if_down_cmd); if (rtn == WD_OK) { for (i = 0; i < 3; i++) { if (wd_is_unused_ip(pool_config->delegate_IP)) break; } if (i >= 3) rtn = WD_NG; } if (rtn == WD_OK) pool_log("wd_IP_down: ifconfig down succeeded"); else pool_error("wd_IP_down: ifconfig down failed"); } else { pool_debug("wd_IP_down: not delegate IP holder"); } return rtn; }
/* * Connect to PostgreSQL server by using UNIX domain socket. * If retry is true, retry to call connect() upon receiving EINTR error. */ int connect_unix_domain_socket_by_port(int port, char *socket_dir, bool retry) { struct sockaddr_un addr; int fd; int len; fd = socket(AF_UNIX, SOCK_STREAM, 0); if (fd == -1) { pool_error("connect_unix_domain_socket_by_port: socket() failed: %s", strerror(errno)); return -1; } memset((char *) &addr, 0, sizeof(addr)); addr.sun_family = AF_UNIX; snprintf(addr.sun_path, sizeof(addr.sun_path), "%s/.s.PGSQL.%d", socket_dir, port); len = sizeof(struct sockaddr_un); for (;;) { if (exit_request) /* exit request already sent */ { pool_log("connect_unix_domain_socket_by_port: exit request has been sent"); close(fd); return -1; } if (connect(fd, (struct sockaddr *)&addr, len) < 0) { if ((errno == EINTR && retry) || errno == EAGAIN) continue; pool_error("connect_unix_domain_socket_by_port: connect() failed to %s: %s", addr.sun_path, strerror(errno)); close(fd); return -1; } break; } return fd; }
/* * Convert logid/recoff style text to 64bit log location (LSN) */ static long text_to_lsn(char *text) { /* * WAL segment size in bytes. XXX We should fetch this from * PostgreSQL, rather than having fixed value. */ #define WALSEGMENTSIZE 16 * 1024 * 1024 unsigned int xlogid; unsigned int xrecoff; unsigned long long int lsn; if (sscanf(text, "%X/%X", &xlogid, &xrecoff) != 2) { pool_error("text_to_lsn: wrong log location format: %s", text); return 0; } lsn = xlogid * ((unsigned long long int)0xffffffff - WALSEGMENTSIZE) + xrecoff; #ifdef DEBUG pool_log("lsn: %X %X %llX", xlogid, xrecoff, lsn); #endif return lsn; }
/* * signal handler for SIGALRM * */ static RETSIGTYPE authentication_timeout(int sig) { pool_log("authentication is timeout"); child_exit(1); }
/* * flush write buffer */ int pool_flush_it(POOL_CONNECTION *cp) { int sts; int wlen; int offset; wlen = cp->wbufpo; if (wlen == 0) { return 0; } offset = 0; for (;;) { errno = 0; #ifdef NOT_USED if (!cp->isbackend) { fd_set writemask; fd_set exceptmask; FD_ZERO(&writemask); FD_ZERO(&exceptmask); FD_SET(cp->fd, &writemask); FD_SET(cp->fd, &exceptmask); sts = select(cp->fd+1, NULL, &writemask, &exceptmask, NULL); if (sts == -1) { if (errno == EAGAIN || errno == EINTR || errno == EWOULDBLOCK) continue; pool_error("pool_flush_it: select() failed. reason: %s", strerror(errno)); cp->wbufpo = 0; return -1; } else if (sts == 0) { continue; } else if (FD_ISSET(cp->fd, &exceptmask)) { pool_log("pool_flush_it: exception occured"); cp->wbufpo = 0; return -1; } } #endif if (cp->ssl_active > 0) { sts = pool_ssl_write(cp, cp->wbuf + offset, wlen); } else { sts = write(cp->fd, cp->wbuf + offset, wlen); } if (sts > 0) { wlen -= sts; if (wlen == 0) { /* write completed */ break; } else if (wlen < 0) { pool_error("pool_flush_it: invalid write size %d", sts); cp->wbufpo = 0; return -1; } else { /* need to write remaining data */ offset += sts; continue; } } else if (errno == EAGAIN || errno == EINTR) { continue; } else { /* If this is the backend stream, report error. Otherwise * just report debug message. */ if (cp->isbackend) pool_error("pool_flush_it: write failed to backend (%d). reason: %s offset: %d wlen: %d", cp->db_node_id, strerror(errno), offset, wlen); else pool_debug("pool_flush_it: write failed to frontend. reason: %s offset: %d wlen: %d", strerror(errno), offset, wlen); cp->wbufpo = 0; return -1; } } cp->wbufpo = 0; return 0; }
/* * Removes a shared memory segment from process' address spaceq (called as * an on_shmem_exit callback, hence funny argument list) */ static void IpcMemoryDetach(int status, Datum shmaddr) { if (shmdt((void *) shmaddr) < 0) pool_log("shmdt(%p) failed: %s", (void *) shmaddr, strerror(errno)); }
/* * find connection by user and database */ POOL_CONNECTION_POOL *pool_get_cp(char *user, char *database, int protoMajor, int check_socket) { #ifdef HAVE_SIGPROCMASK sigset_t oldmask; #else int oldmask; #endif int i, freed = 0; ConnectionInfo *info; POOL_CONNECTION_POOL *p = pool_connection_pool; if (p == NULL) { pool_error("pool_get_cp: pool_connection_pool is not initialized"); return NULL; } POOL_SETMASK2(&BlockSig, &oldmask); for (i=0;i<pool_config->max_pool;i++) { if (MASTER_CONNECTION(p) && MASTER_CONNECTION(p)->sp && MASTER_CONNECTION(p)->sp->major == protoMajor && MASTER_CONNECTION(p)->sp->user != NULL && strcmp(MASTER_CONNECTION(p)->sp->user, user) == 0 && strcmp(MASTER_CONNECTION(p)->sp->database, database) == 0) { int sock_broken = 0; int j; /* mark this connection is under use */ MASTER_CONNECTION(p)->closetime = 0; for (j=0;j<NUM_BACKENDS;j++) { p->info[j].counter++; } POOL_SETMASK(&oldmask); if (check_socket) { for (j=0;j<NUM_BACKENDS;j++) { if (!VALID_BACKEND(j)) continue; if (CONNECTION_SLOT(p, j)) { sock_broken = check_socket_status(CONNECTION(p, j)->fd); if (sock_broken < 0) break; } else { sock_broken = -1; break; } } if (sock_broken < 0) { pool_log("connection closed. retry to create new connection pool."); for (j=0;j<NUM_BACKENDS;j++) { if (!VALID_BACKEND(j) || (CONNECTION_SLOT(p, j) == NULL)) continue; if (!freed) { pool_free_startup_packet(CONNECTION_SLOT(p, j)->sp); freed = 1; } pool_close(CONNECTION(p, j)); free(CONNECTION_SLOT(p, j)); } info = p->info; memset(p, 0, sizeof(POOL_CONNECTION_POOL_SLOT)); p->info = info; memset(p->info, 0, sizeof(ConnectionInfo) * MAX_NUM_BACKENDS); POOL_SETMASK(&oldmask); return NULL; } } POOL_SETMASK(&oldmask); pool_index = i; return p; } p++; } POOL_SETMASK(&oldmask); return NULL; }
static int wd_send_response(int sock, WdPacket * recv_pack) { int rtn = WD_NG; WdInfo * p, *q; WdNodeInfo * node; WdLockInfo * lock; WdPacket send_packet; struct timeval tv; char pack_str[WD_MAX_PACKET_STRING]; int pack_str_len; char hash[(MD5_PASSWD_LEN+1)*2]; bool is_node_packet = false; if (recv_pack == NULL) { return rtn; } memset(&send_packet, 0, sizeof(WdPacket)); p = &(recv_pack->wd_body.wd_info); /* auhtentication */ if (strlen(pool_config->wd_authkey)) { /* calculate hash from packet */ pack_str_len = wd_packet_to_string(*recv_pack, pack_str, sizeof(pack_str)); wd_calc_hash(pack_str, pack_str_len, hash); if (strcmp(recv_pack->hash, hash)) { pool_log("wd_send_response: watchdog authentication failed"); rtn = wd_authentication_failed(sock); return rtn; } } /* set response packet no */ switch (recv_pack->packet_no) { /* add request into the watchdog list */ case WD_ADD_REQ: p = &(recv_pack->wd_body.wd_info); if (wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status) > 0) { send_packet.packet_no = WD_ADD_ACCEPT; } else { send_packet.packet_no = WD_ADD_REJECT; } memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo)); break; /* announce candidacy to be the new master */ case WD_STAND_FOR_MASTER: p = &(recv_pack->wd_body.wd_info); wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status); /* check exist master */ if ((q = wd_is_alive_master()) != NULL) { /* vote against the candidate */ send_packet.packet_no = WD_MASTER_EXIST; memcpy(&(send_packet.wd_body.wd_info), q, sizeof(WdInfo)); } else { if (WD_MYSELF->tv.tv_sec <= p->tv.tv_sec ) { memcpy(&tv,&(p->tv),sizeof(struct timeval)); tv.tv_sec += 1; wd_set_myself(&tv, WD_NORMAL); } /* vote for the candidate */ send_packet.packet_no = WD_VOTE_YOU; memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo)); } break; /* announce assumption to be the new master */ case WD_DECLARE_NEW_MASTER: p = &(recv_pack->wd_body.wd_info); wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status); if (WD_MYSELF->status == WD_MASTER) { /* resign master server */ pool_log("wd_declare_new_master: ifconfig down to resign master server"); wd_IP_down(); wd_set_myself(NULL, WD_NORMAL); } send_packet.packet_no = WD_READY; memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo)); break; /* annouce to assume lock holder */ case WD_STAND_FOR_LOCK_HOLDER: p = &(recv_pack->wd_body.wd_info); wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status); /* only master handles lock holder privilege */ if (WD_MYSELF->status == WD_MASTER) { /* if theare are no lock holder yet */ if (wd_get_lock_holder() != NULL) { send_packet.packet_no = WD_LOCK_HOLDER_EXIST; } } memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo)); break; case WD_DECLARE_LOCK_HOLDER: p = &(recv_pack->wd_body.wd_info); wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status); wd_set_lock_holder(p, true); send_packet.packet_no = WD_READY; memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo)); break; /* annouce to resigne lock holder */ case WD_RESIGN_LOCK_HOLDER: p = &(recv_pack->wd_body.wd_info); wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status); wd_set_lock_holder(p, false); send_packet.packet_no = WD_READY; memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo)); break; case WD_START_INTERLOCK: p = &(recv_pack->wd_body.wd_info); wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status); wd_set_interlocking(p, true); break; case WD_END_INTERLOCK: p = &(recv_pack->wd_body.wd_info); wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), p->status); wd_set_interlocking(p, false); break; /* announce that server is down */ case WD_SERVER_DOWN: p = &(recv_pack->wd_body.wd_info); wd_set_wd_list(p->hostname,p->pgpool_port, p->wd_port, p->delegate_ip, &(p->tv), WD_DOWN); send_packet.packet_no = WD_READY; memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo)); if (wd_am_I_oldest() == WD_OK && WD_MYSELF->status != WD_MASTER) { wd_escalation(); } break; /* announce start online recovery */ case WD_START_RECOVERY: if (*InRecovery != RECOVERY_INIT) { send_packet.packet_no = WD_NODE_FAILED; } else { send_packet.packet_no = WD_NODE_READY; *InRecovery = RECOVERY_ONLINE; if (wait_connection_closed() != 0) { send_packet.packet_no = WD_NODE_FAILED; } } break; case WD_END_RECOVERY: send_packet.packet_no = WD_NODE_READY; *InRecovery = RECOVERY_INIT; kill(wd_ppid, SIGUSR2); break; case WD_FAILBACK_REQUEST: node = &(recv_pack->wd_body.wd_node_info); wd_set_node_mask(WD_FAILBACK_REQUEST,node->node_id_set,node->node_num); is_node_packet = true; send_packet.packet_no = WD_NODE_READY; break; case WD_DEGENERATE_BACKEND: node = &(recv_pack->wd_body.wd_node_info); wd_set_node_mask(WD_DEGENERATE_BACKEND,node->node_id_set, node->node_num); is_node_packet = true; send_packet.packet_no = WD_NODE_READY; break; case WD_PROMOTE_BACKEND: node = &(recv_pack->wd_body.wd_node_info); wd_set_node_mask(WD_PROMOTE_BACKEND,node->node_id_set, node->node_num); is_node_packet = true; send_packet.packet_no = WD_NODE_READY; break; case WD_UNLOCK_REQUEST: lock = &(recv_pack->wd_body.wd_lock_info); wd_set_lock(lock->lock_id, false); send_packet.packet_no = WD_LOCK_READY; break; default: send_packet.packet_no = WD_INVALID; memcpy(&(send_packet.wd_body.wd_info), WD_MYSELF, sizeof(WdInfo)); break; } /* send response packet */ rtn = wd_send_packet(sock, &send_packet); /* send node request signal. * wd_node_request_singnal() uses a semaphore lock internally, so should be * called after sending a response pakcet to prevent dead lock. */ if (is_node_packet) wd_node_request_signal(recv_pack->packet_no, node); return rtn; }
/* * Connect to PostgreSQL server by using INET domain socket. * If retry is true, retry to call connect() upon receiving EINTR error. */ int connect_inet_domain_socket_by_port(char *host, int port, bool retry) { int fd; int len; int on = 1; struct sockaddr_in addr; struct hostent *hp; fd = socket(AF_INET, SOCK_STREAM, 0); if (fd < 0) { pool_error("connect_inet_domain_socket_by_port: socket() failed: %s", strerror(errno)); return -1; } /* set nodelay */ if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *) &on, sizeof(on)) < 0) { pool_error("connect_inet_domain_socket_by_port: setsockopt() failed: %s", strerror(errno)); close(fd); return -1; } memset((char *) &addr, 0, sizeof(addr)); addr.sin_family = AF_INET; addr.sin_port = htons(port); len = sizeof(struct sockaddr_in); hp = gethostbyname(host); if ((hp == NULL) || (hp->h_addrtype != AF_INET)) { pool_error("connect_inet_domain_socket: gethostbyname() failed: %s host: %s", hstrerror(h_errno), host); close(fd); return -1; } memmove((char *) &(addr.sin_addr), (char *) hp->h_addr, hp->h_length); pool_set_nonblock(fd); for (;;) { if (exit_request) /* exit request already sent */ { pool_log("connect_inet_domain_socket_by_port: exit request has been sent"); close(fd); return -1; } if (health_check_timer_expired) /* has health check timer expired */ { pool_log("connect_inet_domain_socket_by_port: health check timer expired"); close(fd); return -1; } if (connect(fd, (struct sockaddr *)&addr, len) < 0) { if (errno == EISCONN) { /* Socket is already connected */ break; } if ((errno == EINTR && retry) || errno == EAGAIN) continue; /* Non block fd could return these */ if (errno == EINPROGRESS || errno == EALREADY) continue; pool_error("connect_inet_domain_socket: connect() failed: %s",strerror(errno)); close(fd); return -1; } break; } pool_unset_nonblock(fd); return fd; }
/* * create actual connections to backends */ static POOL_CONNECTION_POOL *new_connection(POOL_CONNECTION_POOL *p) { POOL_CONNECTION_POOL_SLOT *s; int active_backend_count = 0; int i; for (i=0;i<NUM_BACKENDS;i++) { pool_debug("new_connection: connecting %d backend", i); if (!VALID_BACKEND(i)) { pool_debug("new_connection: skipping slot %d because backend_status = %d", i, BACKEND_INFO(i).backend_status); continue; } s = malloc(sizeof(POOL_CONNECTION_POOL_SLOT)); if (s == NULL) { pool_error("new_connection: malloc() failed"); return NULL; } if (create_cp(s, i) == NULL) { /* connection failed. mark this backend down */ pool_error("new_connection: create_cp() failed"); /* If fail_over_on_backend_error is true, do failover. * Otherwise, just exit this session. */ if (pool_config->fail_over_on_backend_error) { notice_backend_error(i); } else { pool_log("new_connection: do not failover because fail_over_on_backend_error is off"); } child_exit(1); } p->info[i].create_time = time(NULL); p->slots[i] = s; if (pool_init_params(&s->con->params)) { return NULL; } BACKEND_INFO(i).backend_status = CON_UP; active_backend_count++; } if (active_backend_count > 0) { return p; } return NULL; }
/* * read a string until EOF or NULL is encountered. * if line is not 0, read until new line is encountered. */ char *pool_read_string(POOL_CONNECTION *cp, int *len, int line) { int readp; int readsize; int readlen; int strlength; int flag; int consume_size; #ifdef DEBUG static char pbuf[READBUFSZ]; #endif *len = 0; readp = 0; /* initialize read buffer */ if (cp->sbufsz == 0) { cp->sbuf = malloc(READBUFSZ); if (cp->sbuf == NULL) { pool_error("pool_read_string: malloc failed"); return NULL; } cp->sbufsz = READBUFSZ; *cp->sbuf = '\0'; } /* any pending data? */ if (cp->len) { if (line) strlength = mystrlinelen(cp->hp+cp->po, cp->len, &flag); else strlength = mystrlen(cp->hp+cp->po, cp->len, &flag); /* buffer is too small? */ if ((strlength + 1) > cp->sbufsz) { cp->sbufsz = ((strlength+1)/READBUFSZ+1)*READBUFSZ; cp->sbuf = realloc(cp->sbuf, cp->sbufsz); if (cp->sbuf == NULL) { pool_error("pool_read_string: realloc failed"); return NULL; } } /* consume pending and save to read string buffer */ consume_size = consume_pending_data(cp, cp->sbuf, strlength); *len = strlength; /* is the string null terminated? */ if (consume_size == strlength && !flag) { /* not null or line terminated. * we need to read more since we have not encountered NULL or new line yet */ readsize = cp->sbufsz - strlength; readp = strlength; } else { pool_debug("pool_read_string: read all from pending data. po:%d len:%d", cp->po, cp->len); return cp->sbuf; } } else { readsize = cp->sbufsz; } for (;;) { if (pool_check_fd(cp)) { if (!IS_MASTER_NODE_ID(cp->db_node_id)) { pool_log("pool_read_string: data is not ready in DB node:%d. abort this session", cp->db_node_id); exit(1); } else { pool_error("pool_read_string: pool_check_fd failed (%s)", strerror(errno)); return NULL; } } if (cp->ssl_active > 0) { readlen = pool_ssl_read(cp, cp->sbuf+readp, readsize); } else { readlen = read(cp->fd, cp->sbuf+readp, readsize); } if (readlen == -1) { pool_error("pool_read_string: read() failed. reason:%s", strerror(errno)); if (cp->isbackend) { notice_backend_error(cp->db_node_id); child_exit(1); } else { return NULL; } } else if (readlen == 0) /* EOF detected */ { /* * just returns an error, not trigger failover or degeneration */ pool_error("pool_read_string: read () EOF detected"); return NULL; } /* check overrun */ if (line) strlength = mystrlinelen(cp->sbuf+readp, readlen, &flag); else strlength = mystrlen(cp->sbuf+readp, readlen, &flag); if (strlength < readlen) { save_pending_data(cp, cp->sbuf+readp+strlength, readlen-strlength); *len += strlength; pool_debug("pool_read_string: total result %d with pending data po:%d len:%d", *len, cp->po, cp->len); return cp->sbuf; } *len += readlen; /* encountered null or newline? */ if (flag) { /* ok we have read all data */ pool_debug("pool_read_string: total result %d ", *len); break; } readp += readlen; readsize = READBUFSZ; if ((*len+readsize) > cp->sbufsz) { cp->sbufsz += READBUFSZ; cp->sbuf = realloc(cp->sbuf, cp->sbufsz); if (cp->sbuf == NULL) { pool_error("pool_read_string: realloc failed"); return NULL; } } } return cp->sbuf; }
/* * process cancel request */ void cancel_request(CancelPacket *sp) { int len; int fd; POOL_CONNECTION *con; int i,j,k; ConnectionInfo *c = NULL; CancelPacket cp; bool found = false; pool_debug("Cancel request received"); /* look for cancel key from shmem info */ for (i=0;i<pool_config->num_init_children;i++) { for (j=0;j<pool_config->max_pool;j++) { for (k=0;k<NUM_BACKENDS;k++) { c = pool_coninfo(i, j, k); pool_debug("con_info: address:%p database:%s user:%s pid:%d key:%d i:%d", c, c->database, c->user, ntohl(c->pid), ntohl(c->key),i); if (c->pid == sp->pid && c->key == sp->key) { pool_debug("found pid:%d key:%d i:%d",ntohl(c->pid), ntohl(c->key),i); c = pool_coninfo(i, j, 0); found = true; goto found; } } } } found: if (!found) { pool_error("cancel_request: invalid cancel key: pid:%d key:%d",ntohl(sp->pid), ntohl(sp->key)); return; /* invalid key */ } for (i=0;i<NUM_BACKENDS;i++,c++) { if (!VALID_BACKEND(i)) continue; if (*(BACKEND_INFO(i).backend_hostname) == '/') fd = connect_unix_domain_socket(i, TRUE); else fd = connect_inet_domain_socket(i, TRUE); if (fd < 0) { pool_error("Could not create socket for sending cancel request for backend %d", i); return; } con = pool_open(fd); if (con == NULL) return; len = htonl(sizeof(len) + sizeof(CancelPacket)); pool_write(con, &len, sizeof(len)); cp.protoVersion = sp->protoVersion; cp.pid = c->pid; cp.key = c->key; pool_log("cancel_request: canceling backend pid:%d key: %d", ntohl(cp.pid),ntohl(cp.key)); if (pool_write_and_flush(con, &cp, sizeof(CancelPacket)) < 0) pool_error("Could not send cancel request packet for backend %d", i); pool_close(con); /* * this is needed to ensure that the next DB node executes the * query supposed to be canceled. */ sleep(1); } }
int wd_reaper_watchdog(pid_t pid, int status) { int i; /* watchdog lifecheck process exits */ if (pid == lifecheck_pid) { if (WIFSIGNALED(status)) pool_debug("watchdog lifecheck process %d exits with status %d by signal %d", pid, status, WTERMSIG(status)); else pool_debug("watchdog lifecheck process %d exits with status %d", pid, status); lifecheck_pid = fork_a_lifecheck(1); if (lifecheck_pid < 0) { pool_error("wd_reaper: fork a watchdog lifecheck process failed"); return 0; } pool_log("fork a new watchdog lifecheck pid %d", lifecheck_pid); } /* watchdog child process exits */ else if (pid == child_pid) { if (WIFSIGNALED(status)) pool_debug("watchdog child process %d exits with status %d by signal %d", pid, status, WTERMSIG(status)); else pool_debug("watchdog child process %d exits with status %d", pid, status); child_pid = wd_child(1); if (child_pid < 0) { pool_error("wd_reaper: fork a watchdog child process failed"); return 0; } pool_log("fork a new watchdog child pid %d", child_pid); } /* receiver/sender process exits */ else { for (i = 0; i < pool_config->num_hb_if; i++) { if (pid == hb_receiver_pid[i]) { if (WIFSIGNALED(status)) pool_debug("watchdog heartbeat receiver process %d exits with status %d by signal %d", pid, status, WTERMSIG(status)); else pool_debug("watchdog heartbeat receiver process %d exits with status %d", pid, status); hb_receiver_pid[i] = wd_hb_receiver(1, pool_config->hb_if[i]); if (hb_receiver_pid[i] < 0) { pool_error("wd_reaper: fork a watchdog heartbeat receiver process failed"); return 0; } pool_log("fork a new watchdog heartbeat receiver: pid %d", hb_receiver_pid[i]); break; } else if (pid == hb_sender_pid[i]) { if (WIFSIGNALED(status)) pool_debug("watchdog heartbeat sender process %d exits with status %d by signal %d", pid, status, WTERMSIG(status)); else pool_debug("watchdog heartbeat sender process %d exits with status %d", pid, status); hb_sender_pid[i] = wd_hb_sender(1, pool_config->hb_if[i]); if (hb_sender_pid[i] < 0) { pool_error("wd_reaper: fork a watchdog heartbeat sender process failed"); return 0; } pool_log("fork a new watchdog heartbeat sender: pid %d", hb_sender_pid[i]); break; } } } return 1; }
/* * perform accept() and return new fd */ static POOL_CONNECTION *do_accept(int unix_fd, int inet_fd, struct timeval *timeout) { fd_set readmask; int fds; int save_errno; SockAddr saddr; int fd = 0; int afd; int inet = 0; POOL_CONNECTION *cp; #ifdef ACCEPT_PERFORMANCE struct timeval now1, now2; static long atime; static int cnt; #endif struct timeval *timeoutval; struct timeval tv1, tv2, tmback = {0, 0}; set_ps_display("wait for connection request", false); /* Destroy session context for just in case... */ pool_session_context_destroy(); FD_ZERO(&readmask); FD_SET(unix_fd, &readmask); if (inet_fd) FD_SET(inet_fd, &readmask); if (timeout->tv_sec == 0 && timeout->tv_usec == 0) timeoutval = NULL; else { timeoutval = timeout; tmback.tv_sec = timeout->tv_sec; tmback.tv_usec = timeout->tv_usec; gettimeofday(&tv1, NULL); #ifdef DEBUG pool_log("before select = {%d, %d}", timeoutval->tv_sec, timeoutval->tv_usec); pool_log("g:before select = {%d, %d}", tv1.tv_sec, tv1.tv_usec); #endif } fds = select(Max(unix_fd, inet_fd)+1, &readmask, NULL, NULL, timeoutval); save_errno = errno; /* check backend timer is expired */ if (backend_timer_expired) { pool_backend_timer(); backend_timer_expired = 0; } /* * following code fragment computes remaining timeout val in a * portable way. Linux does this automatically but other platforms do not. */ if (timeoutval) { gettimeofday(&tv2, NULL); tmback.tv_usec -= tv2.tv_usec - tv1.tv_usec; tmback.tv_sec -= tv2.tv_sec - tv1.tv_sec; if (tmback.tv_usec < 0) { tmback.tv_sec--; if (tmback.tv_sec < 0) { timeout->tv_sec = 0; timeout->tv_usec = 0; } else { tmback.tv_usec += 1000000; timeout->tv_sec = tmback.tv_sec; timeout->tv_usec = tmback.tv_usec; } } #ifdef DEBUG pool_log("g:after select = {%d, %d}", tv2.tv_sec, tv2.tv_usec); pool_log("after select = {%d, %d}", timeout->tv_sec, timeout->tv_usec); #endif } errno = save_errno; if (fds == -1) { if (errno == EAGAIN || errno == EINTR) return NULL; pool_error("select() failed. reason %s", strerror(errno)); return NULL; } /* timeout */ if (fds == 0) { return NULL; } if (FD_ISSET(unix_fd, &readmask)) { fd = unix_fd; } if (FD_ISSET(inet_fd, &readmask)) { fd = inet_fd; inet++; } /* * Note that some SysV systems do not work here. For those * systems, we need some locking mechanism for the fd. */ memset(&saddr, 0, sizeof(saddr)); saddr.salen = sizeof(saddr.addr); #ifdef ACCEPT_PERFORMANCE gettimeofday(&now1,0); #endif retry_accept: /* wait if recovery is started */ while (*InRecovery == 1) { pause(); } afd = accept(fd, (struct sockaddr *)&saddr.addr, &saddr.salen); save_errno = errno; /* check backend timer is expired */ if (backend_timer_expired) { pool_backend_timer(); backend_timer_expired = 0; } errno = save_errno; if (afd < 0) { if (errno == EINTR && *InRecovery) goto retry_accept; /* * "Resource temporarily unavailable" (EAGAIN or EWOULDBLOCK) * can be silently ignored. And EINTR can be ignored. */ if (errno != EAGAIN && errno != EWOULDBLOCK && errno != EINTR) pool_error("accept() failed. reason: %s", strerror(errno)); return NULL; } #ifdef ACCEPT_PERFORMANCE gettimeofday(&now2,0); atime += (now2.tv_sec - now1.tv_sec)*1000000 + (now2.tv_usec - now1.tv_usec); cnt++; if (cnt % 100 == 0) { pool_log("cnt: %d atime: %ld", cnt, atime); } #endif /* reload config file */ if (got_sighup) { pool_get_config(get_config_file_name(), RELOAD_CONFIG); if (pool_config->enable_pool_hba) { load_hba(get_hba_file_name()); if (strcmp("", pool_config->pool_passwd)) pool_reopen_passwd_file(); } if (pool_config->parallel_mode) pool_memset_system_db_info(system_db_info->info); got_sighup = 0; } connection_count_up(); accepted = 1; if (pool_config->parallel_mode) { /* * do not accept new connection if any of DB node or SystemDB is down when operating in * parallel mode */ int i; for (i=0;i<NUM_BACKENDS;i++) { if (BACKEND_INFO(i).backend_status == CON_DOWN || SYSDB_STATUS == CON_DOWN) { StartupPacket *sp; char *msg = "pgpool is not available in parallel query mode"; if (SYSDB_STATUS == CON_DOWN) pool_log("Cannot accept() new connection. SystemDB is down"); else pool_log("Cannot accept() new connection. %d th backend is down", i); if ((cp = pool_open(afd)) == NULL) { close(afd); child_exit(1); } sp = read_startup_packet(cp); if (sp == NULL) { /* failed to read the startup packet. return to the accept() loop */ pool_close(cp); child_exit(1); } pool_debug("do_accept: send error message to frontend"); if (sp->major == PROTO_MAJOR_V3) { char buf[256]; if (SYSDB_STATUS == CON_DOWN) snprintf(buf, sizeof(buf), "SystemDB is down"); else snprintf(buf, sizeof(buf), "%d th backend is down", i); pool_send_error_message(cp, sp->major, "08S01", msg, buf, ((SYSDB_STATUS == CON_DOWN) ? "repair the SystemDB and restart pgpool" : "repair the backend and restart pgpool"), __FILE__, __LINE__); } else { pool_send_error_message(cp, sp->major, 0, msg, "", "", "", 0); } pool_close(cp); child_exit(1); } } } else { /* * do not accept new connection if all DB nodes are down when operating in * non parallel mode */ int i; int found = 0; for (i=0;i<NUM_BACKENDS;i++) { if (VALID_BACKEND(i)) { found = 1; } } if (found == 0) { pool_log("Cannot accept() new connection. all backends are down"); child_exit(1); } } pool_debug("I am %d accept fd %d", getpid(), afd); pool_getnameinfo_all(&saddr, remote_host, remote_port); snprintf(remote_ps_data, sizeof(remote_ps_data), remote_port[0] == '\0' ? "%s" : "%s(%s)", remote_host, remote_port); set_ps_display("accept connection", false); /* log who is connecting */ if (pool_config->log_connections) { pool_log("connection received: host=%s%s%s", remote_host, remote_port[0] ? " port=" : "", remote_port); } /* set NODELAY and KEEPALIVE options if INET connection */ if (inet) { int on = 1; if (setsockopt(afd, IPPROTO_TCP, TCP_NODELAY, (char *) &on, sizeof(on)) < 0) { pool_error("do_accept: setsockopt() failed: %s", strerror(errno)); close(afd); return NULL; } if (setsockopt(afd, SOL_SOCKET, SO_KEEPALIVE, (char *) &on, sizeof(on)) < 0) { pool_error("do_accept: setsockopt() failed: %s", strerror(errno)); close(afd); return NULL; } } if ((cp = pool_open(afd)) == NULL) { close(afd); return NULL; } /* save ip address for hba */ memcpy(&cp->raddr, &saddr, sizeof(SockAddr)); if (cp->raddr.addr.ss_family == 0) cp->raddr.addr.ss_family = AF_UNIX; return cp; }
/* * child main loop */ void do_child(int unix_fd, int inet_fd) { POOL_CONNECTION *frontend; POOL_CONNECTION_POOL *backend; struct timeval now; struct timezone tz; struct timeval timeout; static int connected; /* non 0 if has been accepted connections from frontend */ int connections_count = 0; /* used if child_max_connections > 0 */ int found; char psbuf[NI_MAXHOST + 128]; pool_debug("I am %d", getpid()); /* Identify myself via ps */ init_ps_display("", "", "", ""); /* set up signal handlers */ signal(SIGALRM, SIG_DFL); signal(SIGTERM, die); signal(SIGINT, die); signal(SIGHUP, reload_config_handler); signal(SIGQUIT, die); signal(SIGCHLD, SIG_DFL); signal(SIGUSR1, close_idle_connection); signal(SIGUSR2, wakeup_handler); signal(SIGPIPE, SIG_IGN); #ifdef NONE_BLOCK /* set listen fds to none-blocking */ pool_set_nonblock(unix_fd); if (inet_fd) { pool_set_nonblock(inet_fd); } #endif /* Initialize my backend status */ pool_initialize_private_backend_status(); /* Initialize per process context */ pool_init_process_context(); /* initialize random seed */ gettimeofday(&now, &tz); #if defined(sun) || defined(__sun) srand((unsigned int) now.tv_usec); #else srandom((unsigned int) now.tv_usec); #endif /* initialize system db connection */ init_system_db_connection(); /* initialize connection pool */ if (pool_init_cp()) { child_exit(1); } /* * Open pool_passwd in child process. This is necessary to avoid the * file descriptor race condition reported in [pgpool-general: 1141]. */ if (strcmp("", pool_config->pool_passwd)) { pool_reopen_passwd_file(); } timeout.tv_sec = pool_config->child_life_time; timeout.tv_usec = 0; for (;;) { StartupPacket *sp; idle = 1; /* pgpool stop request already sent? */ check_stop_request(); /* Check if restart request is set because of failback event * happend. If so, exit myself with exit code 1 to be * restarted by pgpool parent. */ if (pool_get_my_process_info()->need_to_restart) { pool_log("do_child: failback event found. restart myself."); pool_get_my_process_info()->need_to_restart = 0; child_exit(1); } accepted = 0; /* perform accept() */ frontend = do_accept(unix_fd, inet_fd, &timeout); if (frontend == NULL) /* connection request from frontend timed out */ { /* check select() timeout */ if (connected && pool_config->child_life_time > 0 && timeout.tv_sec == 0 && timeout.tv_usec == 0) { pool_debug("child life %d seconds expired", pool_config->child_life_time); /* * Doesn't need to call this. child_exit() calls it. * send_frontend_exits(); */ child_exit(2); } continue; } /* set frontend fd to blocking */ pool_unset_nonblock(frontend->fd); /* reset busy flag */ idle = 0; /* check backend timer is expired */ if (backend_timer_expired) { pool_backend_timer(); backend_timer_expired = 0; } /* read the startup packet */ retry_startup: sp = read_startup_packet(frontend); if (sp == NULL) { /* failed to read the startup packet. return to the accept() loop */ pool_close(frontend); connection_count_down(); continue; } /* cancel request? */ if (sp->major == 1234 && sp->minor == 5678) { cancel_request((CancelPacket *)sp->startup_packet); pool_close(frontend); pool_free_startup_packet(sp); connection_count_down(); continue; } /* SSL? */ if (sp->major == 1234 && sp->minor == 5679 && !frontend->ssl_active) { pool_debug("SSLRequest from client"); pool_ssl_negotiate_serverclient(frontend); goto retry_startup; } if (pool_config->enable_pool_hba) { /* * do client authentication. * Note that ClientAuthentication does not return if frontend * was rejected; it simply terminates this process. */ frontend->protoVersion = sp->major; frontend->database = strdup(sp->database); if (frontend->database == NULL) { pool_error("do_child: strdup failed: %s\n", strerror(errno)); child_exit(1); } frontend->username = strdup(sp->user); if (frontend->username == NULL) { pool_error("do_child: strdup failed: %s\n", strerror(errno)); child_exit(1); } ClientAuthentication(frontend); } /* * Ok, negotiation with frontend has been done. Let's go to the * next step. Connect to backend if there's no existing * connection which can be reused by this frontend. * Authentication is also done in this step. */ /* Check if restart request is set because of failback event * happend. If so, close idle connections to backend and make * a new copy of backend status. */ if (pool_get_my_process_info()->need_to_restart) { pool_log("do_child: failback event found. discard existing connections"); pool_get_my_process_info()->need_to_restart = 0; close_idle_connection(0); pool_initialize_private_backend_status(); } /* * if there's no connection associated with user and database, * we need to connect to the backend and send the startup packet. */ /* look for existing connection */ found = 0; backend = pool_get_cp(sp->user, sp->database, sp->major, 1); if (backend != NULL) { found = 1; /* existing connection associated with same user/database/major found. * however we should make sure that the startup packet contents are identical. * OPTION data and others might be different. */ if (sp->len != MASTER_CONNECTION(backend)->sp->len) { pool_debug("do_child: connection exists but startup packet length is not identical"); found = 0; } else if(memcmp(sp->startup_packet, MASTER_CONNECTION(backend)->sp->startup_packet, sp->len) != 0) { pool_debug("do_child: connection exists but startup packet contents is not identical"); found = 0; } if (found == 0) { /* we need to discard existing connection since startup packet is different */ pool_discard_cp(sp->user, sp->database, sp->major); backend = NULL; } } if (backend == NULL) { /* create a new connection to backend */ if ((backend = connect_backend(sp, frontend)) == NULL) { connection_count_down(); continue; } } else { /* reuse existing connection */ if (!connect_using_existing_connection(frontend, backend, sp)) continue; } connected = 1; /* show ps status */ sp = MASTER_CONNECTION(backend)->sp; snprintf(psbuf, sizeof(psbuf), "%s %s %s idle", sp->user, sp->database, remote_ps_data); set_ps_display(psbuf, false); /* * Initialize per session context */ pool_init_session_context(frontend, backend); /* Mark this connection pool is connected from frontend */ pool_coninfo_set_frontend_connected(pool_get_process_context()->proc_id, pool_pool_index()); /* query process loop */ for (;;) { POOL_STATUS status; status = pool_process_query(frontend, backend, 0); sp = MASTER_CONNECTION(backend)->sp; switch (status) { /* client exits */ case POOL_END: /* * do not cache connection if: * pool_config->connection_cahe == 0 or * database name is template0, template1, postgres or regression */ if (pool_config->connection_cache == 0 || !strcmp(sp->database, "template0") || !strcmp(sp->database, "template1") || !strcmp(sp->database, "postgres") || !strcmp(sp->database, "regression")) { reset_connection(); pool_close(frontend); pool_send_frontend_exits(backend); pool_discard_cp(sp->user, sp->database, sp->major); } else { POOL_STATUS status1; /* send reset request to backend */ status1 = pool_process_query(frontend, backend, 1); pool_close(frontend); /* if we detect errors on resetting connection, we need to discard * this connection since it might be in unknown status */ if (status1 != POOL_CONTINUE) { pool_debug("error in resetting connections. discarding connection pools..."); pool_send_frontend_exits(backend); pool_discard_cp(sp->user, sp->database, sp->major); } else pool_connection_pool_timer(backend); } break; /* error occurred. discard backend connection pool and disconnect connection to the frontend */ case POOL_ERROR: pool_log("do_child: exits with status 1 due to error"); child_exit(1); break; /* fatal error occurred. just exit myself... */ case POOL_FATAL: notice_backend_error(1); child_exit(1); break; /* not implemented yet */ case POOL_IDLE: do_accept(unix_fd, inet_fd, &timeout); pool_debug("accept while idle"); break; default: break; } if (status != POOL_CONTINUE) break; } /* Destroy session context */ pool_session_context_destroy(); /* Mark this connection pool is not connected from frontend */ pool_coninfo_unset_frontend_connected(pool_get_process_context()->proc_id, pool_pool_index()); accepted = 0; connection_count_down(); timeout.tv_sec = pool_config->child_life_time; timeout.tv_usec = 0; /* increment queries counter if necessary */ if ( pool_config->child_max_connections > 0 ) connections_count++; /* check if maximum connections count for this child reached */ if ( ( pool_config->child_max_connections > 0 ) && ( connections_count >= pool_config->child_max_connections ) ) { pool_log("child exiting, %d connections reached", pool_config->child_max_connections); send_frontend_exits(); child_exit(2); } } child_exit(0); }