/* * PerformAuthentication -- authenticate a remote client * * returns: nothing. Will not return at all if there's any failure. */ static void PerformAuthentication(Port *port) { /* This should be set already, but let's make sure */ ClientAuthInProgress = true; /* limit visibility of log messages */ /* * In EXEC_BACKEND case, we didn't inherit the contents of pg_hba.conf * etcetera from the postmaster, and have to load them ourselves. Note we * are loading them into the startup transaction's memory context, not * PostmasterContext, but that shouldn't matter. * * FIXME: [fork/exec] Ugh. Is there a way around this overhead? */ #ifdef EXEC_BACKEND if (!load_hba()) { /* * It makes no sense to continue if we fail to load the HBA file, * since there is no way to connect to the database in this case. */ ereport(FATAL, (errmsg("could not load pg_hba.conf"))); } load_ident(); #endif /* * Set up a timeout in case a buggy or malicious client fails to respond * during authentication. Since we're inside a transaction and might do * database access, we have to use the statement_timeout infrastructure. */ enable_timeout_after(STATEMENT_TIMEOUT, AuthenticationTimeout * 1000); /* * Now perform authentication exchange. */ ClientAuthentication(port); /* might not return, if failure */ /* * Done with authentication. Disable the timeout, and log if needed. */ disable_timeout(STATEMENT_TIMEOUT, false); if (Log_connections) { if (am_walsender) ereport(LOG, (errmsg("replication connection authorized: user=%s", port->user_name))); else ereport(LOG, (errmsg("connection authorized: user=%s database=%s", port->user_name, port->database_name))); } set_ps_display("startup", false); ClientAuthInProgress = false; /* client_min_messages is active now */ }
static void reload_config(void) { pool_log("reload config files."); pool_get_config(get_config_file_name(), RELOAD_CONFIG); if (pool_config->enable_pool_hba) load_hba(get_hba_file_name()); reload_config_request = 0; }
static void reload_config(void) { pool_log("reload config files."); pool_get_config(conf_file, RELOAD_CONFIG); if (pool_config->enable_pool_hba) load_hba(hba_file); if (pool_config->parallel_mode) pool_memset_system_db_info(system_db_info->info); kill_all_children(SIGHUP); }
/* * PerformAuthentication -- authenticate a remote client * * returns: nothing. Will not return at all if there's any failure. */ static void PerformAuthentication(Port *port) { /* This should be set already, but let's make sure */ ClientAuthInProgress = true; /* limit visibility of log messages */ /* * In EXEC_BACKEND case, we didn't inherit the contents of pg_hba.conf * etcetera from the postmaster, and have to load them ourselves. * * FIXME: [fork/exec] Ugh. Is there a way around this overhead? */ #ifdef EXEC_BACKEND if (!load_hba()) { /* * It makes no sense to continue if we fail to load the HBA file, * since there is no way to connect to the database in this case. */ ereport(FATAL, (errmsg("could not load pg_hba.conf"))); } if (!load_ident()) { /* * It is ok to continue if we fail to load the IDENT file, although it * means that you cannot log in using any of the authentication * methods that need a user name mapping. load_ident() already logged * the details of error to the log. */ } #endif /* * Set up a timeout in case a buggy or malicious client fails to respond * during authentication. Since we're inside a transaction and might do * database access, we have to use the statement_timeout infrastructure. */ enable_timeout_after(STATEMENT_TIMEOUT, AuthenticationTimeout * 1000); /* * Now perform authentication exchange. */ ClientAuthentication(port); /* might not return, if failure */ /* * Done with authentication. Disable the timeout, and log if needed. */ disable_timeout(STATEMENT_TIMEOUT, false); if (Log_connections) { if (am_walsender) { #ifdef USE_SSL if (port->ssl) ereport(LOG, (errmsg("replication connection authorized: user=%s SSL enabled (protocol=%s, cipher=%s, compression=%s)", port->user_name, SSL_get_version(port->ssl), SSL_get_cipher(port->ssl), SSL_get_current_compression(port->ssl) ? _("on") : _("off")))); else #endif ereport(LOG, (errmsg("replication connection authorized: user=%s", port->user_name))); } else { #ifdef USE_SSL if (port->ssl) ereport(LOG, (errmsg("connection authorized: user=%s database=%s SSL enabled (protocol=%s, cipher=%s, compression=%s)", port->user_name, port->database_name, SSL_get_version(port->ssl), SSL_get_cipher(port->ssl), SSL_get_current_compression(port->ssl) ? _("on") : _("off")))); else #endif ereport(LOG, (errmsg("connection authorized: user=%s database=%s", port->user_name, port->database_name))); } } set_ps_display("startup", false); ClientAuthInProgress = false; /* client_min_messages is active now */ }
/* * perform accept() and return new fd */ static POOL_CONNECTION *do_accept(int unix_fd, int inet_fd, struct timeval *timeout) { fd_set readmask; int fds; int save_errno; SockAddr saddr; int fd = 0; int afd; int inet = 0; POOL_CONNECTION *cp; #ifdef ACCEPT_PERFORMANCE struct timeval now1, now2; static long atime; static int cnt; #endif struct timeval *timeoutval; struct timeval tv1, tv2, tmback = {0, 0}; set_ps_display("wait for connection request", false); /* Destroy session context for just in case... */ pool_session_context_destroy(); FD_ZERO(&readmask); FD_SET(unix_fd, &readmask); if (inet_fd) FD_SET(inet_fd, &readmask); if (timeout->tv_sec == 0 && timeout->tv_usec == 0) timeoutval = NULL; else { timeoutval = timeout; tmback.tv_sec = timeout->tv_sec; tmback.tv_usec = timeout->tv_usec; gettimeofday(&tv1, NULL); #ifdef DEBUG pool_log("before select = {%d, %d}", timeoutval->tv_sec, timeoutval->tv_usec); pool_log("g:before select = {%d, %d}", tv1.tv_sec, tv1.tv_usec); #endif } fds = select(Max(unix_fd, inet_fd)+1, &readmask, NULL, NULL, timeoutval); save_errno = errno; /* check backend timer is expired */ if (backend_timer_expired) { pool_backend_timer(); backend_timer_expired = 0; } /* * following code fragment computes remaining timeout val in a * portable way. Linux does this automatically but other platforms do not. */ if (timeoutval) { gettimeofday(&tv2, NULL); tmback.tv_usec -= tv2.tv_usec - tv1.tv_usec; tmback.tv_sec -= tv2.tv_sec - tv1.tv_sec; if (tmback.tv_usec < 0) { tmback.tv_sec--; if (tmback.tv_sec < 0) { timeout->tv_sec = 0; timeout->tv_usec = 0; } else { tmback.tv_usec += 1000000; timeout->tv_sec = tmback.tv_sec; timeout->tv_usec = tmback.tv_usec; } } #ifdef DEBUG pool_log("g:after select = {%d, %d}", tv2.tv_sec, tv2.tv_usec); pool_log("after select = {%d, %d}", timeout->tv_sec, timeout->tv_usec); #endif } errno = save_errno; if (fds == -1) { if (errno == EAGAIN || errno == EINTR) return NULL; pool_error("select() failed. reason %s", strerror(errno)); return NULL; } /* timeout */ if (fds == 0) { return NULL; } if (FD_ISSET(unix_fd, &readmask)) { fd = unix_fd; } if (FD_ISSET(inet_fd, &readmask)) { fd = inet_fd; inet++; } /* * Note that some SysV systems do not work here. For those * systems, we need some locking mechanism for the fd. */ memset(&saddr, 0, sizeof(saddr)); saddr.salen = sizeof(saddr.addr); #ifdef ACCEPT_PERFORMANCE gettimeofday(&now1,0); #endif retry_accept: /* wait if recovery is started */ while (*InRecovery == 1) { pause(); } afd = accept(fd, (struct sockaddr *)&saddr.addr, &saddr.salen); save_errno = errno; /* check backend timer is expired */ if (backend_timer_expired) { pool_backend_timer(); backend_timer_expired = 0; } errno = save_errno; if (afd < 0) { if (errno == EINTR && *InRecovery) goto retry_accept; /* * "Resource temporarily unavailable" (EAGAIN or EWOULDBLOCK) * can be silently ignored. And EINTR can be ignored. */ if (errno != EAGAIN && errno != EWOULDBLOCK && errno != EINTR) pool_error("accept() failed. reason: %s", strerror(errno)); return NULL; } #ifdef ACCEPT_PERFORMANCE gettimeofday(&now2,0); atime += (now2.tv_sec - now1.tv_sec)*1000000 + (now2.tv_usec - now1.tv_usec); cnt++; if (cnt % 100 == 0) { pool_log("cnt: %d atime: %ld", cnt, atime); } #endif /* reload config file */ if (got_sighup) { pool_get_config(get_config_file_name(), RELOAD_CONFIG); if (pool_config->enable_pool_hba) { load_hba(get_hba_file_name()); if (strcmp("", pool_config->pool_passwd)) pool_reopen_passwd_file(); } if (pool_config->parallel_mode) pool_memset_system_db_info(system_db_info->info); got_sighup = 0; } connection_count_up(); accepted = 1; if (pool_config->parallel_mode) { /* * do not accept new connection if any of DB node or SystemDB is down when operating in * parallel mode */ int i; for (i=0;i<NUM_BACKENDS;i++) { if (BACKEND_INFO(i).backend_status == CON_DOWN || SYSDB_STATUS == CON_DOWN) { StartupPacket *sp; char *msg = "pgpool is not available in parallel query mode"; if (SYSDB_STATUS == CON_DOWN) pool_log("Cannot accept() new connection. SystemDB is down"); else pool_log("Cannot accept() new connection. %d th backend is down", i); if ((cp = pool_open(afd)) == NULL) { close(afd); child_exit(1); } sp = read_startup_packet(cp); if (sp == NULL) { /* failed to read the startup packet. return to the accept() loop */ pool_close(cp); child_exit(1); } pool_debug("do_accept: send error message to frontend"); if (sp->major == PROTO_MAJOR_V3) { char buf[256]; if (SYSDB_STATUS == CON_DOWN) snprintf(buf, sizeof(buf), "SystemDB is down"); else snprintf(buf, sizeof(buf), "%d th backend is down", i); pool_send_error_message(cp, sp->major, "08S01", msg, buf, ((SYSDB_STATUS == CON_DOWN) ? "repair the SystemDB and restart pgpool" : "repair the backend and restart pgpool"), __FILE__, __LINE__); } else { pool_send_error_message(cp, sp->major, 0, msg, "", "", "", 0); } pool_close(cp); child_exit(1); } } } else { /* * do not accept new connection if all DB nodes are down when operating in * non parallel mode */ int i; int found = 0; for (i=0;i<NUM_BACKENDS;i++) { if (VALID_BACKEND(i)) { found = 1; } } if (found == 0) { pool_log("Cannot accept() new connection. all backends are down"); child_exit(1); } } pool_debug("I am %d accept fd %d", getpid(), afd); pool_getnameinfo_all(&saddr, remote_host, remote_port); snprintf(remote_ps_data, sizeof(remote_ps_data), remote_port[0] == '\0' ? "%s" : "%s(%s)", remote_host, remote_port); set_ps_display("accept connection", false); /* log who is connecting */ if (pool_config->log_connections) { pool_log("connection received: host=%s%s%s", remote_host, remote_port[0] ? " port=" : "", remote_port); } /* set NODELAY and KEEPALIVE options if INET connection */ if (inet) { int on = 1; if (setsockopt(afd, IPPROTO_TCP, TCP_NODELAY, (char *) &on, sizeof(on)) < 0) { pool_error("do_accept: setsockopt() failed: %s", strerror(errno)); close(afd); return NULL; } if (setsockopt(afd, SOL_SOCKET, SO_KEEPALIVE, (char *) &on, sizeof(on)) < 0) { pool_error("do_accept: setsockopt() failed: %s", strerror(errno)); close(afd); return NULL; } } if ((cp = pool_open(afd)) == NULL) { close(afd); return NULL; } /* save ip address for hba */ memcpy(&cp->raddr, &saddr, sizeof(SockAddr)); if (cp->raddr.addr.ss_family == 0) cp->raddr.addr.ss_family = AF_UNIX; return cp; }
/* * pgpool main program */ int main(int argc, char **argv) { int opt; int i; int pid; int size; int retrycnt; int sys_retrycnt; myargc = argc; myargv = argv; snprintf(conf_file, sizeof(conf_file), "%s/%s", DEFAULT_CONFIGDIR, POOL_CONF_FILE_NAME); snprintf(pcp_conf_file, sizeof(pcp_conf_file), "%s/%s", DEFAULT_CONFIGDIR, PCP_PASSWD_FILE_NAME); snprintf(hba_file, sizeof(hba_file), "%s/%s", DEFAULT_CONFIGDIR, HBA_CONF_FILE_NAME); while ((opt = getopt(argc, argv, "a:cdf:F:hm:nv")) != -1) { switch (opt) { case 'a': /* specify hba configuration file */ if (!optarg) { usage(); exit(1); } strncpy(hba_file, optarg, sizeof(hba_file)); break; case 'c': /* clear cache option */ clear_cache = 1; break; case 'd': /* debug option */ debug = 1; break; case 'f': /* specify configuration file */ if (!optarg) { usage(); exit(1); } strncpy(conf_file, optarg, sizeof(conf_file)); break; case 'F': /* specify PCP password file */ if (!optarg) { usage(); exit(1); } strncpy(pcp_conf_file, optarg, sizeof(pcp_conf_file)); break; case 'h': usage(); exit(0); break; case 'm': /* stop mode */ if (!optarg) { usage(); exit(1); } if (*optarg == 's' || !strcmp("smart", optarg)) stop_sig = SIGTERM; /* smart shutdown */ else if (*optarg == 'f' || !strcmp("fast", optarg)) stop_sig = SIGINT; /* fast shutdown */ else if (*optarg == 'i' || !strcmp("immediate", optarg)) stop_sig = SIGQUIT; /* immediate shutdown */ else { usage(); exit(1); } break; case 'n': /* no detaching control ttys */ not_detach = 1; break; case 'v': show_version(); exit(0); default: usage(); exit(1); } } mypid = getpid(); if (pool_init_config()) exit(1); if (pool_get_config(conf_file, INIT_CONFIG)) { pool_error("Unable to get configuration. Exiting..."); exit(1); } if (pool_config->enable_pool_hba) load_hba(hba_file); /* * If a non-switch argument remains, then it should be either "reload" or "stop". */ if (optind == (argc - 1)) { if (!strcmp(argv[optind], "reload")) { pid_t pid; pid = read_pid_file(); if (pid < 0) { pool_error("could not read pid file"); pool_shmem_exit(1); exit(1); } if (kill(pid, SIGHUP) == -1) { pool_error("could not reload configuration file pid: %d. reason: %s", pid, strerror(errno)); pool_shmem_exit(1); exit(1); } pool_shmem_exit(0); exit(0); } if (!strcmp(argv[optind], "stop")) { stop_me(); pool_shmem_exit(0); exit(0); } else { usage(); pool_shmem_exit(1); exit(1); } } /* * else if no non-switch argument remains, then it should be a start request */ else if (optind == argc) { pid = read_pid_file(); if (pid > 0) { if (kill(pid, 0) == 0) { fprintf(stderr, "pid file found. is another pgpool(%d) is running?\n", pid); exit(1); } else fprintf(stderr, "pid file found but it seems bogus. Trying to start pgpool anyway...\n"); } } /* * otherwise an error... */ else { usage(); exit(1); } /* set signal masks */ poolinitmask(); if (not_detach) write_pid_file(); else daemonize(); if (pool_semaphore_create(MAX_NUM_SEMAPHORES)) { pool_error("Unable to create semaphores. Exiting..."); pool_shmem_exit(1); exit(1); } /* * Restore previous backend status if possible */ read_status_file(); /* clear cache */ if (clear_cache && pool_config->enable_query_cache && SYSDB_STATUS == CON_UP) { Interval interval[1]; interval[0].quantity = 0; interval[0].unit = second; pool_clear_cache_by_time(interval, 1); } /* set unix domain socket path */ snprintf(un_addr.sun_path, sizeof(un_addr.sun_path), "%s/.s.PGSQL.%d", pool_config->socket_dir, pool_config->port); /* set up signal handlers */ pool_signal(SIGPIPE, SIG_IGN); /* create unix domain socket */ unix_fd = create_unix_domain_socket(un_addr); /* create inet domain socket if any */ if (pool_config->listen_addresses[0]) { inet_fd = create_inet_domain_socket(pool_config->listen_addresses, pool_config->port); } size = pool_config->num_init_children * pool_config->max_pool * sizeof(ConnectionInfo); con_info = pool_shared_memory_create(size); if (con_info == NULL) { pool_error("failed to allocate connection informations"); myexit(1); } memset(con_info, 0, size); size = pool_config->num_init_children * (sizeof(ProcessInfo)); pids = pool_shared_memory_create(size); if (pids == NULL) { pool_error("failed to allocate pids"); myexit(1); } memset(pids, 0, size); for (i = 0; i < pool_config->num_init_children; i++) { pids[i].connection_info = &con_info[i * pool_config->max_pool]; } /* create fail over/switch over event area */ Req_info = pool_shared_memory_create(sizeof(POOL_REQUEST_INFO)); if (Req_info == NULL) { pool_error("failed to allocate Req_info"); myexit(1); } /* initialize Req_info */ Req_info->kind = NODE_UP_REQUEST; memset(Req_info->node_id, -1, sizeof(int) * MAX_NUM_BACKENDS); Req_info->master_node_id = get_next_master_node(); Req_info->conn_counter = 0; InRecovery = pool_shared_memory_create(sizeof(int)); if (InRecovery == NULL) { pool_error("failed to allocate InRecovery"); myexit(1); } *InRecovery = 0; /* * We need to block signal here. Otherwise child might send some * signals, for example SIGUSR1(fail over). Children will inherit * signal blocking but they do unblock signals at the very beginning * of process. So this is harmless. */ POOL_SETMASK(&BlockSig); /* fork the children */ for (i=0;i<pool_config->num_init_children;i++) { pids[i].pid = fork_a_child(unix_fd, inet_fd, i); pids[i].start_time = time(NULL); } /* set up signal handlers */ pool_signal(SIGTERM, exit_handler); pool_signal(SIGINT, exit_handler); pool_signal(SIGQUIT, exit_handler); pool_signal(SIGCHLD, reap_handler); pool_signal(SIGUSR1, failover_handler); pool_signal(SIGUSR2, wakeup_handler); pool_signal(SIGHUP, reload_config_handler); /* create pipe for delivering event */ if (pipe(pipe_fds) < 0) { pool_error("failed to create pipe"); myexit(1); } pool_log("pgpool successfully started"); /* fork a child for PCP handling */ snprintf(pcp_un_addr.sun_path, sizeof(pcp_un_addr.sun_path), "%s/.s.PGSQL.%d", pool_config->pcp_socket_dir, pool_config->pcp_port); pcp_unix_fd = create_unix_domain_socket(pcp_un_addr); /* maybe change "*" to pool_config->pcp_listen_addresses */ pcp_inet_fd = create_inet_domain_socket("*", pool_config->pcp_port); pcp_pid = pcp_fork_a_child(pcp_unix_fd, pcp_inet_fd, pcp_conf_file); retrycnt = 0; /* reset health check retry counter */ sys_retrycnt = 0; /* reset SystemDB health check retry counter */ /* * This is the main loop */ for (;;) { CHECK_REQUEST; /* do we need health checking for PostgreSQL? */ if (pool_config->health_check_period > 0) { int sts; int sys_sts = 0; unsigned int sleep_time; if (retrycnt == 0) { pool_debug("starting health checking"); } else { pool_debug("retrying %d th health checking", retrycnt); } if (pool_config->health_check_timeout > 0) { /* * set health checker timeout. we want to detect * communication path failure much earlier before * TCP/IP stack detects it. */ pool_signal(SIGALRM, health_check_timer_handler); alarm(pool_config->health_check_timeout); } /* * do actual health check. trying to connect to the backend */ errno = 0; health_check_timer_expired = 0; POOL_SETMASK(&UnBlockSig); sts = health_check(); POOL_SETMASK(&BlockSig); if (pool_config->parallel_mode || pool_config->enable_query_cache) sys_sts = system_db_health_check(); if ((sts > 0 || sys_sts < 0) && (errno != EINTR || (errno == EINTR && health_check_timer_expired))) { if (sts > 0) { sts--; if (!pool_config->parallel_mode) { pool_log("set %d th backend down status", sts); Req_info->kind = NODE_DOWN_REQUEST; Req_info->node_id[0] = sts; failover(); /* need to distribute this info to children */ } else { retrycnt++; pool_signal(SIGALRM, SIG_IGN); /* Cancel timer */ if (retrycnt > NUM_BACKENDS) { /* retry count over */ pool_log("set %d th backend down status", sts); Req_info->kind = NODE_DOWN_REQUEST; Req_info->node_id[0] = sts; failover(); retrycnt = 0; } else { /* continue to retry */ sleep_time = pool_config->health_check_period/NUM_BACKENDS; pool_debug("retry sleep time: %d seconds", sleep_time); pool_sleep(sleep_time); continue; } } } if (sys_sts < 0) { sys_retrycnt++; pool_signal(SIGALRM, SIG_IGN); if (sys_retrycnt > NUM_BACKENDS) { pool_log("set SystemDB down status"); SYSDB_STATUS = CON_DOWN; sys_retrycnt = 0; } else if (sts == 0) /* goes to sleep only when SystemDB alone was down */ { sleep_time = pool_config->health_check_period/NUM_BACKENDS; pool_debug("retry sleep time: %d seconds", sleep_time); pool_sleep(sleep_time); continue; } } } if (pool_config->health_check_timeout > 0) { /* seems ok. cancel health check timer */ pool_signal(SIGALRM, SIG_IGN); } sleep_time = pool_config->health_check_period; pool_sleep(sleep_time); } else { for (;;) { int r; struct timeval t = {3, 0}; POOL_SETMASK(&UnBlockSig); r = pool_pause(&t); POOL_SETMASK(&BlockSig); if (r > 0) break; } } } pool_shmem_exit(0); }