int conn_flush(Connection *conn) { int ret; int revents; int fd; lock_out(conn); ret = unlocked_write(conn); if (ret < 0) { unlock_out(conn); return -1; } while (unlocked_outbuf_len(conn) != 0) { fd = conn->fd; unlock_out(conn); revents = gwthread_pollfd(fd, POLLOUT, -1.0); /* Note: Make sure we have the "out" lock when * going through the loop again, because the * loop condition needs it. */ if (revents < 0) { if (errno == EINTR) return 1; error(0, "conn_flush: poll failed on fd %d:", fd); return -1; } if (revents == 0) { /* We were woken up */ return 1; } if (revents & POLLNVAL) { error(0, "conn_flush: fd %d not open.", fd); return -1; } lock_out(conn); if (revents & (POLLOUT | POLLERR | POLLHUP)) { ret = unlocked_write(conn); if (ret < 0) { unlock_out(conn); return -1; } } } unlock_out(conn); return 0; }
/** * @brief * This is the Windows couterpart of acquire_lock * @par * This function creates/opens the lock file, and locks the file. * In case of a failover environment, the whole operation is retried * several times in a loop. * * @param[in] lockfile - Path of db_lock file. * @param[out] reason - Reason for failure, if not able to accquire lock * @param[in] reasonlen - reason buffer legnth. * @param[out] is_lock_hld_by_thishost - This flag is set if the lock is held by the host * requesting accquire_lock in check_mode. * * @return File handle of the open and locked file * @retval INVALID_HANDLE_VALUE : Function failed to acquire lock * @retval INVALID_HANDLE_VALUE : Function succeeded (file handle returned) * * @par MT-safe: Yes */ HANDLE acquire_lock(char *lockfile, char *reason, int reasonlen, int *is_lock_hld_by_thishost) { HANDLE hFile = INVALID_HANDLE_VALUE; int i, j; char who[PBS_MAXHOSTNAME + 10]; DWORD dwNumBytesRead; BOOL fSuccess; char *p; if (reasonlen > 0) reason[0] = '\0'; if (pbs_conf.pbs_secondary == NULL) j = 1; /* not fail over, try lock one time */ else j = MAX_LOCK_ATTEMPTS; /* fail over, try X times */ hFile = CreateFile(lockfile, GENERIC_READ | GENERIC_WRITE, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, 0, NULL); if (hFile == INVALID_HANDLE_VALUE) { snprintf(reason, reasonlen, "Could not access lockfile, errno=%d", GetLastError()); return hFile; } for (i = 0; i < j; i++) { if (i > 0) sleep(1); if (lock_out(hFile, F_WRLCK) == 0) return hFile; /* success */ } /* all attempts to lock failed, try to see who has it locked */ fSuccess = ReadFile(hFile, who, sizeof(who) - 1, &dwNumBytesRead, NULL); CloseHandle(hFile); hFile = INVALID_HANDLE_VALUE; if (fSuccess) { if (dwNumBytesRead > 0) { who[dwNumBytesRead - 1] = '\0'; p = strchr(who, ':'); if (p) { *p = '\0'; snprintf(reason, reasonlen, "Lock seems to be held by pid: %s running on host: %s", (p + 1), who); } else { snprintf(reason, reasonlen, "Lock seems to be held by %s", who); } if (is_lock_hld_by_thishost != NULL) { if (strcmp(thishost, who) == 0) *is_lock_hld_by_thishost = 1; else *is_lock_hld_by_thishost = 0; } } } else snprintf(reason, reasonlen, "Could not access lockfile, errno=%d", GetLastError()); return hFile; }
void conn_set_output_buffering(Connection *conn, unsigned int size) { lock_out(conn); conn->output_buffering = size; /* If the buffer size is smaller, we may have to write immediately. */ unlocked_try_write(conn); unlock_out(conn); }
long conn_outbuf_len(Connection *conn) { long len; lock_out(conn); len = unlocked_outbuf_len(conn); unlock_out(conn); return len; }
int conn_register_real(Connection *conn, FDSet *fdset, conn_callback_t callback, void *data, conn_callback_data_destroyer_t *data_destroyer) { int events; int result = 0; gw_assert(conn != NULL); if (conn->fd < 0) return -1; /* We need both locks if we want to update the registration * information. */ lock_out(conn); lock_in(conn); if (conn->registered == fdset) { /* Re-registering. Change only the callback info. */ conn->callback = callback; /* call data destroyer if new data supplied */ if (conn->callback_data != NULL && conn->callback_data != data && conn->callback_data_destroyer != NULL) conn->callback_data_destroyer(conn->callback_data); conn->callback_data = data; conn->callback_data_destroyer = data_destroyer; result = 0; } else if (conn->registered) { /* Already registered to a different fdset. */ result = -1; } else { events = 0; /* For nonconnected socket we must lesten both directions */ if (conn->connected == yes) { if (conn->read_eof == 0 && conn->io_error == 0) events |= POLLIN; if (unlocked_outbuf_len(conn) > 0) events |= POLLOUT; } else { events |= POLLIN | POLLOUT; } conn->registered = fdset; conn->callback = callback; conn->callback_data = data; conn->callback_data_destroyer = data_destroyer; conn->listening_pollin = (events & POLLIN) != 0; conn->listening_pollout = (events & POLLOUT) != 0; fdset_register(fdset, conn->fd, events, poll_callback, conn); result = 0; } unlock_in(conn); unlock_out(conn); return result; }
int conn_write_data(Connection *conn, unsigned char *data, long length) { int ret; lock_out(conn); octstr_append_data(conn->outbuf, data, length); ret = unlocked_try_write(conn); unlock_out(conn); return ret; }
int conn_write(Connection *conn, Octstr *data) { int ret; lock_out(conn); octstr_append(conn->outbuf, data); ret = unlocked_try_write(conn); unlock_out(conn); return ret; }
int conn_error(Connection *conn) { int err; lock_out(conn); lock_in(conn); err = conn->io_error; unlock_in(conn); unlock_out(conn); return err; }
/** * @brief * Forks a background process and continues on that, while * exiting the foreground process. It also sets the child process to * become the session leader. This function is avaible only on Non-Windows * platforms and in non-debug mode. * * @return - pid_t - sid of the child process (result of setsid) * @retval >0 - sid of the child process. * @retval -1 - Fork or setsid failed. */ static pid_t go_to_background() { pid_t sid = -1; int rc; lock_out(lockfds, F_UNLCK); rc = fork(); if (rc == -1) /* fork failed */ return ((pid_t) -1); if (rc > 0) exit(0); /* parent goes away, allowing booting to continue */ lock_out(lockfds, F_WRLCK); if ((sid = setsid()) == -1) { fprintf(stderr, "pbs_comm: setsid failed"); return ((pid_t) -1); } already_forked = 1; return sid; }
int conn_write_withlen(Connection *conn, Octstr *data) { int ret; unsigned char lengthbuf[4]; encode_network_long(lengthbuf, octstr_len(data)); lock_out(conn); octstr_append_data(conn->outbuf, lengthbuf, 4); octstr_append(conn->outbuf, data); ret = unlocked_try_write(conn); unlock_out(conn); return ret; }
void conn_unregister(Connection *conn) { FDSet *set = NULL; int fd = -1; void *data = NULL; conn_callback_data_destroyer_t *destroyer = NULL; gw_assert(conn != NULL); if (conn == NULL || conn->fd < 0) return; /* We need both locks to update the registration information */ lock_out(conn); lock_in(conn); if (conn->registered) { set = conn->registered; fd = conn->fd; conn->registered = NULL; conn->callback = NULL; /* * remember and don't destroy data and data_destroyer because we * may be in callback right now. So destroy only after fdset_unregister * call which guarantee us we are not in callback anymore. */ data = conn->callback_data; conn->callback_data = NULL; destroyer = conn->callback_data_destroyer; conn->callback_data_destroyer = NULL; conn->listening_pollin = 0; conn->listening_pollout = 0; } unlock_in(conn); unlock_out(conn); /* now unregister from FDSet */ if (set != NULL) fdset_unregister(set, fd); /* ok we are not in callback anymore, destroy data if any */ if (data != NULL && destroyer != NULL) destroyer(data); }
/** * @brief * The child part of the monitor functionality on Windows. * * This function is called as a separate executable (child) process * from the parent monitor, and is passed the location of the * data directory. The parent launches this function as a process and waits * to read the stdout of the child process. * @par * This function attempts to lock the lockfile (inside dbstore) and if it * fails, it prints the reason of failure to lock, to its stdout; the child * process also exits in this case. * @par * If the function succeeds in locking the file, it prints "0" to its stdout * resulting in the parent to exit with success to its caller. In that case * this process (child) continues to run in the background, as long as the * monitored database process is still up, holding onto the lock, so that no * other process can lock this file (and thus not be able to start the database). * @par * If and when eventually the database goes down, this function unlocks the file * and quits (allowing others to lock the file and start the database). * * @retval 0 : Function succeeded for the given operation * @retval 1 : Failed (eg to lock the file). * * @par MT-safe: Yes */ int win_db_monitor_child() { HANDLE hFile; BOOL fSuccess = FALSE; pid_t dbpid; int i; char lockfile[MAXPATHLEN + 1]; char reason[RES_BUF_SIZE]; reason[0] = '\0'; /* clear any residual stop db file before starting monitoring */ clear_stop_db_file(); snprintf(lockfile, MAXPATHLEN, "%s\\datastore\\pbs_dblock", pbs_conf.pbs_home_path); hFile = acquire_lock(lockfile, reason, sizeof(reason), NULL); if (hFile == INVALID_HANDLE_VALUE) { printf("%s", reason); fflush(stdout); return 1; } /* set success event */ printf("0"); fflush(stdout); fclose(stdin); fclose(stderr); fclose(stdout); /* dont need stdout after this */ /* * okay, so we locked the file. Now find postgres pid * then loop forever as long as pid is up */ dbpid = 0; for (i = 0; i < MAX_DBPID_ATTEMPTS; i++) { if ((dbpid = get_pid()) > 0) break; sleep(1); } if (dbpid == 0) { lock_out(hFile, F_UNLCK); CloseHandle(hFile); unlink(lockfile); return 0; /* this will unlock the lock in the datastore */ } while (1) { if (!checkpid(dbpid)) break; if (!((dbpid = get_pid()) > 0)) break; /* check if stop db file exists */ check_and_stop_db(dbpid); sleep(1); } /* unlock and return */ lock_out(hFile, F_UNLCK); CloseHandle(hFile); unlink(lockfile); /* clear temporary err files created at startup; windows only case */ clear_tmp_files(); return 0; }
/** * @brief * This is the Windows counterpart of the monitoring * code. * @par * This function does the following: * a) Creates/opens a file $PBS_HOME/datastore/pbs_dblock. * b) If mode is "check", attempts to lock the file. If locking * succeeds, unlocks the file and returns success. * c) If mode is "monitor", launches itself with a "monitorchild" * parameter, which calls function "win_db_monitor_child". * d) It launches a child process using win_popen() and reads its stdout. * e) If the child was able to successfully lock the file, it prints "0" * to its stdout. Otherwise it prints the reason for why it could * not acquire the lockfile. * * @param[in] mode - "check" : to just check if lockfile can be locked * "monitor" : to launch a monitoring process that holds * onto the file lock * * @retval 1 : Function failed to acquire lock * @retval 0 : Function succeded in the requested operation * * @par MT-safe: Yes */ int win_db_monitor(char *mode) { int rc; BOOL fSuccess = FALSE; HANDLE hFile; char lockfile[MAXPATHLEN + 1]; char cmd_line[2*MAXPATHLEN + 1]; char result[RES_BUF_SIZE]; int is_lock_local = 0; pio_handles pio; proc_ctrl proc_info; HANDLE hOut, hErr; result[0] = '\0'; snprintf(lockfile, MAXPATHLEN, "%s\\datastore\\pbs_dblock", pbs_conf.pbs_home_path); /* * If mode is check, just attempt to lock the file. * Return success if able to lock, else return failure. */ if (strcmp(mode, "check") == 0) { hFile = acquire_lock(lockfile, result, sizeof(result), &is_lock_local); if (hFile == INVALID_HANDLE_VALUE) { if (is_lock_local) return 0; /* Since lock is already held by this host, return success */ fprintf(stderr, "Failed to acquire lock on %s. %s\n", lockfile, result); return 1; } lock_out(hFile, F_UNLCK); CloseHandle(hFile); unlink(lockfile); return 0; } /* monitor part */ proc_info.flags = CREATE_DEFAULT_ERROR_MODE | CREATE_NO_WINDOW; proc_info.bInheritHandle = TRUE; proc_info.bnowait = TRUE; proc_info.need_ptree_termination = FALSE; proc_info.buse_cmd = FALSE; sprintf(cmd_line, "\"%s\" monitorchild", pbs_ds_monitor_exe); /* set the current processes stdout/stderr not be inherited */ hOut = GetStdHandle(STD_OUTPUT_HANDLE); SetHandleInformation(hOut, HANDLE_FLAG_INHERIT, 0); hErr = GetStdHandle(STD_ERROR_HANDLE); SetHandleInformation(hErr, HANDLE_FLAG_INHERIT, 0); /* start child process to lock db lockfile and monitor db process */ if (win_popen(cmd_line, "r", &pio, &proc_info) == 0) { win_pclose(&pio); fprintf(stderr, "Unable to create process, errno = %d\n", errno); return 1; } /* wait and read the info from child whether it was able to acquire lock */ rc = win_pread(&pio, result, sizeof(result) - 1); win_pclose2(&pio); /* close handles but keep process running */ if (rc > 0) { if (result[0] == '0') { /* indicates success */ return 0; } result[rc - 1] = '\0'; } /* failure */ fprintf(stderr, "Failed to acquire lock on %s. %s\n", lockfile, result); return 1; }
int main(int argc, char **argv) #endif /* WIN32 */ { #ifdef WIN32 struct arg_param *p = (struct arg_param *)pv; int argc; char **argv; SERVICE_STATUS ss; #endif /* WIN32 */ char *name = NULL; struct tpp_config conf; int rpp_fd; char *pc; int numthreads; char lockfile[MAXPATHLEN + 1]; char path_log[MAXPATHLEN + 1]; char svr_home[MAXPATHLEN + 1]; char *log_file = 0; char *host; int port; char *routers = NULL; int c, i, rc; extern char *optarg; int are_primary; int num_var_env; #ifndef WIN32 struct sigaction act; struct sigaction oact; #endif #ifndef WIN32 /*the real deal or just pbs_version and exit*/ execution_mode(argc, argv); #endif /* As a security measure and to make sure all file descriptors */ /* are available to us, close all above stderr */ #ifdef WIN32 _fcloseall(); #else i = sysconf(_SC_OPEN_MAX); while (--i > 2) (void)close(i); /* close any file desc left open by parent */ #endif /* If we are not run with real and effective uid of 0, forget it */ #ifdef WIN32 argc = p->argc; argv = p->argv; ZeroMemory(&ss, sizeof(ss)); ss.dwCheckPoint = 0; ss.dwServiceType = SERVICE_WIN32_OWN_PROCESS; ss.dwCurrentState = g_dwCurrentState; ss.dwControlsAccepted = SERVICE_ACCEPT_STOP | SERVICE_ACCEPT_SHUTDOWN; ss.dwWaitHint = 6000; if (g_ssHandle != 0) SetServiceStatus(g_ssHandle, &ss); if (!isAdminPrivilege(getlogin())) { fprintf(stderr, "%s: Must be run by root\n", argv[0]); return (2); } #else if ((getuid() != 0) || (geteuid() != 0)) { fprintf(stderr, "%s: Must be run by root\n", argv[0]); return (2); } #endif /* WIN32 */ /* set standard umask */ #ifndef WIN32 umask(022); #endif /* load the pbs conf file */ if (pbs_loadconf(0) == 0) { fprintf(stderr, "%s: Configuration error\n", argv[0]); return (1); } umask(022); #ifdef WIN32 save_env(); #endif /* The following is code to reduce security risks */ /* start out with standard umask, system resource limit infinite */ if ((num_var_env = setup_env(pbs_conf.pbs_environment)) == -1) { #ifdef WIN32 g_dwCurrentState = SERVICE_STOPPED; ss.dwCurrentState = g_dwCurrentState; ss.dwWin32ExitCode = ERROR_INVALID_ENVIRONMENT; if (g_ssHandle != 0) SetServiceStatus(g_ssHandle, &ss); return (1); #else exit(1); #endif /* WIN32 */ } #ifndef WIN32 i = getgid(); (void)setgroups(1, (gid_t *)&i); /* secure suppl. groups */ #endif log_event_mask = &pbs_conf.pbs_comm_log_events; tpp_set_logmask(*log_event_mask); #ifdef WIN32 winsock_init(); #endif routers = pbs_conf.pbs_comm_routers; numthreads = pbs_conf.pbs_comm_threads; server_host[0] = '\0'; if (pbs_conf.pbs_comm_name) { name = pbs_conf.pbs_comm_name; host = tpp_parse_hostname(name, &port); if (host) snprintf(server_host, sizeof(server_host), "%s", host); free(host); host = NULL; } else if (pbs_conf.pbs_leaf_name) { char *endp; snprintf(server_host, sizeof(server_host), "%s", pbs_conf.pbs_leaf_name); endp = strchr(server_host, ','); /* find the first name */ if (endp) *endp = '\0'; endp = strchr(server_host, ':'); /* cut out the port */ if (endp) *endp = '\0'; name = server_host; } else { if (gethostname(server_host, (sizeof(server_host) - 1)) == -1) { #ifndef WIN32 sprintf(log_buffer, "Could not determine my hostname, errno=%d", errno); #else sprintf(log_buffer, "Could not determine my hostname, errno=%d", WSAGetLastError()); #endif fprintf(stderr, "%s\n", log_buffer); return (1); } if ((get_fullhostname(server_host, server_host, (sizeof(server_host) - 1)) == -1)) { sprintf(log_buffer, "Could not determine my hostname"); fprintf(stderr, "%s\n", log_buffer); return (1); } name = server_host; } if (server_host[0] == '\0') { sprintf(log_buffer, "Could not determine server host"); fprintf(stderr, "%s\n", log_buffer); return (1); } while ((c = getopt(argc, argv, "r:t:e:N")) != -1) { switch (c) { case 'e': *log_event_mask = strtol(optarg, NULL, 0); break; case 'r': routers = optarg; break; case 't': numthreads = atol(optarg); if (numthreads == -1) { usage(argv[0]); return (1); } break; case 'N': stalone = 1; break; default: usage(argv[0]); return (1); } } (void)strcpy(daemonname, "Comm@"); (void)strcat(daemonname, name); if ((pc = strchr(daemonname, (int)'.')) != NULL) *pc = '\0'; if(set_msgdaemonname(daemonname)) { fprintf(stderr, "Out of memory\n"); return 1; } (void) snprintf(path_log, sizeof(path_log), "%s/%s", pbs_conf.pbs_home_path, PBS_COMM_LOGDIR); #ifdef WIN32 /* * let SCM wait 10 seconds for log_open() to complete * as it does network interface query which can take time */ ss.dwCheckPoint++; ss.dwWaitHint = 60000; if (g_ssHandle != 0) SetServiceStatus(g_ssHandle, &ss); #endif (void) log_open(log_file, path_log); /* set pbs_comm's process limits */ set_limits(); /* set_limits can call log_record, so call only after opening log file */ /* set tcp function pointers */ set_tpp_funcs(log_tppmsg); (void) snprintf(svr_home, sizeof(svr_home), "%s/%s", pbs_conf.pbs_home_path, PBS_SVR_PRIVATE); if (chdir(svr_home) != 0) { (void) sprintf(log_buffer, msg_init_chdir, svr_home); log_err(-1, __func__, log_buffer); return (1); } (void) sprintf(lockfile, "%s/%s/comm.lock", pbs_conf.pbs_home_path, PBS_SVR_PRIVATE); if ((are_primary = are_we_primary()) == FAILOVER_SECONDARY) { strcat(lockfile, ".secondary"); } else if (are_primary == FAILOVER_CONFIG_ERROR) { sprintf(log_buffer, "Failover configuration error"); log_err(-1, __func__, log_buffer); #ifdef WIN32 g_dwCurrentState = SERVICE_STOPPED; ss.dwCurrentState = g_dwCurrentState; ss.dwWin32ExitCode = ERROR_SERVICE_NOT_ACTIVE; if (g_ssHandle != 0) SetServiceStatus(g_ssHandle, &ss); #endif return (3); } if ((lockfds = open(lockfile, O_CREAT | O_WRONLY, 0600)) < 0) { (void) sprintf(log_buffer, "pbs_comm: unable to open lock file"); log_err(errno, __func__, log_buffer); return (1); } if ((host = tpp_parse_hostname(name, &port)) == NULL) { sprintf(log_buffer, "Out of memory parsing leaf name"); log_err(errno, __func__, log_buffer); return (1); } rc = 0; if (pbs_conf.auth_method == AUTH_RESV_PORT) { rc = set_tpp_config(&pbs_conf, &conf, host, port, routers, pbs_conf.pbs_use_compression, TPP_AUTH_RESV_PORT, NULL, NULL); } else { /* for all non-resv-port based authentication use a callback from TPP */ rc = set_tpp_config(&pbs_conf, &conf, host, port, routers, pbs_conf.pbs_use_compression, TPP_AUTH_EXTERNAL, get_ext_auth_data, validate_ext_auth_data); } if (rc == -1) { (void) sprintf(log_buffer, "Error setting TPP config"); log_err(-1, __func__, log_buffer); return (1); } free(host); i = 0; if (conf.routers) { while (conf.routers[i]) { sprintf(log_buffer, "Router[%d]:%s", i, conf.routers[i]); fprintf(stdout, "%s\n", log_buffer); log_event(PBSEVENT_SYSTEM | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, LOG_INFO, msg_daemonname, log_buffer); i++; } } #ifndef DEBUG #ifndef WIN32 if (stalone != 1) go_to_background(); #endif #endif #ifdef WIN32 ss.dwCheckPoint = 0; g_dwCurrentState = SERVICE_RUNNING; ss.dwCurrentState = g_dwCurrentState; if (g_ssHandle != 0) SetServiceStatus(g_ssHandle, &ss); #endif if (already_forked == 0) lock_out(lockfds, F_WRLCK); /* go_to_backgroud call creates a forked process, * thus print/log pid only after go_to_background() * has been called */ sprintf(log_buffer, "%s ready (pid=%d), Proxy Name:%s, Threads:%d", argv[0], getpid(), conf.node_name, numthreads); fprintf(stdout, "%s\n", log_buffer); log_event(PBSEVENT_SYSTEM | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, LOG_INFO, msg_daemonname, log_buffer); #ifndef DEBUG pbs_close_stdfiles(); #endif #ifdef WIN32 signal(SIGINT, stop_me); signal(SIGTERM, stop_me); #else sigemptyset(&act.sa_mask); act.sa_flags = 0; act.sa_handler = hup_me; if (sigaction(SIGHUP, &act, &oact) != 0) { log_err(errno, __func__, "sigaction for HUP"); return (2); } act.sa_handler = stop_me; if (sigaction(SIGINT, &act, &oact) != 0) { log_err(errno, __func__, "sigaction for INT"); return (2); } if (sigaction(SIGTERM, &act, &oact) != 0) { log_err(errno, __func__, "sigactin for TERM"); return (2); } if (sigaction(SIGQUIT, &act, &oact) != 0) { log_err(errno, __func__, "sigactin for QUIT"); return (2); } #ifdef SIGSHUTDN if (sigaction(SIGSHUTDN, &act, &oact) != 0) { log_err(errno, __func__, "sigactin for SHUTDN"); return (2); } #endif /* SIGSHUTDN */ act.sa_handler = SIG_IGN; if (sigaction(SIGPIPE, &act, &oact) != 0) { log_err(errno, __func__, "sigaction for PIPE"); return (2); } if (sigaction(SIGUSR1, &act, &oact) != 0) { log_err(errno, __func__, "sigaction for USR1"); return (2); } if (sigaction(SIGUSR2, &act, &oact) != 0) { log_err(errno, __func__, "sigaction for USR2"); return (2); } #endif /* WIN32 */ conf.node_type = TPP_ROUTER_NODE; conf.numthreads = numthreads; if ((rpp_fd = tpp_init_router(&conf)) == -1) { log_err(-1, __func__, "tpp init failed\n"); return 1; } /* Protect from being killed by kernel */ daemon_protect(0, PBS_DAEMON_PROTECT_ON); /* go in a while loop */ while (get_out == 0) { if (hupped == 1) { struct pbs_config pbs_conf_bak; int new_logevent; hupped = 0; /* reset back */ memcpy(&pbs_conf_bak, &pbs_conf, sizeof(struct pbs_config)); if (pbs_loadconf(1) == 0) { log_tppmsg(LOG_CRIT, NULL, "Configuration error, ignoring"); memcpy(&pbs_conf, &pbs_conf_bak, sizeof(struct pbs_config)); } else { /* restore old pbs.conf */ new_logevent = pbs_conf.pbs_comm_log_events; memcpy(&pbs_conf, &pbs_conf_bak, sizeof(struct pbs_config)); pbs_conf.pbs_comm_log_events = new_logevent; log_tppmsg(LOG_INFO, NULL, "Processed SIGHUP"); log_event_mask = &pbs_conf.pbs_comm_log_events; tpp_set_logmask(*log_event_mask); } } sleep(3); } tpp_router_shutdown(); log_event(PBSEVENT_SYSTEM | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, LOG_NOTICE, msg_daemonname, "Exiting"); log_close(1); lock_out(lockfds, F_UNLCK); /* unlock */ (void)close(lockfds); (void)unlink(lockfile); return 0; }
void start_tcl(void) { char *id = "start_tcl"; char buf[BUFSIZ]; int fd; int tot, len; interp = Tcl_CreateInterp(); if (Tcl_Init(interp) == TCL_ERROR) { sprintf(log_buffer, "Tcl_Init error: %s", Tcl_GetStringResult(interp)); log_err(-1, id, log_buffer); die(0); } #if TCLX #if TCL_MINOR_VERSION < 5 && TCL_MAJOR_VERSION < 8 if (TclX_Init(interp) == TCL_ERROR) { #else if (Tclx_Init(interp) == TCL_ERROR) { #endif sprintf(log_buffer, "Tclx_Init error: %s", Tcl_GetStringResult(interp)); log_err(-1, id, log_buffer); die(0); } #endif add_cmds(interp); if (initfil) { int code; code = Tcl_EvalFile(interp, initfil); if (code != TCL_OK) { char *trace; trace = (char *)Tcl_GetVar(interp, "errorInfo", 0); if (trace == NULL) trace = (char *)Tcl_GetStringResult(interp); fprintf(stderr, "%s: TCL error @ line %d: %s\n", initfil, interp->errorLine, trace); sprintf(log_buffer, "%s: TCL error @ line %d: %s", initfil, interp->errorLine, Tcl_GetStringResult(interp)); log_err(-1, id, log_buffer); die(0); } sprintf(log_buffer, "init file %s", initfil); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } if ((fd = open(bodyfil, O_RDONLY)) == -1) { log_err(errno, id, bodyfil); die(0); } sprintf(log_buffer, "body file: %s", bodyfil); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); if (body) free(body); if ((body = malloc(BUFSIZ)) == NULL) { log_err(errno, id, "malloc"); die(0); } for (tot = 0; (len = read(fd, buf, sizeof(buf))) > 0; tot += len) { if ((body = realloc(body, tot + len + 1)) == NULL) { log_err(errno, id, "realloc"); die(0); } memcpy(&body[tot], buf, len); } if (len == -1) { log_err(errno, id, bodyfil); die(0); } body[tot] = '\0'; close(fd); #if TCL_MAJOR_VERSION >= 8 if (body_obj == NULL) { body_obj = Tcl_NewStringObj(body, tot); Tcl_IncrRefCount(body_obj); } else { Tcl_SetStringObj(body_obj, body, tot); } #endif } int addclient(name) char *name; { static char id[] = "addclient"; struct hostent *host, *gethostbyname(); struct in_addr saddr; if ((host = gethostbyname(name)) == NULL) { sprintf(log_buffer, "host %s not found", name); log_err(-1, id, log_buffer); return -1; } if (numclients >= START_CLIENTS) { pbs_net_t *newclients; newclients = realloc(okclients, sizeof(pbs_net_t) * (numclients + 1)); if (newclients == NULL) return -1; okclients = newclients; } memcpy((char *)&saddr, host->h_addr, host->h_length); okclients[numclients++] = saddr.s_addr; return 0; } /* * read_config - read and process the configuration file (see -c option) * * Currently, the only statement is $clienthost to specify which systems * can contact the scheduler. */ #define CONF_LINE_LEN 120 static int read_config(file) char *file; { static char *id = "read_config"; FILE *conf; int i; char line[CONF_LINE_LEN]; char *token; struct specialconfig { char *name; int (*handler)(); } special[] = { {"clienthost", addclient }, { NULL, NULL } }; #if !defined(DEBUG) && !defined(NO_SECURITY_CHECK) if (chk_file_sec(file, 0, 0, S_IWGRP | S_IWOTH, 1, 0)) return (-1); #endif if ((conf = fopen(file, "r")) == NULL) { log_err(errno, id, "cannot open config file"); return (-1); } while (fgets(line, CONF_LINE_LEN, conf)) { if ((line[0] == '#') || (line[0] == '\n')) continue; /* ignore comment & null line */ else if (line[0] == '$') /* special */ { if ((token = strtok(line, " \t")) == NULL) token = ""; for (i = 0; special[i].name; i++) { if (strcmp(token + 1, special[i].name) == 0) break; } if (special[i].name == NULL) { sprintf(log_buffer, "config name %s not known", token); log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, msg_daemonname, log_buffer); return (-1); } token = strtok(NULL, " \t"); if (*(token + strlen(token) - 1) == '\n') *(token + strlen(token) - 1) = '\0'; if (special[i].handler(token)) { fclose(conf); return (-1); } } else { log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER, msg_daemonname, "invalid line in config file"); fclose(conf); return (-1); } } fclose(conf); return (0); } void restart(sig) int sig; { char *id = "restart"; if (sig) { sprintf(log_buffer, "restart on signal %d", sig); log_close(1); log_open(logfile, path_log); } else { sprintf(log_buffer, "restart command"); } log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); Tcl_DeleteInterp(interp); if (configfile) { if (read_config(configfile) != 0) die(0); } start_tcl(); } void badconn(msg) char *msg; { static char id[] = "badconn"; struct in_addr addr; char buf[5*sizeof(addr) + 100]; struct hostent *phe; addr = saddr.sin_addr; phe = gethostbyaddr((void *) & addr, sizeof(addr), AF_INET); if (phe == NULL) { char hold[6]; int i; union { struct in_addr aa; u_char bb[sizeof(addr)]; } uu; uu.aa = addr; sprintf(buf, "%u", uu.bb[0]); for (i = 1; i < (int)sizeof(addr); i++) { sprintf(hold, ".%u", uu.bb[i]); strcat(buf, hold); } } else { strncpy(buf, phe->h_name, sizeof(buf)); buf[sizeof(buf)-1] = '\0'; } sprintf(log_buffer, "%s on port %u %s", buf, ntohs(saddr.sin_port), msg); log_err(-1, id, log_buffer); return; } unsigned int server_command() { static char id[] = "server_command"; int new_socket; int i; torque_socklen_t slen; unsigned int cmd; pbs_net_t addr; slen = sizeof(saddr); new_socket = accept(server_sock, (struct sockaddr *) & saddr, &slen); if (new_socket == -1) { log_err(errno, id, "accept"); return SCH_ERROR; } if (ntohs(saddr.sin_port) >= IPPORT_RESERVED) { badconn("non-reserved port"); close(new_socket); return SCH_ERROR; } addr = (pbs_net_t)saddr.sin_addr.s_addr; for (i = 0; i < numclients; i++) { if (addr == okclients[i]) break; } if (i == numclients) { badconn("unauthorized host"); close(new_socket); return SCH_ERROR; } if ((connector = socket_to_conn(new_socket)) < 0) { log_err(errno, id, "socket_to_conn"); return SCH_ERROR; } if (get_4byte(new_socket, &cmd) != 1) { log_err(errno, id, "get4bytes"); return SCH_ERROR; } return cmd; } /* * lock_out - lock out other daemons from this directory. */ static void lock_out(fds, op) int fds; int op; /* F_WRLCK or F_UNLCK */ { struct flock flock; flock.l_type = op; flock.l_whence = SEEK_SET; flock.l_start = 0; flock.l_len = 0; /* whole file */ if (fcntl(fds, F_SETLK, &flock) < 0) { (void)strcpy(log_buffer, "pbs_sched: another scheduler running\n"); log_err(errno, msg_daemonname, log_buffer); fprintf(stderr, log_buffer); exit(1); } } int main(argc, argv) int argc; char *argv[]; { char *id = "main"; int code; struct hostent *hp; int go, c, errflg = 0; int lockfds; int t = 1; char *ptr; pid_t pid; char *cp, host[100]; char *homedir = PBS_SERVER_HOME; unsigned int port; char path_priv[_POSIX_PATH_MAX]; char *dbfile = "sched_out"; int alarm_time = 180; struct sigaction act; caddr_t curr_brk = 0, next_brk; extern char *optarg; extern int optind, opterr; fd_set fdset; #ifndef DEBUG if (IamRoot() == 0) { return (1); } #endif /* DEBUG */ glob_argv = argv; if ((cp = strrchr(argv[0], '/')) == NULL) cp = argv[0]; else cp++; msg_daemonname = strdup(cp); port = get_svrport(PBS_SCHEDULER_SERVICE_NAME, "tcp", PBS_SCHEDULER_SERVICE_PORT); while ((c = getopt(argc, argv, "L:S:d:i:b:t:p:a:vc:")) != EOF) { switch (c) { case 'L': logfile = optarg; break; case 'S': port = (unsigned int)atoi(optarg); if (port == 0) { fprintf(stderr, "%s: illegal port\n", optarg); errflg = 1; } break; case 'd': homedir = optarg; break; case 'i': /* initialize */ initfil = optarg; break; case 'b': bodyfil = optarg; break; case 't': termfil = optarg; break; case 'p': dbfile = optarg; break; case 'a': alarm_time = strtol(optarg, &ptr, 10); if (alarm_time <= 0 || *ptr != '\0') { fprintf(stderr, "%s: bad alarm time\n", optarg); errflg = 1; } break; case 'c': configfile = optarg; break; case 'v': verbose = 1; break; case '?': errflg = 1; break; } } if (errflg || optind != argc) { static char *options[] = { "[-L logfile]", "[-S port]", "[-d home]", "[-i init]", "[-b body]", "[-t term]", "[-p output]", "[-a alarm]", "[-c configfile]", "[-v]", NULL }; int i; fprintf(stderr, "usage: %s\n", argv[0]); for (i = 0; options[i]; i++) fprintf(stderr, "\t%s\n", options[i]); exit(1); } /* Save the original working directory for "restart" */ if ((oldpath = getcwd((char *)NULL, MAXPATHLEN)) == NULL) { fprintf(stderr, "cannot get current working directory\n"); exit(1); } (void)sprintf(path_priv, "%s/sched_priv", homedir); #if !defined(DEBUG) && !defined(NO_SECURITY_CHECK) c = chk_file_sec(path_priv, 1, 0, S_IWGRP | S_IWOTH, 1, 0); c |= chk_file_sec(PBS_ENVIRON, 0, 0, S_IWGRP | S_IWOTH, 0, 0); if (c != 0) exit(1); #endif /* not DEBUG and not NO_SECURITY_CHECK */ if (chdir(path_priv) == -1) { perror(path_priv); exit(1); } (void)sprintf(path_log, "%s/sched_logs", homedir); (void)strcpy(pbs_current_user, "Scheduler"); /* The following is code to reduce security risks */ /* start out with standard umask, system resource limit infinite */ umask(022); if (setup_env(PBS_ENVIRON) == -1) exit(1); c = getgid(); (void)setgroups(1, (gid_t *)&c); /* secure suppl. group ids */ c = sysconf(_SC_OPEN_MAX); while (--c > 2) (void)close(c); /* close any file desc left open by parent */ #ifndef DEBUG #ifdef _CRAY (void)limit(C_JOB, 0, L_CPROC, 0); (void)limit(C_JOB, 0, L_CPU, 0); (void)limit(C_JOBPROCS, 0, L_CPU, 0); (void)limit(C_PROC, 0, L_FD, 255); (void)limit(C_JOB, 0, L_FSBLK, 0); (void)limit(C_JOBPROCS, 0, L_FSBLK, 0); (void)limit(C_JOB, 0, L_MEM , 0); (void)limit(C_JOBPROCS, 0, L_MEM , 0); #else /* not _CRAY */ { struct rlimit rlimit; rlimit.rlim_cur = RLIM_INFINITY; rlimit.rlim_max = RLIM_INFINITY; (void)setrlimit(RLIMIT_CPU, &rlimit); (void)setrlimit(RLIMIT_FSIZE, &rlimit); (void)setrlimit(RLIMIT_DATA, &rlimit); (void)setrlimit(RLIMIT_STACK, &rlimit); #ifdef RLIMIT_RSS (void)setrlimit(RLIMIT_RSS , &rlimit); #endif /* RLIMIT_RSS */ #ifdef RLIMIT_VMEM (void)setrlimit(RLIMIT_VMEM , &rlimit); #endif /* RLIMIT_VMEM */ } #endif /* not _CRAY */ #if !defined(NO_SECURITY_CHECK) c = 0; if (initfil) { if (*initfil != '/') { (void)sprintf(log_buffer, "%s/%s", path_priv, initfil); c |= chk_file_sec(log_buffer, 0, 0, S_IWGRP | S_IWOTH, 1, 0); } else { c |= chk_file_sec(initfil, 0, 0, S_IWGRP | S_IWOTH, 1, 0); } } if (bodyfil) { if (*bodyfil != '/') { (void)sprintf(log_buffer, "%s/%s", path_priv, bodyfil); c |= chk_file_sec(log_buffer, 0, 0, S_IWGRP | S_IWOTH, 1, 0); } else { c |= chk_file_sec(bodyfil, 0, 0, S_IWGRP | S_IWOTH, 1, 0); } } if (termfil) { if (*termfil != '/') { (void)sprintf(log_buffer, "%s/%s", path_priv, termfil); c |= chk_file_sec(log_buffer, 0, 0, S_IWGRP | S_IWOTH, 1, 0); } else { c |= chk_file_sec(termfil, 0, 0, S_IWGRP | S_IWOTH, 1, 0); } } if (c) exit(1); #endif /* not NO_SECURITY_CHECK */ #endif /* not DEBUG */ if (log_open(logfile, path_log) == -1) { fprintf(stderr, "%s: logfile could not be opened\n", argv[0]); exit(1); } if (gethostname(host, sizeof(host)) == -1) { char *prob = "gethostname"; log_err(errno, id, prob); perror(prob); die(0); } if ((hp = gethostbyname(host)) == NULL) { char *prob = "gethostbyname"; log_err(errno, id, prob); perror(prob); die(0); } if ((server_sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) { char *prob = "socket"; log_err(errno, id, prob); perror(prob); die(0); } if (setsockopt(server_sock, SOL_SOCKET, SO_REUSEADDR, (char *)&t, sizeof(t)) == -1) { char *prob = "setsockopt"; log_err(errno, id, prob); perror(prob); die(0); } saddr.sin_family = AF_INET; saddr.sin_port = htons((unsigned short)port); memcpy(&saddr.sin_addr, hp->h_addr, hp->h_length); if (bind(server_sock, (struct sockaddr *)&saddr, sizeof(saddr)) < 0) { char *prob = "bind"; log_err(errno, id, prob); perror(prob); die(0); } if (listen(server_sock, 5) < 0) { char *prob = "listen"; log_err(errno, id, prob); perror(prob); die(0); } okclients = (pbs_net_t *)calloc(START_CLIENTS, sizeof(pbs_net_t)); addclient("localhost"); /* who has permission to call MOM */ addclient(host); if (configfile) { if (read_config(configfile) != 0) die(0); } lockfds = open("sched.lock", O_CREAT | O_TRUNC | O_WRONLY, 0644); if (lockfds < 0) { char *prob = "lock file"; log_err(errno, id, prob); perror(prob); die(0); } lock_out(lockfds, F_WRLCK); #ifndef DEBUG lock_out(lockfds, F_UNLCK); if ((pid = fork()) == -1) /* error on fork */ { char *prob = "fork"; log_err(errno, id, prob); perror(prob); die(0); } else if (pid > 0) /* parent exits */ exit(0); if ((pid = setsid()) == -1) { log_err(errno, id, "setsid"); die(0); } lock_out(lockfds, F_WRLCK); freopen(dbfile, "a", stdout); setvbuf(stdout, NULL, _IOLBF, 0); dup2(fileno(stdout), fileno(stderr)); #else pid = getpid(); setvbuf(stdout, NULL, _IOLBF, 0); setvbuf(stderr, NULL, _IOLBF, 0); #endif freopen("/dev/null", "r", stdin); /* write schedulers pid into lockfile */ (void)sprintf(log_buffer, "%d\n", pid); (void)write(lockfds, log_buffer, strlen(log_buffer) + 1); #if (PLOCK_DAEMONS & 2) (void)plock(PROCLOCK); /* lock daemon into memory */ #endif sprintf(log_buffer, "%s startup pid %d", argv[0], pid); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); sprintf(log_buffer, "%s using TCL %s (%s)", argv[0], TCL_VERSION, TCL_PATCH_LEVEL); fprintf(stderr, "%s\n", log_buffer); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); fullresp(0); sigemptyset(&allsigs); act.sa_flags = 0; sigaddset(&allsigs, SIGHUP); /* remember to block these */ sigaddset(&allsigs, SIGINT); /* during critical sections */ sigaddset(&allsigs, SIGTERM); /* so we don't get confused */ act.sa_mask = allsigs; act.sa_handler = restart; /* do a restart on SIGHUP */ sigaction(SIGHUP, &act, NULL); act.sa_handler = toolong; /* handle an alarm call */ sigaction(SIGALRM, &act, NULL); act.sa_handler = die; /* bite the biscuit for all following */ sigaction(SIGINT, &act, NULL); sigaction(SIGTERM, &act, NULL); start_tcl(); FD_ZERO(&fdset); for (go = 1; go;) { unsigned int cmd; FD_SET(server_sock, &fdset); if (select(FD_SETSIZE, &fdset, NULL, NULL, NULL) == -1) { if (errno != EINTR) log_err(errno, id, "select"); continue; } if (!FD_ISSET(server_sock, &fdset)) continue; cmd = server_command(); if (cmd == (unsigned)SCH_ERROR || cmd == (unsigned)SCH_SCHEDULE_NULL) continue; if (sigprocmask(SIG_BLOCK, &allsigs, &oldsigs) == -1) log_err(errno, id, "sigprocmaskSIG_BLOCK)"); if (verbose) { sprintf(log_buffer, "command %d", cmd); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } switch (cmd) { case SCH_SCHEDULE_NEW: case SCH_SCHEDULE_TERM: case SCH_SCHEDULE_TIME: case SCH_SCHEDULE_RECYC: case SCH_SCHEDULE_CMD: case SCH_SCHEDULE_FIRST: alarm(alarm_time); #if TCL_MAJOR_VERSION >= 8 /* execute compiled body code for TCL-8 */ code = Tcl_EvalObj(interp, body_obj); #else code = Tcl_Eval(interp, body); #endif alarm(0); switch (code) { case TCL_OK: case TCL_RETURN: break; default: { char *trace; char codename[20]; switch (code) { case TCL_BREAK: strcpy(codename, "break"); break; case TCL_CONTINUE: strcpy(codename, "continue"); break; default: strcpy(codename, "<unknown>"); break; } trace = (char *)Tcl_GetVar(interp, "errorInfo", 0); if (trace == NULL) trace = (char *)Tcl_GetStringResult(interp); fprintf(stderr, "%s: TCL interpreter return code %d (%s) @ line %d: %s\n", bodyfil, code, codename, interp->errorLine, trace); sprintf(log_buffer, "%s: TCL error @ line %d: %s", bodyfil, interp->errorLine, Tcl_GetStringResult(interp)); log_err(-1, id, log_buffer); die(0); } } break; case SCH_CONFIGURE: case SCH_RULESET: restart(0); break; case SCH_QUIT: go = 0; break; default: log_err(-1, id, "unknown command"); break; } if (connector >= 0 && server_disconnect(connector)) { log_err(errno, id, "server_disconnect"); die(0); } connector = -1; if (verbose) { next_brk = (caddr_t)sbrk(0); if (next_brk > curr_brk) { sprintf(log_buffer, "brk point %p", next_brk); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); curr_brk = next_brk; } } if (sigprocmask(SIG_SETMASK, &oldsigs, NULL) == -1) log_err(errno, id, "sigprocmask(SIG_SETMASK)"); } if (termfil) { code = Tcl_EvalFile(interp, termfil); if (code != TCL_OK) { char *trace; trace = (char *)Tcl_GetVar(interp, "errorInfo", 0); if (trace == NULL) trace = (char *)Tcl_GetStringResult(interp); fprintf(stderr, "%s: TCL error @ line %d: %s\n", termfil, interp->errorLine, trace); sprintf(log_buffer, "%s: TCL error @ line %d: %s", termfil, interp->errorLine, Tcl_GetStringResult(interp)); log_err(-1, id, log_buffer); die(0); } sprintf(log_buffer, "term file: %s", termfil); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); } sprintf(log_buffer, "%s normal finish pid %d", argv[0], pid); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); (void)close(server_sock); exit(0); }
int conn_wait(Connection *conn, double seconds) { int events; int ret; int fd; lock_out(conn); /* Try to write any data that might still be waiting to be sent */ ret = unlocked_write(conn); if (ret < 0) { unlock_out(conn); return -1; } if (ret > 0) { /* We did something useful. No need to poll or wait now. */ unlock_out(conn); return 0; } fd = conn->fd; /* Normally, we block until there is more data available. But * if any data still needs to be sent, we block until we can * send it (or there is more data available). We always block * for reading, unless we know there is no more data coming. * (Because in that case, poll will keep reporting POLLIN to * signal the end of the file). If the caller explicitly wants * to wait even though there is no data to write and we're at * end of file, then poll for new data anyway because the caller * apparently doesn't trust eof. */ events = 0; if (unlocked_outbuf_len(conn) > 0) events |= POLLOUT; /* Don't keep the connection locked while we wait */ unlock_out(conn); /* We need the in lock to query read_eof */ lock_in(conn); if ((conn->read_eof == 0 && conn->io_error == 0) || events == 0) events |= POLLIN; unlock_in(conn); ret = gwthread_pollfd(fd, events, seconds); if (ret < 0) { if (errno == EINTR) return 0; error(0, "conn_wait: poll failed on fd %d:", fd); return -1; } if (ret == 0) return 1; if (ret & POLLNVAL) { error(0, "conn_wait: fd %d not open.", fd); return -1; } if (ret & (POLLERR | POLLHUP)) { /* Call unlocked_read to report the specific error, * and handle the results of the error. We can't be * certain that the error still exists, because we * released the lock for a while. */ lock_in(conn); unlocked_read(conn); unlock_in(conn); return -1; } /* If POLLOUT is on, then we must have wanted * to write something. */ if (ret & POLLOUT) { lock_out(conn); unlocked_write(conn); unlock_out(conn); } /* Since we normally select for reading, we must * try to read here. Otherwise, if the caller loops * around conn_wait without making conn_read* calls * in between, we will keep polling this same data. */ if (ret & POLLIN) { lock_in(conn); unlocked_read(conn); unlock_in(conn); } return 0; }
static void poll_callback(int fd, int revents, void *data) { Connection *conn; int do_callback = 0; conn = data; if (conn == NULL) { error(0, "poll_callback called with NULL connection."); return; } if (conn->fd != fd) { error(0, "poll_callback called on wrong connection."); return; } /* Get result of nonblocking connect, before any reads and writes * we must check result (it must be handled in initial callback) */ if (conn->connected == no) { if (conn->callback) conn->callback(conn, conn->callback_data); return; } /* If got POLLERR or POLHUP, then unregister the descriptor from the * fdset and set the error condition variable to let the upper layer * close and destroy the connection. */ if (revents & (POLLERR|POLLHUP)) { lock_out(conn); lock_in(conn); if (conn->listening_pollin) unlocked_register_pollin(conn, 0); if (conn->listening_pollout) unlocked_register_pollout(conn, 0); conn->io_error = 1; unlock_in(conn); unlock_out(conn); do_callback = 1; } /* If unlocked_write manages to write all pending data, it will * tell the fdset to stop listening for POLLOUT. */ if (revents & POLLOUT) { lock_out(conn); unlocked_write(conn); if (unlocked_outbuf_len(conn) == 0) do_callback = 1; unlock_out(conn); } /* We read only in unlocked_read in we received POLLIN, cause the * descriptor is already broken and of no use anymore. */ if (revents & POLLIN) { lock_in(conn); unlocked_read(conn); unlock_in(conn); do_callback = 1; } if (do_callback && conn->callback) conn->callback(conn, conn->callback_data); }
/** * @brief * This is the Unix counterpart of acquire_lock * @par * This function creates/opens the lock file, and locks the file. * In case of a failover environment, the whole operation is retried * several times in a loop. * * @param[in] lockfile - Path of db_lock file. * @param[out] reason - Reason for failure, if not able to accquire lock * @param[in] reasonlen - reason buffer legnth. * @param[out] is_lock_hld_by_thishost - This flag is set if the lock is held by the host * requesting accquire_lock in check_mode. * * @return File descriptor of the open and locked file * @retval -1 : Function failed to acquire lock * @retval !=-1 : Function succeeded (file descriptor returned) * * @par MT-safe: Yes */ int acquire_lock(char *lockfile, char *reason, int reasonlen, int *is_lock_hld_by_thishost) { int fd; struct stat st; int i, j; time_t lasttime = 0; int rc; char who[PBS_MAXHOSTNAME + 10]; char *p; if (reasonlen > 0) reason[0] = '\0'; if (pbs_conf.pbs_secondary == NULL) j = 1; /* not fail over, try lock one time */ else j = MAX_LOCK_ATTEMPTS; /* fail over, try X times */ #ifndef O_RSYNC #define O_RSYNC 0 #endif again: if ((fd = open(lockfile, O_RDWR | O_CREAT | O_RSYNC, 0600)) == -1) { snprintf(reason, reasonlen, "Could not access lockfile, errno=%d", errno); return -1; } /* check time stamp of lock file */ if (fstat(fd, &st) == -1) { snprintf(reason, reasonlen, "Failed to stat lockfile, errno=%d", errno); close(fd); return -1; } /* record the last modified timestamp */ lasttime = st.st_mtime; for (i=0; i < j; i++) { /* try X times where X is MAX_LOCK_ATTEMPTS */ if (i > 0) sleep(1); /* attempt to lock the datastore directory */ if (lock_out(fd, F_WRLCK) == 0) return fd; } /* do this only if failover is configured */ if (pbs_conf.pbs_secondary != NULL) { /* * Came here, means we could not lock even after j attempts. * * 2 levels of check will be performed (based on the last modified timestamp): * * 1) Check the lock file's modified timestamp and compare with "lasttime" to see if the file was modified * in between. If the file was modified, then the other side up and so we give up. * * 2) We know that the modified timestamp is not updating however we need to make * sure that the other side is really gone. Therefore we check the difference of last * updated timestamp from now (current system time). If the difference > (4*j) seconds, * then the other side has vanished at the OS level itself, and NFS cannot unlock it. * So delete the lockfile and start afresh. For this to work, make sure that the * time on primary, secondary and the pbs_home server (NFS server) are synced. */ /* Re-check time stamp of lock file */ if (fstat(fd, &st) == -1) { snprintf(reason, reasonlen, "Failed to stat lockfile, errno=%d", errno); close(fd); return -1; } /* Check if time stamp of lock file has updated at all */ if (st.st_mtime == lasttime) { /* Modified times stamp did not update in the given window. Re-check how long it has been stale */ if (time(0) - lasttime >= (MAX_LOCK_ATTEMPTS * 4)) { /* other side is long dead, clear up stuff */ close(fd); unlink(lockfile); fd = -1; lasttime = 0; goto again; } } } /* all attempts to lock failed, try to see who has it locked */ (void) lseek(fd, (off_t) 0, SEEK_SET); if ((rc = read(fd, who, sizeof(who) - 1)) > 0) { who[rc - 1] = '\0'; p = strchr(who, ':'); if (p) { *p = '\0'; snprintf(reason, reasonlen, "Lock seems to be held by pid: %s running on host: %s", (p + 1), who); } else { snprintf(reason, reasonlen, "Lock seems to be held by %s", who); } if (is_lock_hld_by_thishost != NULL) { if (strcmp(thishost, who) == 0) *is_lock_hld_by_thishost = 1; else *is_lock_hld_by_thishost = 0; } } close(fd); fd = -1; return fd; }
Octstr *conn_read_withlen(Connection *conn) { Octstr *result = NULL; unsigned char lengthbuf[4]; long length = 0; /* for compiler please */ int try, retry; lock_in(conn); for (try = 1; try <= 2; try++) { if (try > 1) unlocked_read(conn); do { retry = 0; /* First get the length. */ if (unlocked_inbuf_len(conn) < 4) continue; octstr_get_many_chars(lengthbuf, conn->inbuf, conn->inbufpos, 4); length = decode_network_long(lengthbuf); if (length < 0) { warning(0, "conn_read_withlen: got negative length, skipping"); conn->inbufpos += 4; retry = 1; } } while(retry == 1); /* Then get the data. */ if (unlocked_inbuf_len(conn) - 4 < length) continue; conn->inbufpos += 4; result = unlocked_get(conn, length); gw_claim_area(result); break; } unlock_in(conn); return result; } Octstr *conn_read_packet(Connection *conn, int startmark, int endmark) { int startpos, endpos; Octstr *result = NULL; int try; lock_in(conn); for (try = 1; try <= 2; try++) { if (try > 1) unlocked_read(conn); /* Find startmark, and discard everything up to it */ if (startmark >= 0) { startpos = octstr_search_char(conn->inbuf, startmark, conn->inbufpos); if (startpos < 0) { conn->inbufpos = octstr_len(conn->inbuf); continue; } else { conn->inbufpos = startpos; } } else { startpos = conn->inbufpos; } /* Find first endmark after startmark */ endpos = octstr_search_char(conn->inbuf, endmark, conn->inbufpos); if (endpos < 0) continue; result = unlocked_get(conn, endpos - startpos + 1); gw_claim_area(result); break; } unlock_in(conn); return result; } #ifdef HAVE_LIBSSL X509 *conn_get_peer_certificate(Connection *conn) { /* Don't know if it needed to be locked , but better safe as crash */ lock_out(conn); lock_in(conn); if (conn->peer_certificate == NULL && conn->ssl != NULL) conn->peer_certificate = SSL_get_peer_certificate(conn->ssl); unlock_in(conn); unlock_out(conn); return conn->peer_certificate; } /* * XXX Alex decalred the RSA callback routine static and now we're getting * warning messages for our automatic compilation tests. So we are commenting * the function out to avoid the warnings. * static RSA *tmp_rsa_callback(SSL *ssl, int export, int key_len) { static RSA *rsa = NULL; debug("gwlib.http", 0, "SSL: Generating new RSA key (export=%d, keylen=%d)", export, key_len); if (export) { rsa = RSA_generate_key(key_len, RSA_F4, NULL, NULL); } else { debug("gwlib.http", 0, "SSL: Export not set"); } return rsa; } */ static Mutex **ssl_static_locks = NULL; /* the call-back function for the openssl crypto thread locking */ static void openssl_locking_function(int mode, int n, const char *file, int line) { if (mode & CRYPTO_LOCK) mutex_lock(ssl_static_locks[n-1]); else mutex_unlock(ssl_static_locks[n-1]); } void openssl_init_locks(void) { int c, maxlocks = CRYPTO_num_locks(); gw_assert(ssl_static_locks == NULL); ssl_static_locks = gw_malloc(sizeof(Mutex *) * maxlocks); for (c = 0; c < maxlocks; c++) ssl_static_locks[c] = mutex_create(); /* after the mutexes have been created, apply the call-back to it */ CRYPTO_set_locking_callback(openssl_locking_function); CRYPTO_set_id_callback((CRYPTO_CALLBACK_PTR)gwthread_self); }
/** * @brief This is the Unix couterpart of the monitoring * code. * @par * This function does the following: * a) Creates a pipe, forks itself, parent waits to read on pipe. * b) Child creates/opens a file $PBS_HOME/datastore/pbs_dblock. * c) Attempts to lock the file. If locking * succeeds, unlocks the file and writes 0 (success) to the write * end of the pipe. If locking fails, writes 1 (failure) to pipe. * Parent reads from pipe and exits with the code read from pipe. * d) If mode is "check" then child quits. * e) If mode is "monitor", continues in the background, checking * the database pid in a loop forever. If database pid goes * down, then it unlocks the file and exits. * * @param[in] mode - "check" - to just check if lockfile can be locked * "monitor" - to launch a monitoring child process that * holds onto the file lock. * * @retval 1 - Function failed to acquire lock * @retval 0 - Function succeded in the requested operation * @par * The return values are not used by the caller (parent process) since * in the success case this function does not return. Instead, the parent * waits on the read end of the pipe to read a status from the monitoring * child process. * * @par MT-safe: Yes */ int unix_db_monitor(char *mode) { int fd; int rc; int i; pid_t dbpid; char lockfile[MAXPATHLEN + 1]; int pipefd[2]; int res; int is_lock_local = 0; char reason[RES_BUF_SIZE]; reason[0] = '\0'; if (pipe(pipefd) != 0) { fprintf(stderr, "Unable to create pipe, errno = %d\n", errno); return 1; } snprintf(lockfile, MAXPATHLEN, "%s/datastore/pbs_dblock", pbs_conf.pbs_home_path); /* first fork off */ rc = fork(); if (rc == -1) { fprintf(stderr, "Unable to create process, errno = %d\n", errno); return 1; } if (rc > 0) { close(pipefd[1]); /* * child can continue to execute in case of "monitor", * so dont wait for child to exit, rather read code * from pipe that child will write to */ if (read(pipefd[0], &res, sizeof(int)) != sizeof(int)) return 1; if (res != 0) { read(pipefd[0], &reason, sizeof(reason)); fprintf(stderr, "Failed to acquire lock on %s. %s\n", lockfile, reason); } return (res); /* return parent with success */ } close(pipefd[0]); /* child */ if (setsid() == -1) { close(pipefd[1]); return 1; } (void)fclose(stdin); (void)fclose(stdout); (void)fclose(stderr); /* Protect from being killed by kernel */ daemon_protect(0, PBS_DAEMON_PROTECT_ON); if ((fd = acquire_lock(lockfile, reason, sizeof(reason), &is_lock_local)) == -1) { if (is_lock_local && strcmp(mode, "check") == 0) { /* write success to parent since lock is already held by the localhost */ res = 0; write(pipefd[1], &res, sizeof(int)); close(pipefd[1]); return 0; } res = 1; write(pipefd[1], &res, sizeof(int)); write(pipefd[1], reason, sizeof(reason)); close(pipefd[1]); return 1; } /* unlock before writing success to parent, to avoid race */ if (strcmp(mode, "check") == 0) { lock_out(fd, F_UNLCK); close(fd); unlink(lockfile); } /* write success to parent since we acquired the lock */ res = 0; write(pipefd[1], &res, sizeof(int)); close(pipefd[1]); if (strcmp(mode, "check") == 0) return 0; /* clear any residual stop db file before starting monitoring */ clear_stop_db_file(); /* * first find out the pid of the postgres process from dbstore/postmaster.pid * wait for a while till it is found * if not found within MAX_DBPID_ATTEMPTS then break with error * if found, start monitoring the pid * */ dbpid = 0; for (i = 0; i < MAX_DBPID_ATTEMPTS; i++) { if ((dbpid = get_pid()) > 0) break; (void)utimes(lockfile, NULL); sleep(1); } if (dbpid == 0) { /* database did not come up, so quit after unlocking file */ lock_out(fd, F_UNLCK); close(fd); unlink(lockfile); return 0; } while (1) { (void)utimes(lockfile, NULL); if (kill(dbpid, 0) != 0) break; if (!((dbpid = get_pid()) > 0)) break; /* check if stop db file exists */ check_and_stop_db(dbpid); sleep(1); } lock_out(fd, F_UNLCK); close(fd); unlink(lockfile); return 0; }
int main( int argc, char *argv[]) { char *id = "main"; struct hostent *hp; int go, c, errflg = 0; int lockfds; int t = 1; pid_t pid; char host[100]; char *homedir = PBS_SERVER_HOME; unsigned int port; char *dbfile = "sched_out"; struct sigaction act; sigset_t oldsigs; caddr_t curr_brk = 0; caddr_t next_brk; extern char *optarg; extern int optind, opterr; extern int rpp_fd; fd_set fdset; int schedinit(int argc, char **argv); int schedule(int com, int connector); glob_argv = argv; alarm_time = 180; /* The following is code to reduce security risks */ /* move this to a place where nss_ldap doesn't hold a socket yet */ c = sysconf(_SC_OPEN_MAX); while (--c > 2) (void)close(c); /* close any file desc left open by parent */ port = get_svrport(PBS_SCHEDULER_SERVICE_NAME, "tcp", PBS_SCHEDULER_SERVICE_PORT); pbs_rm_port = get_svrport(PBS_MANAGER_SERVICE_NAME, "tcp", PBS_MANAGER_SERVICE_PORT); strcpy(pbs_current_user, "Scheduler"); msg_daemonname = strdup("pbs_sched"); opterr = 0; while ((c = getopt(argc, argv, "L:S:R:d:p:c:a:-:")) != EOF) { switch (c) { case '-': if ((optarg == NULL) || (optarg[0] == '\0')) { errflg = 1; } if (!strcmp(optarg, "version")) { fprintf(stderr, "version: %s\n", PACKAGE_VERSION); exit(0); } else { errflg = 1; } break; case 'L': logfile = optarg; break; case 'S': port = atoi(optarg); if (port == 0) { fprintf(stderr, "%s: illegal port\n", optarg); errflg = 1; } break; case 'R': if ((pbs_rm_port = atoi(optarg)) == 0) { (void)fprintf(stderr, "%s: bad -R %s\n", argv[0], optarg); return 1; } break; case 'd': homedir = optarg; break; case 'p': dbfile = optarg; break; case 'c': configfile = optarg; break; case 'a': alarm_time = atoi(optarg); if (alarm_time == 0) { fprintf(stderr, "%s: bad alarm time\n", optarg); errflg = 1; } break; case '?': errflg = 1; break; } } if (errflg) { fprintf(stderr, "usage: %s %s\n", argv[0], usage); exit(1); } #ifndef DEBUG if (IamRoot() == 0) { return (1); } #endif /* DEBUG */ /* Save the original working directory for "restart" */ if ((oldpath = getcwd((char *)NULL, MAXPATHLEN)) == NULL) { fprintf(stderr, "cannot get current working directory\n"); exit(1); } (void)sprintf(log_buffer, "%s/sched_priv", homedir); #if !defined(DEBUG) && !defined(NO_SECURITY_CHECK) c = chk_file_sec(log_buffer, 1, 0, S_IWGRP | S_IWOTH, 1, NULL); c |= chk_file_sec(PBS_ENVIRON, 0, 0, S_IWGRP | S_IWOTH, 0, NULL); if (c != 0) exit(1); #endif /* not DEBUG and not NO_SECURITY_CHECK */ if (chdir(log_buffer) == -1) { perror("chdir"); exit(1); } (void)sprintf(path_log, "%s/sched_logs", homedir); (void)sprintf(path_acct, "%s/%s", log_buffer, PBS_ACCT); /* The following is code to reduce security risks */ /* start out with standard umask, system resource limit infinite */ umask(022); if (setup_env(PBS_ENVIRON) == -1) exit(1); c = getgid(); (void)setgroups(1, (gid_t *)&c); /* secure suppl. groups */ #ifndef DEBUG #ifdef _CRAY (void)limit(C_JOB, 0, L_CPROC, 0); (void)limit(C_JOB, 0, L_CPU, 0); (void)limit(C_JOBPROCS, 0, L_CPU, 0); (void)limit(C_PROC, 0, L_FD, 255); (void)limit(C_JOB, 0, L_FSBLK, 0); (void)limit(C_JOBPROCS, 0, L_FSBLK, 0); (void)limit(C_JOB, 0, L_MEM , 0); (void)limit(C_JOBPROCS, 0, L_MEM , 0); #else /* not _CRAY */ { struct rlimit rlimit; rlimit.rlim_cur = RLIM_INFINITY; rlimit.rlim_max = RLIM_INFINITY; (void)setrlimit(RLIMIT_CPU, &rlimit); (void)setrlimit(RLIMIT_FSIZE, &rlimit); (void)setrlimit(RLIMIT_DATA, &rlimit); (void)setrlimit(RLIMIT_STACK, &rlimit); #ifdef RLIMIT_RSS (void)setrlimit(RLIMIT_RSS , &rlimit); #endif /* RLIMIT_RSS */ #ifdef RLIMIT_VMEM (void)setrlimit(RLIMIT_VMEM , &rlimit); #endif /* RLIMIT_VMEM */ } #endif /* not _CRAY */ #endif /* DEBUG */ if (log_open(logfile, path_log) == -1) { fprintf(stderr, "%s: logfile could not be opened\n", argv[0]); exit(1); } if (gethostname(host, sizeof(host)) == -1) { log_err(errno, id, "gethostname"); die(0); } if ((hp = gethostbyname(host)) == NULL) { log_err(errno, id, "gethostbyname"); die(0); } if ((server_sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) { log_err(errno, id, "socket"); die(0); } if (setsockopt(server_sock, SOL_SOCKET, SO_REUSEADDR, (char *)&t, sizeof(t)) == -1) { log_err(errno, id, "setsockopt"); die(0); } saddr.sin_family = AF_INET; saddr.sin_port = htons(port); memcpy(&saddr.sin_addr, hp->h_addr, hp->h_length); if (bind(server_sock, (struct sockaddr *)&saddr, sizeof(saddr)) < 0) { log_err(errno, id, "bind"); die(0); } if (listen(server_sock, 5) < 0) { log_err(errno, id, "listen"); die(0); } okclients = (pbs_net_t *)calloc(START_CLIENTS, sizeof(pbs_net_t)); addclient("localhost"); /* who has permission to call MOM */ addclient(host); if (configfile) { if (read_config(configfile) != 0) die(0); } lockfds = open("sched.lock", O_CREAT | O_TRUNC | O_WRONLY, 0644); if (lockfds < 0) { log_err(errno, id, "open lock file"); exit(1); } lock_out(lockfds, F_WRLCK); fullresp(0); if (sigemptyset(&allsigs) == -1) { perror("sigemptyset"); exit(1); } if (sigprocmask(SIG_SETMASK, &allsigs, NULL) == -1) /* unblock */ { perror("sigprocmask"); exit(1); } act.sa_flags = 0; sigaddset(&allsigs, SIGHUP); /* remember to block these */ sigaddset(&allsigs, SIGINT); /* during critical sections */ sigaddset(&allsigs, SIGTERM); /* so we don't get confused */ act.sa_mask = allsigs; act.sa_handler = restart; /* do a restart on SIGHUP */ sigaction(SIGHUP, &act, NULL); act.sa_handler = toolong; /* handle an alarm call */ sigaction(SIGALRM, &act, NULL); act.sa_handler = die; /* bite the biscuit for all following */ sigaction(SIGINT, &act, NULL); sigaction(SIGTERM, &act, NULL); /* * Catch these signals to ensure we core dump even if * our rlimit for core dumps is set to 0 initially. * * Chris Samuel - VPAC * [email protected] - 29th July 2003 * * Now conditional on the PBSCOREDUMP environment variable */ if (getenv("PBSCOREDUMP")) { act.sa_handler = catch_abort; /* make sure we core dump */ sigaction(SIGSEGV, &act, NULL); sigaction(SIGBUS, &act, NULL); sigaction(SIGFPE, &act, NULL); sigaction(SIGILL, &act, NULL); sigaction(SIGTRAP, &act, NULL); sigaction(SIGSYS, &act, NULL); } /* * Local initialization stuff */ if (schedinit(argc, argv)) { (void) sprintf(log_buffer, "local initialization failed, terminating"); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); exit(1); } if (getenv("PBSDEBUG") == NULL) { lock_out(lockfds, F_UNLCK); #ifdef DISABLE_DAEMONS pid = getpid(); #else if ((pid = fork()) == -1) { /* error on fork */ perror("fork"); exit(1); } else if (pid > 0) /* parent exits */ { exit(0); } if ((pid = setsid()) == -1) { perror("setsid"); exit(1); } #endif /* DISABLE_DAEMONS */ lock_out(lockfds, F_WRLCK); if (freopen(dbfile, "a", stdout) == NULL) { perror("opening lockfile"); exit(1); } setvbuf(stdout, NULL, _IOLBF, 0); dup2(fileno(stdout), fileno(stderr)); } else { setvbuf(stdout, NULL, _IOLBF, 0); setvbuf(stderr, NULL, _IOLBF, 0); pid = getpid(); } if (freopen("/dev/null", "r", stdin) == NULL) { perror("opening /dev/null"); exit(1); } /* write scheduler's pid into lockfile */ (void)sprintf(log_buffer, "%ld\n", (long)pid); if (write(lockfds, log_buffer, strlen(log_buffer) + 1) != (ssize_t)(strlen(log_buffer) + 1)) { perror("writing to lockfile"); exit(1); } #if (PLOCK_DAEMONS & 2) (void)plock(PROCLOCK); /* lock daemon into memory */ #endif sprintf(log_buffer, "%s startup pid %ld", argv[0], (long)pid); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); FD_ZERO(&fdset); for (go = 1;go;) { int cmd; if (rpp_fd != -1) FD_SET(rpp_fd, &fdset); FD_SET(server_sock, &fdset); if (select(FD_SETSIZE, &fdset, NULL, NULL, NULL) == -1) { if (errno != EINTR) { log_err(errno, id, "select"); die(0); } continue; } if (rpp_fd != -1 && FD_ISSET(rpp_fd, &fdset)) { if (rpp_io() == -1) log_err(errno, id, "rpp_io"); } if (!FD_ISSET(server_sock, &fdset)) continue; cmd = server_command(); if (sigprocmask(SIG_BLOCK, &allsigs, &oldsigs) == -1) log_err(errno, id, "sigprocmaskSIG_BLOCK)"); alarm(alarm_time); if (schedule(cmd, connector)) /* magic happens here */ go = 0; alarm(0); if (connector >= 0 && server_disconnect(connector)) { log_err(errno, id, "server_disconnect"); die(0); } next_brk = (caddr_t)sbrk(0); if (next_brk > curr_brk) { sprintf(log_buffer, "brk point %ld", (long)next_brk); log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_SERVER, id, log_buffer); curr_brk = next_brk; } if (sigprocmask(SIG_SETMASK, &oldsigs, NULL) == -1) log_err(errno, id, "sigprocmask(SIG_SETMASK)"); } sprintf(log_buffer, "%s normal finish pid %ld", argv[0], (long)pid); log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer); close(server_sock); exit(0); } /* END main() */