Ejemplo n.º 1
0
int conn_flush(Connection *conn)
{
    int ret;
    int revents;
    int fd;

    lock_out(conn);
    ret = unlocked_write(conn);
    if (ret < 0) {
        unlock_out(conn);
        return -1;
    }

    while (unlocked_outbuf_len(conn) != 0) {
        fd = conn->fd;

        unlock_out(conn);
        revents = gwthread_pollfd(fd, POLLOUT, -1.0);

        /* Note: Make sure we have the "out" lock when
         * going through the loop again, because the 
         * loop condition needs it. */

        if (revents < 0) {
            if (errno == EINTR)
                return 1;
            error(0, "conn_flush: poll failed on fd %d:", fd);
            return -1;
        }

        if (revents == 0) {
            /* We were woken up */
            return 1;
        }

        if (revents & POLLNVAL) {
            error(0, "conn_flush: fd %d not open.", fd);
            return -1;
        }

        lock_out(conn);

        if (revents & (POLLOUT | POLLERR | POLLHUP)) {
            ret = unlocked_write(conn);
            if (ret < 0) {
                unlock_out(conn);
                return -1;
            }
        }
    }

    unlock_out(conn);

    return 0;
}
Ejemplo n.º 2
0
/**
 * @brief
 * 		This is the Windows couterpart of acquire_lock
 * @par
 *  	This function creates/opens the lock file, and locks the file.
 *  	In case of a failover environment, the whole operation is retried
 *  	several times in a loop.
 *
 * @param[in]  lockfile         - Path of db_lock file.
 * @param[out] reason           - Reason for failure, if not able to accquire lock
 * @param[in]  reasonlen        - reason buffer legnth.
 * @param[out] is_lock_hld_by_thishost  - This flag is set if the lock is held by the host
 *                                          requesting accquire_lock in check_mode.
 *
 * @return	File handle of the open and locked file
 * @retval	INVALID_HANDLE_VALUE	: Function failed to acquire lock
 * @retval	INVALID_HANDLE_VALUE	: Function succeeded (file handle returned)
 *
 * @par MT-safe:	Yes
 */
HANDLE
acquire_lock(char *lockfile, char *reason, int reasonlen, int *is_lock_hld_by_thishost)
{
	HANDLE hFile = INVALID_HANDLE_VALUE;
	int i, j;
	char who[PBS_MAXHOSTNAME + 10];
	DWORD dwNumBytesRead;
	BOOL fSuccess;
	char *p;

	if (reasonlen > 0)
		reason[0] = '\0';

	if (pbs_conf.pbs_secondary == NULL)
		j = 1;	/* not fail over, try lock one time */
	else
		j = MAX_LOCK_ATTEMPTS;	/* fail over, try X times */

	hFile = CreateFile(lockfile, GENERIC_READ | GENERIC_WRITE,
		FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_ALWAYS, 0, NULL);
	if (hFile == INVALID_HANDLE_VALUE) {
		snprintf(reason, reasonlen, "Could not access lockfile, errno=%d", GetLastError());
		return hFile;
	}

	for (i = 0; i < j; i++) {
		if (i > 0)
			sleep(1);

		if (lock_out(hFile, F_WRLCK) == 0)
			return hFile; /* success */
	}

	/* all attempts to lock failed, try to see who has it locked */
	fSuccess = ReadFile(hFile, who, sizeof(who) - 1, &dwNumBytesRead, NULL);
	CloseHandle(hFile);
	hFile = INVALID_HANDLE_VALUE;

	if (fSuccess) {
		if (dwNumBytesRead > 0) {
			who[dwNumBytesRead - 1] = '\0';
			p = strchr(who, ':');
			if (p) {
				*p = '\0';
				snprintf(reason, reasonlen, "Lock seems to be held by pid: %s running on host: %s", (p + 1), who);
			} else {
				snprintf(reason, reasonlen, "Lock seems to be held by %s", who);
			}
			if (is_lock_hld_by_thishost != NULL) {
				if (strcmp(thishost, who) == 0)
					*is_lock_hld_by_thishost = 1;
				else
					*is_lock_hld_by_thishost = 0;
			}
		}
	} else
		snprintf(reason, reasonlen, "Could not access lockfile, errno=%d", GetLastError());

	return hFile;
}
Ejemplo n.º 3
0
void conn_set_output_buffering(Connection *conn, unsigned int size)
{
    lock_out(conn);
    conn->output_buffering = size;
    /* If the buffer size is smaller, we may have to write immediately. */
    unlocked_try_write(conn);
    unlock_out(conn);
}
Ejemplo n.º 4
0
long conn_outbuf_len(Connection *conn)
{
    long len;

    lock_out(conn);
    len = unlocked_outbuf_len(conn);
    unlock_out(conn);

    return len;
}
Ejemplo n.º 5
0
int conn_register_real(Connection *conn, FDSet *fdset,
                  conn_callback_t callback, void *data, conn_callback_data_destroyer_t *data_destroyer)
{
    int events;
    int result = 0;

    gw_assert(conn != NULL);

    if (conn->fd < 0)
        return -1;

    /* We need both locks if we want to update the registration
     * information. */
    lock_out(conn);
    lock_in(conn);

    if (conn->registered == fdset) {
        /* Re-registering.  Change only the callback info. */
        conn->callback = callback;
        /* call data destroyer if new data supplied */
        if (conn->callback_data != NULL && conn->callback_data != data && conn->callback_data_destroyer != NULL)
            conn->callback_data_destroyer(conn->callback_data);
        conn->callback_data = data;
        conn->callback_data_destroyer = data_destroyer;
        result = 0;
    } else if (conn->registered) {
        /* Already registered to a different fdset. */
        result = -1;
    } else {
        events = 0;
	/* For nonconnected socket we must lesten both directions */
        if (conn->connected == yes) {
            if (conn->read_eof == 0 && conn->io_error == 0)
                events |= POLLIN;
            if (unlocked_outbuf_len(conn) > 0)
                events |= POLLOUT;
        } else {
          events |= POLLIN | POLLOUT;
        }

        conn->registered = fdset;
        conn->callback = callback;
        conn->callback_data = data;
        conn->callback_data_destroyer = data_destroyer;
        conn->listening_pollin = (events & POLLIN) != 0;
        conn->listening_pollout = (events & POLLOUT) != 0;
        fdset_register(fdset, conn->fd, events, poll_callback, conn);
        result = 0;
    }

    unlock_in(conn);
    unlock_out(conn);

    return result;
}
Ejemplo n.º 6
0
int conn_write_data(Connection *conn, unsigned char *data, long length)
{
    int ret;

    lock_out(conn);
    octstr_append_data(conn->outbuf, data, length);
    ret = unlocked_try_write(conn);
    unlock_out(conn);

    return ret;
}
Ejemplo n.º 7
0
int conn_write(Connection *conn, Octstr *data)
{
    int ret;

    lock_out(conn);
    octstr_append(conn->outbuf, data);
    ret = unlocked_try_write(conn);
    unlock_out(conn);

    return ret;
}
Ejemplo n.º 8
0
int conn_error(Connection *conn)
{
    int err;

    lock_out(conn);
    lock_in(conn);
    err = conn->io_error;
    unlock_in(conn);
    unlock_out(conn);

    return err;
}
Ejemplo n.º 9
0
/**
 * @brief
 *		Forks a background process and continues on that, while
 * 		exiting the foreground process. It also sets the child process to
 * 		become the session leader. This function is avaible only on Non-Windows
 * 		platforms and in non-debug mode.
 *
 * @return -  pid_t	- sid of the child process (result of setsid)
 * @retval       >0	- sid of the child process.
 * @retval       -1	- Fork or setsid failed.
 */
static pid_t
go_to_background()
{
	pid_t sid = -1;
	int rc;

	lock_out(lockfds, F_UNLCK);
	rc = fork();
	if (rc == -1) /* fork failed */
		return ((pid_t) -1);
	if (rc > 0)
		exit(0); /* parent goes away, allowing booting to continue */

	lock_out(lockfds, F_WRLCK);
	if ((sid = setsid()) == -1) {
		fprintf(stderr, "pbs_comm: setsid failed");
		return ((pid_t) -1);
	}
	already_forked = 1;
	return sid;
}
Ejemplo n.º 10
0
int conn_write_withlen(Connection *conn, Octstr *data)
{
    int ret;
    unsigned char lengthbuf[4];

    encode_network_long(lengthbuf, octstr_len(data));
    lock_out(conn);
    octstr_append_data(conn->outbuf, lengthbuf, 4);
    octstr_append(conn->outbuf, data);
    ret = unlocked_try_write(conn);
    unlock_out(conn);

    return ret;
}
Ejemplo n.º 11
0
void conn_unregister(Connection *conn)
{
    FDSet *set = NULL;
    int fd = -1;
    void *data = NULL;
    conn_callback_data_destroyer_t *destroyer = NULL;
    
    gw_assert(conn != NULL);

    if (conn == NULL || conn->fd < 0)
        return;

    /* We need both locks to update the registration information */
    lock_out(conn);
    lock_in(conn);

    if (conn->registered) {
        set = conn->registered;
        fd = conn->fd;
        conn->registered = NULL;
        conn->callback = NULL;
        /*
         * remember and don't destroy data and data_destroyer because we
         * may be in callback right now. So destroy only after fdset_unregister
         * call which guarantee us we are not in callback anymore.
         */
        data = conn->callback_data;
        conn->callback_data = NULL;
        destroyer = conn->callback_data_destroyer;
        conn->callback_data_destroyer = NULL;
        conn->listening_pollin = 0;
        conn->listening_pollout = 0;
    }

    unlock_in(conn);
    unlock_out(conn);

    /* now unregister from FDSet */
    if (set != NULL)
        fdset_unregister(set, fd);

    /* ok we are not in callback anymore, destroy data if any */
    if (data != NULL && destroyer != NULL)
        destroyer(data);
}
Ejemplo n.º 12
0
/**
 * @brief
 * 		The child part of the monitor functionality on Windows.
 *
 *		This function is called as a separate executable (child) process
 *		from the parent monitor, and is passed the location of the
 *		data directory. The parent launches this function as a process and waits
 *		to read the stdout of the child process.
 * @par
 *		This function attempts to lock the lockfile (inside dbstore) and if it
 *		fails, it prints the reason of failure to lock, to its stdout; the child
 *		process also exits in this case.
 * @par
 *		If the function succeeds in locking the file, it prints "0" to its stdout
 *		resulting in the parent to exit with success to its caller. In that case
 *		this process (child) continues to run in the background, as long as the
 *		monitored database process is still up, holding onto the lock, so that no
 *		other process can lock this file (and thus not be able to start the database).
 * @par
 *		If and when eventually the database goes down, this function unlocks the file
 *		and quits (allowing others to lock the file and start the database).
 *
 * @retval	0	: Function succeeded for the given operation
 * @retval	1	: Failed (eg to lock the file).
 *
 * @par MT-safe:	Yes
 */
int
win_db_monitor_child()
{
	HANDLE hFile;
	BOOL fSuccess = FALSE;
	pid_t dbpid;
	int i;
	char lockfile[MAXPATHLEN + 1];
	char reason[RES_BUF_SIZE];

	reason[0] = '\0';

	/* clear any residual stop db file before starting monitoring */
	clear_stop_db_file();

	snprintf(lockfile, MAXPATHLEN, "%s\\datastore\\pbs_dblock", pbs_conf.pbs_home_path);
	hFile = acquire_lock(lockfile, reason, sizeof(reason), NULL);
	if (hFile == INVALID_HANDLE_VALUE) {
		printf("%s", reason);
		fflush(stdout);
		return 1;
	}

	/* set success event */
	printf("0");
	fflush(stdout);
	fclose(stdin);
	fclose(stderr);
	fclose(stdout); /* dont need stdout after this */

	/*
	 * okay, so we locked the file. Now find postgres pid
	 * then loop forever as long as pid is up
	 */
	dbpid = 0;
	for (i = 0; i < MAX_DBPID_ATTEMPTS; i++) {
		if ((dbpid = get_pid()) > 0)
			break;
		sleep(1);
	}

	if (dbpid == 0) {
		lock_out(hFile, F_UNLCK);
		CloseHandle(hFile);
		unlink(lockfile);
		return 0; /* this will unlock the lock in the datastore */
	}

	while (1) {
		if (!checkpid(dbpid))
			break;
		if (!((dbpid = get_pid()) > 0))
			break;

		/* check if stop db file exists */
		check_and_stop_db(dbpid);

		sleep(1);
	}

	/* unlock and return */
	lock_out(hFile, F_UNLCK);
	CloseHandle(hFile);
	unlink(lockfile);

	/* clear temporary err files created at startup; windows only case */
	clear_tmp_files();
	return 0;
}
Ejemplo n.º 13
0
/**
 * @brief
 * 		This is the Windows counterpart of the monitoring
 *		code.
 * @par
 *		This function does the following:
 *		a) Creates/opens a file $PBS_HOME/datastore/pbs_dblock.
 *		b) If mode is "check", attempts to lock the file. If locking
 *		succeeds, unlocks the file and returns success.
 *		c) If mode is "monitor", launches itself with a "monitorchild"
 *		parameter, which calls function "win_db_monitor_child".
 *		d) It launches a child process using win_popen() and reads its stdout.
 *		e) If the child was able to successfully lock the file, it prints "0"
 *	   to its stdout. Otherwise it prints the reason for why it could
 *	   not acquire the lockfile.
 *
 * @param[in]	mode	-	"check"	: to just check if lockfile can be locked
 *		     				"monitor"	:  to launch a monitoring process that holds
 *				 			onto the file lock
 *
 * @retval	1	: Function failed to acquire lock
 * @retval	0	: Function succeded in the requested operation
 *
 * @par MT-safe:	Yes
 */
int
win_db_monitor(char *mode)
{
	int rc;
	BOOL fSuccess = FALSE;
	HANDLE hFile;
	char lockfile[MAXPATHLEN + 1];
	char cmd_line[2*MAXPATHLEN + 1];
	char result[RES_BUF_SIZE];
	int is_lock_local = 0;
	pio_handles pio;
	proc_ctrl proc_info;
	HANDLE hOut, hErr;

	result[0] = '\0';
	snprintf(lockfile, MAXPATHLEN, "%s\\datastore\\pbs_dblock", pbs_conf.pbs_home_path);

	/*
	 * If mode is check, just attempt to lock the file.
	 * Return success if able to lock, else return failure.
	 */
	if (strcmp(mode, "check") == 0) {
		hFile = acquire_lock(lockfile, result, sizeof(result), &is_lock_local);
		if (hFile == INVALID_HANDLE_VALUE) {
			if (is_lock_local)
				return 0; /* Since lock is already held by this host, return success */
			fprintf(stderr, "Failed to acquire lock on %s. %s\n", lockfile, result);
			return 1;
		}

		lock_out(hFile, F_UNLCK);
		CloseHandle(hFile);
		unlink(lockfile);
		return 0;
	}

	/* monitor part */
	proc_info.flags = CREATE_DEFAULT_ERROR_MODE | CREATE_NO_WINDOW;
	proc_info.bInheritHandle = TRUE;
	proc_info.bnowait = TRUE;
	proc_info.need_ptree_termination = FALSE;
	proc_info.buse_cmd = FALSE;

	sprintf(cmd_line, "\"%s\" monitorchild", pbs_ds_monitor_exe);

	/* set the current processes stdout/stderr not be inherited */
	hOut = GetStdHandle(STD_OUTPUT_HANDLE);
	SetHandleInformation(hOut, HANDLE_FLAG_INHERIT, 0);

	hErr = GetStdHandle(STD_ERROR_HANDLE);
	SetHandleInformation(hErr, HANDLE_FLAG_INHERIT, 0);

	/* start child process to lock db lockfile and monitor db process */
	if (win_popen(cmd_line, "r", &pio, &proc_info) == 0) {
		win_pclose(&pio);
		fprintf(stderr, "Unable to create process, errno = %d\n", errno);
		return 1;
	}

	/* wait and read the info from child whether it was able to acquire lock */
	rc = win_pread(&pio, result, sizeof(result) - 1);

	win_pclose2(&pio); /* close handles but keep process running */

	if (rc > 0) {
		if (result[0] == '0') { /* indicates success */
			return 0;
		}
		result[rc - 1] = '\0';
	}

	/* failure */
	fprintf(stderr, "Failed to acquire lock on %s. %s\n", lockfile, result);
	return 1;
}
Ejemplo n.º 14
0
int
main(int argc, char **argv)
#endif	/* WIN32 */
{
#ifdef	WIN32
	struct arg_param *p = (struct arg_param *)pv;
	int      		argc;
	char			**argv;
	SERVICE_STATUS          ss;
#endif	/* WIN32 */
	char *name = NULL;
	struct tpp_config conf;
	int rpp_fd;
	char *pc;
	int numthreads;
	char lockfile[MAXPATHLEN + 1];
	char path_log[MAXPATHLEN + 1];
	char svr_home[MAXPATHLEN + 1];
	char *log_file = 0;
	char *host;
	int port;
	char *routers = NULL;
	int c, i, rc;
	extern char *optarg;
	int	are_primary;
	int	num_var_env;
#ifndef WIN32
	struct sigaction act;
	struct sigaction oact;
#endif

#ifndef WIN32
	/*the real deal or just pbs_version and exit*/

	execution_mode(argc, argv);
#endif

	/* As a security measure and to make sure all file descriptors	*/
	/* are available to us,  close all above stderr			*/
#ifdef WIN32
	_fcloseall();
#else
	i = sysconf(_SC_OPEN_MAX);
	while (--i > 2)
		(void)close(i); /* close any file desc left open by parent */
#endif

	/* If we are not run with real and effective uid of 0, forget it */
#ifdef WIN32
	argc = p->argc;
	argv = p->argv;

	ZeroMemory(&ss, sizeof(ss));
	ss.dwCheckPoint = 0;
	ss.dwServiceType = SERVICE_WIN32_OWN_PROCESS;
	ss.dwCurrentState = g_dwCurrentState;
	ss.dwControlsAccepted = SERVICE_ACCEPT_STOP | SERVICE_ACCEPT_SHUTDOWN;
	ss.dwWaitHint = 6000;

	if (g_ssHandle != 0)
		SetServiceStatus(g_ssHandle, &ss);

	if (!isAdminPrivilege(getlogin())) {
		fprintf(stderr, "%s: Must be run by root\n", argv[0]);
		return (2);
	}

#else
	if ((getuid() != 0) || (geteuid() != 0)) {
		fprintf(stderr, "%s: Must be run by root\n", argv[0]);
		return (2);
	}
#endif	/* WIN32 */

	/* set standard umask */
#ifndef WIN32
	umask(022);
#endif

	/* load the pbs conf file */
	if (pbs_loadconf(0) == 0) {
		fprintf(stderr, "%s: Configuration error\n", argv[0]);
		return (1);
	}

	umask(022);

#ifdef	WIN32
	save_env();
#endif
	/* The following is code to reduce security risks                */
	/* start out with standard umask, system resource limit infinite */
	if ((num_var_env = setup_env(pbs_conf.pbs_environment)) == -1) {
#ifdef	WIN32
		g_dwCurrentState = SERVICE_STOPPED;
		ss.dwCurrentState = g_dwCurrentState;
		ss.dwWin32ExitCode = ERROR_INVALID_ENVIRONMENT;
		if (g_ssHandle != 0) SetServiceStatus(g_ssHandle, &ss);
		return (1);
#else
		exit(1);
#endif	/* WIN32 */
	}

#ifndef WIN32
	i = getgid();
	(void)setgroups(1, (gid_t *)&i);	/* secure suppl. groups */
#endif

	log_event_mask = &pbs_conf.pbs_comm_log_events;
	tpp_set_logmask(*log_event_mask);

#ifdef WIN32
	winsock_init();
#endif

	routers = pbs_conf.pbs_comm_routers;
	numthreads = pbs_conf.pbs_comm_threads;

	server_host[0] = '\0';
	if (pbs_conf.pbs_comm_name) {
		name = pbs_conf.pbs_comm_name;
		host = tpp_parse_hostname(name, &port);
		if (host)
			snprintf(server_host, sizeof(server_host), "%s", host);
		free(host);
		host = NULL;
	} else if (pbs_conf.pbs_leaf_name) {
		char *endp;

		snprintf(server_host, sizeof(server_host), "%s", pbs_conf.pbs_leaf_name);
		endp = strchr(server_host, ','); /* find the first name */
		if (endp)
			*endp = '\0';
		endp = strchr(server_host, ':'); /* cut out the port */
		if (endp)
			*endp = '\0';
		name = server_host;
	} else {
		if (gethostname(server_host, (sizeof(server_host) - 1)) == -1) {
#ifndef WIN32
			sprintf(log_buffer, "Could not determine my hostname, errno=%d", errno);
#else
			sprintf(log_buffer, "Could not determine my hostname, errno=%d", WSAGetLastError());
#endif
			fprintf(stderr, "%s\n", log_buffer);
			return (1);
		}
		if ((get_fullhostname(server_host, server_host, (sizeof(server_host) - 1)) == -1)) {
			sprintf(log_buffer, "Could not determine my hostname");
			fprintf(stderr, "%s\n", log_buffer);
			return (1);
		}
		name = server_host;
	}
	if (server_host[0] == '\0') {
		sprintf(log_buffer, "Could not determine server host");
		fprintf(stderr, "%s\n", log_buffer);
		return (1);
	}

	while ((c = getopt(argc, argv, "r:t:e:N")) != -1) {
		switch (c) {
			case 'e': *log_event_mask = strtol(optarg, NULL, 0);
				break;
			case 'r':
				routers = optarg;
				break;
			case 't':
				numthreads = atol(optarg);
				if (numthreads == -1) {
					usage(argv[0]);
					return (1);
				}
				break;
			case 'N':
				stalone = 1;
				break;
			default:
				usage(argv[0]);
				return (1);
		}
	}

	(void)strcpy(daemonname, "Comm@");
	(void)strcat(daemonname, name);
	if ((pc = strchr(daemonname, (int)'.')) != NULL)
		*pc = '\0';

	if(set_msgdaemonname(daemonname)) {
		fprintf(stderr, "Out of memory\n");
		return 1;
	}

	(void) snprintf(path_log, sizeof(path_log), "%s/%s", pbs_conf.pbs_home_path, PBS_COMM_LOGDIR);
#ifdef WIN32
	/*
	 * let SCM wait 10 seconds for log_open() to complete
	 * as it does network interface query which can take time
	 */

	ss.dwCheckPoint++;
	ss.dwWaitHint = 60000;
	if (g_ssHandle != 0) SetServiceStatus(g_ssHandle, &ss);
#endif
	(void) log_open(log_file, path_log);

	/* set pbs_comm's process limits */
	set_limits(); /* set_limits can call log_record, so call only after opening log file */

	/* set tcp function pointers */
	set_tpp_funcs(log_tppmsg);

	(void) snprintf(svr_home, sizeof(svr_home), "%s/%s", pbs_conf.pbs_home_path, PBS_SVR_PRIVATE);
	if (chdir(svr_home) != 0) {
		(void) sprintf(log_buffer, msg_init_chdir, svr_home);
		log_err(-1, __func__, log_buffer);
		return (1);
	}

	(void) sprintf(lockfile, "%s/%s/comm.lock", pbs_conf.pbs_home_path, PBS_SVR_PRIVATE);
	if ((are_primary = are_we_primary()) == FAILOVER_SECONDARY) {
		strcat(lockfile, ".secondary");
	} else if (are_primary == FAILOVER_CONFIG_ERROR) {
		sprintf(log_buffer, "Failover configuration error");
		log_err(-1, __func__, log_buffer);
#ifdef WIN32
		g_dwCurrentState = SERVICE_STOPPED;
		ss.dwCurrentState = g_dwCurrentState;
		ss.dwWin32ExitCode = ERROR_SERVICE_NOT_ACTIVE;
		if (g_ssHandle != 0) SetServiceStatus(g_ssHandle, &ss);
#endif
		return (3);
	}

	if ((lockfds = open(lockfile, O_CREAT | O_WRONLY, 0600)) < 0) {
		(void) sprintf(log_buffer, "pbs_comm: unable to open lock file");
		log_err(errno, __func__, log_buffer);
		return (1);
	}

	if ((host = tpp_parse_hostname(name, &port)) == NULL) {
		sprintf(log_buffer, "Out of memory parsing leaf name");
		log_err(errno, __func__, log_buffer);
		return (1);
	}

	rc = 0;
	if (pbs_conf.auth_method == AUTH_RESV_PORT) {
		rc = set_tpp_config(&pbs_conf, &conf, host, port, routers, pbs_conf.pbs_use_compression,
				TPP_AUTH_RESV_PORT, NULL, NULL);
	} else {
		/* for all non-resv-port based authentication use a callback from TPP */
		rc = set_tpp_config(&pbs_conf, &conf, host, port, routers, pbs_conf.pbs_use_compression,
				TPP_AUTH_EXTERNAL, get_ext_auth_data, validate_ext_auth_data);
	}
	if (rc == -1) {
		(void) sprintf(log_buffer, "Error setting TPP config");
		log_err(-1, __func__, log_buffer);
		return (1);
	}
	free(host);

	i = 0;
	if (conf.routers) {
		while (conf.routers[i]) {
			sprintf(log_buffer, "Router[%d]:%s", i, conf.routers[i]);
			fprintf(stdout, "%s\n", log_buffer);
			log_event(PBSEVENT_SYSTEM | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, LOG_INFO, msg_daemonname, log_buffer);
			i++;
		}
	}

#ifndef DEBUG
#ifndef WIN32
	if (stalone != 1)
		go_to_background();
#endif
#endif


#ifdef WIN32
	ss.dwCheckPoint = 0;
	g_dwCurrentState = SERVICE_RUNNING;
	ss.dwCurrentState = g_dwCurrentState;
	if (g_ssHandle != 0) SetServiceStatus(g_ssHandle, &ss);
#endif

	if (already_forked == 0)
		lock_out(lockfds, F_WRLCK);

	/* go_to_backgroud call creates a forked process,
	 * thus print/log pid only after go_to_background()
	 * has been called
	 */
	sprintf(log_buffer, "%s ready (pid=%d), Proxy Name:%s, Threads:%d", argv[0], getpid(), conf.node_name, numthreads);
	fprintf(stdout, "%s\n", log_buffer);
	log_event(PBSEVENT_SYSTEM | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, LOG_INFO, msg_daemonname, log_buffer);

#ifndef DEBUG
	pbs_close_stdfiles();
#endif

#ifdef WIN32
	signal(SIGINT, stop_me);
	signal(SIGTERM, stop_me);
#else
	sigemptyset(&act.sa_mask);
	act.sa_flags = 0;
	act.sa_handler = hup_me;
	if (sigaction(SIGHUP, &act, &oact) != 0) {
		log_err(errno, __func__, "sigaction for HUP");
		return (2);
	}
	act.sa_handler = stop_me;
	if (sigaction(SIGINT, &act, &oact) != 0) {
		log_err(errno, __func__, "sigaction for INT");
		return (2);
	}
	if (sigaction(SIGTERM, &act, &oact) != 0) {
		log_err(errno, __func__, "sigactin for TERM");
		return (2);
	}
	if (sigaction(SIGQUIT, &act, &oact) != 0) {
		log_err(errno, __func__, "sigactin for QUIT");
		return (2);
	}
#ifdef SIGSHUTDN
	if (sigaction(SIGSHUTDN, &act, &oact) != 0) {
		log_err(errno, __func__, "sigactin for SHUTDN");
		return (2);
	}
#endif	/* SIGSHUTDN */

	act.sa_handler = SIG_IGN;
	if (sigaction(SIGPIPE, &act, &oact) != 0) {
		log_err(errno, __func__, "sigaction for PIPE");
		return (2);
	}
	if (sigaction(SIGUSR1, &act, &oact) != 0) {
		log_err(errno, __func__, "sigaction for USR1");
		return (2);
	}
	if (sigaction(SIGUSR2, &act, &oact) != 0) {
		log_err(errno, __func__, "sigaction for USR2");
		return (2);
	}
#endif 	/* WIN32 */

	conf.node_type = TPP_ROUTER_NODE;
	conf.numthreads = numthreads;

	if ((rpp_fd = tpp_init_router(&conf)) == -1) {
		log_err(-1, __func__, "tpp init failed\n");
		return 1;
	}

	/* Protect from being killed by kernel */
	daemon_protect(0, PBS_DAEMON_PROTECT_ON);

	/* go in a while loop */
	while (get_out == 0) {

		if (hupped == 1) {
			struct pbs_config pbs_conf_bak;
			int new_logevent;

			hupped = 0; /* reset back */
			memcpy(&pbs_conf_bak, &pbs_conf, sizeof(struct pbs_config));

			if (pbs_loadconf(1) == 0) {
				log_tppmsg(LOG_CRIT, NULL, "Configuration error, ignoring");
				memcpy(&pbs_conf, &pbs_conf_bak, sizeof(struct pbs_config));
			} else {
				/* restore old pbs.conf */
				new_logevent = pbs_conf.pbs_comm_log_events;
				memcpy(&pbs_conf, &pbs_conf_bak, sizeof(struct pbs_config));
				pbs_conf.pbs_comm_log_events = new_logevent;
				log_tppmsg(LOG_INFO, NULL, "Processed SIGHUP");

				log_event_mask = &pbs_conf.pbs_comm_log_events;
				tpp_set_logmask(*log_event_mask);
			}
		}

		sleep(3);
	}

	tpp_router_shutdown();

	log_event(PBSEVENT_SYSTEM | PBSEVENT_FORCE, PBS_EVENTCLASS_SERVER, LOG_NOTICE, msg_daemonname, "Exiting");
	log_close(1);

	lock_out(lockfds, F_UNLCK);	/* unlock  */
	(void)close(lockfds);
	(void)unlink(lockfile);

	return 0;
}
Ejemplo n.º 15
0
void
start_tcl(void)
{
    char *id = "start_tcl";
    char buf[BUFSIZ];
    int fd;
    int tot, len;

    interp = Tcl_CreateInterp();

    if (Tcl_Init(interp) == TCL_ERROR)
    {
        sprintf(log_buffer, "Tcl_Init error: %s",
                Tcl_GetStringResult(interp));
        log_err(-1, id, log_buffer);
        die(0);
    }

#if TCLX
#if TCL_MINOR_VERSION < 5  && TCL_MAJOR_VERSION < 8
    if (TclX_Init(interp) == TCL_ERROR)
    {
#else

    if (Tclx_Init(interp) == TCL_ERROR)
    {
#endif
        sprintf(log_buffer, "Tclx_Init error: %s",
                Tcl_GetStringResult(interp));
        log_err(-1, id, log_buffer);
        die(0);
    }

#endif
    add_cmds(interp);

    if (initfil)
    {
        int  code;

        code = Tcl_EvalFile(interp, initfil);

        if (code != TCL_OK)
        {
            char *trace;

            trace = (char *)Tcl_GetVar(interp, "errorInfo", 0);

            if (trace == NULL)
                trace = (char *)Tcl_GetStringResult(interp);

            fprintf(stderr, "%s: TCL error @ line %d: %s\n",
                    initfil, interp->errorLine, trace);

            sprintf(log_buffer, "%s: TCL error @ line %d: %s",
                    initfil, interp->errorLine,
                    Tcl_GetStringResult(interp));

            log_err(-1, id, log_buffer);

            die(0);
        }

        sprintf(log_buffer, "init file %s", initfil);

        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER,
                   id, log_buffer);
    }

    if ((fd = open(bodyfil, O_RDONLY)) == -1)
    {
        log_err(errno, id, bodyfil);
        die(0);
    }

    sprintf(log_buffer, "body file: %s", bodyfil);

    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

    if (body)
        free(body);

    if ((body = malloc(BUFSIZ)) == NULL)
    {
        log_err(errno, id, "malloc");
        die(0);
    }

    for (tot = 0; (len = read(fd, buf, sizeof(buf))) > 0; tot += len)
    {
        if ((body = realloc(body, tot + len + 1)) == NULL)
        {
            log_err(errno, id, "realloc");
            die(0);
        }

        memcpy(&body[tot], buf, len);
    }

    if (len == -1)
    {
        log_err(errno, id, bodyfil);
        die(0);
    }

    body[tot] = '\0';

    close(fd);

#if TCL_MAJOR_VERSION >= 8

    if (body_obj == NULL)
    {
        body_obj = Tcl_NewStringObj(body, tot);
        Tcl_IncrRefCount(body_obj);
    }
    else
    {
        Tcl_SetStringObj(body_obj, body, tot);
    }

#endif
}

int

addclient(name)
char *name;
{
    static char id[] = "addclient";

    struct hostent  *host, *gethostbyname();

    struct  in_addr saddr;

    if ((host = gethostbyname(name)) == NULL)
    {
        sprintf(log_buffer, "host %s not found", name);
        log_err(-1, id, log_buffer);
        return -1;
    }

    if (numclients >= START_CLIENTS)
    {
        pbs_net_t *newclients;

        newclients = realloc(okclients,
                             sizeof(pbs_net_t) * (numclients + 1));

        if (newclients == NULL)
            return -1;

        okclients = newclients;
    }

    memcpy((char *)&saddr, host->h_addr, host->h_length);

    okclients[numclients++] = saddr.s_addr;
    return 0;
}

/*
 * read_config - read and process the configuration file (see -c option)
 *
 * Currently, the only statement is $clienthost to specify which systems
 * can contact the scheduler.
 */
#define CONF_LINE_LEN 120

static
int
read_config(file)
char *file;
{
    static char *id = "read_config";
    FILE *conf;
    int i;
    char line[CONF_LINE_LEN];
    char *token;

    struct specialconfig
    {
        char *name;
        int (*handler)();
    } special[] =

    {
        {"clienthost", addclient },
        { NULL,  NULL }
    };


#if !defined(DEBUG) && !defined(NO_SECURITY_CHECK)

    if (chk_file_sec(file, 0, 0, S_IWGRP | S_IWOTH, 1, 0))
        return (-1);

#endif

    if ((conf = fopen(file, "r")) == NULL)
    {
        log_err(errno, id, "cannot open config file");
        return (-1);
    }

    while (fgets(line, CONF_LINE_LEN, conf))
    {

        if ((line[0] == '#') || (line[0] == '\n'))
            continue;  /* ignore comment & null line */
        else if (line[0] == '$')   /* special */
        {

            if ((token = strtok(line, " \t")) == NULL)
                token = "";

            for (i = 0; special[i].name; i++)
            {
                if (strcmp(token + 1, special[i].name) == 0)
                    break;
            }

            if (special[i].name == NULL)
            {
                sprintf(log_buffer, "config name %s not known",
                        token);
                log_record(PBSEVENT_ERROR,
                           PBS_EVENTCLASS_SERVER,
                           msg_daemonname, log_buffer);
                return (-1);
            }

            token = strtok(NULL, " \t");

            if (*(token + strlen(token) - 1) == '\n')
                *(token + strlen(token) - 1) = '\0';

            if (special[i].handler(token))
            {
                fclose(conf);
                return (-1);
            }

        }
        else
        {
            log_record(PBSEVENT_ERROR, PBS_EVENTCLASS_SERVER,
                       msg_daemonname,
                       "invalid line in config file");
            fclose(conf);
            return (-1);
        }
    }

    fclose(conf);

    return (0);
}

void
restart(sig)
int sig;
{
    char    *id = "restart";

    if (sig)
    {
        sprintf(log_buffer, "restart on signal %d", sig);
        log_close(1);
        log_open(logfile, path_log);
    }
    else
    {
        sprintf(log_buffer, "restart command");
    }

    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

    Tcl_DeleteInterp(interp);

    if (configfile)
    {
        if (read_config(configfile) != 0)
            die(0);
    }

    start_tcl();
}

void
badconn(msg)
char *msg;
{
    static char id[] = "badconn";

    struct in_addr addr;
    char  buf[5*sizeof(addr) + 100];

    struct hostent *phe;

    addr = saddr.sin_addr;
    phe = gethostbyaddr((void *) & addr, sizeof(addr), AF_INET);

    if (phe == NULL)
    {
        char hold[6];
        int i;
        union
        {

            struct in_addr aa;
            u_char  bb[sizeof(addr)];
        } uu;

        uu.aa = addr;
        sprintf(buf, "%u", uu.bb[0]);

        for (i = 1; i < (int)sizeof(addr); i++)
        {
            sprintf(hold, ".%u", uu.bb[i]);
            strcat(buf, hold);
        }
    }
    else
    {
        strncpy(buf, phe->h_name, sizeof(buf));
        buf[sizeof(buf)-1] = '\0';
    }

    sprintf(log_buffer, "%s on port %u %s", buf, ntohs(saddr.sin_port), msg);

    log_err(-1, id, log_buffer);
    return;
}

unsigned int
server_command()
{
    static char id[] = "server_command";
    int  new_socket;
    int  i;
    torque_socklen_t slen;
    unsigned int  cmd;
    pbs_net_t addr;

    slen = sizeof(saddr);
    new_socket = accept(server_sock,
                        (struct sockaddr *) & saddr, &slen);

    if (new_socket == -1)
    {
        log_err(errno, id, "accept");
        return SCH_ERROR;
    }

    if (ntohs(saddr.sin_port) >= IPPORT_RESERVED)
    {
        badconn("non-reserved port");
        close(new_socket);
        return SCH_ERROR;
    }

    addr = (pbs_net_t)saddr.sin_addr.s_addr;

    for (i = 0; i < numclients; i++)
    {
        if (addr == okclients[i])
            break;
    }

    if (i == numclients)
    {
        badconn("unauthorized host");
        close(new_socket);
        return SCH_ERROR;
    }

    if ((connector = socket_to_conn(new_socket)) < 0)
    {
        log_err(errno, id, "socket_to_conn");
        return SCH_ERROR;
    }

    if (get_4byte(new_socket, &cmd) != 1)
    {
        log_err(errno, id, "get4bytes");
        return SCH_ERROR;
    }

    return cmd;
}


/*
 * lock_out - lock out other daemons from this directory.
 */

static void lock_out(fds, op)
int fds;
int op;  /* F_WRLCK  or  F_UNLCK */
{

    struct flock flock;

    flock.l_type   = op;
    flock.l_whence = SEEK_SET;
    flock.l_start  = 0;
    flock.l_len    = 0; /* whole file */

    if (fcntl(fds, F_SETLK, &flock) < 0)
    {
        (void)strcpy(log_buffer, "pbs_sched: another scheduler running\n");
        log_err(errno, msg_daemonname, log_buffer);
        fprintf(stderr, log_buffer);
        exit(1);
    }
}

int main(argc, argv)
int  argc;
char *argv[];
{
    char  *id = "main";
    int  code;

    struct hostent *hp;
    int  go, c, errflg = 0;
    int  lockfds;
    int  t = 1;
    char  *ptr;
    pid_t  pid;
    char  *cp, host[100];
    char  *homedir = PBS_SERVER_HOME;
    unsigned int port;
    char  path_priv[_POSIX_PATH_MAX];
    char  *dbfile = "sched_out";
    int  alarm_time = 180;

    struct sigaction act;
    caddr_t  curr_brk = 0, next_brk;
    extern char *optarg;
    extern int optind, opterr;
    fd_set  fdset;

#ifndef DEBUG
    if (IamRoot() == 0)
    {
        return (1);
    }
#endif /* DEBUG */

    glob_argv = argv;

    if ((cp = strrchr(argv[0], '/')) == NULL)
        cp = argv[0];
    else
        cp++;

    msg_daemonname = strdup(cp);

    port = get_svrport(PBS_SCHEDULER_SERVICE_NAME, "tcp",
                       PBS_SCHEDULER_SERVICE_PORT);

    while ((c = getopt(argc, argv, "L:S:d:i:b:t:p:a:vc:")) != EOF)
    {
        switch (c)
        {

        case 'L':
            logfile = optarg;
            break;

        case 'S':
            port = (unsigned int)atoi(optarg);

            if (port == 0)
            {
                fprintf(stderr,
                        "%s: illegal port\n", optarg);
                errflg = 1;
            }

            break;

        case 'd':
            homedir = optarg;
            break;

        case 'i':  /* initialize */
            initfil = optarg;
            break;

        case 'b':
            bodyfil = optarg;
            break;

        case 't':
            termfil = optarg;
            break;

        case 'p':
            dbfile = optarg;
            break;

        case 'a':
            alarm_time = strtol(optarg, &ptr, 10);

            if (alarm_time <= 0 || *ptr != '\0')
            {
                fprintf(stderr,
                        "%s: bad alarm time\n", optarg);
                errflg = 1;
            }

            break;

        case 'c':
            configfile = optarg;
            break;

        case 'v':
            verbose = 1;
            break;

        case '?':
            errflg = 1;
            break;
        }
    }

    if (errflg || optind != argc)
    {
        static char *options[] =
        {
            "[-L logfile]",
            "[-S port]",
            "[-d home]",
            "[-i init]",
            "[-b body]",
            "[-t term]",
            "[-p output]",
            "[-a alarm]",
            "[-c configfile]",
            "[-v]",
            NULL
        };
        int i;

        fprintf(stderr, "usage: %s\n", argv[0]);

        for (i = 0; options[i]; i++)
            fprintf(stderr, "\t%s\n", options[i]);

        exit(1);
    }

    /* Save the original working directory for "restart" */
    if ((oldpath = getcwd((char *)NULL, MAXPATHLEN)) == NULL)
    {
        fprintf(stderr, "cannot get current working directory\n");
        exit(1);
    }

    (void)sprintf(path_priv, "%s/sched_priv", homedir);
#if !defined(DEBUG) && !defined(NO_SECURITY_CHECK)
    c  = chk_file_sec(path_priv, 1, 0, S_IWGRP | S_IWOTH, 1, 0);
    c |= chk_file_sec(PBS_ENVIRON, 0, 0, S_IWGRP | S_IWOTH, 0, 0);

    if (c != 0) exit(1);

#endif  /* not DEBUG and not NO_SECURITY_CHECK */
    if (chdir(path_priv) == -1)
    {
        perror(path_priv);
        exit(1);
    }

    (void)sprintf(path_log, "%s/sched_logs", homedir);
    (void)strcpy(pbs_current_user, "Scheduler");

    /* The following is code to reduce security risks                */
    /* start out with standard umask, system resource limit infinite */

    umask(022);

    if (setup_env(PBS_ENVIRON) == -1)
        exit(1);

    c = getgid();

    (void)setgroups(1, (gid_t *)&c); /* secure suppl. group ids */

    c = sysconf(_SC_OPEN_MAX);

    while (--c > 2)
        (void)close(c); /* close any file desc left open by parent */

#ifndef DEBUG
#ifdef _CRAY
    (void)limit(C_JOB,      0, L_CPROC, 0);

    (void)limit(C_JOB,      0, L_CPU,   0);

    (void)limit(C_JOBPROCS, 0, L_CPU,   0);

    (void)limit(C_PROC,     0, L_FD,  255);

    (void)limit(C_JOB,      0, L_FSBLK, 0);

    (void)limit(C_JOBPROCS, 0, L_FSBLK, 0);

    (void)limit(C_JOB,      0, L_MEM  , 0);

    (void)limit(C_JOBPROCS, 0, L_MEM  , 0);

#else /* not  _CRAY */
    {

        struct rlimit rlimit;

        rlimit.rlim_cur = RLIM_INFINITY;
        rlimit.rlim_max = RLIM_INFINITY;
        (void)setrlimit(RLIMIT_CPU,   &rlimit);
        (void)setrlimit(RLIMIT_FSIZE, &rlimit);
        (void)setrlimit(RLIMIT_DATA,  &rlimit);
        (void)setrlimit(RLIMIT_STACK, &rlimit);
#ifdef  RLIMIT_RSS
        (void)setrlimit(RLIMIT_RSS  , &rlimit);
#endif  /* RLIMIT_RSS */
#ifdef  RLIMIT_VMEM
        (void)setrlimit(RLIMIT_VMEM  , &rlimit);
#endif  /* RLIMIT_VMEM */
    }
#endif /* not _CRAY */

#if !defined(NO_SECURITY_CHECK)
    c = 0;

    if (initfil)
    {
        if (*initfil != '/')
        {
            (void)sprintf(log_buffer, "%s/%s", path_priv, initfil);
            c |= chk_file_sec(log_buffer, 0, 0, S_IWGRP | S_IWOTH, 1, 0);
        }
        else
        {
            c |= chk_file_sec(initfil, 0, 0, S_IWGRP | S_IWOTH, 1, 0);
        }
    }

    if (bodyfil)
    {
        if (*bodyfil != '/')
        {
            (void)sprintf(log_buffer, "%s/%s", path_priv, bodyfil);
            c |= chk_file_sec(log_buffer, 0, 0, S_IWGRP | S_IWOTH, 1, 0);
        }
        else
        {
            c |= chk_file_sec(bodyfil, 0, 0, S_IWGRP | S_IWOTH, 1, 0);
        }
    }

    if (termfil)
    {
        if (*termfil != '/')
        {
            (void)sprintf(log_buffer, "%s/%s", path_priv, termfil);
            c |= chk_file_sec(log_buffer, 0, 0, S_IWGRP | S_IWOTH, 1, 0);
        }
        else
        {
            c |= chk_file_sec(termfil, 0, 0, S_IWGRP | S_IWOTH, 1, 0);
        }
    }

    if (c) exit(1);

#endif /* not NO_SECURITY_CHECK */
#endif /* not DEBUG */

    if (log_open(logfile, path_log) == -1)
    {
        fprintf(stderr, "%s: logfile could not be opened\n", argv[0]);
        exit(1);
    }

    if (gethostname(host, sizeof(host)) == -1)
    {
        char *prob = "gethostname";

        log_err(errno, id, prob);
        perror(prob);
        die(0);
    }

    if ((hp = gethostbyname(host)) == NULL)
    {
        char *prob = "gethostbyname";

        log_err(errno, id, prob);
        perror(prob);
        die(0);
    }

    if ((server_sock = socket(AF_INET, SOCK_STREAM, 0)) < 0)
    {
        char *prob = "socket";

        log_err(errno, id, prob);
        perror(prob);
        die(0);
    }

    if (setsockopt(server_sock, SOL_SOCKET, SO_REUSEADDR,
                   (char *)&t, sizeof(t)) == -1)
    {
        char *prob = "setsockopt";

        log_err(errno, id, prob);
        perror(prob);
        die(0);
    }

    saddr.sin_family = AF_INET;

    saddr.sin_port = htons((unsigned short)port);
    memcpy(&saddr.sin_addr, hp->h_addr, hp->h_length);

    if (bind(server_sock, (struct sockaddr *)&saddr, sizeof(saddr)) < 0)
    {
        char *prob = "bind";

        log_err(errno, id, prob);
        perror(prob);
        die(0);
    }

    if (listen(server_sock, 5) < 0)
    {
        char *prob = "listen";

        log_err(errno, id, prob);
        perror(prob);
        die(0);
    }

    okclients = (pbs_net_t *)calloc(START_CLIENTS, sizeof(pbs_net_t));

    addclient("localhost");   /* who has permission to call MOM */
    addclient(host);

    if (configfile)
    {
        if (read_config(configfile) != 0)
            die(0);
    }

    lockfds = open("sched.lock", O_CREAT | O_TRUNC | O_WRONLY, 0644);

    if (lockfds < 0)
    {
        char *prob = "lock file";

        log_err(errno, id, prob);
        perror(prob);
        die(0);
    }

    lock_out(lockfds, F_WRLCK);

#ifndef DEBUG
    lock_out(lockfds, F_UNLCK);

    if ((pid = fork()) == -1)       /* error on fork */
    {
        char *prob = "fork";

        log_err(errno, id, prob);
        perror(prob);
        die(0);
    }
    else if (pid > 0)               /* parent exits */
        exit(0);

    if ((pid = setsid()) == -1)
    {
        log_err(errno, id, "setsid");
        die(0);
    }

    lock_out(lockfds, F_WRLCK);

    freopen(dbfile, "a", stdout);
    setvbuf(stdout, NULL, _IOLBF, 0);
    dup2(fileno(stdout), fileno(stderr));
#else
    pid = getpid();
    setvbuf(stdout, NULL, _IOLBF, 0);
    setvbuf(stderr, NULL, _IOLBF, 0);
#endif
    freopen("/dev/null", "r", stdin);

    /* write schedulers pid into lockfile */
    (void)sprintf(log_buffer, "%d\n", pid);
    (void)write(lockfds, log_buffer, strlen(log_buffer) + 1);

#if (PLOCK_DAEMONS & 2)
    (void)plock(PROCLOCK); /* lock daemon into memory */
#endif

    sprintf(log_buffer, "%s startup pid %d", argv[0], pid);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

    sprintf(log_buffer, "%s using TCL %s (%s)", argv[0],
            TCL_VERSION, TCL_PATCH_LEVEL);
    fprintf(stderr, "%s\n", log_buffer);
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

    fullresp(0);
    sigemptyset(&allsigs);
    act.sa_flags = 0;
    sigaddset(&allsigs, SIGHUP);    /* remember to block these */
    sigaddset(&allsigs, SIGINT);    /* during critical sections */
    sigaddset(&allsigs, SIGTERM);   /* so we don't get confused */
    act.sa_mask = allsigs;

    act.sa_handler = restart;       /* do a restart on SIGHUP */
    sigaction(SIGHUP, &act, NULL);

    act.sa_handler = toolong; /* handle an alarm call */
    sigaction(SIGALRM, &act, NULL);

    act.sa_handler = die;           /* bite the biscuit for all following */
    sigaction(SIGINT, &act, NULL);
    sigaction(SIGTERM, &act, NULL);

    start_tcl();

    FD_ZERO(&fdset);

    for (go = 1; go;)
    {
        unsigned int cmd;


        FD_SET(server_sock, &fdset);

        if (select(FD_SETSIZE, &fdset, NULL, NULL, NULL) == -1)
        {
            if (errno != EINTR)
                log_err(errno, id, "select");

            continue;
        }

        if (!FD_ISSET(server_sock, &fdset))
            continue;

        cmd = server_command();

        if (cmd == (unsigned)SCH_ERROR || cmd == (unsigned)SCH_SCHEDULE_NULL)
            continue;

        if (sigprocmask(SIG_BLOCK, &allsigs, &oldsigs) == -1)
            log_err(errno, id, "sigprocmaskSIG_BLOCK)");

        if (verbose)
        {
            sprintf(log_buffer, "command %d", cmd);
            log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER,
                       id, log_buffer);
        }

        switch (cmd)
        {

        case SCH_SCHEDULE_NEW:

        case SCH_SCHEDULE_TERM:

        case SCH_SCHEDULE_TIME:

        case SCH_SCHEDULE_RECYC:

        case SCH_SCHEDULE_CMD:

        case SCH_SCHEDULE_FIRST:
            alarm(alarm_time);

#if TCL_MAJOR_VERSION >= 8
            /* execute compiled body code for TCL-8 */
            code = Tcl_EvalObj(interp, body_obj);
#else
            code = Tcl_Eval(interp, body);
#endif
            alarm(0);

            switch (code)
            {

            case TCL_OK:

            case TCL_RETURN:
                break;

            default:
            {

                char *trace;
                char  codename[20];

                switch (code)
                {

                case TCL_BREAK:
                    strcpy(codename, "break");
                    break;

                case TCL_CONTINUE:
                    strcpy(codename, "continue");
                    break;

                default:
                    strcpy(codename, "<unknown>");
                    break;
                }

                trace = (char *)Tcl_GetVar(interp, "errorInfo", 0);

                if (trace == NULL)
                    trace = (char *)Tcl_GetStringResult(interp);

                fprintf(stderr, "%s: TCL interpreter return code %d (%s) @ line %d: %s\n",
                        bodyfil, code, codename,
                        interp->errorLine, trace);

                sprintf(log_buffer,
                        "%s: TCL error @ line %d: %s",
                        bodyfil, interp->errorLine,
                        Tcl_GetStringResult(interp));

                log_err(-1, id, log_buffer);

                die(0);
            }
            }

            break;

        case SCH_CONFIGURE:

        case SCH_RULESET:
            restart(0);
            break;

        case SCH_QUIT:
            go = 0;
            break;

        default:
            log_err(-1, id, "unknown command");
            break;
        }

        if (connector >= 0 && server_disconnect(connector))
        {
            log_err(errno, id, "server_disconnect");
            die(0);
        }

        connector = -1;

        if (verbose)
        {
            next_brk = (caddr_t)sbrk(0);

            if (next_brk > curr_brk)
            {
                sprintf(log_buffer, "brk point %p", next_brk);
                log_record(PBSEVENT_SYSTEM,
                           PBS_EVENTCLASS_SERVER,
                           id, log_buffer);
                curr_brk = next_brk;
            }
        }

        if (sigprocmask(SIG_SETMASK, &oldsigs, NULL) == -1)
            log_err(errno, id, "sigprocmask(SIG_SETMASK)");
    }

    if (termfil)
    {
        code = Tcl_EvalFile(interp, termfil);

        if (code != TCL_OK)
        {
            char *trace;

            trace = (char *)Tcl_GetVar(interp, "errorInfo", 0);

            if (trace == NULL)
                trace = (char *)Tcl_GetStringResult(interp);

            fprintf(stderr, "%s: TCL error @ line %d: %s\n",
                    termfil, interp->errorLine, trace);

            sprintf(log_buffer, "%s: TCL error @ line %d: %s",
                    termfil, interp->errorLine,
                    Tcl_GetStringResult(interp));

            log_err(-1, id, log_buffer);

            die(0);
        }

        sprintf(log_buffer, "term file: %s", termfil);

        log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER,
                   id, log_buffer);
    }

    sprintf(log_buffer, "%s normal finish pid %d", argv[0], pid);

    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

    (void)close(server_sock);
    exit(0);
}
Ejemplo n.º 16
0
int conn_wait(Connection *conn, double seconds)
{
    int events;
    int ret;
    int fd;

    lock_out(conn);

    /* Try to write any data that might still be waiting to be sent */
    ret = unlocked_write(conn);
    if (ret < 0) {
        unlock_out(conn);
        return -1;
    }
    if (ret > 0) {
        /* We did something useful.  No need to poll or wait now. */
        unlock_out(conn);
        return 0;
    }

    fd = conn->fd;

    /* Normally, we block until there is more data available.  But
     * if any data still needs to be sent, we block until we can
     * send it (or there is more data available).  We always block
     * for reading, unless we know there is no more data coming.
     * (Because in that case, poll will keep reporting POLLIN to
     * signal the end of the file).  If the caller explicitly wants
     * to wait even though there is no data to write and we're at
     * end of file, then poll for new data anyway because the caller
     * apparently doesn't trust eof. */
    events = 0;
    if (unlocked_outbuf_len(conn) > 0)
        events |= POLLOUT;
    /* Don't keep the connection locked while we wait */
    unlock_out(conn);

    /* We need the in lock to query read_eof */
    lock_in(conn);
    if ((conn->read_eof == 0 && conn->io_error == 0) || events == 0)
        events |= POLLIN;
    unlock_in(conn);

    ret = gwthread_pollfd(fd, events, seconds);
    if (ret < 0) {
        if (errno == EINTR)
            return 0;
        error(0, "conn_wait: poll failed on fd %d:", fd);
        return -1;
    }

    if (ret == 0)
        return 1;

    if (ret & POLLNVAL) {
        error(0, "conn_wait: fd %d not open.", fd);
        return -1;
    }

    if (ret & (POLLERR | POLLHUP)) {
        /* Call unlocked_read to report the specific error,
         * and handle the results of the error.  We can't be
         * certain that the error still exists, because we
         * released the lock for a while. */
        lock_in(conn);
        unlocked_read(conn);
        unlock_in(conn);
        return -1;
    }

    /* If POLLOUT is on, then we must have wanted
     * to write something. */
    if (ret & POLLOUT) {
        lock_out(conn);
        unlocked_write(conn);
        unlock_out(conn);
    }

    /* Since we normally select for reading, we must
     * try to read here.  Otherwise, if the caller loops
     * around conn_wait without making conn_read* calls
     * in between, we will keep polling this same data. */
    if (ret & POLLIN) {
        lock_in(conn);
        unlocked_read(conn);
        unlock_in(conn);
    }

    return 0;
}
Ejemplo n.º 17
0
static void poll_callback(int fd, int revents, void *data)
{
    Connection *conn;
    int do_callback = 0;

    conn = data;

    if (conn == NULL) {
        error(0, "poll_callback called with NULL connection.");
        return;
    }

    if (conn->fd != fd) {
        error(0, "poll_callback called on wrong connection.");
        return;
    }

    /* Get result of nonblocking connect, before any reads and writes
     * we must check result (it must be handled in initial callback) */
    if (conn->connected == no) {
        if (conn->callback)
            conn->callback(conn, conn->callback_data);
        return;
    }

    /* If got POLLERR or POLHUP, then unregister the descriptor from the
     * fdset and set the error condition variable to let the upper layer
     * close and destroy the connection. */
    if (revents & (POLLERR|POLLHUP)) {
        lock_out(conn);
        lock_in(conn);
        if (conn->listening_pollin)
            unlocked_register_pollin(conn, 0);
        if (conn->listening_pollout)
            unlocked_register_pollout(conn, 0);
        conn->io_error = 1;
        unlock_in(conn);
        unlock_out(conn);
        do_callback = 1;
    }

    /* If unlocked_write manages to write all pending data, it will
     * tell the fdset to stop listening for POLLOUT. */
    if (revents & POLLOUT) {
        lock_out(conn);
        unlocked_write(conn);
        if (unlocked_outbuf_len(conn) == 0)
            do_callback = 1;
        unlock_out(conn);
    }

    /* We read only in unlocked_read in we received POLLIN, cause the 
     * descriptor is already broken and of no use anymore. */
    if (revents & POLLIN) {
        lock_in(conn);
        unlocked_read(conn);
        unlock_in(conn);
        do_callback = 1;
    }

    if (do_callback && conn->callback)
        conn->callback(conn, conn->callback_data);
}
Ejemplo n.º 18
0
/**
 * @brief
 * 		This is the Unix counterpart of acquire_lock
 * @par
 *  	This function creates/opens the lock file, and locks the file.
 *  	In case of a failover environment, the whole operation is retried
 *  	several times in a loop.
 *
 * @param[in]  lockfile         - Path of db_lock file.
 * @param[out] reason           - Reason for failure, if not able to accquire lock
 * @param[in]  reasonlen        - reason buffer legnth.
 * @param[out] is_lock_hld_by_thishost  - This flag is set if the lock is held by the host
 *                                         requesting accquire_lock in check_mode.
 *
 * @return	File descriptor of the open and locked file
 * @retval	-1	: Function failed to acquire lock
 * @retval	!=-1	: Function succeeded (file descriptor returned)
 *
 * @par MT-safe:	Yes
 */
int
acquire_lock(char *lockfile, char *reason, int reasonlen, int *is_lock_hld_by_thishost)
{
	int fd;
	struct stat st;
	int i, j;
	time_t	lasttime = 0;
	int rc;
	char who[PBS_MAXHOSTNAME + 10];
	char *p;

	if (reasonlen > 0)
		reason[0] = '\0';

	if (pbs_conf.pbs_secondary == NULL)
		j = 1;		/* not fail over, try lock one time */
	else
		j = MAX_LOCK_ATTEMPTS;	/* fail over, try X times */

#ifndef O_RSYNC
#define O_RSYNC 0
#endif

again:
	if ((fd = open(lockfile, O_RDWR | O_CREAT | O_RSYNC, 0600)) == -1) {
		snprintf(reason, reasonlen, "Could not access lockfile, errno=%d", errno);
		return -1;
	}

	/* check time stamp of lock file */
	if (fstat(fd, &st) == -1) {
		snprintf(reason, reasonlen, "Failed to stat lockfile, errno=%d", errno);
		close(fd);
		return -1;
	}

	/* record the last modified timestamp */
	lasttime = st.st_mtime;

	for (i=0; i < j; i++) { /* try X times where X is MAX_LOCK_ATTEMPTS */
		if (i > 0)
			sleep(1);
		/* attempt to lock the datastore directory */
		if (lock_out(fd, F_WRLCK) == 0)
			return fd;
	}

	/* do this only if failover is configured */
	if (pbs_conf.pbs_secondary != NULL) {
		/*
		 * Came here, means we could not lock even after j attempts.
		 *
		 * 2 levels of check will be performed (based on the last modified timestamp):
		 *
		 * 1) Check the lock file's modified timestamp and compare with "lasttime" to see if the file was modified
		 *    in between. If the file was modified, then the other side up and so we give up.
		 *
		 * 2) We know that the modified timestamp is not updating however we need to make
		 *    sure that the other side is really gone. Therefore we check the difference of last
		 *    updated timestamp from now (current system time). If the difference > (4*j) seconds,
		 *    then the other side has vanished at the OS level itself, and NFS cannot unlock it.
		 *    So delete the lockfile and start afresh. For this to work, make sure that the
		 *    time on primary, secondary and the pbs_home server (NFS server) are synced.
		 */

		/* Re-check time stamp of lock file */
		if (fstat(fd, &st) == -1) {
			snprintf(reason, reasonlen, "Failed to stat lockfile, errno=%d", errno);
			close(fd);
			return -1;
		}

		/* Check if time stamp of lock file has updated at all */
		if (st.st_mtime == lasttime) {
			/* Modified times stamp did not update in the given window. Re-check how long it has been stale */
			if (time(0) - lasttime >= (MAX_LOCK_ATTEMPTS * 4)) {
				/* other side is long dead, clear up stuff */
				close(fd);
				unlink(lockfile);
				fd = -1;
				lasttime = 0;
				goto again;
			}
		}
	}

	/* all attempts to lock failed, try to see who has it locked */
	(void) lseek(fd, (off_t) 0, SEEK_SET);
	if ((rc = read(fd, who, sizeof(who) - 1)) > 0) {
		who[rc - 1] = '\0';
		p = strchr(who, ':');
		if (p) {
			*p = '\0';
			snprintf(reason, reasonlen,
					"Lock seems to be held by pid: %s running on host: %s",
					(p + 1), who);
		} else {
			snprintf(reason, reasonlen, "Lock seems to be held by %s", who);
		}
		if (is_lock_hld_by_thishost != NULL) {
			if (strcmp(thishost, who) == 0)
				*is_lock_hld_by_thishost = 1;
			else
				*is_lock_hld_by_thishost = 0;
		}
	}

	close(fd);
	fd = -1;

	return fd;
}
Ejemplo n.º 19
0
Octstr *conn_read_withlen(Connection *conn)
{
    Octstr *result = NULL;
    unsigned char lengthbuf[4];
    long length = 0; /* for compiler please */
    int try, retry;

    lock_in(conn);

    for (try = 1; try <= 2; try++) {
        if (try > 1)
            unlocked_read(conn);

        do {
            retry = 0;
            /* First get the length. */
            if (unlocked_inbuf_len(conn) < 4)
                continue;

            octstr_get_many_chars(lengthbuf, conn->inbuf, conn->inbufpos, 4);
            length = decode_network_long(lengthbuf);

            if (length < 0) {
                warning(0, "conn_read_withlen: got negative length, skipping");
                conn->inbufpos += 4;
                retry = 1;
             }
        } while(retry == 1);

        /* Then get the data. */
        if (unlocked_inbuf_len(conn) - 4 < length)
            continue;

        conn->inbufpos += 4;
        result = unlocked_get(conn, length);
        gw_claim_area(result);
        break;
    }

    unlock_in(conn);
    return result;
}

Octstr *conn_read_packet(Connection *conn, int startmark, int endmark)
{
    int startpos, endpos;
    Octstr *result = NULL;
    int try;

    lock_in(conn);

    for (try = 1; try <= 2; try++) {
        if (try > 1)
            unlocked_read(conn);

        /* Find startmark, and discard everything up to it */
        if (startmark >= 0) {
            startpos = octstr_search_char(conn->inbuf, startmark, conn->inbufpos);
            if (startpos < 0) {
                conn->inbufpos = octstr_len(conn->inbuf);
                continue;
            } else {
                conn->inbufpos = startpos;
            }
        } else {
           startpos = conn->inbufpos;
        }

        /* Find first endmark after startmark */
        endpos = octstr_search_char(conn->inbuf, endmark, conn->inbufpos);
        if (endpos < 0)
            continue;

        result = unlocked_get(conn, endpos - startpos + 1);
        gw_claim_area(result);
        break;
    }

    unlock_in(conn);
    return result;
}

#ifdef HAVE_LIBSSL
X509 *conn_get_peer_certificate(Connection *conn) 
{
    /* Don't know if it needed to be locked , but better safe as crash */
    lock_out(conn);
    lock_in(conn);
    if (conn->peer_certificate == NULL && conn->ssl != NULL)
        conn->peer_certificate = SSL_get_peer_certificate(conn->ssl);
    unlock_in(conn);
    unlock_out(conn);
    
    return conn->peer_certificate;
}

/*
 * XXX Alex decalred the RSA callback routine static and now we're getting 
 * warning messages for our automatic compilation tests. So we are commenting
 * the function out to avoid the warnings.
 *

static RSA *tmp_rsa_callback(SSL *ssl, int export, int key_len)
{
    static RSA *rsa = NULL;
    debug("gwlib.http", 0, "SSL: Generating new RSA key (export=%d, keylen=%d)", export, key_len);
    if (export) {
	   rsa = RSA_generate_key(key_len, RSA_F4, NULL, NULL);
    } else {
	   debug("gwlib.http", 0, "SSL: Export not set");
    }
    return rsa;
}
*/

static Mutex **ssl_static_locks = NULL;

/* the call-back function for the openssl crypto thread locking */
static void openssl_locking_function(int mode, int n, const char *file, int line)
{
    if (mode & CRYPTO_LOCK)
        mutex_lock(ssl_static_locks[n-1]);
    else
        mutex_unlock(ssl_static_locks[n-1]);
}

void openssl_init_locks(void)
{
    int c, maxlocks = CRYPTO_num_locks();

    gw_assert(ssl_static_locks == NULL);

    ssl_static_locks = gw_malloc(sizeof(Mutex *) * maxlocks);
    for (c = 0; c < maxlocks; c++)
        ssl_static_locks[c] = mutex_create();

    /* after the mutexes have been created, apply the call-back to it */
    CRYPTO_set_locking_callback(openssl_locking_function);
    CRYPTO_set_id_callback((CRYPTO_CALLBACK_PTR)gwthread_self);
}
Ejemplo n.º 20
0
/**
 * @brief	This is the Unix couterpart of the monitoring
 *			code.
 * @par
 *		This function does the following:
 *		a) Creates a pipe, forks itself, parent waits to read on pipe.
 *		b) Child creates/opens a file $PBS_HOME/datastore/pbs_dblock.
 *		c) Attempts to lock the file. If locking
 *			succeeds, unlocks the file and writes 0 (success) to the write
 *			end of the pipe. If locking fails, writes 1 (failure) to pipe.
 *			Parent reads from pipe and exits with the code read from pipe.
 *		d) If mode is "check" then child quits.
 *		e) If mode is "monitor", continues in the background, checking
 *			the database pid in a loop forever. If database pid goes
 *			down, then it unlocks the file and exits.
 *
 * @param[in]	mode	-	"check" - to just check if lockfile can be locked
 *		     				"monitor" - to launch a monitoring child process that
 *							holds onto the file lock.
 *
 * @retval	1	-	Function failed to acquire lock
 * @retval	0	-	Function succeded in the requested operation
 * @par
 * 		The return values are not used by the caller (parent process) since
 * 		in the success case this function does not return. Instead, the parent
 * 		waits on the read end of the pipe to read a status from the monitoring
 * 		child process.
 *
 * @par MT-safe: Yes
 */
int
unix_db_monitor(char *mode)
{
	int fd;
	int rc;
	int i;
	pid_t dbpid;
	char lockfile[MAXPATHLEN + 1];
	int pipefd[2];
	int res;
	int is_lock_local = 0;
	char reason[RES_BUF_SIZE];

	reason[0] = '\0';

	if (pipe(pipefd) != 0) {
		fprintf(stderr, "Unable to create pipe, errno = %d\n", errno);
		return 1;
	}

	snprintf(lockfile, MAXPATHLEN, "%s/datastore/pbs_dblock", pbs_conf.pbs_home_path);

	/* first fork off */
	rc = fork();
	if (rc == -1) {
		fprintf(stderr, "Unable to create process, errno = %d\n", errno);
		return 1;
	}

	if (rc > 0) {
		close(pipefd[1]);
		/*
		 * child can continue to execute in case of "monitor",
		 * so dont wait for child to exit, rather read code
		 * from pipe that child will write to
		 */
		if (read(pipefd[0], &res, sizeof(int)) != sizeof(int))
			return 1;

		if (res != 0) {
			read(pipefd[0], &reason, sizeof(reason));
			fprintf(stderr, "Failed to acquire lock on %s. %s\n", lockfile, reason);
		}

		return (res); /* return parent with success */
	}

	close(pipefd[0]);

	/* child */
	if (setsid() == -1) {
		close(pipefd[1]);
		return 1;
	}

	(void)fclose(stdin);
	(void)fclose(stdout);
	(void)fclose(stderr);

	/* Protect from being killed by kernel */
	daemon_protect(0, PBS_DAEMON_PROTECT_ON);

	if ((fd = acquire_lock(lockfile, reason, sizeof(reason), &is_lock_local)) == -1) {
		if (is_lock_local && strcmp(mode, "check") == 0) {
			/* write success to parent since lock is already held by the localhost */
			res = 0;
			write(pipefd[1], &res, sizeof(int));
			close(pipefd[1]);
			return 0;
		}
		res = 1;
		write(pipefd[1], &res, sizeof(int));
		write(pipefd[1], reason, sizeof(reason));
		close(pipefd[1]);
		return 1;
	}

	/* unlock before writing success to parent, to avoid race */
	if (strcmp(mode, "check") == 0) {
		lock_out(fd, F_UNLCK);
		close(fd);
		unlink(lockfile);
	}

	/* write success to parent since we acquired the lock */
	res = 0;
	write(pipefd[1], &res, sizeof(int));
	close(pipefd[1]);

	if (strcmp(mode, "check") == 0)
		return 0;

	/* clear any residual stop db file before starting monitoring */
	clear_stop_db_file();

	/*
	 * first find out the pid of the postgres process from dbstore/postmaster.pid
	 * wait for a while till it is found
	 * if not found within MAX_DBPID_ATTEMPTS then break with error
	 * if found, start monitoring the pid
	 *
	 */
	dbpid = 0;
	for (i = 0; i < MAX_DBPID_ATTEMPTS; i++) {
		if ((dbpid = get_pid()) > 0)
			break;
		(void)utimes(lockfile, NULL);
		sleep(1);
	}

	if (dbpid == 0) {
		/* database did not come up, so quit after unlocking file */
		lock_out(fd, F_UNLCK);
		close(fd);
		unlink(lockfile);
		return 0;
	}

	while (1) {
		(void)utimes(lockfile, NULL);

		if (kill(dbpid, 0) != 0)
			break;
		if (!((dbpid = get_pid()) > 0))
			break;

		/* check if stop db file exists */
		check_and_stop_db(dbpid);

		sleep(1);
	}

	lock_out(fd, F_UNLCK);
	close(fd);
	unlink(lockfile);

	return 0;
}
Ejemplo n.º 21
0
int main(

  int   argc,
  char *argv[])

  {
  char  *id = "main";

  struct hostent *hp;
  int  go, c, errflg = 0;
  int  lockfds;
  int  t = 1;
  pid_t  pid;
  char  host[100];
  char  *homedir = PBS_SERVER_HOME;
  unsigned int port;
  char  *dbfile = "sched_out";

  struct sigaction act;
  sigset_t oldsigs;
  caddr_t curr_brk = 0;
  caddr_t next_brk;
  extern char *optarg;
  extern int optind, opterr;
  extern int rpp_fd;
  fd_set fdset;

  int  schedinit(int argc, char **argv);
  int  schedule(int com, int connector);

  glob_argv = argv;
  alarm_time = 180;

  /* The following is code to reduce security risks                */
  /* move this to a place where nss_ldap doesn't hold a socket yet */

  c = sysconf(_SC_OPEN_MAX);

  while (--c > 2)
    (void)close(c); /* close any file desc left open by parent */

  port = get_svrport(PBS_SCHEDULER_SERVICE_NAME, "tcp",
                     PBS_SCHEDULER_SERVICE_PORT);

  pbs_rm_port = get_svrport(PBS_MANAGER_SERVICE_NAME, "tcp",
                            PBS_MANAGER_SERVICE_PORT);

  strcpy(pbs_current_user, "Scheduler");

  msg_daemonname = strdup("pbs_sched");

  opterr = 0;

  while ((c = getopt(argc, argv, "L:S:R:d:p:c:a:-:")) != EOF)
    {
    switch (c)
      {

      case '-':

        if ((optarg == NULL) || (optarg[0] == '\0'))
          {
          errflg = 1;
          }

        if (!strcmp(optarg, "version"))
          {
          fprintf(stderr, "version: %s\n", PACKAGE_VERSION);
          exit(0);
          }
        else
          {
          errflg = 1;
          }

        break;

      case 'L':
        logfile = optarg;
        break;

      case 'S':
        port = atoi(optarg);

        if (port == 0)
          {
          fprintf(stderr,
                  "%s: illegal port\n", optarg);
          errflg = 1;
          }

        break;

      case 'R':

        if ((pbs_rm_port = atoi(optarg)) == 0)
          {
          (void)fprintf(stderr, "%s: bad -R %s\n",
                        argv[0], optarg);
          return 1;
          }

        break;

      case 'd':
        homedir = optarg;
        break;

      case 'p':
        dbfile = optarg;
        break;

      case 'c':
        configfile = optarg;
        break;

      case 'a':
        alarm_time = atoi(optarg);

        if (alarm_time == 0)
          {
          fprintf(stderr,
                  "%s: bad alarm time\n", optarg);
          errflg = 1;
          }

        break;

      case '?':
        errflg = 1;
        break;
      }
    }

  if (errflg)
    {
    fprintf(stderr, "usage: %s %s\n", argv[0], usage);
    exit(1);
    }

#ifndef DEBUG
  if (IamRoot() == 0)
    {
        return (1);
    }
#endif        /* DEBUG */

  /* Save the original working directory for "restart" */
  if ((oldpath = getcwd((char *)NULL, MAXPATHLEN)) == NULL)
    {
    fprintf(stderr, "cannot get current working directory\n");
    exit(1);
    }

  (void)sprintf(log_buffer, "%s/sched_priv", homedir);
#if !defined(DEBUG) && !defined(NO_SECURITY_CHECK)
  c  = chk_file_sec(log_buffer, 1, 0, S_IWGRP | S_IWOTH, 1, NULL);
  c |= chk_file_sec(PBS_ENVIRON, 0, 0, S_IWGRP | S_IWOTH, 0, NULL);

  if (c != 0) exit(1);

#endif  /* not DEBUG and not NO_SECURITY_CHECK */
  if (chdir(log_buffer) == -1)
    {
    perror("chdir");
    exit(1);
    }

  (void)sprintf(path_log,   "%s/sched_logs", homedir);
  (void)sprintf(path_acct,   "%s/%s", log_buffer, PBS_ACCT);


  /* The following is code to reduce security risks                */
  /* start out with standard umask, system resource limit infinite */

  umask(022);

  if (setup_env(PBS_ENVIRON) == -1)
    exit(1);

  c = getgid();

  (void)setgroups(1, (gid_t *)&c); /* secure suppl. groups */

#ifndef DEBUG
#ifdef _CRAY
  (void)limit(C_JOB,      0, L_CPROC, 0);

  (void)limit(C_JOB,      0, L_CPU,   0);

  (void)limit(C_JOBPROCS, 0, L_CPU,   0);

  (void)limit(C_PROC,     0, L_FD,  255);

  (void)limit(C_JOB,      0, L_FSBLK, 0);

  (void)limit(C_JOBPROCS, 0, L_FSBLK, 0);

  (void)limit(C_JOB,      0, L_MEM  , 0);

  (void)limit(C_JOBPROCS, 0, L_MEM  , 0);

#else /* not  _CRAY */
    {

    struct rlimit rlimit;

    rlimit.rlim_cur = RLIM_INFINITY;
    rlimit.rlim_max = RLIM_INFINITY;
    (void)setrlimit(RLIMIT_CPU,   &rlimit);
    (void)setrlimit(RLIMIT_FSIZE, &rlimit);
    (void)setrlimit(RLIMIT_DATA,  &rlimit);
    (void)setrlimit(RLIMIT_STACK, &rlimit);
#ifdef  RLIMIT_RSS
    (void)setrlimit(RLIMIT_RSS  , &rlimit);
#endif  /* RLIMIT_RSS */
#ifdef  RLIMIT_VMEM
    (void)setrlimit(RLIMIT_VMEM  , &rlimit);
#endif  /* RLIMIT_VMEM */
    }
#endif /* not _CRAY */
#endif /* DEBUG */

  if (log_open(logfile, path_log) == -1)
    {
    fprintf(stderr, "%s: logfile could not be opened\n", argv[0]);
    exit(1);
    }

  if (gethostname(host, sizeof(host)) == -1)
    {
    log_err(errno, id, "gethostname");
    die(0);
    }

  if ((hp = gethostbyname(host)) == NULL)
    {
    log_err(errno, id, "gethostbyname");
    die(0);
    }

  if ((server_sock = socket(AF_INET, SOCK_STREAM, 0)) < 0)
    {
    log_err(errno, id, "socket");
    die(0);
    }

  if (setsockopt(server_sock, SOL_SOCKET, SO_REUSEADDR,
                 (char *)&t, sizeof(t)) == -1)
    {
    log_err(errno, id, "setsockopt");
    die(0);
    }

  saddr.sin_family = AF_INET;

  saddr.sin_port = htons(port);
  memcpy(&saddr.sin_addr, hp->h_addr, hp->h_length);

  if (bind(server_sock, (struct sockaddr *)&saddr, sizeof(saddr)) < 0)
    {
    log_err(errno, id, "bind");
    die(0);
    }

  if (listen(server_sock, 5) < 0)
    {
    log_err(errno, id, "listen");
    die(0);
    }

  okclients = (pbs_net_t *)calloc(START_CLIENTS, sizeof(pbs_net_t));

  addclient("localhost");   /* who has permission to call MOM */
  addclient(host);

  if (configfile)
    {
    if (read_config(configfile) != 0)
      die(0);
    }

  lockfds = open("sched.lock", O_CREAT | O_TRUNC | O_WRONLY, 0644);

  if (lockfds < 0)
    {
    log_err(errno, id, "open lock file");
    exit(1);
    }

  lock_out(lockfds, F_WRLCK);

  fullresp(0);

  if (sigemptyset(&allsigs) == -1)
    {
    perror("sigemptyset");
    exit(1);
    }

  if (sigprocmask(SIG_SETMASK, &allsigs, NULL) == -1)   /* unblock */
    {
    perror("sigprocmask");
    exit(1);
    }

  act.sa_flags = 0;

  sigaddset(&allsigs, SIGHUP);    /* remember to block these */
  sigaddset(&allsigs, SIGINT);    /* during critical sections */
  sigaddset(&allsigs, SIGTERM);   /* so we don't get confused */
  act.sa_mask = allsigs;

  act.sa_handler = restart;       /* do a restart on SIGHUP */
  sigaction(SIGHUP, &act, NULL);

  act.sa_handler = toolong; /* handle an alarm call */
  sigaction(SIGALRM, &act, NULL);

  act.sa_handler = die;           /* bite the biscuit for all following */
  sigaction(SIGINT, &act, NULL);
  sigaction(SIGTERM, &act, NULL);

  /*
   * Catch these signals to ensure we core dump even if
   * our rlimit for core dumps is set to 0 initially.
   *
   * Chris Samuel - VPAC
   * [email protected] - 29th July 2003
   *
   * Now conditional on the PBSCOREDUMP environment variable
   */

  if (getenv("PBSCOREDUMP"))
    {
    act.sa_handler = catch_abort;   /* make sure we core dump */

    sigaction(SIGSEGV, &act, NULL);
    sigaction(SIGBUS, &act, NULL);
    sigaction(SIGFPE, &act, NULL);
    sigaction(SIGILL, &act, NULL);
    sigaction(SIGTRAP, &act, NULL);
    sigaction(SIGSYS, &act, NULL);
    }

  /*
   *  Local initialization stuff
   */

  if (schedinit(argc, argv))
    {
    (void) sprintf(log_buffer,
                   "local initialization failed, terminating");
    log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);
    exit(1);
    }

  if (getenv("PBSDEBUG") == NULL)
    {
    lock_out(lockfds, F_UNLCK);

#ifdef DISABLE_DAEMONS
    pid = getpid();
#else
    if ((pid = fork()) == -1)
      {
      /* error on fork */
      perror("fork");

      exit(1);
      }
    else
    if (pid > 0)               /* parent exits */
      {
      exit(0);
      }

    if ((pid = setsid()) == -1)
      {
      perror("setsid");

      exit(1);
      }
#endif  /*  DISABLE_DAEMONS  */

    lock_out(lockfds, F_WRLCK);

    if (freopen(dbfile, "a", stdout) == NULL)
      {
      perror("opening lockfile");

      exit(1);
      }


    setvbuf(stdout, NULL, _IOLBF, 0);

    dup2(fileno(stdout), fileno(stderr));
    }
  else
    {
    setvbuf(stdout, NULL, _IOLBF, 0);
    setvbuf(stderr, NULL, _IOLBF, 0);

    pid = getpid();
    }

  if (freopen("/dev/null", "r", stdin) == NULL)
    {
    perror("opening /dev/null");

    exit(1);
    }

  /* write scheduler's pid into lockfile */

  (void)sprintf(log_buffer, "%ld\n", (long)pid);

  if (write(lockfds, log_buffer, strlen(log_buffer) + 1) != (ssize_t)(strlen(log_buffer) + 1))
    {
    perror("writing to lockfile");

    exit(1);
    }

#if (PLOCK_DAEMONS & 2)
  (void)plock(PROCLOCK); /* lock daemon into memory */

#endif

  sprintf(log_buffer, "%s startup pid %ld", argv[0], (long)pid);

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

  FD_ZERO(&fdset);

  for (go = 1;go;)
    {
    int cmd;

    if (rpp_fd != -1)
      FD_SET(rpp_fd, &fdset);

    FD_SET(server_sock, &fdset);

    if (select(FD_SETSIZE, &fdset, NULL, NULL, NULL) == -1)
      {
      if (errno != EINTR)
        {
        log_err(errno, id, "select");
        die(0);
        }

      continue;
      }

    if (rpp_fd != -1 && FD_ISSET(rpp_fd, &fdset))
      {
      if (rpp_io() == -1)
        log_err(errno, id, "rpp_io");
      }

    if (!FD_ISSET(server_sock, &fdset))
      continue;

    cmd = server_command();

    if (sigprocmask(SIG_BLOCK, &allsigs, &oldsigs) == -1)
      log_err(errno, id, "sigprocmaskSIG_BLOCK)");

    alarm(alarm_time);

    if (schedule(cmd, connector)) /* magic happens here */
      go = 0;

    alarm(0);

    if (connector >= 0 && server_disconnect(connector))
      {
      log_err(errno, id, "server_disconnect");
      die(0);
      }

    next_brk = (caddr_t)sbrk(0);

    if (next_brk > curr_brk)
      {
      sprintf(log_buffer, "brk point %ld", (long)next_brk);
      log_record(PBSEVENT_DEBUG, PBS_EVENTCLASS_SERVER,
                 id, log_buffer);
      curr_brk = next_brk;
      }

    if (sigprocmask(SIG_SETMASK, &oldsigs, NULL) == -1)
      log_err(errno, id, "sigprocmask(SIG_SETMASK)");
    }

  sprintf(log_buffer, "%s normal finish pid %ld",

          argv[0],
          (long)pid);

  log_record(PBSEVENT_SYSTEM, PBS_EVENTCLASS_SERVER, id, log_buffer);

  close(server_sock);

  exit(0);
  }  /* END main() */