/****** shepherd/qrsh/write_exit_code_to_qrsh() ******************************* * NAME * write_exit_code_to_qrsh -- write an exit code to qrsh * * SYNOPSIS * void write_exit_code_to_qrsh(int exit_code) * * FUNCTION * If the program handled by this shepherd uses rsh mechanism * (configuration value "rsh_daemon" is set), then the function * writes an exit code to the corresponding qrsh process via a * socket connection. * * The exit code is either taken from parameter <exit_code>, if it is * notequal 0, to signal an error condition in the shepherd, * or read from a special file ($TMPDIR/qrsh_exit_code). * * INPUTS * exit_code - status of the calling process * * SEE ALSO * shepherd/qrsh/write_to_qrsh() ******************************************************************************/ void write_exit_code_to_qrsh(int exit_code) { char buffer[1024]; *buffer = 0; /* rshd exited with OK: try to get returncode from qrsh_starter file */ shepherd_trace("write_exit_code_to_qrsh(%d)", exit_code); /* write exit code as string number to qrsh */ sprintf(buffer, "%d", exit_code); if (write_to_qrsh(buffer) != 0) { shepherd_trace("writing exit code to qrsh failed"); } }
/****** shepherd/qrsh/qlogin_starter() **************************************** * * NAME * qlogin_starter -- short description * * SYNOPSIS * #include "qlogin_starter.h" * int qlogin_starter(const char *cwd, char *daemon); * * FUNCTION * The function is called from shepherd to start a protocol daemon * like telnetd, rshd or rlogind. * The mechanism used to call these daemons is that of inetd: * - a socket is created (server side, any free port is assigned * by the operating system) * - qlogin_starter waits for someone to connect to this socket * - the socket file handles are redirected to stdin, stdout * and stderr * - the daemon process is started * Additionally to the inetd mechanism, the port number and some * other information is sent to the qrsh process that initiated * (over qmaster, schedd, execd, shepherd) the qlogin_starter call. * * INPUTS * cwd - the current working directory (the active_jobs directory) * daemon - name and path of the daemon to start * * RESULT * on success, the function will not return (it exec's) * 4, if there is a problem with permissions * 5, if a socket cannot be allocated * 6, if a socket bind fails * 7, if socket name (port) cannot be determined * 8, if environment (to be passed to qrsh) cannot be read * 9, if sending information to qrsh fails * 10, if nobody connects to the socket within a one minute * 11, if the acception of a connecting client fails * 12, if the execution of the daemon fails ******************************************************************************/ int qlogin_starter(const char *cwd, char *daemon, char** env) { int ret; int port; int fd; int maxfd; int sockfd; int on = 1; int sso = 1; int newsfd; fd_set fds; struct sockaddr_in serv_addr; struct timeval timeout; char buffer[2048]; char *args[20]; /* JG: TODO: should be dynamically allocated */ int argc = 0; const char *sge_root = NULL; const char *arch = NULL; #if defined(IRIX65) || defined(INTERIX) || defined(DARWIN6) || defined(ALPHA5) || defined(HP1164) int length; int len; #else socklen_t length; socklen_t len; #endif len = sizeof(serv_addr); /* must be root because we must access /dev/something */ if( setgid(SGE_SUPERUSER_GID) || setuid(SGE_SUPERUSER_UID) || setegid(SGE_SUPERUSER_GID) || seteuid(SGE_SUPERUSER_UID)) { shepherd_trace("cannot change uid/gid\n"); return 4; } shepherd_trace("uid = "uid_t_fmt", euid = "uid_t_fmt", gid = "gid_t_fmt ", egid = "gid_t_fmt, getuid(), geteuid(), getgid(), getegid()); /* socket stuff from here */ sockfd = socket(AF_INET, SOCK_STREAM, 0); if (sockfd == -1) { shepherd_trace("cannot open socket."); return 5; } shepherd_trace("using sfd %d", sockfd); setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, (char *) &on, sizeof(on)); /* bind an address to any socket */ memset((char *) &serv_addr, 0, sizeof(serv_addr)); serv_addr.sin_port = 0; serv_addr.sin_family = AF_INET; serv_addr.sin_addr.s_addr = INADDR_ANY; ret = bind(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr)); if (ret != 0) { shepherd_trace("cannot bind socket: %s", strerror(errno)); shutdown(sockfd, 2); close(sockfd); return 6; } /* find out assigned port number and pass it to caller */ length = sizeof(serv_addr); if (getsockname(sockfd,(struct sockaddr *) &serv_addr, &length) == -1) { shepherd_trace("getting socket name failed: %s", strerror(errno)); shutdown(sockfd, 2); close(sockfd); return 7; } /* listen on socked - make connections be accepted */ if (listen(sockfd, 1) != 0) { shepherd_trace("listen failed: %s", strerror(errno)); shutdown(sockfd, 2); close(sockfd); return 8; } /* send necessary info to qrsh: port + utilbin directory + active job * directory */ port = ntohs(serv_addr.sin_port); shepherd_trace("bound to port %d", port); sge_root = sge_get_root_dir(0, NULL, 0, 1); arch = sge_get_arch(); if (sge_root == NULL || arch == NULL) { shepherd_trace("reading environment SGE_ROOT and ARC failed"); shutdown(sockfd, 2); close(sockfd); return 9; } snprintf(buffer, 2048, "0:%d:%s/utilbin/%s:%s:%s", port, sge_root, arch, cwd, get_conf_val("host")); if (write_to_qrsh(buffer) != 0) { shepherd_trace("communication with qrsh failed"); shutdown(sockfd, 2); close(sockfd); return 10; } /* wait for connection */ shepherd_trace("waiting for connection."); /* use a reasonable timeout (60 seconds) to prevent hanging here forever */ FD_ZERO(&fds); FD_SET(sockfd, &fds); timeout.tv_sec = 60; timeout.tv_usec = 0; if (select(sockfd+1, &fds, NULL, NULL, &timeout) < 1) { shepherd_trace("nobody connected to the socket"); shutdown(sockfd, 2); close(sockfd); return 11; } /* accept connection */ newsfd = accept(sockfd, (struct sockaddr *)(&serv_addr), &len); if (newsfd == -1) { shepherd_trace("error when accepting socket conection"); shutdown(sockfd, 2); close(sockfd); return 12; } shepherd_trace("accepted connection on fd %d", newsfd); /* now we have a connection and do no longer need the "well known" port * free this resource. */ shutdown(sockfd, 2); close(sockfd); /* don't close on exec */ fcntl( newsfd, F_SETFD, 0 ); /* speed up ;-) */ setsockopt(newsfd, IPPROTO_TCP, TCP_NODELAY, (const char *) &sso, sizeof(int)); /* use this fd as stdin,out,err */ dup2( newsfd, 0 ); dup2( newsfd, 1 ); dup2( newsfd, 2 ); /* close all the rest */ #ifndef WIN32NATIVE maxfd = sysconf(_SC_OPEN_MAX); #else /* WIN32NATIVE */ maxfd = FD_SETSIZE; /* detect maximal number of fds under NT/W2000 (env: Files)*/ #endif /* WIN32NATIVE */ /* we do not use any FD_SET call it is ok to use _SC_OPEN_MAX */ for (fd=3; fd<maxfd; fd++) { close(fd); } shepherd_trace("daemon to start: |%s|", daemon); /* split daemon commandline into single arguments */ /* JG: TODO: might contain quoted arguments containing spaces * make function to split or use an already existing one */ args[argc++] = strtok(daemon, " "); while ((args[argc++] = strtok(NULL, " ")) != NULL); #if 0 { int i = 0; shepherd_trace("daemon commandline split to %d arguments", argc); while (args[i] != NULL) { shepherd_trace("daemon argv[%d] = |%s|", i, args[i]); i++; } } #endif /* that it. */ execve(args[0], args, env); /* oh oh, exec failed */ /* no way to tell anyone, becuase all FDs are closed */ /* last chance -> tell parent process */ shutdown(newsfd, 2); close(newsfd); return 13; }
/****** shepherd_error ******************************************************** * NAME * shepherd_error() -- Write a line to the error file and exit program. * * SYNOPSIS * void shepherd_error(bool do_exit, const char *format, ...) * * FUNCTION * Writes a line to the error file, preceding it with a * date, time, uid and pid stamp, and exits the program. stops execution. * * INPUTS * do_exit: If true, this function calls exit(2). * format: The format string of the line to be written to the error file. * ...: The parameters to the format string. See printf(3c). * * RESULT * void - none *******************************************************************************/ void shepherd_error(int do_exit, const char *format, ...) { dstring ds; dstring message = DSTRING_INIT; char buffer[128]; char header_str[256]; struct stat statbuf; if (format != NULL) { va_list ap; va_start(ap, format); sge_dstring_vsprintf(&message, format, ap); va_end(ap); } shepherd_trace(sge_dstring_get_string(&message)); /* File was closed (e.g. by an exec()) but fp was not set to NULL */ if (shepherd_error_fp && fstat(fileno(shepherd_error_fp), &statbuf) == -1 && errno==EBADF) { shepherd_error_fp = NULL; } if (shepherd_error_fp == NULL) { shepherd_error_fp = shepherd_trace_init_intern(st_error); } if (shepherd_error_fp != NULL) { sge_dstring_init(&ds, buffer, sizeof(buffer)); sprintf(header_str, "%s ["uid_t_fmt":"pid_t_fmt"]: ", sge_ctime(0, &ds), geteuid(), getpid()); sh_str2file(header_str, sge_dstring_get_string(&message), shepherd_error_fp); } if (foreground) { fprintf(stderr, "%s%s\n", header_str, sge_dstring_get_string(&message)); } /* File was closed (e.g. by an exec()) but fp was not set to NULL */ if (shepherd_exit_status_fp && fstat(fileno(shepherd_exit_status_fp), &statbuf) == -1 && errno==EBADF ) { shepherd_exit_status_fp = NULL; } if (shepherd_exit_status_fp == NULL) { shepherd_exit_status_fp = shepherd_trace_init_intern(st_exit_status); } if (shepherd_exit_status_fp != NULL) { sprintf(header_str, "%d", shepherd_state); sh_str2file(header_str, NULL, shepherd_exit_status_fp); } if (coshepherd_pid > 0) { sge_switch2start_user(); kill(coshepherd_pid, SIGTERM); sge_switch2admin_user(); } if (g_new_interactive_job_support == false && search_conf_val("qrsh_control_port") != NULL) { char buffer[1024]; snprintf(buffer, sizeof(buffer), "1:%s", sge_dstring_get_string(&message)); write_to_qrsh(buffer); } sge_dstring_free(&message); if (do_exit) { /* close all trace files before exit */ shepherd_trace_exit(); exit(shepherd_state); } /* There are cases where we have to open and close the files * for every write. */ if (!g_keep_files_open) { shepherd_error_exit(); } }