Пример #1
0
/****** shepherd/qrsh/write_exit_code_to_qrsh() *******************************
*  NAME
*     write_exit_code_to_qrsh -- write an exit code to qrsh
*
*  SYNOPSIS
*     void write_exit_code_to_qrsh(int exit_code)
*
*  FUNCTION
*     If the program handled by this shepherd uses rsh mechanism
*     (configuration value "rsh_daemon" is set), then the function
*     writes an exit code to the corresponding qrsh process via a
*     socket connection.
*
*     The exit code is either taken from parameter <exit_code>, if it is
*     notequal 0, to signal an error condition in the shepherd,
*     or read from a special file ($TMPDIR/qrsh_exit_code).
*
*  INPUTS
*     exit_code - status of the calling process
*
*  SEE ALSO
*     shepherd/qrsh/write_to_qrsh()
******************************************************************************/
void write_exit_code_to_qrsh(int exit_code)
{
    char buffer[1024];
    *buffer = 0;

    /* rshd exited with OK: try to get returncode from qrsh_starter file */
    shepherd_trace("write_exit_code_to_qrsh(%d)", exit_code);

    /* write exit code as string number to qrsh */
    sprintf(buffer, "%d", exit_code);
    if (write_to_qrsh(buffer) != 0) {
        shepherd_trace("writing exit code to qrsh failed");
    }
}
Пример #2
0
/****** shepherd/qrsh/qlogin_starter() ****************************************
*
*  NAME
*     qlogin_starter -- short description
*
*  SYNOPSIS
*     #include "qlogin_starter.h"
*     int qlogin_starter(const char *cwd, char *daemon);
*
*  FUNCTION
*     The function is called from shepherd to start a protocol daemon
*     like telnetd, rshd or rlogind.
*     The mechanism used to call these daemons is that of inetd:
*        - a socket is created (server side, any free port is assigned
*          by the operating system)
*        - qlogin_starter waits for someone to connect to this socket
*        - the socket file handles are redirected to stdin, stdout
*          and stderr
*        - the daemon process is started
*     Additionally to the inetd mechanism, the port number and some
*     other information is sent to the qrsh process that initiated
*     (over qmaster, schedd, execd, shepherd) the qlogin_starter call.
*
*  INPUTS
*     cwd    - the current working directory (the active_jobs directory)
*     daemon - name and path of the daemon to start
*
*  RESULT
*     on success, the function will not return (it exec's)
*      4, if there is a problem with permissions
*      5, if a socket cannot be allocated
*      6, if a socket bind fails
*      7, if socket name (port) cannot be determined
*      8, if environment (to be passed to qrsh) cannot be read
*      9, if sending information to qrsh fails
*     10, if nobody connects to the socket within a one minute
*     11, if the acception of a connecting client fails
*     12, if the execution of the daemon fails
******************************************************************************/
int qlogin_starter(const char *cwd, char *daemon, char** env)
{
    int ret;
    int port;
    int fd;
    int maxfd;
    int sockfd;
    int on = 1;
    int sso = 1;
    int newsfd;
    fd_set fds;
    struct sockaddr_in serv_addr;
    struct timeval timeout;
    char buffer[2048];
    char *args[20]; /* JG: TODO: should be dynamically allocated */
    int argc = 0;
    const char *sge_root = NULL;
    const char *arch = NULL;

#if defined(IRIX65) || defined(INTERIX) || defined(DARWIN6) || defined(ALPHA5) || defined(HP1164)
    int length;
    int len;
#else
    socklen_t length;
    socklen_t len;
#endif

    len = sizeof(serv_addr);

    /* must be root because we must access /dev/something */
    if( setgid(SGE_SUPERUSER_GID) ||
            setuid(SGE_SUPERUSER_UID) ||
            setegid(SGE_SUPERUSER_GID) ||
            seteuid(SGE_SUPERUSER_UID)) {
        shepherd_trace("cannot change uid/gid\n");
        return 4;
    }
    shepherd_trace("uid = "uid_t_fmt", euid = "uid_t_fmt", gid = "gid_t_fmt
                   ", egid = "gid_t_fmt, getuid(), geteuid(), getgid(), getegid());

    /* socket stuff from here */
    sockfd = socket(AF_INET, SOCK_STREAM, 0);

    if (sockfd == -1) {
        shepherd_trace("cannot open socket.");
        return 5;
    }
    shepherd_trace("using sfd %d", sockfd);

    setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, (char *) &on, sizeof(on));

    /* bind an address to any socket */
    memset((char *) &serv_addr, 0, sizeof(serv_addr));
    serv_addr.sin_port = 0;
    serv_addr.sin_family = AF_INET;
    serv_addr.sin_addr.s_addr = INADDR_ANY;
    ret = bind(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr));
    if (ret != 0) {
        shepherd_trace("cannot bind socket: %s", strerror(errno));
        shutdown(sockfd, 2);
        close(sockfd);
        return 6;
    }

    /* find out assigned port number and pass it to caller */
    length = sizeof(serv_addr);
    if (getsockname(sockfd,(struct sockaddr *) &serv_addr, &length) == -1) {
        shepherd_trace("getting socket name failed: %s", strerror(errno));
        shutdown(sockfd, 2);
        close(sockfd);
        return 7;
    }

    /* listen on socked - make connections be accepted */
    if (listen(sockfd, 1) != 0) {
        shepherd_trace("listen failed: %s", strerror(errno));
        shutdown(sockfd, 2);
        close(sockfd);
        return 8;
    }

    /* send necessary info to qrsh: port + utilbin directory + active job
     * directory
     */
    port = ntohs(serv_addr.sin_port);
    shepherd_trace("bound to port %d", port);

    sge_root = sge_get_root_dir(0, NULL, 0, 1);
    arch = sge_get_arch();

    if (sge_root == NULL || arch == NULL) {
        shepherd_trace("reading environment SGE_ROOT and ARC failed");
        shutdown(sockfd, 2);
        close(sockfd);
        return 9;
    }

    snprintf(buffer, 2048, "0:%d:%s/utilbin/%s:%s:%s",
             port, sge_root, arch, cwd, get_conf_val("host"));

    if (write_to_qrsh(buffer) != 0) {
        shepherd_trace("communication with qrsh failed");
        shutdown(sockfd, 2);
        close(sockfd);
        return 10;
    }

    /* wait for connection */
    shepherd_trace("waiting for connection.");
    /* use a reasonable timeout (60 seconds) to prevent hanging here forever */
    FD_ZERO(&fds);
    FD_SET(sockfd, &fds);
    timeout.tv_sec = 60;
    timeout.tv_usec = 0;
    if (select(sockfd+1, &fds, NULL, NULL, &timeout) < 1) {
        shepherd_trace("nobody connected to the socket");
        shutdown(sockfd, 2);
        close(sockfd);
        return 11;
    }

    /* accept connection */
    newsfd = accept(sockfd, (struct sockaddr *)(&serv_addr), &len);
    if (newsfd == -1) {
        shepherd_trace("error when accepting socket conection");
        shutdown(sockfd, 2);
        close(sockfd);
        return 12;
    }
    shepherd_trace("accepted connection on fd %d", newsfd);

    /* now we have a connection and do no longer need the "well known" port
     * free this resource.
     */
    shutdown(sockfd, 2);
    close(sockfd);

    /* don't close on exec */
    fcntl( newsfd, F_SETFD, 0 );

    /* speed up ;-) */
    setsockopt(newsfd, IPPROTO_TCP, TCP_NODELAY, (const char *) &sso, sizeof(int));

    /* use this fd as stdin,out,err */
    dup2( newsfd, 0 );
    dup2( newsfd, 1 );
    dup2( newsfd, 2 );

    /* close all the rest */
#ifndef WIN32NATIVE
    maxfd = sysconf(_SC_OPEN_MAX);
#else /* WIN32NATIVE */
    maxfd = FD_SETSIZE;
    /* detect maximal number of fds under NT/W2000 (env: Files)*/
#endif /* WIN32NATIVE */

    /* we do not use any FD_SET call it is ok to use _SC_OPEN_MAX */
    for (fd=3; fd<maxfd; fd++) {
        close(fd);
    }

    shepherd_trace("daemon to start: |%s|", daemon);

    /* split daemon commandline into single arguments */
    /* JG: TODO: might contain quoted arguments containing spaces
     *           make function to split or use an already existing one
     */
    args[argc++] = strtok(daemon, " ");
    while ((args[argc++] = strtok(NULL, " ")) != NULL);
#if 0
    {
        int i = 0;
        shepherd_trace("daemon commandline split to %d arguments", argc);
        while (args[i] != NULL) {
            shepherd_trace("daemon argv[%d] = |%s|", i, args[i]);
            i++;
        }
    }
#endif

    /* that it. */
    execve(args[0], args, env);

    /* oh oh, exec failed */
    /* no way to tell anyone, becuase all FDs are closed */
    /* last chance -> tell parent process */
    shutdown(newsfd, 2);
    close(newsfd);
    return 13;
}
Пример #3
0
/****** shepherd_error ********************************************************
*  NAME
*     shepherd_error() -- Write a line to the error file and exit program.
*
*  SYNOPSIS
*     void shepherd_error(bool do_exit, const char *format, ...)
*
*  FUNCTION
*     Writes a line to the error file, preceding it with a
*     date, time, uid and pid stamp, and exits the program. stops execution.
*
*  INPUTS
*     do_exit: If true, this function calls exit(2).
*     format: The format string of the line to be written to the error file.
*     ...: The parameters to the format string. See printf(3c).
*
*  RESULT
*     void - none
*******************************************************************************/
void shepherd_error(int do_exit, const char *format, ...)
{
   dstring     ds;
   dstring     message = DSTRING_INIT;
   char        buffer[128];
   char        header_str[256];
   struct stat statbuf;

   if (format != NULL)
   {
      va_list     ap;

      va_start(ap, format);
      sge_dstring_vsprintf(&message, format, ap);
      va_end(ap);
   }

   shepherd_trace(sge_dstring_get_string(&message));

   /* File was closed (e.g. by an exec()) but fp was not set to NULL */
   if (shepherd_error_fp && fstat(fileno(shepherd_error_fp), &statbuf) == -1 && errno==EBADF)
   {
      shepherd_error_fp = NULL;
   }

   if (shepherd_error_fp == NULL)
   {
      shepherd_error_fp = shepherd_trace_init_intern(st_error);
   }

   if (shepherd_error_fp != NULL)
   {
      sge_dstring_init(&ds, buffer, sizeof(buffer));
      sprintf(header_str, "%s ["uid_t_fmt":"pid_t_fmt"]: ",
              sge_ctime(0, &ds), geteuid(), getpid());

      sh_str2file(header_str, sge_dstring_get_string(&message), shepherd_error_fp);
   }

   if (foreground)
   {
      fprintf(stderr, "%s%s\n", header_str, sge_dstring_get_string(&message));
   }

   /* File was closed (e.g. by an exec()) but fp was not set to NULL */
   if (shepherd_exit_status_fp && fstat(fileno(shepherd_exit_status_fp), &statbuf) == -1 && errno==EBADF )
   {
      shepherd_exit_status_fp = NULL;
   }

   if (shepherd_exit_status_fp == NULL)
   {
      shepherd_exit_status_fp = shepherd_trace_init_intern(st_exit_status);
   }

   if (shepherd_exit_status_fp != NULL)
   {
      sprintf(header_str, "%d", shepherd_state);
      sh_str2file(header_str, NULL, shepherd_exit_status_fp);
   }
	
   if (coshepherd_pid > 0)
   {
      sge_switch2start_user();
      kill(coshepherd_pid, SIGTERM);
      sge_switch2admin_user();
   }   
     
   if (g_new_interactive_job_support == false && 
      search_conf_val("qrsh_control_port") != NULL)
   {
      char buffer[1024];
      snprintf(buffer, sizeof(buffer), "1:%s", sge_dstring_get_string(&message));
      write_to_qrsh(buffer);  
   }
   sge_dstring_free(&message);

   if (do_exit)
   {
      /* close all trace files before exit */
      shepherd_trace_exit();
      exit(shepherd_state);
   }

   /* There are cases where we have to open and close the files 
    * for every write.
    */
   if (!g_keep_files_open)
   {
      shepherd_error_exit();
   }
}