Beispiel #1
0
void sge_pset_create_processor_set(void) 
{
#if defined(__sgi) || defined(ALPHA) || defined(SOLARIS64) || defined(SOLARISAMD64)
   char err_str[2*SGE_PATH_MAX+128];

   /* SGI IRIX processor set stuff */
   if (strcasecmp("UNDEFINED",get_conf_val("processors"))) {
      int ret;

      sge_switch2start_user();
      if ((ret=set_processor_range(get_conf_val("processors"),
                 (int) strtol(get_conf_val("job_id"), NULL, 10),
                 err_str)) != PROC_SET_OK) {
         sge_switch2admin_user();
         if (ret == PROC_SET_WARNING) /* not critical - e.g. not root */
            shepherd_trace("warning: processor set not set in set_processor_range");
         else { /* critical --> use err_str to indicate error */
            shepherd_trace("critical error in set_processor_range - bailing out");
            shepherd_state = SSTATE_PROCSET_NOTSET;
            shepherd_error(1, err_str);
         }
      } else {
         sge_switch2admin_user();
      }
   }
#endif

}
void sge_pset_free_processor_set(void)
{
#if defined(__sgi) || defined(ALPHA) || defined(SOLARIS64) || defined(SOLARISAMD64)
   /* SGI IRIX processor set stuff */
   if (strcasecmp("UNDEFINED",get_conf_val("processors"))) {
      char err_str[2*SGE_PATH_MAX+128];
      int ret;

      errno = 0;
      if (sge_switch2start_user()) {
         shepherd_trace("failed to switch user in free_processor_set: %s",
                        strerror(errno));
         shepherd_state = SSTATE_PROCSET_NOTFREED;
         shepherd_error(1, strerror(errno));
         return;
      }
      if ((ret=free_processor_set(err_str)) != PROC_SET_OK) {
         sge_switch2admin_user();
         switch (ret) {
         case PROC_SET_WARNING: /* not critical - e.g. not root */
            shepherd_trace("warning: processor set not freed in free_processor_set - "
                           "did no exist, probably");
            break;
         case PROC_SET_ERROR: /* critical - err_str indicates error */
            shepherd_trace("critical error in free_processor_set - bailing out");
            shepherd_state = SSTATE_PROCSET_NOTFREED;
            shepherd_error(1, err_str);
            break;
         case PROC_SET_BUSY: /* still processes running in processor set */
            shepherd_trace("error in releasing processor set");
            shepherd_state = SSTATE_PROCSET_NOTFREED;
            shepherd_error(1, err_str);
            break;
         default: /* should not occur */
            sprintf(err_str,
               "internal error after free_processor_set - ret=%d", ret);
            shepherd_state = SSTATE_PROCSET_NOTFREED;
            shepherd_error(1, err_str);
            break;
         }
      } else {
         sge_switch2admin_user();
      }
   }
#endif
}
Beispiel #3
0
/****** Interactive/qrsh/writeExitCode() ***************************************
*
*  NAME
*    writeExitCode() -- write exit code of child process to file
*
*  SYNOPSIS
*     static int writeExitCode(int myExitCode, int programExitCode)
*
*  FUNCTION
*     If myExitCode != EXIT_SUCCESS, that means, if an error occured in
*     qrsh_starter, write this exit code to file,
*     else write the exit code of the child process (programExitCode).
*     The exit code is written to a file "qrsh_exit_code" in the
*     directory $TMPDIR.
*
*  INPUTS
*     myExitCode      - status of qrsh_starter
*     programExitCode - status of the child process
*
*  RESULT
*     EXIT_SUCCESS, if all actions could be performed,
*     EXIT_FAILURE, if one of the following errors occured:
*        - the environment variable TMPDIR cannot be read
*        - the file $TMPDIR/qrsh_exit_code cannot be written
*
****************************************************************************
*/
static int writeExitCode(int myExitCode, int programExitCode) 
{   
   int exitCode;
   char exitCode_str[20];
   char *tmpdir = NULL;
   char *taskid = NULL;
   int  file;
   char fileName[SGE_PATH_MAX];

   if(myExitCode != EXIT_SUCCESS) {
      exitCode = MAKEEXITSTATUS(myExitCode);
   } else {
      exitCode = programExitCode;
   }

   if((tmpdir = search_conf_val("qrsh_tmpdir")) == NULL) {
      qrsh_error(MSG_CONF_NOCONFVALUE_S, "qrsh_tmpdir");
      return EXIT_FAILURE;
   }
  
   taskid = get_conf_val("pe_task_id");
   
   if(taskid != NULL) {
      snprintf(fileName, SGE_PATH_MAX, "%s/qrsh_exit_code.%s", tmpdir, taskid);
   } else {
      snprintf(fileName, SGE_PATH_MAX, "%s/qrsh_exit_code", tmpdir);
   }

   if((file = SGE_OPEN3(fileName, O_WRONLY | O_APPEND | O_CREAT, 00744)) == -1) {
      dstring ds = DSTRING_INIT;
      qrsh_error(MSG_QRSH_STARTER_CANNOTOPENFILE_SS, fileName, sge_strerror(errno, &ds));
      sge_dstring_free(&ds);
      return EXIT_FAILURE;
   }
 
   snprintf(exitCode_str, 20, "%d", exitCode);
   if (write(file, exitCode_str, strlen(exitCode_str)) != strlen(exitCode_str)) {
      dstring ds = DSTRING_INIT;
      qrsh_error(MSG_FILE_CANNOT_WRITE_SS, fileName, sge_strerror(errno, &ds));
      sge_dstring_free(&ds);
   }
   SGE_CLOSE(file);
   
   return EXIT_SUCCESS;
}
Beispiel #4
0
static int get_conf_val_args(const char *field, const char *value, const char *opt, struct strlist ***args, int *got_args, int *count, struct strlist ***list, int include)
{
	char *tmp=NULL;
	if(get_conf_val(field, value, opt, &tmp)) return -1;
	if(tmp)
	{
		if(got_args && *got_args && args)
		{
			strlists_free(*args, *count);
			*got_args=0;
			*args=NULL;
			*count=0;
		}
		if(strlist_add(list, count, tmp, include)) return -1;
		free(tmp); tmp=NULL;
	}
	return 0;
}
Beispiel #5
0
/****** Interactive/qrsh/changeDirectory() *****************************************
*  NAME
*     changeDirectory() -- change to directory named in job config
*
*  SYNOPSIS
*     static int changeDirectory(void) 
*
*  FUNCTION
*     Reads the target working directory for a qrsh job from the jobs 
*     configuration and tries to 
*     change the current working directory.
*
*  RESULT
*     static int - 0, if an error occured
*                  1, if function completed without errors
*  SEE ALSO
*     Interactive/qrsh/readConfig()
*
*******************************************************************************/
static int changeDirectory(void) 
{
   char *cwd = NULL;

   /* get jobs target directory */
   cwd = get_conf_val("cwd");

   if(cwd == NULL) {
      qrsh_error("MSG_QRSH_STARTER_NOCWDINCONFIG");
      return 0;
   }

   /* change to dir cwd */
   if(chdir(cwd) == -1) {
      fprintf(stderr, MSG_QRSH_STARTER_CANNOTCHANGEDIR_SS, cwd, strerror(errno));
      fprintf(stderr, "\n");
      return 0;
   }

   return 1;
}
Beispiel #6
0
/****** shepherd/shepconf/shepconf_has_to_notify_before_signal() **************
*  NAME
*     shepconf_has_to_notify_before_signal() -- Get notification time 
*
*  SYNOPSIS
*     int shepconf_has_to_notify_before_signal(int *seconds) 
*
*  FUNCTION
*     If the notification mechanism is enabled then this function
*     will return with "true" and "seconds" will be > 0. 
*
*  INPUTS
*     int *seconds - time to elapse between notification and final signal 
*
*  RESULT
*     int - true or false
*******************************************************************************/
int shepconf_has_to_notify_before_signal(int *seconds) 
{
   *seconds = atoi(get_conf_val("notify"));

   return (*seconds > 0);
} 
Beispiel #7
0
/****** shepherd/qrsh/qlogin_starter() ****************************************
*
*  NAME
*     qlogin_starter -- short description
*
*  SYNOPSIS
*     #include "qlogin_starter.h"
*     int qlogin_starter(const char *cwd, char *daemon);
*
*  FUNCTION
*     The function is called from shepherd to start a protocol daemon
*     like telnetd, rshd or rlogind.
*     The mechanism used to call these daemons is that of inetd:
*        - a socket is created (server side, any free port is assigned
*          by the operating system)
*        - qlogin_starter waits for someone to connect to this socket
*        - the socket file handles are redirected to stdin, stdout
*          and stderr
*        - the daemon process is started
*     Additionally to the inetd mechanism, the port number and some
*     other information is sent to the qrsh process that initiated
*     (over qmaster, schedd, execd, shepherd) the qlogin_starter call.
*
*  INPUTS
*     cwd    - the current working directory (the active_jobs directory)
*     daemon - name and path of the daemon to start
*
*  RESULT
*     on success, the function will not return (it exec's)
*      4, if there is a problem with permissions
*      5, if a socket cannot be allocated
*      6, if a socket bind fails
*      7, if socket name (port) cannot be determined
*      8, if environment (to be passed to qrsh) cannot be read
*      9, if sending information to qrsh fails
*     10, if nobody connects to the socket within a one minute
*     11, if the acception of a connecting client fails
*     12, if the execution of the daemon fails
******************************************************************************/
int qlogin_starter(const char *cwd, char *daemon, char** env)
{
    int ret;
    int port;
    int fd;
    int maxfd;
    int sockfd;
    int on = 1;
    int sso = 1;
    int newsfd;
    fd_set fds;
    struct sockaddr_in serv_addr;
    struct timeval timeout;
    char buffer[2048];
    char *args[20]; /* JG: TODO: should be dynamically allocated */
    int argc = 0;
    const char *sge_root = NULL;
    const char *arch = NULL;

#if defined(IRIX65) || defined(INTERIX) || defined(DARWIN6) || defined(ALPHA5) || defined(HP1164)
    int length;
    int len;
#else
    socklen_t length;
    socklen_t len;
#endif

    len = sizeof(serv_addr);

    /* must be root because we must access /dev/something */
    if( setgid(SGE_SUPERUSER_GID) ||
            setuid(SGE_SUPERUSER_UID) ||
            setegid(SGE_SUPERUSER_GID) ||
            seteuid(SGE_SUPERUSER_UID)) {
        shepherd_trace("cannot change uid/gid\n");
        return 4;
    }
    shepherd_trace("uid = "uid_t_fmt", euid = "uid_t_fmt", gid = "gid_t_fmt
                   ", egid = "gid_t_fmt, getuid(), geteuid(), getgid(), getegid());

    /* socket stuff from here */
    sockfd = socket(AF_INET, SOCK_STREAM, 0);

    if (sockfd == -1) {
        shepherd_trace("cannot open socket.");
        return 5;
    }
    shepherd_trace("using sfd %d", sockfd);

    setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, (char *) &on, sizeof(on));

    /* bind an address to any socket */
    memset((char *) &serv_addr, 0, sizeof(serv_addr));
    serv_addr.sin_port = 0;
    serv_addr.sin_family = AF_INET;
    serv_addr.sin_addr.s_addr = INADDR_ANY;
    ret = bind(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr));
    if (ret != 0) {
        shepherd_trace("cannot bind socket: %s", strerror(errno));
        shutdown(sockfd, 2);
        close(sockfd);
        return 6;
    }

    /* find out assigned port number and pass it to caller */
    length = sizeof(serv_addr);
    if (getsockname(sockfd,(struct sockaddr *) &serv_addr, &length) == -1) {
        shepherd_trace("getting socket name failed: %s", strerror(errno));
        shutdown(sockfd, 2);
        close(sockfd);
        return 7;
    }

    /* listen on socked - make connections be accepted */
    if (listen(sockfd, 1) != 0) {
        shepherd_trace("listen failed: %s", strerror(errno));
        shutdown(sockfd, 2);
        close(sockfd);
        return 8;
    }

    /* send necessary info to qrsh: port + utilbin directory + active job
     * directory
     */
    port = ntohs(serv_addr.sin_port);
    shepherd_trace("bound to port %d", port);

    sge_root = sge_get_root_dir(0, NULL, 0, 1);
    arch = sge_get_arch();

    if (sge_root == NULL || arch == NULL) {
        shepherd_trace("reading environment SGE_ROOT and ARC failed");
        shutdown(sockfd, 2);
        close(sockfd);
        return 9;
    }

    snprintf(buffer, 2048, "0:%d:%s/utilbin/%s:%s:%s",
             port, sge_root, arch, cwd, get_conf_val("host"));

    if (write_to_qrsh(buffer) != 0) {
        shepherd_trace("communication with qrsh failed");
        shutdown(sockfd, 2);
        close(sockfd);
        return 10;
    }

    /* wait for connection */
    shepherd_trace("waiting for connection.");
    /* use a reasonable timeout (60 seconds) to prevent hanging here forever */
    FD_ZERO(&fds);
    FD_SET(sockfd, &fds);
    timeout.tv_sec = 60;
    timeout.tv_usec = 0;
    if (select(sockfd+1, &fds, NULL, NULL, &timeout) < 1) {
        shepherd_trace("nobody connected to the socket");
        shutdown(sockfd, 2);
        close(sockfd);
        return 11;
    }

    /* accept connection */
    newsfd = accept(sockfd, (struct sockaddr *)(&serv_addr), &len);
    if (newsfd == -1) {
        shepherd_trace("error when accepting socket conection");
        shutdown(sockfd, 2);
        close(sockfd);
        return 12;
    }
    shepherd_trace("accepted connection on fd %d", newsfd);

    /* now we have a connection and do no longer need the "well known" port
     * free this resource.
     */
    shutdown(sockfd, 2);
    close(sockfd);

    /* don't close on exec */
    fcntl( newsfd, F_SETFD, 0 );

    /* speed up ;-) */
    setsockopt(newsfd, IPPROTO_TCP, TCP_NODELAY, (const char *) &sso, sizeof(int));

    /* use this fd as stdin,out,err */
    dup2( newsfd, 0 );
    dup2( newsfd, 1 );
    dup2( newsfd, 2 );

    /* close all the rest */
#ifndef WIN32NATIVE
    maxfd = sysconf(_SC_OPEN_MAX);
#else /* WIN32NATIVE */
    maxfd = FD_SETSIZE;
    /* detect maximal number of fds under NT/W2000 (env: Files)*/
#endif /* WIN32NATIVE */

    /* we do not use any FD_SET call it is ok to use _SC_OPEN_MAX */
    for (fd=3; fd<maxfd; fd++) {
        close(fd);
    }

    shepherd_trace("daemon to start: |%s|", daemon);

    /* split daemon commandline into single arguments */
    /* JG: TODO: might contain quoted arguments containing spaces
     *           make function to split or use an already existing one
     */
    args[argc++] = strtok(daemon, " ");
    while ((args[argc++] = strtok(NULL, " ")) != NULL);
#if 0
    {
        int i = 0;
        shepherd_trace("daemon commandline split to %d arguments", argc);
        while (args[i] != NULL) {
            shepherd_trace("daemon argv[%d] = |%s|", i, args[i]);
            i++;
        }
    }
#endif

    /* that it. */
    execve(args[0], args, env);

    /* oh oh, exec failed */
    /* no way to tell anyone, becuase all FDs are closed */
    /* last chance -> tell parent process */
    shutdown(newsfd, 2);
    close(newsfd);
    return 13;
}
Beispiel #8
0
/****** shepherd/qrsh/write_to_qrsh() *****************************************
*  NAME
*     write_to_qrsh -- short description
*
*  SYNOPSIS
*    int write_to_qrsh(const char *data);
*
*  FUNCTION
*     Writes the contents of <data> to an other (remote) process over
*     a socket connection.
*     Host and port of the communication partner are read from the
*     configuration entry "qrsh_control_port".
*     A socket client connection is opened to the named host and port,
*     and the data is written.
*
*  INPUTS
*     data - null terminated string with data to write
*
*  RESULT
*     0, if function finishes correctly
*     1, if the config entry qrsh_control_port does not exist
*     2, if qrsh_control_port contains illegal data
*     3, if opening the socket failed
*     4, if the hostname cannot be resolved
*     5, if connecting to the socket fails
*     6, if writing the data fails
******************************************************************************/
int write_to_qrsh(const char *data)
{
    char *address = NULL;
    char *host;
    char *c;
    int   port    = 0;
    int   sock    = 0;
    int datalen   = 0;
    struct sockaddr_in server;
    struct hostent *hp;

    shepherd_trace("write_to_qrsh - data = %s", data);

    /* read destination host and port from config */
    address = get_conf_val("qrsh_control_port");

    if (address == NULL) {
        shepherd_trace("config does not contain entry for qrsh_control_port");
        return 1;
    }

    shepherd_trace("write_to_qrsh - address = %s", address);

    c = strchr(address, ':');
    if (c == NULL) {
        shepherd_trace("illegal value for qrsh_control_port: \"%s\". "
                       "Should be host:port", address);
        return 2;
    }

    *c = 0;
    host = address;
    port = atoi(c + 1);

    shepherd_trace("write_to_qrsh - host = %s, port = %d", host, port);

    /* create socket. */
    sock = socket( AF_INET, SOCK_STREAM, 0);
    if (sock == -1) {
        shepherd_trace("error opening stream socket: %s", strerror(errno));
        return 3;
    }

    /* connect socket using name specified by command line. */
    server.sin_family = AF_INET;
    hp = gethostbyname(host);

    /*
    * gethostbyname returns a structure including the network address
    * of the specified host.
    */
    if (hp == (struct hostent *) 0) {
        shepherd_trace("%s: unknown host", host);
        close(sock);
        return 4;
    }

    memcpy((char *) &server.sin_addr, (char *) hp->h_addr, hp->h_length);
    server.sin_port = htons(port);

    if (connect(sock, (struct sockaddr *) &server, sizeof server) == -1) {
        shepherd_trace("error connecting stream socket: %s", strerror(errno));
        close(sock);
        return 5;
    }

    /* write data */
    datalen = strlen(data) + 1;
    if (write(sock, data, datalen) != datalen) {
        shepherd_trace("error writing data to qrsh_control_port");
        close(sock);
        return 6;
    }

    /* close connection */
    close(sock);
    return 0;
}
Beispiel #9
0
/****** shepherd_binding/do_core_binding() ******************
*******************
*  NAME
*     do_core_binding() -- Performs the core binding task for the Solaris OS. 
*
*  SYNOPSIS
*     int do_core_binding(void) 
*
*  FUNCTION
*     Performs core binding on shepherd side. All information required for  
*     the binding is communicated from execd to shepherd in the config 
*     file value "binding". If there is "NULL" no core binding is done. 
*
*     This function is Solaris specific.
*
*     DG TODO change return value to bool
*
*  RESULT
*     int - Returns 0 in case of success and a negative value in case of problems.  
*
*  NOTES
*     MT-NOTE: do_core_binding() is not MT safe 
*
*******************************************************************************/
int do_core_binding(void)
{
   int retval = 0; 

   /* just read out what is in "config" file and attach to the given psrset if 
      it is specified */
   char *binding = get_conf_val("binding");
   
   if (binding == NULL) {
      shepherd_trace("do_core_binding: \"binding\" parameter not found in config file");
      retval = -1;
   } else if (strcasecmp("no_job_binding", binding) == 0 || strcasecmp("NULL", binding) == 0) {
      shepherd_trace("do_core_binding: skip binding - no core binding configured");
      retval = -1;
   }

   if (retval == 0 && strstr(binding, "psrset:") != NULL) {
      int processor_set_id = 0;
      shepherd_trace("do_core_binding: psrset found - attaching to it!");

      /* parse the psrset number right after "psrset:" */
      if (sge_strtok(binding, ":") != NULL) {
         /* parse the rest of the line */
         char* pset_id;
         if ((pset_id = sge_strtok(NULL, ":")) != NULL) {
            /* finally get the processor set id */
            processor_set_id = atoi(pset_id);
         } else {
            shepherd_trace("do_core_binding: couldn't find the psrset id after \"psrset:\" in config file (binding)");
            retval = -1;
         }
      } else {
         shepherd_trace("do_core_binding: found string \"psrset:\" but no \":\" - almost impossible");
         retval = -1;
      }

      if (retval == 0) {
         if (processor_set_id == -1) {            
            /* prcoessor_set_id == -1: Check here for a special processor_set_id (negative; 0)
               which does show that no binding is needed since this processor set
               would require (exactly) all of the remaining cores. Creating 
               such a processor set is not possible because one processor must 
               left for the OS. But the job is implicitly bound to the processors 
               since it can not use any onther processor from the other processor 
               sets. */
            shepherd_trace("do_core_binding: psrset not created since all remaining processors would be used");
            shepherd_trace("do_core_binding: binding is done implicitly");
         } else {
            /* start user rights (root) are required for creating processor sets */
            sge_switch2start_user();
            
            if (bind_shepherd_to_pset(processor_set_id) == false) {
               shepherd_trace("do_core_binding: couldn't bind to existing processor set!");
            } else {
               shepherd_trace("do_core_binding: successfully bound to existing processor set!");
            }
   
            /* switch back to admin user */
            sge_switch2admin_user();
         }
      }

   } else {  /* "psrset" is not in config file defined */
      shepherd_trace("do_core_binding: no processor set found in config file! do nothing");
      retval = -1;
   }

   shepherd_trace("do_core_binding: finishing");

   return retval;
}
Beispiel #10
0
/****** shepherd_binding/do_core_binding() *************************************
*  NAME
*     do_core_binding() -- Performs the core binding task for the Linux OS. 
*
*  SYNOPSIS
*     int do_core_binding(void) 
*
*  FUNCTION
*     Performs core binding on shepherd side. All information required for  
*     the binding is communicated from execd to shepherd in the config 
*     file value "binding". If there is "NULL" no core binding is done. 
* 
*     This function is Linux specific.
*
*     If there is any instruction the bookkeeping for these cores is already 
*     done. In case of Solaris the processor set is already created by 
*     execution daemon. Hence shepherd has just to add itself to it.
*     In case of Linux the whole binding is done by shepherd. In each case 
*     the binding is inherited from shepherd to the job it starts.
*
*     DG TODO change return value to bool
* 
*  RESULT
*     int - Returns 0 in case of success and a negative value in case of problems. 
*
*  NOTES
*     MT-NOTE: do_core_binding() is not MT safe 
*
*******************************************************************************/
int do_core_binding(void) 
{
   /* Check if "binding" parameter in 'config' file 
    * is available and not set to "binding=no_job_binding".
    * If so, we do an early abortion. 
    */
   char *binding = get_conf_val("binding");
   binding_type_t type;

   if (binding == NULL || strcasecmp("NULL", binding) == 0) {
      shepherd_trace("do_core_binding: \"binding\" parameter not found in config file");
      return -1;
   }
   
   if (strcasecmp("no_job_binding", binding) == 0) {
      shepherd_trace("do_core_binding: skip binding - no core binding configured");
      return -1;
   }
   
   /* get the binding type (set = 0 | env = 1 | pe = 2) where default is 0 */
   type = binding_parse_type(binding); 

   /* do a binding accorting the strategy */
   if (strstr(binding, "linear") != NULL) {
      /* do a linear binding */ 
      int amount;
      int socket;
      int core;

      shepherd_trace("do_core_binding: do linear");
   
      /* get the amount of cores to bind on */
      if ((amount = binding_linear_parse_amount(binding)) < 0) {
         shepherd_trace("do_core_binding: couldn't parse the amount of cores from config file");
         return -1;
      } 

      /* get the socket to begin binding with (choosen by execution daemon) */
      if ((socket = binding_linear_parse_socket_offset(binding)) < 0) {
         shepherd_trace("do_core_binding: couldn't get the socket number from config file");
         return -1;
      }

      /* get the core to begin binding with (choosen by execution daemon)   */
      if ((core = binding_linear_parse_core_offset(binding)) < 0) {
         shepherd_trace("do_core_binding: couldn't get the core number from config file");
         return -1;
      }

      /* perform core binding on current process */
      if (binding_set_linear_linux(socket, core, amount, 1, type) == false) {
         /* core binding was not successful */
         if (type == BINDING_TYPE_SET) {
            shepherd_trace("do_core_binding: linear binding was not successful");
         } else if (type == BINDING_TYPE_ENV) {
            shepherd_trace("do_core_binding: couldn't set SGE_BINDING environment variable");
         } else if (type == BINDING_TYPE_PE) {
            shepherd_trace("do_core_binding: couldn't produce rankfile");
         }
      } else {
         if (type == BINDING_TYPE_SET) {
            shepherd_trace("do_core_binding: job successfully bound");
         } else if (type == BINDING_TYPE_ENV) {
            shepherd_trace("do_core_binding: SGE_BINDING environment variable created");
         } else if (type == BINDING_TYPE_PE) {
            shepherd_trace("do_core_binding: rankefile produced");
         }
      }

   } else if (strstr(binding, "striding") != NULL) {
      int amount = binding_striding_parse_amount(binding);
      int stepsize = binding_striding_parse_step_size(binding);
      
      /* these are the real start parameters */
      int first_socket = 0, first_core = 0;
      
      shepherd_trace("do_core_binding: striding");

      if (amount <= 0) {
         shepherd_trace("do_core_binding: error parsing <amount>");
         return -1;
      }

      if (stepsize < 0) {
         shepherd_trace("do_core_binding: error parsing <stepsize>");
         return -1;
      }
      
      first_socket = binding_striding_parse_first_socket(binding);
      if (first_socket < 0) {
         shepherd_trace("do_core_binding: error parsing <socket>");
         return -1;
      }
      
      first_core   = binding_striding_parse_first_core(binding);
      if (first_core < 0) {
         shepherd_trace("do_core_binding: error parsing <core>");
         return -1;
      }

      /* last core has to be incremented because core 0 is first core to be used */
      if (stepsize == 0) {
         /* stepsize must be >= 1 */
         stepsize = 1;
      }

      shepherd_trace("do_core_binding: striding set binding: first_core: %d first_socket %d amount %d stepsize %d", 
         first_core, first_socket, amount, stepsize);

      /* get the first core and first socket which is available for striding    */

      /* perform core binding on current process                */

      if (binding_set_striding_linux(first_socket, first_core, amount, 0, stepsize, type)) {
         shepherd_trace("do_core_binding: striding: binding done");
      } else {
         shepherd_trace("do_core_binding: striding: binding not done");
      }

   } else if (strstr(binding, "explicit") != NULL) {

      /* list with the sockets (first part of the <socket>,<core> tuples) */
      int* sockets = NULL;
      /* length of sockets list */
      int nr_of_sockets = 0;
      /* list with the cores to be bound on the sockets */
      int* cores = NULL;
      /* length of cores list */
      int nr_of_cores = 0;

      shepherd_trace("do_core_binding: explicit");
      
      /* get <socket>,<core> pairs out of binding string */ 
      if (binding_explicit_extract_sockets_cores(binding, &sockets, &nr_of_sockets,
            &cores, &nr_of_cores) == true) {

         if (nr_of_sockets == 0 && nr_of_cores == 0) {
            /* no cores and no sockets are found */
            shepherd_trace("do_core_binding: explicit: no socket or no core was specified");
         } else if (nr_of_sockets != nr_of_cores) {
            shepherd_trace("do_core_binding: explicit: unequal amount of specified sockets and cores");
         } else {
            /* do core binding according the <socket>,<core> tuples */
            if (binding_explicit(sockets, nr_of_sockets, cores, nr_of_cores, type) == true) {
               shepherd_trace("do_core_binding: explicit: binding done");
            } else {
               shepherd_trace("do_core_binding: explicit: no core binding done");
            }
         }
         
         sge_free(&sockets);
         sge_free(&cores);

      } else {
         sge_free(&sockets);
         sge_free(&cores);    
         shepherd_trace("do_core_binding: explicit: couldn't extract <socket>,<core> pair");
      }

   } else {
   
      if (binding != NULL) {
         shepherd_trace("do_core_binding: WARNING: unknown \"binding\" parameter: %s", 
            binding);
      } else {
         shepherd_trace("do_core_binding: WARNING: binding was null!");
      }   

   }
   
   shepherd_trace("do_core_binding: finishing");

   return 0;
}
Beispiel #11
0
void setosjobid(pid_t sid, gid_t *add_grp_id_ptr, struct passwd *pw)
{
   FILE *fp=NULL;

   shepherd_trace("setosjobid: uid = "pid_t_fmt", euid = "pid_t_fmt, getuid(), geteuid());

#  if defined(SOLARIS) || defined(ALPHA) || defined(LINUX) || defined(FREEBSD) || defined(DARWIN)
      /* Read SgeId from config-File and create Addgrpid-File */
      {  
         char *cp;
         if ((cp = search_conf_val("add_grp_id")))
            *add_grp_id_ptr = atol(cp);
         else
            *add_grp_id_ptr = 0;
      }
      if ((fp = fopen("addgrpid", "w")) == NULL) {
         shepherd_error(1, "can't open \"addgrpid\" file");   
      }
      fprintf(fp, gid_t_fmt"\n", *add_grp_id_ptr);
      FCLOSE(fp);   
# elif defined(HP1164) || defined(AIX)
    {
      if ((fp = fopen("addgrpid", "w")) == NULL) {
         shepherd_error(1, "can't open \"addgrpid\" file");
      }
      fprintf(fp, pid_t_fmt"\n", getpgrp());
      FCLOSE(fp);
    }
#  else
   {
      char osjobid[100];
      if ((fp = fopen("osjobid", "w")) == NULL) {
         shepherd_error(1, "can't open \"osjobid\" file");
      }

      if(sge_switch2start_user() == 0) {
#     if defined(IRIX)
      {
         /* The following block contains the operations necessary for
          * IRIX6.2 (and later) to set array session handles (ASHs) and
          * service provider info (SPI) records
          */
         struct acct_spi spi;
         int ret;
         char *cp;

         shepherd_trace("in irix code");
         /* get _local_ array session id */
         if ((ret=newarraysess())) {
            shepherd_error(1, "error: can't create ASH; errno=%d", ret);
         }

         /* retrieve array session id we just assigned to the process and
          * write it to the os-jobid file
          */
         sprintf(osjobid, "%lld", getash());
         shepherd_trace(osjobid); 
         /* set service provider information (spi) record */
         strncpy(spi.spi_company, "SGE", 8);
         strncpy(spi.spi_initiator, get_conf_val("spi_initiator"), 8);
         strncpy(spi.spi_origin, get_conf_val("queue"),16);
         strcpy(spi.spi_spi, "Job ");
         strncat(spi.spi_spi, get_conf_val("job_id"),11);
         if ((ret=setspinfo(&spi))) {
            shepherd_error(1, "error: can't set SPI; errno=%d", ret);
         }
         
         if ((cp = search_conf_val("acct_project"))) {
            prid_t proj; 
            if (strcasecmp(cp, "none") && ((proj = projid(cp)) >= 0)) {
               shepherd_trace("setting project \"%s\" to id %lld", cp, proj);
               if (setprid(proj) == -1)
                  shepherd_trace("failed setting project id");
            }
            else {   
               shepherd_trace("can't get id for project \"%s\"", cp);
            }
         } else {
            shepherd_trace("can't get configuration entry for projects");
         }
      }
#     elif defined(CRAY)
      {
         char *cp;
	      {
	         int jobid;

	         if ((jobid=setjob(pw->pw_uid, 0)) < 0) {
	            shepherd_error(1, "error: can't set job ID; errno = %d", errno);
	         }

	         if (sesscntl(jobid, S_ADDFL, S_BATCH) == -1) {
	            shepherd_error(1, "error: sesscntl(%d, S_ADDFL, S_BATCH) failed,"
		                        " errno = %d", sid, errno);
	         } 
	         sprintf(osjobid, "%d", jobid);
	      }

	      if ((cp = search_conf_val("acct_project"))) {
	         int proj; 
	         if (strcasecmp(cp, "none") && ((proj = nam2acid(cp)) >= 0)) {
	            shephed_trace("setting project \"%s\" to acid %d", cp, proj);
	            if (acctid(0, proj) == -1) {
		            shepherd_trace("failed setting project id (acctid)");
               }
	         } else {   
	            shepherd_trace("can't get id for project \"%s\"", cp);
	         }
	      } else {
	         shepherd_trace("can't get configuration entry for projects");
         }
      }
#     elif defined(NECSX4) || defined(NECSX5)
      {
         id_t jobid = 0;
		 	dispset2_t attr;	
			int value;

         /*
          * Create new Super-UX job
          */
         if (setjid() == -1) {
            shepherd_trace("ERROR: can't set jobid: %s[%d]", strerror(errno), errno);
         } else {
            jobid = getjid(0);
            shepherd_trace("Created job with id: "sge_u32, (u_long32) jobid);
         }  
         sprintf(osjobid, sge_u32, (u_long32) jobid); 

         /*
          * We will use limits for the whole job
          */
         set_rlimits_os_job_id(jobid);

         /*
          * The job will use the resources of the configured 
          * Resource Sharing Group (rsg)
          */ 
         {
            char *rsg_id_string;
            int rsg_id;
            char fsg_dev_string[256];

            rsg_id_string  = get_conf_val("processors");
            rsg_id = atoi(rsg_id_string);
            if (rsg_id) {
               int fd;

               sprintf(fsg_dev_string, "/dev/rsg/%d", rsg_id);
               fd = open(fsg_dev_string, O_RDONLY);
               if (fd <= 0) {
                  shepherd_trace("ERROR: can't switch to rsg%d because can't open"
                                 "device: %s[%d]", rsg_id, strerror(errno), errno);
               } else {
                  if (ioctl(fd, RSG_JUMP, NULL) == -1) {
                     close(fd);
                     shepherd_trace("ERROR: can't switch to rsg%d: %s[%d]", 
                                    rsg_id, strerror(errno), errno);
                     return;
                  } else {
                     close(fd);
                     shepherd_trace("switched to rsg%d", rsg_id);
                  }
               }
            } else {
               shepherd_trace("using default rsg", rsg_id);
            }
         } 

         /*
          * Set scheduling parameter for job
          */
         if (((attr.basepri = atoi(get_conf_val("nec_basepriority"))) != NEC_UNDEF_VALUE)
            && ((attr.modcpu = atoi(get_conf_val("nec_modcpu"))) != NEC_UNDEF_VALUE)
            && ((attr.tickcnt = atoi(get_conf_val("nec_tickcnt"))) != NEC_UNDEF_VALUE)
            && ((attr.dcyfctr = atoi(get_conf_val("nec_dcyfctr"))) != NEC_UNDEF_VALUE)
            && ((attr.dcyintvl = atoi(get_conf_val("nec_dcyintvl"))) != NEC_UNDEF_VALUE)
            && ((attr.tmslice = atoi(get_conf_val("nec_timeslice"))) != NEC_UNDEF_VALUE)
            && ((attr.mempri = atoi(get_conf_val("nec_memorypriority"))) != NEC_UNDEF_VALUE)
            && ((attr.szefctmrt = atoi(get_conf_val("nec_mrt_size_effct"))) != NEC_UNDEF_VALUE)
            && ((attr.priefctmrt = atoi(get_conf_val("nec_mrt_pri_effct"))) != NEC_UNDEF_VALUE)
            && ((attr.minmrt = atoi(get_conf_val("nec_mrt_minimum"))) != NEC_UNDEF_VALUE)
            && ((attr.agrange = atoi(get_conf_val("nec_aging_range"))) != NEC_UNDEF_VALUE)
            && ((attr.spinherit = atoi(get_conf_val("nec_slavepriority"))) != NEC_UNDEF_VALUE)
            && ((attr.concpu = atoi(get_conf_val("nec_cpu_count"))) != NEC_UNDEF_VALUE)) {
            if (dispcntl(SG_JID, getjid(0), DCNTL_SET2, &attr) == -1) {
               shepherd_trace("ERROR: can't set scheduling parameter: %s[%d]",
                              strerror(errno), errno);
            } else {
               shepherd_trace("control parameters for active process scheduling modified");
               print_scheduling_parameters(attr);
            }
         } else {
            shepherd_trace("we do not control active process scheduling");
         }
      }               
#     else
         /* write a default os-jobid to file */
         sprintf(osjobid, pid_t_fmt, sid);
#     endif
         sge_switch2admin_user();
      } 
      else /* not running as super user --> we want a default os-jobid */
         sprintf(osjobid, "0");
      
      if(fprintf(fp, "%s\n", osjobid) < 0)
         shepherd_trace("error writing osjobid file");
         
      FCLOSE(fp); /* Close os-jobid file */   
   }
#  endif
   return;
FCLOSE_ERROR:
   shepherd_error(1, "can't close file"); 
}
Beispiel #12
0
/****** Interactive/qrsh/setEnvironment() ***************************************
*
*  NAME
*     setEnvironment() -- set environment from file
*
*  SYNOPSIS
*     static char *setEnvironment(const char *jobdir, char **wrapper);
*
*  FUNCTION
*     Reads environment variables and their values from file <envFileName>
*     and sets them in the actual process environment.
*     The file format conforms to the sge environment file format:
*     Each line contains a tuple:
*        <name>=<value>
*     Special handling for variable PWD: tries to change to named
*     directory.
*     Special handling for variable QRSH_COMMAND: is the command to be executed
*     by qrsh_starter. The value of this variable will be returned as command,
*     or NULL, if an error occurs.
*     Special handling for variable QRSH_WRAPPER: this is a wrapper to be called
*     instead of a shell to execute the command.
*     If this variable is contained in the environment, it will be returned in
*     the parameter wrapper. Memory will be allocated to hold the variable, it 
*     is in the responsibility of the caller to free this memory.
*     Special handling for variable DISPLAY: if it is already set, do not 
*     overwrite it. Usually  it is not set, but if ssh is used as transport
*     mechanism for qrsh, the ssh -X option can be used to enable 
*     X11 forwarding.
*
*  INPUTS
*     jobdir - the jobs spool directory
*     wrapper - buffer to take the path and name of a wrapper script
*
*  RESULT
*     command, if all actions could be performed
*     NULL,    if an error occured; possible errors are:
*                 - the environment file cannot be opened
*                 - a PWD entry is found, but changing to the named directory fails
*                 - necessary memory cannot be allocated
*                 - the variable QRSH_COMMAND is not found
*
****************************************************************************
*/
static char *setEnvironment(const char *jobdir, char **wrapper)
{
   char envFileName[SGE_PATH_MAX];
   FILE *envFile = NULL;
   char *line = NULL;
   char *command   = NULL;
   SGE_STRUCT_STAT statbuf;
   int size;
   bool set_display = true;

   *wrapper = NULL;

   /* don't set DISPLAY, if it is already set (e.g. by ssh) */
   if (getenv("DISPLAY") != NULL) {
      set_display = false;
   }

   snprintf(envFileName, SGE_PATH_MAX, "%s/environment", jobdir);
  
   /* check if environment file exists and
    * retrieve file size. We will take file size as maximum possible line length
    */
   if (SGE_STAT(envFileName, &statbuf) != 0) {
      qrsh_error(MSG_QRSH_STARTER_CANNOTOPENFILE_SS, envFileName, strerror(errno));
      return NULL;
   } 
   
   size = statbuf.st_size;
   line = (char *)malloc(size + 1);
   if (line == NULL) {
      qrsh_error(MSG_QRSH_STARTER_MALLOCFAILED_S, strerror(errno));
      return NULL;
   }

   /* open sge environment file */
   if ((envFile = fopen(envFileName, "r")) == NULL) {
      qrsh_error(MSG_QRSH_STARTER_CANNOTOPENFILE_SS, envFileName, strerror(errno));
      sge_free(&line);
      return NULL;
   }

   /* set all environment variables, change to directory named by PWD */
   while (fgets(line, size, envFile) != NULL) {
      /* clean trailing garbage (\n, \r, EOF ...) */
      char *c = &line[strlen(line)];
      while (iscntrl(*(--c))) {
         *c = 0;
      }

      /* skip setting of display variable */
      if (strncmp(line, "DISPLAY=", 8) == 0 && !set_display) {
         continue;
      }
      
      if (strncmp(line, "QRSH_COMMAND=", 13) == 0) {
         if ((command = (char *)malloc(strlen(line) - 13 + 1)) == NULL) {
            qrsh_error(MSG_QRSH_STARTER_MALLOCFAILED_S, strerror(errno));
            sge_free(&line);
            FCLOSE(envFile);
            return NULL;
         }
         strcpy(command, line + 13);
      } else if (strncmp(line, "QRSH_WRAPPER=", 13) == 0) {
         if (*(line + 13) == 0) {
            fprintf(stderr, "%s\n", MSG_QRSH_STARTER_EMPTY_WRAPPER);
         } else {
            if ((*wrapper = (char *)malloc(strlen(line) - 13 + 1)) == NULL) {
               qrsh_error(MSG_QRSH_STARTER_MALLOCFAILED_S, strerror(errno));
               sge_free(&line);
               FCLOSE(envFile); 
               return NULL;
            }
            strcpy(*wrapper, line + 13);
         }
      } else {
         const char *new_line = sge_replace_substring(line, "\\n", "\n");
         int put_ret;
         /* set variable */
         if (new_line != NULL) {
            put_ret = sge_putenv(new_line);
            sge_free(&new_line);
         } else {
            put_ret = sge_putenv(line);
         }
         if (put_ret == 0) {
            sge_free(&line);
            FCLOSE(envFile); 
            return NULL;
         }
      }
   }

   sge_free(&line);
   FCLOSE(envFile); 

   /* 
    * Use starter_method if it is supplied
    * and not overridden by QRSH_WRAPPER
    */
    
   if (*wrapper == NULL) {
      char *starter_method = get_conf_val("starter_method");
      if (starter_method != NULL && strcasecmp(starter_method, "none") != 0) { 
         char buffer[128];
         *wrapper = starter_method;
         snprintf(buffer, 128, "%s=%s", "SGE_STARTER_SHELL_PATH", ""); sge_putenv(buffer);
         snprintf(buffer, 128, "%s=%s", "SGE_STARTER_SHELL_START_MODE", "unix_behavior"); sge_putenv(buffer);
         snprintf(buffer, 128, "%s=%s", "SGE_STARTER_USE_LOGIN_SHELL", "false"); sge_putenv(buffer);
      } 
   }
   
   return command;
FCLOSE_ERROR:
   qrsh_error(MSG_FILE_ERRORCLOSEINGXY_SS, envFileName, strerror(errno));
   return NULL;
}