コード例 #1
0
ファイル: sge_fileio.c プロジェクト: BlueBolt/BB_GridEngine
bool 
shepherd_read_qrsh_file(const char* pid_file_name, pid_t *qrsh_pid)
{
   bool ret = true;
   FILE *fp = NULL;

   fp = fopen(pid_file_name, "r");
   if (fp != NULL) {
      int arguments = fscanf(fp, pid_t_fmt, qrsh_pid);

      /* retrieve first exit status from exit status file */
      if (arguments != 1) {
         shepherd_trace("could not read qrsh_pid_file '%s'", pid_file_name);
         *qrsh_pid = 0;
         ret = false;
      } 
      FCLOSE(fp);
   } else {
      /*
       * CR 6588743 - raising a shepherd_error here would set the queue in
       *              error state and rerun the job
       */
      shepherd_trace(MSG_FILE_NOOPEN_SS, pid_file_name, strerror(errno));
      ret = false;
   }
   return ret;
FCLOSE_ERROR:
   /*
    * CR 6588743 - raising a shepherd_error here would set the queue in
    *              error state and rerun the job
    */
   shepherd_trace(MSG_FILE_NOCLOSE_SS, pid_file_name, strerror(errno));
   return false;
}
コード例 #2
0
ファイル: sge_pset.c プロジェクト: BlueBolt/BB_GridEngine
void sge_pset_create_processor_set(void) 
{
#if defined(__sgi) || defined(ALPHA) || defined(SOLARIS64) || defined(SOLARISAMD64)
   char err_str[2*SGE_PATH_MAX+128];

   /* SGI IRIX processor set stuff */
   if (strcasecmp("UNDEFINED",get_conf_val("processors"))) {
      int ret;

      sge_switch2start_user();
      if ((ret=set_processor_range(get_conf_val("processors"),
                 (int) strtol(get_conf_val("job_id"), NULL, 10),
                 err_str)) != PROC_SET_OK) {
         sge_switch2admin_user();
         if (ret == PROC_SET_WARNING) /* not critical - e.g. not root */
            shepherd_trace("warning: processor set not set in set_processor_range");
         else { /* critical --> use err_str to indicate error */
            shepherd_trace("critical error in set_processor_range - bailing out");
            shepherd_state = SSTATE_PROCSET_NOTSET;
            shepherd_error(1, err_str);
         }
      } else {
         sge_switch2admin_user();
      }
   }
#endif

}
コード例 #3
0
/****** shepherd/qrsh/write_exit_code_to_qrsh() *******************************
*  NAME
*     write_exit_code_to_qrsh -- write an exit code to qrsh
*
*  SYNOPSIS
*     void write_exit_code_to_qrsh(int exit_code)
*
*  FUNCTION
*     If the program handled by this shepherd uses rsh mechanism
*     (configuration value "rsh_daemon" is set), then the function
*     writes an exit code to the corresponding qrsh process via a
*     socket connection.
*
*     The exit code is either taken from parameter <exit_code>, if it is
*     notequal 0, to signal an error condition in the shepherd,
*     or read from a special file ($TMPDIR/qrsh_exit_code).
*
*  INPUTS
*     exit_code - status of the calling process
*
*  SEE ALSO
*     shepherd/qrsh/write_to_qrsh()
******************************************************************************/
void write_exit_code_to_qrsh(int exit_code)
{
    char buffer[1024];
    *buffer = 0;

    /* rshd exited with OK: try to get returncode from qrsh_starter file */
    shepherd_trace("write_exit_code_to_qrsh(%d)", exit_code);

    /* write exit code as string number to qrsh */
    sprintf(buffer, "%d", exit_code);
    if (write_to_qrsh(buffer) != 0) {
        shepherd_trace("writing exit code to qrsh failed");
    }
}
コード例 #4
0
/****** shepherd/qrsh/get_error_of_qrsh_starter() *************************
*  NAME
*     get_error_of_qrsh_starter -- get error message from qrsh_starter
*
*  SYNOPSIS
*     #include "qlogin_starter.h"
*     const char *
*     get_error_of_qrsh_starter(void);
*
*  FUNCTION
*     Reads an error message that qrsh_starter may have written to the
*     qrsh jobs tmpdir due to an error in the startup phase of the qrsh job.
*
*  RESULT
*     the error message from qrsh_starter or
*     NULL, if no error was generated (the job started up without problems)
*
*  NOTE
*     The returned string is dynamically allocated. It is in the responsibility
*     of the caller to free it.
******************************************************************************/
const char *get_error_of_qrsh_starter(void)
{
    char buffer[SGE_PATH_MAX];
    char *ret = NULL;

    *buffer = 0;

    /* rshd exited with OK: try to get error messages from qrsh_starter file */
    shepherd_trace("get_error_of_qrsh_starter()");

    /* we only have an error file in TMPDIR in case of rsh */
    if (search_conf_val("rsh_daemon") != NULL) {
        char *tmpdir;
        char *taskid;
        FILE *errorfile;

        tmpdir = search_conf_val("qrsh_tmpdir");
        taskid = search_conf_val("qrsh_task_id");
        shepherd_trace("get_error_of_qrsh_starter - TMPDIR = %s, qrsh_task_id = %s",
                       tmpdir ? tmpdir : "0", taskid ? taskid : "0");
        if (tmpdir != NULL) {
            if (taskid != NULL) {
                sprintf(buffer, "%s/qrsh_error.%s", tmpdir, taskid);
            } else {
                sprintf(buffer, "%s/qrsh_error", tmpdir);
            }

            errorfile = fopen(buffer, "r");
            if (errorfile != NULL) {
                char buffer[MAX_STRING_SIZE];

                if (fgets(buffer, MAX_STRING_SIZE, errorfile) != NULL) {
                    shepherd_trace("error string from qrsh_starter is %s", buffer);
                    ret = strdup(buffer);
                }
                FCLOSE(errorfile);
                if (unlink(buffer) != 0) {
                    shepherd_trace("can't delete %s", buffer);
                }
            }
        }
    }
    return ret;
FCLOSE_ERROR:
    shepherd_trace(MSG_FILE_NOCLOSE_SS, buffer, strerror(errno));
    return ret;

}
コード例 #5
0
ファイル: sge_processes_irix.c プロジェクト: HPCKP/gridengine
int kill_ash(ash_t ash, int sig, int until_vanished)
{
   struct psJob_s *jp;
   struct psProc_s *pp;
   char err_str[4096];
   char *cmd;

   if ((cmd=getenv("SGE_IRIX_KILL_COMMAND"))) {

       char buf[2048];
       sprintf(buf, cmd, sig, ash);
       system(buf);
       sprintf(err_str, "kill_ash: %s", buf);
       shepherd_trace(err_str);

   } else {

      /* use PDC to get process info */

      psStartCollector();
      psWatchJob(ash);

      do {
         if ((jp=psGetOneJob(ash))) {
            if (!jp->jd_proccount)
               until_vanished = 0;
            else {
               int j;
               pp = (struct psProc_s *)((char *)jp + jp->jd_length);
               for(j=0; j<jp->jd_proccount; j++) {
                  if (kill(pp->pd_pid, sig)<0)
                     sprintf(err_str, MSG_PROC_KILL_IIS, (int) pp->pd_pid, sig, strerror(errno));
                  else
                     sprintf(err_str, MSG_PROC_KILLISSUED_II , (int) pp->pd_pid, sig);
                  shepherd_trace(err_str);
                  INCPROCPTR(pp, pp->pd_length);
               }
            }
         }
         sleep(1);
      } while (until_vanished);

      sge_free(&jp);
      psIgnoreJob(ash);
   }

   return 0;
}
コード例 #6
0
ファイル: sge_fileio.c プロジェクト: BlueBolt/BB_GridEngine
bool 
shepherd_read_exit_status_file(int *return_code)
{
   bool ret = true;
   FILE *fp = NULL;
   const char *const filename = "exit_status";

   fp = fopen(filename, "r");
   if (fp != NULL) {
      int arguments = fscanf(fp, "%d\n", return_code);
      /* retrieve first exit status from exit status file */

      if (arguments != 1) {
         shepherd_trace("could not read exit_status file");
         *return_code = ESSTATE_NO_EXITSTATUS;
         ret = false;
      }
   } else {
      shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
      ret = false;
   }
   FCLOSE(fp);
   return ret;
FCLOSE_ERROR:
   shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
   return false;
}
コード例 #7
0
ファイル: shepherd_binding.c プロジェクト: HPCKP/gridengine
/****** shepherd_binding/create_binding_env_linux() ****************************
*  NAME
*     create_binding_env_linux() -- Creates SGE_BINDING env variable. 
*
*  SYNOPSIS
*     bool create_binding_env_linux(const int* proc_id, const int amount) 
*
*  FUNCTION
*     Creates the SGE_BINDING environment variable on Linux operating system. 
*     This environment variable contains a space separated list of Linux 
*     internal processor ids given as input parameter.
*
*  INPUTS
*     const int* proc_id - List of processor ids. 
*     const int amount   - Length of processor id list. 
*
*  RESULT
*     bool - true when SGE_BINDING env var could be generated false if not
*
*  NOTES
*     MT-NOTE: create_binding_env_linux() is MT safe 
*
*******************************************************************************/
bool create_binding_env_linux(const int* proc_id, const int amount)
{
   bool retval          = true;
   dstring sge_binding  = DSTRING_INIT;
   dstring proc         = DSTRING_INIT;
   int i;

   for (i = 0; i < amount; i++) {
      sge_dstring_clear(&proc);
      /* DG TODO env ends with whitespace char */
      sge_dstring_sprintf(&proc, "%d ", proc_id[i]);
      sge_dstring_append_dstring(&sge_binding, &proc);
   }

   if (sge_setenv("SGE_BINDING", sge_dstring_get_string(&sge_binding)) != 1) {
      /* settting env var was not successful */
      retval = false;
      shepherd_trace("create_binding_env_linux: Couldn't set environment variable!");
   }

   sge_dstring_free(&sge_binding);
   sge_dstring_free(&proc);

   return retval;
}
コード例 #8
0
ファイル: sge_fileio.c プロジェクト: BlueBolt/BB_GridEngine
bool 
shepherd_read_processor_set_number_file(int *proc_set)
{
   bool ret = true;
   FILE *fp = NULL;
   const char *const filename = "processor_set_number";

   fp = fopen(filename, "r");
   if (fp != NULL) {
      int arguments = fscanf(fp, "%d", proc_set);

      if (arguments != 1) {
         shepherd_trace("could not read processor_set_number file");
         *proc_set = 0;
         ret = false;
      } 
   } else {
      shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
      ret = false;
   }
   FCLOSE(fp);
   return ret;
FCLOSE_ERROR:
   shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
   return false;
}
コード例 #9
0
/****** shepherd_binding/create_binding_env() ****************************
*  NAME
*     create_binding_env() -- Creates SGE_BINDING env variable.
*
*  SYNOPSIS
*     bool create_binding_env(hwloc_const_bitmap_t set)
*
*  FUNCTION
*     Creates the SGE_BINDING environment variable.
*     This environment variable contains a space-separated list of
*     internal processor ids given as input parameter.
*
*  INPUTS
*     hwloc_const_bitmap_t set - CPU set to use
*
*  RESULT
*     bool - true when SGE_BINDING env var could be generated false if not
*
*  NOTES
*     MT-NOTE: create_binding_env() is MT safe
*
*******************************************************************************/
static bool
create_binding_env(hwloc_const_bitmap_t set)
{
   bool retval          = true;
   dstring sge_binding  = DSTRING_INIT;
   dstring proc         = DSTRING_INIT;
   unsigned i;
   bool first           = true;

   hwloc_bitmap_foreach_begin(i, set)
      if (first) {
        first = false;
        sge_dstring_sprintf(&proc, "%d", i);
      } else {
        sge_dstring_sprintf(&proc, " %d", i);
      }
      sge_dstring_append_dstring(&sge_binding, &proc);
   hwloc_bitmap_foreach_end();

   if (sge_setenv("SGE_BINDING", sge_dstring_get_string(&sge_binding)) != 1) {
      /* settting env var was not successful */
      retval = false;
      shepherd_trace("create_binding_env: Couldn't set environment variable!");
   }

   sge_dstring_free(&sge_binding);
   sge_dstring_free(&proc);

   return retval;
}
コード例 #10
0
void sge_pset_free_processor_set(void)
{
#if defined(__sgi) || defined(ALPHA) || defined(SOLARIS64) || defined(SOLARISAMD64)
   /* SGI IRIX processor set stuff */
   if (strcasecmp("UNDEFINED",get_conf_val("processors"))) {
      char err_str[2*SGE_PATH_MAX+128];
      int ret;

      errno = 0;
      if (sge_switch2start_user()) {
         shepherd_trace("failed to switch user in free_processor_set: %s",
                        strerror(errno));
         shepherd_state = SSTATE_PROCSET_NOTFREED;
         shepherd_error(1, strerror(errno));
         return;
      }
      if ((ret=free_processor_set(err_str)) != PROC_SET_OK) {
         sge_switch2admin_user();
         switch (ret) {
         case PROC_SET_WARNING: /* not critical - e.g. not root */
            shepherd_trace("warning: processor set not freed in free_processor_set - "
                           "did no exist, probably");
            break;
         case PROC_SET_ERROR: /* critical - err_str indicates error */
            shepherd_trace("critical error in free_processor_set - bailing out");
            shepherd_state = SSTATE_PROCSET_NOTFREED;
            shepherd_error(1, err_str);
            break;
         case PROC_SET_BUSY: /* still processes running in processor set */
            shepherd_trace("error in releasing processor set");
            shepherd_state = SSTATE_PROCSET_NOTFREED;
            shepherd_error(1, err_str);
            break;
         default: /* should not occur */
            sprintf(err_str,
               "internal error after free_processor_set - ret=%d", ret);
            shepherd_state = SSTATE_PROCSET_NOTFREED;
            shepherd_error(1, err_str);
            break;
         }
      } else {
         sge_switch2admin_user();
      }
   }
#endif
}
コード例 #11
0
ファイル: sge_fileio.c プロジェクト: BlueBolt/BB_GridEngine
bool 
shepherd_read_osjobid_file(
#if (IRIX)
   ash_t *return_code,
#elif defined(NECSX4) || defined(NECSX5)
   id_t *return_code,
#elif defined(CRAY)
   int *return_code,
#endif
   bool is_error
)
{
   bool ret = true;
   FILE *fp = NULL;
   const char *const filename = "osjobid";

   fp = fopen(filename, "r");
   if (fp != NULL) {
      int arguments = 0;

#if defined(IRIX)
      arguments = fscanf(fp, "%lld\n", return_code);
#else
      arguments = fscanf(fp, "%d\n", return_code);
#endif

      if (arguments != 1) {
         shepherd_trace("could not read osjobid file");
         *return_code = 0;
         ret = false;
      }
      FCLOSE(fp);
   } else {
      if (is_error == true) {
         shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
      } else {
         shepherd_trace(MSG_FILE_NOOPEN_SS, filename, strerror(errno));
      }
      ret = false;
   }
   return ret;
FCLOSE_ERROR:
   shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
   return false;
}
コード例 #12
0
/****** qrsh_starter/delete_qrsh_pid_file() *****************************************
*  NAME
*     delete_qrsh_pid_file() -- delete the pid file from $TMPDIR
*
*  SYNOPSIS
*     static int delete_qrsh_pid_file()
*
*  FUNCTION
*     Delete the pid file created by qrsh_starter
*
*  RESULT
*     1, if the file could be deleted
*     0, if and error occured. Possible error situations are:
*           - the environment variable TMPDIR cannot be read
*           - the file cannot be deleted
*
*  SEE ALSO
*  qrsh_starter
*******************************************************************************/
int delete_qrsh_pid_file()
{
    char *pid_file_name = NULL;
    int ret = 1;

    if((pid_file_name = search_conf_val("qrsh_pid_file")) == NULL) {
        shepherd_trace("cannot get variable %s", pid_file_name);
        return 0;
    }

    if (unlink(pid_file_name) != 0) {
        shepherd_trace("cannot delete qrsh pid file %s", pid_file_name);
        ret = 0;
    }

    return ret;

}
コード例 #13
0
ファイル: signal_queue.c プロジェクト: BlueBolt/BB_GridEngine
void report_signal_queue()
{
   char str[256];
   int n, i;
 
   if (n_sigs==0) {
      shepherd_trace("no signals in queue");
      return;
   }

   i=next_sig;

   for (n=n_sigs; n; n--) {
      sprintf(str, "%d. %d", i, sig_queue[i]); 
      shepherd_trace(str);
      i = NEXT_INDEX(i);
   }

   return;
}
コード例 #14
0
ファイル: sge_pset.c プロジェクト: BlueBolt/BB_GridEngine
/****** shepherd/pset/free_processor_set() ************************************
*  NAME
*     free_processor_set() -- Release the previously occupied proc set. 
*
*  SYNOPSIS
*     int free_processor_set(char *err_str) 
*
*  FUNCTION
*     Release the previously occupied processor set. The unique 
*     processor set number is read from the file "processor_set_number"
*     which has to be located in the current working directory.
*
*  INPUTS
*     char *err_str - The error message string to be used by the calling
*                     routine if return value != PROC_SET_OK. Also used
*                     for trace messages internally 
*
*  RESULT
*     int - Error state
*        PROC_SET_OK      - Ok
*        PROC_SET_BUSY    - The processor set is still in use, i.e.
*                           processes originating from the job have not 
*                           finished.
*        PROC_SET_ERROR   - A critical error occurred. During execution
*                           of sysmp() calls.
*        PROC_SET_WARNING - A non-critical error occurred (e.g. the
*                           procedure is executed as unpriviliged user)
******************************************************************************/
static int free_processor_set(char *err_str) 
{
   FILE *fp;
   int proc_set_num;

   /* read unique processor set number from file */
   if ((fp = fopen("processor_set_number","r"))) {
      fscanf(fp, "%d", &proc_set_num);
      FCLOSE_IGNORE_ERROR(fp);
   } else {
      shepherd_trace("MPPS_CREATE: failed reading from file processor_set_number");
      return PROC_SET_ERROR;
   }

#if defined(ALPHA)
   if (proc_set_num) {
      int ret;
      pid_t pid_list[1];
      pid_list[0] = getpid();

      /* assign shepherd back to default processor set */
      if ((ret=assign_pid_to_pset(pid_list, 1, 0, 0))<0) {
         print_pset_error(ret); /* prints error to stdout */
         shepherd_trace("MPPS_CREATE: failed assigning processors to processor set");
         return PROC_SET_ERROR;
      }

      if ((ret = destroy_pset(proc_set_num, 1))==PROCESSOR_SET_ACTIVE) {
         print_pset_error(ret);
         shepherd_trace("MPPS_CREATE: failed assigning processors to processor set");
         return PROC_SET_ERROR;
      }
   }
#elif defined(SOLARIS64) || defined(SOLARISAMD64)
   /*
    * We do not release a processor set here
    * The system administrator is responsible to do this
    */
#endif
   return PROC_SET_OK;
}
コード例 #15
0
ファイル: sge_fileio.c プロジェクト: BlueBolt/BB_GridEngine
bool
shepherd_read_qrsh_pid_file(const char *filename, pid_t *qrsh_pid,
                            int *replace_qrsh_pid)
{
   bool ret = true;
   FILE *fp = NULL;

   fp = fopen(filename, "r");
   if (fp != NULL) {
      int arguments = fscanf(fp, pid_t_fmt, qrsh_pid);

      if (arguments == 1) {
         char buffer[50];

         /* set pid from qrsh_starter as job_pid */
         sprintf(buffer, pid_t_fmt, *qrsh_pid);
         /* TODO: should better be add_or_replace */
         add_config_entry("job_pid", buffer);
         *replace_qrsh_pid = 0;
      } else {
         shepherd_trace("could not read qrsh_pid file");
         ret = false;
      }
      FCLOSE(fp);
   } else {
      /*
       * CR 6588743 - raising a shepherd_error here would set the queue in
       *              error state and rerun the job
       */
      shepherd_trace(MSG_FILE_NOOPEN_SS, filename, strerror(errno));
      ret = false;
   }
   return ret;
FCLOSE_ERROR:
   /*
    * CR 6588743 - raising a shepherd_error here would set the queue in
    *              error state and rerun the job
    */
   shepherd_trace(MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
   return false;
}
コード例 #16
0
ファイル: signal_queue.c プロジェクト: BlueBolt/BB_GridEngine
/****** shepherd/signal/queue/add_signal() ************************************
*  NAME
*     add_signal() -- store signal in queue 
*
*  SYNOPSIS
*     int add_signal(int signal) 
*
*  FUNCTION
*     Store an additional signal in queue. 
*
*  INPUTS
*     int signal - sginal number 
*
*  RESULT
*     int - error state
*        0 - successfull
*       -1 - buffer is full 
*******************************************************************************/
int add_signal(int signal)
{
   int ret = -1;

   if (n_sigs != SGE_MAXSIG) {
      char err_str[256];
      ret = 0;

      n_sigs++;
      sig_queue[free_sig] = signal;
      free_sig = NEXT_INDEX(free_sig);

      sprintf(err_str, "queued signal %s", sge_sys_sig2str(signal));
      shepherd_trace(err_str);
   } 
   return ret;
}  
コード例 #17
0
ファイル: err_trace.c プロジェクト: BlueBolt/BB_GridEngine
/****** shepherd_write_exit_status ********************************************
*  NAME
*     shepherd_write_exit_status() -- Write exit status to exit_status file.
*
*  SYNOPSIS
*     void shepherd_write_exit_status(const char *exit_status)
*
*  FUNCTION
*     Writes the exit status to the exit_status file.
*
*  INPUTS
*     exit_status: The exit status of the shepherd.
*
*  RESULT
*     void - none
*******************************************************************************/
void shepherd_write_exit_status(const char *exit_status)
{
	struct stat statbuf;
#if 1
	int old_euid = SGE_SUPERUSER_UID;
#endif

	if (exit_status != NULL) {

#if 1 /* on filesystems where root is mapped to nobody this will not work */
		/* set euid=0. Local files: root can write to every file.
		 * NFS files: everyone is allowed to write to exit_status file.
		 */
      if (getuid() == SGE_SUPERUSER_UID) {
         old_euid = geteuid();
         seteuid(SGE_SUPERUSER_UID);
      }
#endif
		/* File was closed (e.g. by an exec()) but fp was not set to NULL */
		if (shepherd_exit_status_fp 
	    	 && fstat(fileno(shepherd_exit_status_fp), &statbuf) == -1
	    	 && errno == EBADF) {
			shepherd_exit_status_fp = NULL;
		}
		if (!shepherd_exit_status_fp) {
			shepherd_exit_status_fp = shepherd_trace_init_intern(st_exit_status);
		}
		if (shepherd_exit_status_fp) {
   		sh_str2file(exit_status, NULL, shepherd_exit_status_fp);
		} else {
         shepherd_trace("could not write exit_status file\n");
      }
#if 1
		if (old_euid != SGE_SUPERUSER_UID) {
			seteuid(old_euid);
		}
#endif
      /* There are cases where we have to open and close the files 
       * for every write.
       */
      if (!g_keep_files_open) {
         shepherd_error_exit();
      }
	}
}
コード例 #18
0
/****** shepherd_binding/bind_process_to_mask() *************************************
*  NAME
*     bind_process_to_mask() -- Binds current process to a given cpuset (mask).
*
*  SYNOPSIS
*     static bool bind_process_to_mask(const hwloc_bitmap_t cpuset)
*
*  FUNCTION
*     Binds current process to a given cpuset. 
*
*  INPUTS
*     const hwloc_bitmap_t cpuset - Processors to bind processes to
*
*  RESULT
*     static bool - true if successful, false otherwise
*
*  NOTES
*     MT-NOTE: bind_process_to_mask() is not MT safe 
*
*******************************************************************************/
static bool bind_process_to_mask(const hwloc_bitmap_t cpuset)
{
   /* we only need core binding capabilites, no topology is required */
   if (!has_core_binding()) return false;
   /* Try strict binding first; fall back to non-strict if it isn't
      available.  */
   if (!hwloc_set_cpubind(sge_hwloc_topology, cpuset, HWLOC_CPUBIND_STRICT) ||
       !hwloc_set_cpubind(sge_hwloc_topology, cpuset, 0)) {
      /* Set the environment variable as for the env type.  Done for
         for conveniece, e.g. with runtimes like GCC's libgomp which
         require an environment variable to be set for thread affinity
         rather than using the core binding in effect.  */
     /* This does not show up in "environment" file!  */
      if (create_binding_env(cpuset) == true)
         shepherd_trace("bind_process_to_mask: SGE_BINDING env var created");
      return true;
      }
   return false;
}
コード例 #19
0
/****** shepherd/qrsh/get_exit_code_of_qrsh_starter() *************************
*  NAME
*     get_exit_code_of_qrsh_starter -- short description
*
*  SYNOPSIS
*     #include "qlogin_starter.h"
*     int get_exit_code_of_qrsh_starter(int* exit_code);
*
*  FUNCTION
*     Reads the exit code from a process started via qrsh - qrsh_starter
*     from a file in the jobs TMPDIR.
*
*  INPUTS
*     exit_code - exit code of qrsh_starter
*
*  RESULT
*     0, success
*     1, if an error occured while trying to get the exit code
******************************************************************************/
int get_exit_code_of_qrsh_starter(int* exit_code)
{
    char buffer[1024];
    int ret = 1;

    *exit_code = 1;
    *buffer = 0;

    /* rshd exited with OK: try to get returncode from qrsh_starter file */

    /* we only have an error file in TMPDIR in case of rsh,
     * otherwise pass exit_code */
    if (search_conf_val("rsh_daemon") != NULL) {
        char *tmpdir;
        char *taskid;
        FILE *errorfile;

        tmpdir = search_conf_val("qrsh_tmpdir");
        taskid = search_conf_val("pe_task_id");
        shepherd_trace("get_exit_code_of_qrsh_starter - TMPDIR = %s, pe_task_id = %s",
                       tmpdir ? tmpdir : "0", taskid ? taskid : "0");
        if (tmpdir != NULL) {
            if (taskid != NULL) {
                sprintf(buffer, "%s/qrsh_exit_code.%s", tmpdir, taskid);
            } else {
                sprintf(buffer, "%s/qrsh_exit_code", tmpdir);
            }

            errorfile = fopen(buffer, "r");
            if (errorfile != NULL) {
                ret = 0;
                if (fscanf(errorfile, "%d", exit_code) == 1) {
                    shepherd_trace("error code from remote command is %d", *exit_code);
                }
                FCLOSE(errorfile);
                if (unlink(buffer) != 0) {
                    shepherd_trace("can't delete %s", buffer);
                }
            } else {
                shepherd_trace("can't open file %s: %s", buffer, strerror(errno));
            }
        } else {
            shepherd_trace("unable to get qrsh_tmpdir");
        }
    }
    return ret;
FCLOSE_ERROR:
    shepherd_trace(MSG_FILE_NOCLOSE_SS, buffer, strerror(errno));
    return ret;
}
コード例 #20
0
ファイル: err_trace.c プロジェクト: BlueBolt/BB_GridEngine
/****** shepherd_error ********************************************************
*  NAME
*     shepherd_error() -- Write a line to the error file and exit program.
*
*  SYNOPSIS
*     void shepherd_error(bool do_exit, const char *format, ...)
*
*  FUNCTION
*     Writes a line to the error file, preceding it with a
*     date, time, uid and pid stamp, and exits the program. stops execution.
*
*  INPUTS
*     do_exit: If true, this function calls exit(2).
*     format: The format string of the line to be written to the error file.
*     ...: The parameters to the format string. See printf(3c).
*
*  RESULT
*     void - none
*******************************************************************************/
void shepherd_error(int do_exit, const char *format, ...)
{
   dstring     ds;
   dstring     message = DSTRING_INIT;
   char        buffer[128];
   char        header_str[256];
   struct stat statbuf;

   if (format != NULL)
   {
      va_list     ap;

      va_start(ap, format);
      sge_dstring_vsprintf(&message, format, ap);
      va_end(ap);
   }

   shepherd_trace(sge_dstring_get_string(&message));

   /* File was closed (e.g. by an exec()) but fp was not set to NULL */
   if (shepherd_error_fp && fstat(fileno(shepherd_error_fp), &statbuf) == -1 && errno==EBADF)
   {
      shepherd_error_fp = NULL;
   }

   if (shepherd_error_fp == NULL)
   {
      shepherd_error_fp = shepherd_trace_init_intern(st_error);
   }

   if (shepherd_error_fp != NULL)
   {
      sge_dstring_init(&ds, buffer, sizeof(buffer));
      sprintf(header_str, "%s ["uid_t_fmt":"pid_t_fmt"]: ",
              sge_ctime(0, &ds), geteuid(), getpid());

      sh_str2file(header_str, sge_dstring_get_string(&message), shepherd_error_fp);
   }

   if (foreground)
   {
      fprintf(stderr, "%s%s\n", header_str, sge_dstring_get_string(&message));
   }

   /* File was closed (e.g. by an exec()) but fp was not set to NULL */
   if (shepherd_exit_status_fp && fstat(fileno(shepherd_exit_status_fp), &statbuf) == -1 && errno==EBADF )
   {
      shepherd_exit_status_fp = NULL;
   }

   if (shepherd_exit_status_fp == NULL)
   {
      shepherd_exit_status_fp = shepherd_trace_init_intern(st_exit_status);
   }

   if (shepherd_exit_status_fp != NULL)
   {
      sprintf(header_str, "%d", shepherd_state);
      sh_str2file(header_str, NULL, shepherd_exit_status_fp);
   }
	
   if (coshepherd_pid > 0)
   {
      sge_switch2start_user();
      kill(coshepherd_pid, SIGTERM);
      sge_switch2admin_user();
   }   
     
   if (g_new_interactive_job_support == false && 
      search_conf_val("qrsh_control_port") != NULL)
   {
      char buffer[1024];
      snprintf(buffer, sizeof(buffer), "1:%s", sge_dstring_get_string(&message));
      write_to_qrsh(buffer);  
   }
   sge_dstring_free(&message);

   if (do_exit)
   {
      /* close all trace files before exit */
      shepherd_trace_exit();
      exit(shepherd_state);
   }

   /* There are cases where we have to open and close the files 
    * for every write.
    */
   if (!g_keep_files_open)
   {
      shepherd_error_exit();
   }
}
コード例 #21
0
ファイル: shepherd_binding.c プロジェクト: HPCKP/gridengine
/****** shepherd_binding/do_core_binding() *************************************
*  NAME
*     do_core_binding() -- Performs the core binding task for the Linux OS. 
*
*  SYNOPSIS
*     int do_core_binding(void) 
*
*  FUNCTION
*     Performs core binding on shepherd side. All information required for  
*     the binding is communicated from execd to shepherd in the config 
*     file value "binding". If there is "NULL" no core binding is done. 
* 
*     This function is Linux specific.
*
*     If there is any instruction the bookkeeping for these cores is already 
*     done. In case of Solaris the processor set is already created by 
*     execution daemon. Hence shepherd has just to add itself to it.
*     In case of Linux the whole binding is done by shepherd. In each case 
*     the binding is inherited from shepherd to the job it starts.
*
*     DG TODO change return value to bool
* 
*  RESULT
*     int - Returns 0 in case of success and a negative value in case of problems. 
*
*  NOTES
*     MT-NOTE: do_core_binding() is not MT safe 
*
*******************************************************************************/
int do_core_binding(void) 
{
   /* Check if "binding" parameter in 'config' file 
    * is available and not set to "binding=no_job_binding".
    * If so, we do an early abortion. 
    */
   char *binding = get_conf_val("binding");
   binding_type_t type;

   if (binding == NULL || strcasecmp("NULL", binding) == 0) {
      shepherd_trace("do_core_binding: \"binding\" parameter not found in config file");
      return -1;
   }
   
   if (strcasecmp("no_job_binding", binding) == 0) {
      shepherd_trace("do_core_binding: skip binding - no core binding configured");
      return -1;
   }
   
   /* get the binding type (set = 0 | env = 1 | pe = 2) where default is 0 */
   type = binding_parse_type(binding); 

   /* do a binding accorting the strategy */
   if (strstr(binding, "linear") != NULL) {
      /* do a linear binding */ 
      int amount;
      int socket;
      int core;

      shepherd_trace("do_core_binding: do linear");
   
      /* get the amount of cores to bind on */
      if ((amount = binding_linear_parse_amount(binding)) < 0) {
         shepherd_trace("do_core_binding: couldn't parse the amount of cores from config file");
         return -1;
      } 

      /* get the socket to begin binding with (choosen by execution daemon) */
      if ((socket = binding_linear_parse_socket_offset(binding)) < 0) {
         shepherd_trace("do_core_binding: couldn't get the socket number from config file");
         return -1;
      }

      /* get the core to begin binding with (choosen by execution daemon)   */
      if ((core = binding_linear_parse_core_offset(binding)) < 0) {
         shepherd_trace("do_core_binding: couldn't get the core number from config file");
         return -1;
      }

      /* perform core binding on current process */
      if (binding_set_linear_linux(socket, core, amount, 1, type) == false) {
         /* core binding was not successful */
         if (type == BINDING_TYPE_SET) {
            shepherd_trace("do_core_binding: linear binding was not successful");
         } else if (type == BINDING_TYPE_ENV) {
            shepherd_trace("do_core_binding: couldn't set SGE_BINDING environment variable");
         } else if (type == BINDING_TYPE_PE) {
            shepherd_trace("do_core_binding: couldn't produce rankfile");
         }
      } else {
         if (type == BINDING_TYPE_SET) {
            shepherd_trace("do_core_binding: job successfully bound");
         } else if (type == BINDING_TYPE_ENV) {
            shepherd_trace("do_core_binding: SGE_BINDING environment variable created");
         } else if (type == BINDING_TYPE_PE) {
            shepherd_trace("do_core_binding: rankefile produced");
         }
      }

   } else if (strstr(binding, "striding") != NULL) {
      int amount = binding_striding_parse_amount(binding);
      int stepsize = binding_striding_parse_step_size(binding);
      
      /* these are the real start parameters */
      int first_socket = 0, first_core = 0;
      
      shepherd_trace("do_core_binding: striding");

      if (amount <= 0) {
         shepherd_trace("do_core_binding: error parsing <amount>");
         return -1;
      }

      if (stepsize < 0) {
         shepherd_trace("do_core_binding: error parsing <stepsize>");
         return -1;
      }
      
      first_socket = binding_striding_parse_first_socket(binding);
      if (first_socket < 0) {
         shepherd_trace("do_core_binding: error parsing <socket>");
         return -1;
      }
      
      first_core   = binding_striding_parse_first_core(binding);
      if (first_core < 0) {
         shepherd_trace("do_core_binding: error parsing <core>");
         return -1;
      }

      /* last core has to be incremented because core 0 is first core to be used */
      if (stepsize == 0) {
         /* stepsize must be >= 1 */
         stepsize = 1;
      }

      shepherd_trace("do_core_binding: striding set binding: first_core: %d first_socket %d amount %d stepsize %d", 
         first_core, first_socket, amount, stepsize);

      /* get the first core and first socket which is available for striding    */

      /* perform core binding on current process                */

      if (binding_set_striding_linux(first_socket, first_core, amount, 0, stepsize, type)) {
         shepherd_trace("do_core_binding: striding: binding done");
      } else {
         shepherd_trace("do_core_binding: striding: binding not done");
      }

   } else if (strstr(binding, "explicit") != NULL) {

      /* list with the sockets (first part of the <socket>,<core> tuples) */
      int* sockets = NULL;
      /* length of sockets list */
      int nr_of_sockets = 0;
      /* list with the cores to be bound on the sockets */
      int* cores = NULL;
      /* length of cores list */
      int nr_of_cores = 0;

      shepherd_trace("do_core_binding: explicit");
      
      /* get <socket>,<core> pairs out of binding string */ 
      if (binding_explicit_extract_sockets_cores(binding, &sockets, &nr_of_sockets,
            &cores, &nr_of_cores) == true) {

         if (nr_of_sockets == 0 && nr_of_cores == 0) {
            /* no cores and no sockets are found */
            shepherd_trace("do_core_binding: explicit: no socket or no core was specified");
         } else if (nr_of_sockets != nr_of_cores) {
            shepherd_trace("do_core_binding: explicit: unequal amount of specified sockets and cores");
         } else {
            /* do core binding according the <socket>,<core> tuples */
            if (binding_explicit(sockets, nr_of_sockets, cores, nr_of_cores, type) == true) {
               shepherd_trace("do_core_binding: explicit: binding done");
            } else {
               shepherd_trace("do_core_binding: explicit: no core binding done");
            }
         }
         
         sge_free(&sockets);
         sge_free(&cores);

      } else {
         sge_free(&sockets);
         sge_free(&cores);    
         shepherd_trace("do_core_binding: explicit: couldn't extract <socket>,<core> pair");
      }

   } else {
   
      if (binding != NULL) {
         shepherd_trace("do_core_binding: WARNING: unknown \"binding\" parameter: %s", 
            binding);
      } else {
         shepherd_trace("do_core_binding: WARNING: binding was null!");
      }   

   }
   
   shepherd_trace("do_core_binding: finishing");

   return 0;
}
コード例 #22
0
ファイル: shepherd_binding.c プロジェクト: HPCKP/gridengine
/****** shepherd_binding/do_core_binding() ******************
*******************
*  NAME
*     do_core_binding() -- Performs the core binding task for the Solaris OS. 
*
*  SYNOPSIS
*     int do_core_binding(void) 
*
*  FUNCTION
*     Performs core binding on shepherd side. All information required for  
*     the binding is communicated from execd to shepherd in the config 
*     file value "binding". If there is "NULL" no core binding is done. 
*
*     This function is Solaris specific.
*
*     DG TODO change return value to bool
*
*  RESULT
*     int - Returns 0 in case of success and a negative value in case of problems.  
*
*  NOTES
*     MT-NOTE: do_core_binding() is not MT safe 
*
*******************************************************************************/
int do_core_binding(void)
{
   int retval = 0; 

   /* just read out what is in "config" file and attach to the given psrset if 
      it is specified */
   char *binding = get_conf_val("binding");
   
   if (binding == NULL) {
      shepherd_trace("do_core_binding: \"binding\" parameter not found in config file");
      retval = -1;
   } else if (strcasecmp("no_job_binding", binding) == 0 || strcasecmp("NULL", binding) == 0) {
      shepherd_trace("do_core_binding: skip binding - no core binding configured");
      retval = -1;
   }

   if (retval == 0 && strstr(binding, "psrset:") != NULL) {
      int processor_set_id = 0;
      shepherd_trace("do_core_binding: psrset found - attaching to it!");

      /* parse the psrset number right after "psrset:" */
      if (sge_strtok(binding, ":") != NULL) {
         /* parse the rest of the line */
         char* pset_id;
         if ((pset_id = sge_strtok(NULL, ":")) != NULL) {
            /* finally get the processor set id */
            processor_set_id = atoi(pset_id);
         } else {
            shepherd_trace("do_core_binding: couldn't find the psrset id after \"psrset:\" in config file (binding)");
            retval = -1;
         }
      } else {
         shepherd_trace("do_core_binding: found string \"psrset:\" but no \":\" - almost impossible");
         retval = -1;
      }

      if (retval == 0) {
         if (processor_set_id == -1) {            
            /* prcoessor_set_id == -1: Check here for a special processor_set_id (negative; 0)
               which does show that no binding is needed since this processor set
               would require (exactly) all of the remaining cores. Creating 
               such a processor set is not possible because one processor must 
               left for the OS. But the job is implicitly bound to the processors 
               since it can not use any onther processor from the other processor 
               sets. */
            shepherd_trace("do_core_binding: psrset not created since all remaining processors would be used");
            shepherd_trace("do_core_binding: binding is done implicitly");
         } else {
            /* start user rights (root) are required for creating processor sets */
            sge_switch2start_user();
            
            if (bind_shepherd_to_pset(processor_set_id) == false) {
               shepherd_trace("do_core_binding: couldn't bind to existing processor set!");
            } else {
               shepherd_trace("do_core_binding: successfully bound to existing processor set!");
            }
   
            /* switch back to admin user */
            sge_switch2admin_user();
         }
      }

   } else {  /* "psrset" is not in config file defined */
      shepherd_trace("do_core_binding: no processor set found in config file! do nothing");
      retval = -1;
   }

   shepherd_trace("do_core_binding: finishing");

   return retval;
}
コード例 #23
0
ファイル: sge_fileio.c プロジェクト: BlueBolt/BB_GridEngine
bool
shepherd_write_usage_file(u_long32 wait_status, int exit_status,
                          int child_signal, u_long32 start_time,
                          u_long32 end_time, struct rusage *rusage)
{
   bool ret = true;
   const char *const filename = "usage";
   FILE *fp = NULL;

   shepherd_trace("writing usage file to \"usage\"");

   fp = fopen(filename, "w");
   if (fp != NULL) {
      /*
       * the wait status is returned by japi_wait()
       * see sge_reportL.h for bitmask and makro definition
       */
      FPRINTF((fp, "wait_status="sge_u32"\n", wait_status));
      FPRINTF((fp, "exit_status=%d\n", exit_status));
      FPRINTF((fp, "signal=%d\n", child_signal));

      FPRINTF((fp, "start_time=%d\n", (int) start_time));
      FPRINTF((fp, "end_time=%d\n", (int) end_time));
      FPRINTF((fp, "ru_wallclock="sge_u32"\n", (u_long32) end_time-start_time));
#if defined(NEC_ACCOUNTING_ENTRIES)
      /* Additional accounting information for NEC SX-4 SX-5 */
#if defined(NECSX4) || defined(NECSX5)
#if defined(NECSX4)
      FPRINTF((fp, "necsx_necsx4="sge_u32"\n", 1));
#elif defined(NECSX5)
      FPRINTF((fp, "necsx_necsx5="sge_u32"\n", 1));
#endif
      FPRINTF((fp, "necsx_base_prty="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_time_slice="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_num_procs="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_kcore_min="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_mean_size="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_maxmem_size="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_chars_trnsfd="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_blocks_rw="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_inst="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_vector_inst="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_vector_elmt="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_vec_exe="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_flops="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_conc_flops="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_fpec="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_cmcc="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_bccc="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_mt_open="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_io_blocks="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_multi_single="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_max_nproc="sge_u32"\n", 0));
#endif
#endif

      FPRINTF((fp, "ru_utime=%f\n", (double)rusage->ru_utime.tv_sec + (double)rusage->ru_utime.tv_usec / 1000000.0));
      FPRINTF((fp, "ru_stime=%f\n", (double)rusage->ru_stime.tv_sec + (double)rusage->ru_stime.tv_usec / 1000000.0));
      FPRINTF((fp, "ru_maxrss=%ld\n", rusage->ru_maxrss));
      FPRINTF((fp, "ru_ixrss=%ld\n", rusage->ru_ixrss));
#if defined(ultrix)
      FPRINTF((fp, "ru_ismrss=%ld\n", rusage->ru_ismrss));
#endif
      FPRINTF((fp, "ru_idrss=%ld\n", rusage->ru_idrss));
      FPRINTF((fp, "ru_isrss=%ld\n", rusage->ru_isrss));
      FPRINTF((fp, "ru_minflt=%ld\n", rusage->ru_minflt));
      FPRINTF((fp, "ru_majflt=%ld\n", rusage->ru_majflt));
      FPRINTF((fp, "ru_nswap=%ld\n", rusage->ru_nswap));
      FPRINTF((fp, "ru_inblock=%ld\n", rusage->ru_inblock));
      FPRINTF((fp, "ru_oublock=%ld\n", rusage->ru_oublock));
      FPRINTF((fp, "ru_msgsnd=%ld\n", rusage->ru_msgsnd));
      FPRINTF((fp, "ru_msgrcv=%ld\n", rusage->ru_msgrcv));
      FPRINTF((fp, "ru_nsignals=%ld\n", rusage->ru_nsignals));
      FPRINTF((fp, "ru_nvcsw=%ld\n", rusage->ru_nvcsw));
      FPRINTF((fp, "ru_nivcsw=%ld\n", rusage->ru_nivcsw));

      FCLOSE(fp);

   } else {
      shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
      ret = false;
   }
   return ret;
FPRINTF_ERROR:
FCLOSE_ERROR:
   shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
   return false;
}
コード例 #24
0
/****** shepherd/qrsh/write_to_qrsh() *****************************************
*  NAME
*     write_to_qrsh -- short description
*
*  SYNOPSIS
*    int write_to_qrsh(const char *data);
*
*  FUNCTION
*     Writes the contents of <data> to an other (remote) process over
*     a socket connection.
*     Host and port of the communication partner are read from the
*     configuration entry "qrsh_control_port".
*     A socket client connection is opened to the named host and port,
*     and the data is written.
*
*  INPUTS
*     data - null terminated string with data to write
*
*  RESULT
*     0, if function finishes correctly
*     1, if the config entry qrsh_control_port does not exist
*     2, if qrsh_control_port contains illegal data
*     3, if opening the socket failed
*     4, if the hostname cannot be resolved
*     5, if connecting to the socket fails
*     6, if writing the data fails
******************************************************************************/
int write_to_qrsh(const char *data)
{
    char *address = NULL;
    char *host;
    char *c;
    int   port    = 0;
    int   sock    = 0;
    int datalen   = 0;
    struct sockaddr_in server;
    struct hostent *hp;

    shepherd_trace("write_to_qrsh - data = %s", data);

    /* read destination host and port from config */
    address = get_conf_val("qrsh_control_port");

    if (address == NULL) {
        shepherd_trace("config does not contain entry for qrsh_control_port");
        return 1;
    }

    shepherd_trace("write_to_qrsh - address = %s", address);

    c = strchr(address, ':');
    if (c == NULL) {
        shepherd_trace("illegal value for qrsh_control_port: \"%s\". "
                       "Should be host:port", address);
        return 2;
    }

    *c = 0;
    host = address;
    port = atoi(c + 1);

    shepherd_trace("write_to_qrsh - host = %s, port = %d", host, port);

    /* create socket. */
    sock = socket( AF_INET, SOCK_STREAM, 0);
    if (sock == -1) {
        shepherd_trace("error opening stream socket: %s", strerror(errno));
        return 3;
    }

    /* connect socket using name specified by command line. */
    server.sin_family = AF_INET;
    hp = gethostbyname(host);

    /*
    * gethostbyname returns a structure including the network address
    * of the specified host.
    */
    if (hp == (struct hostent *) 0) {
        shepherd_trace("%s: unknown host", host);
        close(sock);
        return 4;
    }

    memcpy((char *) &server.sin_addr, (char *) hp->h_addr, hp->h_length);
    server.sin_port = htons(port);

    if (connect(sock, (struct sockaddr *) &server, sizeof server) == -1) {
        shepherd_trace("error connecting stream socket: %s", strerror(errno));
        close(sock);
        return 5;
    }

    /* write data */
    datalen = strlen(data) + 1;
    if (write(sock, data, datalen) != datalen) {
        shepherd_trace("error writing data to qrsh_control_port");
        close(sock);
        return 6;
    }

    /* close connection */
    close(sock);
    return 0;
}
コード例 #25
0
ファイル: sge_pset.c プロジェクト: BlueBolt/BB_GridEngine
/****** shepherd/pset/set_processor_range() ***********************************
*  NAME
*     set_processor_range() -- sets processor range according to string 
*
*  SYNOPSIS
*     int set_processor_range(char *crange, 
*                             int proc_set_num, 
*                             char *err_str) 
*
*  FUNCTION
*     Sets processor range according to string specification.
*     The unique processor set number will be stored in the file
*     "processor_set_number" located in the current working directory.
*
*     Format:
*        n|[n][-[m]],...  , n,m  being int >= 0.
*        no blanks are allowed in between (this is supposed to be 
*        handled by the queue configuration parsing routine)
*
*  INPUTS
*     char *crange     - String specifier of the range. Will be 
*                        modified via strtok internally.
*     int proc_set_num - The base for a unique processor set number.
*                        This number is already supposed to be unique.
*                        for the job (currently the job_id).
*                        set_processor_range() manipulates it to make
*                        sure that it is a unique processor set number. 
*     char *err_str    - The error message string to be used by the
*                        calling routine retuns value != PROC_SET_OK 
*                        Also used for trace messages internally.
*                        Passed to invoked subroutines.
*
*  RESULT
*     int - error state
*        PROC_SET_OK      - Ok
*        PROC_SET_ERROR   - A critical error occurred; either during 
*                           execution of sysmp() calls or as returned 
*                           from range2proc_vec().
*        PROC_SET_WARNING - A non-critical error occurred (e.g. the 
*                           procedure is executed as unpriveliged user)
******************************************************************************/
static int set_processor_range(char *crange, int proc_set_num, char *err_str) 
{
#if defined(__sgi) || defined(ALPHA) 
   int ret;
#endif
   FILE *fp;
#if defined(__sgi) || defined(ALPHA)
   sbv_t proc_vec;
#endif

#if defined(__sgi) || defined(ALPHA)
   if ((ret=range2proc_vec(crange, &proc_vec, err_str)))
      return ret;
#endif

#if defined(ALPHA)
   /* It is not possible to bind processor #0 to other psets than pset #0
    * So if we get a pset with #0 contained in the range we do nothing. 
    * The process gets not bound to a processor but it is guaranteed
    * that no other job will get processor #0 exclusively. It is upon 
    * the administrator to prevent overlapping of the psets in different
    * queues 
    */
   if (!(proc_vec & 1)) { /* processor #0 not contained */
      if ((proc_set_num = create_pset())<0) {
         print_pset_error(proc_set_num); /* prints error to stdout */
         shepherd_trace("MPPS_CREATE: failed to setup a new processor set");
         return PROC_SET_ERROR;
      }

      if (assign_cpu_to_pset(proc_vec, proc_set_num, 0)<0) {
         print_pset_error(proc_set_num); /* prints error to stdout */
         shepherd_trace("MPPS_CREATE: failed assigning processors to processor set");
         return PROC_SET_ERROR;
      }
   } else {
      /* use default pset (id #0) */
      proc_set_num = 0;
   }
#elif defined(SOLARIS64) || defined(SOLARISAMD64)
   /*
    * We do not create a processor set here
    * The system administrator is responsible to do this
    * We read one id from crange. This is the processor-set id we should use.
    */
   if (crange) {
      char *tok, *next;

      if ((tok=strtok(crange, " \t\n"))) {
         proc_set_num = (int) strtol(tok, &next, 10);
         if (next == tok) {
            sprintf(err_str, "wrong processor set id format: %20.20s", crange);
            shepherd_trace(err_str);
            return PROC_SET_ERROR;
         }
      } 
   }
#endif

   /* dump to file for later use */
   if ((fp = fopen("processor_set_number","w"))) {
      fprintf(fp,"%d\n",proc_set_num);
      FCLOSE(fp);
   } else {
      shepherd_trace("MPPS_CREATE: failed creating file processor_set_number");
      return PROC_SET_ERROR;
   }

#if defined(ALPHA)
   /* Now let's assign ourselves to the previously created processor set */
   if (proc_set_num) {
      pid_t pid_list[1];
      pid_list[0] = getpid();
      if (assign_pid_to_pset(pid_list, 1, proc_set_num, PSET_EXCLUSIVE)<0) {
         print_pset_error(proc_set_num); /* prints error to stdout */
         shepherd_trace("MPPS_CREATE: failed assigning processors to processor set");
         return PROC_SET_ERROR;
      }
   }
#elif defined(SOLARIS64) || defined(SOLARISAMD64)
   if (proc_set_num) {
      int local_ret;

      sprintf(err_str,"pset_bind: try to use processorset %d", proc_set_num);
      shepherd_trace(err_str);
      if (pset_bind(proc_set_num, P_PID, P_MYID, NULL)) {
         switch (errno) {
         case EFAULT:
            shepherd_trace("pset_bind: The location pointed to by opset was not"
               " NULL and not writable by the user");
            local_ret = PROC_SET_ERROR;
            break;
         case EINVAL:
            shepherd_trace("pset_bind: invalid processor set was specified");
            local_ret = PROC_SET_ERROR;
            break;
         case EPERM:
            shepherd_trace("pset_bind: The effective user of the calling "
               "process is not super-user");
            local_ret = PROC_SET_ERROR;
            break;
         default:
            sprintf(err_str,"pset_bind: unexpected error - errno=%d", errno);
            shepherd_trace(err_str);
            local_ret = PROC_SET_ERROR;
            break;
         }
         return local_ret;
      }
   }
#endif

   return PROC_SET_OK;
FCLOSE_ERROR:
   shepherd_trace("MPPS_CREATE: failed creating file processor_set_number");
   return PROC_SET_ERROR;
}
コード例 #26
0
ファイル: shepherd_binding.c プロジェクト: HPCKP/gridengine
/****** shepherd_binding/binding_set_linear_linux() ***************************************
*  NAME
*     binding_set_linear_linux() -- Bind current process linear to chunk of cores. 
*
*  SYNOPSIS
*     bool binding_set_linear(int first_socket, int first_core, int 
*     amount_of_cores, int offset) 
*
*  FUNCTION
*     Binds current process (shepherd) to a set of cores. All processes 
*     started by the current process are inheriting the core binding (Linux).
*     
*     The core binding is done in a linear manner, that means that 
*     the process is bound to 'amount_of_cores' cores using one core 
*     after another starting at socket 'first_socket' (usually 0) and 
*     core = 'first_core' (usually 0) + 'offset'. If the core number 
*     is higher than the number of cores which are provided by socket 
*     'first_socket' then the next socket is taken (the core number 
*      defines how many cores are skiped).
*
*  INPUTS
*     int first_socket    - The first socket (starting at 0) to bind to. 
*     int first_core      - The first core to bind. 
*     int amount_of_cores - The amount of cores to bind to. 
*     int offset          - The user specified core number offset. 
*     binding_type_t type - The type of binding ONLY FOR EXECD ( set | env | pe )
*                           
*  RESULT
*     bool - true if binding for current process was done, false if not
*
*  NOTES
*     MT-NOTE: binding_set_linear() is not MT safe 
*
*******************************************************************************/
static bool binding_set_linear_linux(int first_socket, int first_core, 
               int amount_of_cores, int offset, const binding_type_t type)
{

   /* sets bitmask in a linear manner        */ 
   /* first core is on exclusive host 0      */ 
   /* first core could be set from scheduler */ 
   /* offset is the first core to start with (make sense only with exclusive host) */
   dstring error = DSTRING_INIT;

   if (_has_core_binding(&error) == true) {

      sge_dstring_clear(&error);
      
      /* bitmask for processors to turn on and off */
      plpa_cpu_set_t cpuset;
      /* turn off all processors */
      PLPA_CPU_ZERO(&cpuset);
         
      sge_dstring_free(&error);
         
      if (_has_topology_information()) {
         /* amount of cores set in processor binding mask */ 
         int cores_set;
         /* next socket to use */
         int next_socket = first_socket;
         /* the amount of cores of the next socket */
         int socket_amount_of_cores;
         /* next core to use */
         int next_core = first_core + offset;
         /* all the processor ids selected for the mask */
         int* proc_id = NULL; 
         /* size of proc_id array */
         int proc_id_size = 0;

         /* maximal amount of sockets on this system */
         int max_amount_of_sockets = get_amount_of_plpa_sockets();

         /* strategy: go to the first_socket and the first_core + offset and 
            fill up socket and go to the next one. */ 
               
         /* TODO maybe better to search for using a core exclusively? */
            
         while (get_amount_of_plpa_cores(next_socket) <= next_core) {
            /* TODO which kind of warning when first socket does not offer this? */
            /* move on to next socket - could be that we have to deal only with cores 
               instead of <socket><core> tuples */
            next_core -= get_amount_of_plpa_cores(next_socket); 
            next_socket++;
            if (next_socket >= max_amount_of_sockets) {
               /* we are out of sockets - we do nothing */
               return false;
            }
         }  
         
         add_proc_ids_linux(next_socket, next_core, &proc_id, &proc_id_size);

         /* collect the other processor ids with the strategy */
         for (cores_set = 1; cores_set < amount_of_cores; cores_set++) {
            next_core++;
            /* jump to next socket when it is needed */
            /* maybe the next socket could offer 0 cores (I can' see when, 
               but just to be sure) */
            while ((socket_amount_of_cores = get_amount_of_plpa_cores(next_socket)) 
                        <= next_core) {
               next_socket++;
               next_core = next_core - socket_amount_of_cores;
               if (next_socket >= max_amount_of_sockets) {
                  /* we are out of sockets - we do nothing */
                  sge_free(&proc_id);
                  return false;
               }
            }
            /* get processor ids */
            add_proc_ids_linux(next_socket, next_core, &proc_id, &proc_id_size);
         }
            
         /* set the mask for all processor ids */
         set_processor_binding_mask(&cpuset, proc_id, proc_id_size);
            
         /* check what to do with the processor ids (set, env or pe) */
         if (type == BINDING_TYPE_PE) {
               
            /* is done outside */

         } else if (type == BINDING_TYPE_ENV) {
               
            /* set the environment variable                    */
            /* this does not show up in "environment" file !!! */
            if (create_binding_env_linux(proc_id, proc_id_size) == true) {
               shepherd_trace("binding_set_linear_linux: SGE_BINDING env var created");
            } else {
               shepherd_trace("binding_set_linear_linux: problems while creating SGE_BINDING env");
            }
             
         } else {

             /* bind SET process to mask */ 
            if (bind_process_to_mask((pid_t) 0, cpuset) == false) {
               /* there was an error while binding */ 
               sge_free(&proc_id);
               return false;
            }
         }

         sge_free(&proc_id);

      } else {
            
         /* TODO DG strategy without topology information but with 
            working library? */
         shepherd_trace("binding_set_linear_linux: no information about topology");
         return false;
      }
         

   } else {

      shepherd_trace("binding_set_linear_linux: PLPA binding not supported: %s", 
                        sge_dstring_get_string(&error));

      sge_dstring_free(&error);
   }

   return true;
}
コード例 #27
0
ファイル: shepherd_binding.c プロジェクト: HPCKP/gridengine
/****** shepherd_binding/binding_set_striding_linux() *************************************
*  NAME
*     binding_set_striding_linux() -- Binds current process to cores.  
*
*  SYNOPSIS
*     bool binding_set_striding_linux(int first_socket, int first_core, int 
*     amount_of_cores, int offset, int stepsize) 
*
*  FUNCTION
*     Performs a core binding for the calling process according to the 
*     'striding' strategy. The first core used is specified by first_socket
*     (beginning with 0) and first_core (beginning with 0). If first_core is 
*     greater than available cores on first_socket, the next socket is examined 
*     and first_core is reduced by the skipped cores. If the first_core could 
*     not be found on system (because it was to high) no binding will be done.
*     
*     If the first core was choosen the next one is defined by the step size 'n' 
*     which is incremented to the first core found. If the socket has not the 
*     core (because it was the last core of the socket for example) the next 
*     socket is examined.
*
*     If the system is out of cores and there are still some cores to select 
*     (because of the amount_of_cores parameter) no core binding will be performed.
*    
*  INPUTS
*     int first_socket    - first socket to begin with  
*     int first_core      - first core to start with  
*     int amount_of_cores - total amount of cores to be used 
*     int offset          - core offset for first core (increments first core used) 
*     int stepsize        - step size
*     int type            - type of binding (set or env or pe)
*
*  RESULT
*     bool - Returns true if the binding was performed, otherwise false.
*
*  NOTES
*     MT-NOTE: binding_set_striding() is MT safe 
*
*******************************************************************************/
bool binding_set_striding_linux(int first_socket, int first_core, int amount_of_cores,
                          int offset, int stepsize, const binding_type_t type)
{
   /* n := take every n-th core */ 
   bool bound = false;

   dstring error = DSTRING_INIT;

   if (_has_core_binding(&error) == true) {

      sge_dstring_free(&error);

         /* bitmask for processors to turn on and off */
         plpa_cpu_set_t cpuset;  
         /* turn off all processors */
         PLPA_CPU_ZERO(&cpuset);

         /* when library offers architecture: 
            - get virtual processor ids in the following manner:
              * on socket "first_socket" choose core number "first_core + offset"
              * then add n: if core is not available go to next socket
              * ...
         */
         if (_has_topology_information()) {
            /* amount of cores set in processor binding mask */ 
            int cores_set = 0;
            /* next socket to use */
            int next_socket = first_socket;
            /* next core to use */
            int next_core = first_core + offset;
            /* all the processor ids selected for the mask */
            int* proc_id = NULL; 
            int proc_id_size = 0;
            /* maximal amount of sockets on this system */
            int max_amount_of_sockets = get_amount_of_plpa_sockets();
            
            /* check if we are already out of range */
            if (next_socket >= max_amount_of_sockets) {
               shepherd_trace("binding_set_striding_linux: already out of sockets");
               return false;
            }   

            while (get_amount_of_plpa_cores(next_socket) <= next_core) {
               /* move on to next socket - could be that we have to deal only with cores 
                  instead of <socket><core> tuples */
               next_core -= get_amount_of_plpa_cores(next_socket); 
               next_socket++;
               if (next_socket >= max_amount_of_sockets) {
                  /* we are out of sockets - we do nothing */
                  shepherd_trace("binding_set_striding_linux: first core: out of sockets");
                  return false;
               }
            }  
            
            add_proc_ids_linux(next_socket, next_core, &proc_id, &proc_id_size);
            
            /* turn on processor id in mask */ 
            
            /* collect the rest of the processor ids */ 
            for (cores_set = 1; cores_set < amount_of_cores; cores_set++) {
               /* calculate next_core number */ 
               next_core += stepsize;
               
               /* check if we are already out of range */
               if (next_socket >= max_amount_of_sockets) {
                  shepherd_trace("binding_set_striding_linux: out of sockets");
                  sge_free(&proc_id);
                  return false;
               }   

               while (get_amount_of_plpa_cores(next_socket) <= next_core) {
                  /* move on to next socket - could be that we have to deal only with cores 
                     instead of <socket><core> tuples */
                  next_core -= get_amount_of_plpa_cores(next_socket); 
                  next_socket++;
                  if (next_socket >= max_amount_of_sockets) {
                     /* we are out of sockets - we do nothing */
                     shepherd_trace("binding_set_striding_linux: out of sockets!");
                     sge_free(&proc_id);
                     return false;
                  }
               }    

               /* add processor ids for core */
               add_proc_ids_linux(next_socket, next_core, &proc_id, &proc_id_size);
                
            } /* collecting processor ids */

            /* set the mask for all processor ids */ 
            set_processor_binding_mask(&cpuset, proc_id, proc_id_size);
           
            if (type == BINDING_TYPE_PE) {
            
               /* rankfile is created: do nothing */

            } else if (type == BINDING_TYPE_ENV) {

               /* set the environment variable */
               if (create_binding_env_linux(proc_id, proc_id_size) == true) {
                  shepherd_trace("binding_set_striding_linux: SGE_BINDING env var created");
               } else {
                  shepherd_trace("binding_set_striding_linux: problems while creating SGE_BINDING env");
               }

            } else {
               
               /* bind process to mask */ 
               if (bind_process_to_mask((pid_t) 0, cpuset) == true) {
                  /* there was an error while binding */ 
                  bound = true;
               }
            }
         
            sge_free(&proc_id);
            
         } else {
            /* setting bitmask without topology information which could 
               not be right? */
            shepherd_trace("binding_set_striding_linux: bitmask without topology information");
            return false;
         }

   } else {
      /* has no core binding feature */
      sge_dstring_free(&error);
      
      return false;
   }
   
   
   return bound;
}
コード例 #28
0
ファイル: shepherd_binding.c プロジェクト: HPCKP/gridengine
/****** shepherd_binding/binding_explicit() *****************************************
*  NAME
*     binding_explicit() -- Binds current process to specified CPU cores. 
*
*  SYNOPSIS
*     bool binding_explicit(int* list_of_cores, int camount, int* 
*     list_of_sockets, int samount) 
*
*  FUNCTION
*     Binds the current process to the cores specified by a <socket>,<core>
*     tuple. The tuple is given by a list of sockets and a list of cores. 
*     The elements on the same position of these lists are reflecting 
*     a tuple. Therefore the length of the lists must be the same.
*
*     Binding is currently done on Linux hosts only where the machine topology 
*     can be retrieved with PLPA library. It also does require this library.
*
*  INPUTS
*     int* list_of_sockets - List of sockets in the same order as list of cores. 
*     int samount          - Length of the list of sockets. 
*     int* list_of_cores   - List of cores in the same order as list of sockets. 
*     int camount          - Length of the list of cores. 
*     int type             - Type of binding ( set | env | pe ).
*
*  RESULT
*     bool - true when the current process was bound like specified with the 
*            input parameter
*
*  NOTES
*     MT-NOTE: binding_explicit() is not MT safe 
*
*******************************************************************************/
static bool binding_explicit(const int* list_of_sockets, const int samount, 
   const int* list_of_cores, const int camount, const binding_type_t type)
{
   /* return value: successful bound or not */ 
   bool bound = false;

   /* check if we have exactly the same amount of sockets as cores */
   if (camount != samount) {
      shepherd_trace("binding_explicit: bug: amount of sockets != amount of cores");
      return false;
   }

   if (list_of_sockets == NULL || list_of_cores == NULL) {
      shepherd_trace("binding_explicit: wrong input values");
   }   
   
   /* do only on linux when we have core binding feature in kernel */
   if (has_core_binding() == true) {
      
      if (_has_topology_information()) {
         /* bitmask for processors to turn on and off */
         plpa_cpu_set_t cpuset;  
         /* turn off all processors */
         PLPA_CPU_ZERO(&cpuset);
         /* the internal processor ids selected for the binding mask */
         int* proc_id = NULL;
         int proc_id_size = 0;

         /* processor id counter */
         int pr_id_ctr;

         /* Fetch for each socket,core tuple the processor id. 
            If this is not possible for one do nothing and return false. */ 

         /* go through all socket,core tuples and get the processor id */
         for (pr_id_ctr = 0; pr_id_ctr < camount; pr_id_ctr++) { 

            /* get the processor id */
            /* get the OS internal processor ids */ 
            if (add_proc_ids_linux(list_of_sockets[pr_id_ctr], list_of_cores[pr_id_ctr], 
                                    &proc_id, &proc_id_size) != true) {
               sge_free(&proc_id);
               return false;
            }                       

         }
         /* generate the core binding mask out of the processor id array */
         set_processor_binding_mask(&cpuset, proc_id, proc_id_size); 

         if (type == BINDING_TYPE_PE) {
            
            /* rankfile is created */

         } else if (type == BINDING_TYPE_ENV) {
            /* set the environment variable */
            if (create_binding_env_linux(proc_id, proc_id_size) == true) {
               shepherd_trace("binding_explicit: SGE_BINDING env var created");
            } else {
               shepherd_trace("binding_explicit: problems while creating SGE_BINDING env");
            }
         } else {
            /* do the core binding for the current process with the mask */
            if (bind_process_to_mask((pid_t) 0, cpuset) == true) {
               /* there was an error while binding */ 
               bound = true;
            } else {
               /* couldn't be bound return false */
               shepherd_trace("binding_explicit: bind_process_to_mask was not successful");
            }   
         }

         sge_free(&proc_id);
          
      } else {
         /* has no topology information */
         shepherd_trace("binding_explicit: Linux does not offer topology information");
      }  

   } else {
      /* has no core binding ability */
      shepherd_trace("binding_explicit: host does not support core binding");
   }   

   return bound;
}
コード例 #29
0
/****** shepherd/qrsh/qlogin_starter() ****************************************
*
*  NAME
*     qlogin_starter -- short description
*
*  SYNOPSIS
*     #include "qlogin_starter.h"
*     int qlogin_starter(const char *cwd, char *daemon);
*
*  FUNCTION
*     The function is called from shepherd to start a protocol daemon
*     like telnetd, rshd or rlogind.
*     The mechanism used to call these daemons is that of inetd:
*        - a socket is created (server side, any free port is assigned
*          by the operating system)
*        - qlogin_starter waits for someone to connect to this socket
*        - the socket file handles are redirected to stdin, stdout
*          and stderr
*        - the daemon process is started
*     Additionally to the inetd mechanism, the port number and some
*     other information is sent to the qrsh process that initiated
*     (over qmaster, schedd, execd, shepherd) the qlogin_starter call.
*
*  INPUTS
*     cwd    - the current working directory (the active_jobs directory)
*     daemon - name and path of the daemon to start
*
*  RESULT
*     on success, the function will not return (it exec's)
*      4, if there is a problem with permissions
*      5, if a socket cannot be allocated
*      6, if a socket bind fails
*      7, if socket name (port) cannot be determined
*      8, if environment (to be passed to qrsh) cannot be read
*      9, if sending information to qrsh fails
*     10, if nobody connects to the socket within a one minute
*     11, if the acception of a connecting client fails
*     12, if the execution of the daemon fails
******************************************************************************/
int qlogin_starter(const char *cwd, char *daemon, char** env)
{
    int ret;
    int port;
    int fd;
    int maxfd;
    int sockfd;
    int on = 1;
    int sso = 1;
    int newsfd;
    fd_set fds;
    struct sockaddr_in serv_addr;
    struct timeval timeout;
    char buffer[2048];
    char *args[20]; /* JG: TODO: should be dynamically allocated */
    int argc = 0;
    const char *sge_root = NULL;
    const char *arch = NULL;

#if defined(IRIX65) || defined(INTERIX) || defined(DARWIN6) || defined(ALPHA5) || defined(HP1164)
    int length;
    int len;
#else
    socklen_t length;
    socklen_t len;
#endif

    len = sizeof(serv_addr);

    /* must be root because we must access /dev/something */
    if( setgid(SGE_SUPERUSER_GID) ||
            setuid(SGE_SUPERUSER_UID) ||
            setegid(SGE_SUPERUSER_GID) ||
            seteuid(SGE_SUPERUSER_UID)) {
        shepherd_trace("cannot change uid/gid\n");
        return 4;
    }
    shepherd_trace("uid = "uid_t_fmt", euid = "uid_t_fmt", gid = "gid_t_fmt
                   ", egid = "gid_t_fmt, getuid(), geteuid(), getgid(), getegid());

    /* socket stuff from here */
    sockfd = socket(AF_INET, SOCK_STREAM, 0);

    if (sockfd == -1) {
        shepherd_trace("cannot open socket.");
        return 5;
    }
    shepherd_trace("using sfd %d", sockfd);

    setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, (char *) &on, sizeof(on));

    /* bind an address to any socket */
    memset((char *) &serv_addr, 0, sizeof(serv_addr));
    serv_addr.sin_port = 0;
    serv_addr.sin_family = AF_INET;
    serv_addr.sin_addr.s_addr = INADDR_ANY;
    ret = bind(sockfd, (struct sockaddr *) &serv_addr, sizeof(serv_addr));
    if (ret != 0) {
        shepherd_trace("cannot bind socket: %s", strerror(errno));
        shutdown(sockfd, 2);
        close(sockfd);
        return 6;
    }

    /* find out assigned port number and pass it to caller */
    length = sizeof(serv_addr);
    if (getsockname(sockfd,(struct sockaddr *) &serv_addr, &length) == -1) {
        shepherd_trace("getting socket name failed: %s", strerror(errno));
        shutdown(sockfd, 2);
        close(sockfd);
        return 7;
    }

    /* listen on socked - make connections be accepted */
    if (listen(sockfd, 1) != 0) {
        shepherd_trace("listen failed: %s", strerror(errno));
        shutdown(sockfd, 2);
        close(sockfd);
        return 8;
    }

    /* send necessary info to qrsh: port + utilbin directory + active job
     * directory
     */
    port = ntohs(serv_addr.sin_port);
    shepherd_trace("bound to port %d", port);

    sge_root = sge_get_root_dir(0, NULL, 0, 1);
    arch = sge_get_arch();

    if (sge_root == NULL || arch == NULL) {
        shepherd_trace("reading environment SGE_ROOT and ARC failed");
        shutdown(sockfd, 2);
        close(sockfd);
        return 9;
    }

    snprintf(buffer, 2048, "0:%d:%s/utilbin/%s:%s:%s",
             port, sge_root, arch, cwd, get_conf_val("host"));

    if (write_to_qrsh(buffer) != 0) {
        shepherd_trace("communication with qrsh failed");
        shutdown(sockfd, 2);
        close(sockfd);
        return 10;
    }

    /* wait for connection */
    shepherd_trace("waiting for connection.");
    /* use a reasonable timeout (60 seconds) to prevent hanging here forever */
    FD_ZERO(&fds);
    FD_SET(sockfd, &fds);
    timeout.tv_sec = 60;
    timeout.tv_usec = 0;
    if (select(sockfd+1, &fds, NULL, NULL, &timeout) < 1) {
        shepherd_trace("nobody connected to the socket");
        shutdown(sockfd, 2);
        close(sockfd);
        return 11;
    }

    /* accept connection */
    newsfd = accept(sockfd, (struct sockaddr *)(&serv_addr), &len);
    if (newsfd == -1) {
        shepherd_trace("error when accepting socket conection");
        shutdown(sockfd, 2);
        close(sockfd);
        return 12;
    }
    shepherd_trace("accepted connection on fd %d", newsfd);

    /* now we have a connection and do no longer need the "well known" port
     * free this resource.
     */
    shutdown(sockfd, 2);
    close(sockfd);

    /* don't close on exec */
    fcntl( newsfd, F_SETFD, 0 );

    /* speed up ;-) */
    setsockopt(newsfd, IPPROTO_TCP, TCP_NODELAY, (const char *) &sso, sizeof(int));

    /* use this fd as stdin,out,err */
    dup2( newsfd, 0 );
    dup2( newsfd, 1 );
    dup2( newsfd, 2 );

    /* close all the rest */
#ifndef WIN32NATIVE
    maxfd = sysconf(_SC_OPEN_MAX);
#else /* WIN32NATIVE */
    maxfd = FD_SETSIZE;
    /* detect maximal number of fds under NT/W2000 (env: Files)*/
#endif /* WIN32NATIVE */

    /* we do not use any FD_SET call it is ok to use _SC_OPEN_MAX */
    for (fd=3; fd<maxfd; fd++) {
        close(fd);
    }

    shepherd_trace("daemon to start: |%s|", daemon);

    /* split daemon commandline into single arguments */
    /* JG: TODO: might contain quoted arguments containing spaces
     *           make function to split or use an already existing one
     */
    args[argc++] = strtok(daemon, " ");
    while ((args[argc++] = strtok(NULL, " ")) != NULL);
#if 0
    {
        int i = 0;
        shepherd_trace("daemon commandline split to %d arguments", argc);
        while (args[i] != NULL) {
            shepherd_trace("daemon argv[%d] = |%s|", i, args[i]);
            i++;
        }
    }
#endif

    /* that it. */
    execve(args[0], args, env);

    /* oh oh, exec failed */
    /* no way to tell anyone, becuase all FDs are closed */
    /* last chance -> tell parent process */
    shutdown(newsfd, 2);
    close(newsfd);
    return 13;
}
コード例 #30
0
ファイル: sge_pset.c プロジェクト: BlueBolt/BB_GridEngine
/****** shepherd/pset/range2proc_vec() ****************************************
*  NAME
*     range2proc_vec() -- Computes bit vector (prcessor set spec.)
*
*  SYNOPSIS
*     static int range2proc_vec(char *crange, sbv_t *proc_vec, char *err_str) 
*
*  FUNCTION
*     Computes bit vector with bits set corresponding to string 
*     specification of processor range. 
*
*  INPUTS
*     char *crange    - String specifier of the range. Will be modified
*                       internally. Format:
*                          n|[n][-[m]],...  , n,m  being int >= 0.
*                          no blanks are allowed in between 
*     sbv_t *proc_vec - a bit vector of type sbv_t with all bits set
*                       contained in the range description and all
*                       other bits zero. 
*     char *err_str   - The error message string to be used by the
*                       calling routine if return value != PROC_SET_OK.
*                       Also used for trace messages internally. 
*
*  RESULT
*     static int - Error state
*        PROC_SET_OK    - Ok
*        PROC_SET_ERROR - Invalid range value in range description.
******************************************************************************/
static int range2proc_vec(char *crange, sbv_t *proc_vec, char *err_str) 
{
   char *tok, *next, *p=crange;
   int min, max, i;
   int dash_used;
   int sbvlen;

   *proc_vec = (sbv_t) 0;

   /* compute max number of processors and thus significant length of
    * proc_vec
    */
   sbvlen = sge_nprocs() - 1; /* LSB corresponds to proc. 0 */

   /* loop trough range string with "," as token delimiter
    * Set processor vector for each token = range definition element.
    */
   while ((tok=strtok(p,","))) {
      if (p) p=NULL;
      
      /* for each token parse range, i.e. find
       * whether min or max value is set and whether a "-" sign
       * was used
       */
      min = -1;
      max = -1;
      dash_used = 0;
      while (*tok) {
         next = NULL;
         if (*tok == '-') {
            dash_used = 1;
            if (min == -1)
               min = 0;
         } else { /* should be a number */
            if (min == -1 && !dash_used ) {
               min = (int) strtol(tok, &next, 10);
               if (next == tok || min < 0 || min > sbvlen) {
                  sprintf(err_str, "range2proc_vec: wrong processor range format: %20.20s", crange);
                  shepherd_trace(err_str);
                  return PROC_SET_ERROR;
               }
            } else if (max == -1 && dash_used ) {
               max = (int) strtol(tok, &next, 10);
               if (next == tok || max < 0 || max > sbvlen) {
                  sprintf(err_str, "range2proc_vec: wrong processor range format: %20.20s", crange);
                  shepherd_trace(err_str);
                  return PROC_SET_ERROR;
               }
            }
         }

         /* proceed either by one char in case of a "-" or by the
          * width of the number field
          */
         if (next)
            tok = next;
         else
            tok++;
      }

      /* fill out full range specification "n-m" according to findings */
      if (!dash_used )
         max = min;
      else {
         if (min == -1) min = 0;
         if (max == -1) max = sbvlen;
      }

      /* set processor vector as defined by range specification */
      for(i=min; i<=max; i++)
         *proc_vec |= (sbv_t) 1<<i;
   }

   return PROC_SET_OK;
}