Example #1
0
bool 
shepherd_read_exit_status_file(int *return_code)
{
   bool ret = true;
   FILE *fp = NULL;
   const char *const filename = "exit_status";

   fp = fopen(filename, "r");
   if (fp != NULL) {
      int arguments = fscanf(fp, "%d\n", return_code);
      /* retrieve first exit status from exit status file */

      if (arguments != 1) {
         shepherd_trace("could not read exit_status file");
         *return_code = ESSTATE_NO_EXITSTATUS;
         ret = false;
      }
   } else {
      shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
      ret = false;
   }
   FCLOSE(fp);
   return ret;
FCLOSE_ERROR:
   shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
   return false;
}
Example #2
0
bool 
shepherd_read_processor_set_number_file(int *proc_set)
{
   bool ret = true;
   FILE *fp = NULL;
   const char *const filename = "processor_set_number";

   fp = fopen(filename, "r");
   if (fp != NULL) {
      int arguments = fscanf(fp, "%d", proc_set);

      if (arguments != 1) {
         shepherd_trace("could not read processor_set_number file");
         *proc_set = 0;
         ret = false;
      } 
   } else {
      shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
      ret = false;
   }
   FCLOSE(fp);
   return ret;
FCLOSE_ERROR:
   shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
   return false;
}
Example #3
0
void sge_pset_create_processor_set(void) 
{
#if defined(__sgi) || defined(ALPHA) || defined(SOLARIS64) || defined(SOLARISAMD64)
   char err_str[2*SGE_PATH_MAX+128];

   /* SGI IRIX processor set stuff */
   if (strcasecmp("UNDEFINED",get_conf_val("processors"))) {
      int ret;

      errno = 0;
      if (sge_switch2start_user()) {
         shepherd_trace("can't switch user in sge_pset_create_processor_set");
         shepherd_state = SSTATE_PROCSET_NOTSET;
         shepherd_error(1, strerror(errno));
         return;
      }
      if ((ret=set_processor_range(get_conf_val("processors"),
                 (int) strtol(get_conf_val("job_id"), NULL, 10),
                 err_str)) != PROC_SET_OK) {
         sge_switch2admin_user();
         if (ret == PROC_SET_WARNING) /* not critical - e.g. not root */
            shepherd_trace("warning: processor set not set in set_processor_range");
         else { /* critical --> use err_str to indicate error */
            shepherd_trace("critical error in set_processor_range - bailing out");
            shepherd_state = SSTATE_PROCSET_NOTSET;
            shepherd_error(1, err_str);
         }
      } else {
         sge_switch2admin_user();
      }
   }
#endif

}
Example #4
0
void sge_pset_free_processor_set(void)
{
#if defined(__sgi) || defined(ALPHA) || defined(SOLARIS64) || defined(SOLARISAMD64)
   /* SGI IRIX processor set stuff */
   if (strcasecmp("UNDEFINED",get_conf_val("processors"))) {
      char err_str[2*SGE_PATH_MAX+128];
      int ret;

      errno = 0;
      if (sge_switch2start_user()) {
         shepherd_trace("failed to switch user in free_processor_set: %s",
                        strerror(errno));
         shepherd_state = SSTATE_PROCSET_NOTFREED;
         shepherd_error(1, strerror(errno));
         return;
      }
      if ((ret=free_processor_set(err_str)) != PROC_SET_OK) {
         sge_switch2admin_user();
         switch (ret) {
         case PROC_SET_WARNING: /* not critical - e.g. not root */
            shepherd_trace("warning: processor set not freed in free_processor_set - "
                           "did no exist, probably");
            break;
         case PROC_SET_ERROR: /* critical - err_str indicates error */
            shepherd_trace("critical error in free_processor_set - bailing out");
            shepherd_state = SSTATE_PROCSET_NOTFREED;
            shepherd_error(1, err_str);
            break;
         case PROC_SET_BUSY: /* still processes running in processor set */
            shepherd_trace("error in releasing processor set");
            shepherd_state = SSTATE_PROCSET_NOTFREED;
            shepherd_error(1, err_str);
            break;
         default: /* should not occur */
            sprintf(err_str,
               "internal error after free_processor_set - ret=%d", ret);
            shepherd_state = SSTATE_PROCSET_NOTFREED;
            shepherd_error(1, err_str);
            break;
         }
      } else {
         sge_switch2admin_user();
      }
   }
#endif
}
Example #5
0
bool 
shepherd_read_osjobid_file(
#if (IRIX)
   ash_t *return_code,
#elif defined(NECSX4) || defined(NECSX5)
   id_t *return_code,
#elif defined(CRAY)
   int *return_code,
#endif
   bool is_error
)
{
   bool ret = true;
   FILE *fp = NULL;
   const char *const filename = "osjobid";

   fp = fopen(filename, "r");
   if (fp != NULL) {
      int arguments = 0;

#if defined(IRIX)
      arguments = fscanf(fp, "%lld\n", return_code);
#else
      arguments = fscanf(fp, "%d\n", return_code);
#endif

      if (arguments != 1) {
         shepherd_trace("could not read osjobid file");
         *return_code = 0;
         ret = false;
      }
      FCLOSE(fp);
   } else {
      if (is_error == true) {
         shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
      } else {
         shepherd_trace(MSG_FILE_NOOPEN_SS, filename, strerror(errno));
      }
      ret = false;
   }
   return ret;
FCLOSE_ERROR:
   shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
   return false;
}
Example #6
0
bool shepherd_write_osjobid_file(const char *osjobid)
{
   bool ret = true;
   const char *const filename = "osjobid";
   FILE *fp = NULL;

   fp = fopen(filename, "w");
   if (fp != NULL) {
      FPRINTF((fp, "%s\n", osjobid));
      FCLOSE(fp);
   } else {
      shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
      ret = false;
   }
   return ret;
FPRINTF_ERROR:
FCLOSE_ERROR:
   shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
   return false;
}
Example #7
0
void 
create_checkpointed_file(int ckpt_is_in_arena)
{
   const char *const filename = "checkpointed";
   FILE *fp = NULL;

   fp = fopen(filename, "w");
   if (fp != NULL) {
      if (ckpt_is_in_arena) {
         FPRINTF((fp, "1\n"));
      }
      FCLOSE(fp);
   } else {
      shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
   }
   return;
FPRINTF_ERROR:
FCLOSE_ERROR:
   shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
   return;
}
Example #8
0
bool 
shepherd_write_processor_set_number_file(int proc_set)
{
   bool ret = true;
   const char *const filename = "processor_set_number";
   FILE *fp = NULL;

   fp = fopen(filename, "w");
   if (fp != NULL) {
      FPRINTF((fp, "%d\n", proc_set));
      FCLOSE(fp);
   } else {
      shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
      ret = false;
   }
   return ret;
FPRINTF_ERROR:
FCLOSE_ERROR:
   shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
   return false;
}
Example #9
0
bool
shepherd_write_sig_info_file(const char *filename, const char *task_id,
                             u_long32 exit_status)
{
   bool ret = true;
   FILE *fp = NULL;

   fp = fopen(filename, "a");
   if (fp != NULL) {
      FPRINTF((fp, "%s "sge_u32"\n", task_id, exit_status));
      FCLOSE(fp);
   } else {
      shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
      ret = false;
   }
   return ret;
FPRINTF_ERROR:
FCLOSE_ERROR:
   shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
   return false;
}
Example #10
0
bool 
shepherd_write_shepherd_about_to_exit_file(void)
{
   bool ret = true;
   const char *const filename = "shepherd_about_to_exit";
   FILE *fp;

   fp = fopen(filename, "w");
   if (fp != NULL)
   {
      FCLOSE(fp);
   }
   else
   {
      shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
      ret = false;
   }
   return ret;
FCLOSE_ERROR:
   shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
   return false;
}
Example #11
0
bool
shepherd_write_usage_file(u_long32 wait_status, int exit_status,
                          int child_signal, u_long32 start_time,
                          u_long32 end_time, struct rusage *rusage)
{
   bool ret = true;
   const char *const filename = "usage";
   FILE *fp = NULL;

   shepherd_trace("writing usage file to \"usage\"");

   fp = fopen(filename, "w");
   if (fp != NULL) {
      /*
       * the wait status is returned by japi_wait()
       * see sge_reportL.h for bitmask and makro definition
       */
      FPRINTF((fp, "wait_status="sge_u32"\n", wait_status));
      FPRINTF((fp, "exit_status=%d\n", exit_status));
      FPRINTF((fp, "signal=%d\n", child_signal));

      FPRINTF((fp, "start_time=%d\n", (int) start_time));
      FPRINTF((fp, "end_time=%d\n", (int) end_time));
      FPRINTF((fp, "ru_wallclock="sge_u32"\n", (u_long32) end_time-start_time));
#if defined(NEC_ACCOUNTING_ENTRIES)
      /* Additional accounting information for NEC SX-4 SX-5 */
#if defined(NECSX4) || defined(NECSX5)
#if defined(NECSX4)
      FPRINTF((fp, "necsx_necsx4="sge_u32"\n", 1));
#elif defined(NECSX5)
      FPRINTF((fp, "necsx_necsx5="sge_u32"\n", 1));
#endif
      FPRINTF((fp, "necsx_base_prty="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_time_slice="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_num_procs="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_kcore_min="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_mean_size="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_maxmem_size="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_chars_trnsfd="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_blocks_rw="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_inst="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_vector_inst="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_vector_elmt="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_vec_exe="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_flops="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_conc_flops="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_fpec="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_cmcc="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_bccc="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_mt_open="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_io_blocks="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_multi_single="sge_u32"\n", 0));
      FPRINTF((fp, "necsx_max_nproc="sge_u32"\n", 0));
#endif
#endif

      FPRINTF((fp, "ru_utime=%f\n", (double)rusage->ru_utime.tv_sec + (double)rusage->ru_utime.tv_usec / 1000000.0));
      FPRINTF((fp, "ru_stime=%f\n", (double)rusage->ru_stime.tv_sec + (double)rusage->ru_stime.tv_usec / 1000000.0));
      FPRINTF((fp, "ru_maxrss=%ld\n", rusage->ru_maxrss));
      FPRINTF((fp, "ru_ixrss=%ld\n", rusage->ru_ixrss));
#if defined(ultrix)
      FPRINTF((fp, "ru_ismrss=%ld\n", rusage->ru_ismrss));
#endif
      FPRINTF((fp, "ru_idrss=%ld\n", rusage->ru_idrss));
      FPRINTF((fp, "ru_isrss=%ld\n", rusage->ru_isrss));
      FPRINTF((fp, "ru_minflt=%ld\n", rusage->ru_minflt));
      FPRINTF((fp, "ru_majflt=%ld\n", rusage->ru_majflt));
      FPRINTF((fp, "ru_nswap=%ld\n", rusage->ru_nswap));
      FPRINTF((fp, "ru_inblock=%ld\n", rusage->ru_inblock));
      FPRINTF((fp, "ru_oublock=%ld\n", rusage->ru_oublock));
      FPRINTF((fp, "ru_msgsnd=%ld\n", rusage->ru_msgsnd));
      FPRINTF((fp, "ru_msgrcv=%ld\n", rusage->ru_msgrcv));
      FPRINTF((fp, "ru_nsignals=%ld\n", rusage->ru_nsignals));
      FPRINTF((fp, "ru_nvcsw=%ld\n", rusage->ru_nvcsw));
      FPRINTF((fp, "ru_nivcsw=%ld\n", rusage->ru_nivcsw));

      FCLOSE(fp);

   } else {
      shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno));
      ret = false;
   }
   return ret;
FPRINTF_ERROR:
FCLOSE_ERROR:
   shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno));
   return false;
}
Example #12
0
void shepherd_error_ptr(const char *text)
{
   shepherd_error(1, text); 
}
Example #13
0
void setosjobid(pid_t sid, gid_t *add_grp_id_ptr, struct passwd *pw)
{
   FILE *fp=NULL;

   shepherd_trace("setosjobid: uid = "pid_t_fmt", euid = "pid_t_fmt, getuid(), geteuid());

#  if defined(SOLARIS) || defined(ALPHA) || defined(LINUX) || defined(FREEBSD) || defined(DARWIN)
      /* Read SgeId from config-File and create Addgrpid-File */
      {  
         char *cp;
         if ((cp = search_conf_val("add_grp_id")))
            *add_grp_id_ptr = atol(cp);
         else
            *add_grp_id_ptr = 0;
      }
      if ((fp = fopen("addgrpid", "w")) == NULL) {
         shepherd_error(1, "can't open \"addgrpid\" file");   
      }
      fprintf(fp, gid_t_fmt"\n", *add_grp_id_ptr);
      FCLOSE(fp);   
# elif defined(HP1164) || defined(AIX)
    {
      if ((fp = fopen("addgrpid", "w")) == NULL) {
         shepherd_error(1, "can't open \"addgrpid\" file");
      }
      fprintf(fp, pid_t_fmt"\n", getpgrp());
      FCLOSE(fp);
    }
#  else
   {
      char osjobid[100];
      if ((fp = fopen("osjobid", "w")) == NULL) {
         shepherd_error(1, "can't open \"osjobid\" file");
      }

      if(sge_switch2start_user() == 0) {
#     if defined(IRIX)
      {
         /* The following block contains the operations necessary for
          * IRIX6.2 (and later) to set array session handles (ASHs) and
          * service provider info (SPI) records
          */
         struct acct_spi spi;
         int ret;
         char *cp;

         shepherd_trace("in irix code");
         /* get _local_ array session id */
         if ((ret=newarraysess())) {
            shepherd_error(1, "error: can't create ASH; errno=%d", ret);
         }

         /* retrieve array session id we just assigned to the process and
          * write it to the os-jobid file
          */
         sprintf(osjobid, "%lld", getash());
         shepherd_trace(osjobid); 
         /* set service provider information (spi) record */
         strncpy(spi.spi_company, "SGE", 8);
         strncpy(spi.spi_initiator, get_conf_val("spi_initiator"), 8);
         strncpy(spi.spi_origin, get_conf_val("queue"),16);
         strcpy(spi.spi_spi, "Job ");
         strncat(spi.spi_spi, get_conf_val("job_id"),11);
         if ((ret=setspinfo(&spi))) {
            shepherd_error(1, "error: can't set SPI; errno=%d", ret);
         }
         
         if ((cp = search_conf_val("acct_project"))) {
            prid_t proj; 
            if (strcasecmp(cp, "none") && ((proj = projid(cp)) >= 0)) {
               shepherd_trace("setting project \"%s\" to id %lld", cp, proj);
               if (setprid(proj) == -1)
                  shepherd_trace("failed setting project id");
            }
            else {   
               shepherd_trace("can't get id for project \"%s\"", cp);
            }
         } else {
            shepherd_trace("can't get configuration entry for projects");
         }
      }
#     elif defined(CRAY)
      {
         char *cp;
	      {
	         int jobid;

	         if ((jobid=setjob(pw->pw_uid, 0)) < 0) {
	            shepherd_error(1, "error: can't set job ID; errno = %d", errno);
	         }

	         if (sesscntl(jobid, S_ADDFL, S_BATCH) == -1) {
	            shepherd_error(1, "error: sesscntl(%d, S_ADDFL, S_BATCH) failed,"
		                        " errno = %d", sid, errno);
	         } 
	         sprintf(osjobid, "%d", jobid);
	      }

	      if ((cp = search_conf_val("acct_project"))) {
	         int proj; 
	         if (strcasecmp(cp, "none") && ((proj = nam2acid(cp)) >= 0)) {
	            shephed_trace("setting project \"%s\" to acid %d", cp, proj);
	            if (acctid(0, proj) == -1) {
		            shepherd_trace("failed setting project id (acctid)");
               }
	         } else {   
	            shepherd_trace("can't get id for project \"%s\"", cp);
	         }
	      } else {
	         shepherd_trace("can't get configuration entry for projects");
         }
      }
#     elif defined(NECSX4) || defined(NECSX5)
      {
         id_t jobid = 0;
		 	dispset2_t attr;	
			int value;

         /*
          * Create new Super-UX job
          */
         if (setjid() == -1) {
            shepherd_trace("ERROR: can't set jobid: %s[%d]", strerror(errno), errno);
         } else {
            jobid = getjid(0);
            shepherd_trace("Created job with id: "sge_u32, (u_long32) jobid);
         }  
         sprintf(osjobid, sge_u32, (u_long32) jobid); 

         /*
          * We will use limits for the whole job
          */
         set_rlimits_os_job_id(jobid);

         /*
          * The job will use the resources of the configured 
          * Resource Sharing Group (rsg)
          */ 
         {
            char *rsg_id_string;
            int rsg_id;
            char fsg_dev_string[256];

            rsg_id_string  = get_conf_val("processors");
            rsg_id = atoi(rsg_id_string);
            if (rsg_id) {
               int fd;

               sprintf(fsg_dev_string, "/dev/rsg/%d", rsg_id);
               fd = open(fsg_dev_string, O_RDONLY);
               if (fd <= 0) {
                  shepherd_trace("ERROR: can't switch to rsg%d because can't open"
                                 "device: %s[%d]", rsg_id, strerror(errno), errno);
               } else {
                  if (ioctl(fd, RSG_JUMP, NULL) == -1) {
                     close(fd);
                     shepherd_trace("ERROR: can't switch to rsg%d: %s[%d]", 
                                    rsg_id, strerror(errno), errno);
                     return;
                  } else {
                     close(fd);
                     shepherd_trace("switched to rsg%d", rsg_id);
                  }
               }
            } else {
               shepherd_trace("using default rsg", rsg_id);
            }
         } 

         /*
          * Set scheduling parameter for job
          */
         if (((attr.basepri = atoi(get_conf_val("nec_basepriority"))) != NEC_UNDEF_VALUE)
            && ((attr.modcpu = atoi(get_conf_val("nec_modcpu"))) != NEC_UNDEF_VALUE)
            && ((attr.tickcnt = atoi(get_conf_val("nec_tickcnt"))) != NEC_UNDEF_VALUE)
            && ((attr.dcyfctr = atoi(get_conf_val("nec_dcyfctr"))) != NEC_UNDEF_VALUE)
            && ((attr.dcyintvl = atoi(get_conf_val("nec_dcyintvl"))) != NEC_UNDEF_VALUE)
            && ((attr.tmslice = atoi(get_conf_val("nec_timeslice"))) != NEC_UNDEF_VALUE)
            && ((attr.mempri = atoi(get_conf_val("nec_memorypriority"))) != NEC_UNDEF_VALUE)
            && ((attr.szefctmrt = atoi(get_conf_val("nec_mrt_size_effct"))) != NEC_UNDEF_VALUE)
            && ((attr.priefctmrt = atoi(get_conf_val("nec_mrt_pri_effct"))) != NEC_UNDEF_VALUE)
            && ((attr.minmrt = atoi(get_conf_val("nec_mrt_minimum"))) != NEC_UNDEF_VALUE)
            && ((attr.agrange = atoi(get_conf_val("nec_aging_range"))) != NEC_UNDEF_VALUE)
            && ((attr.spinherit = atoi(get_conf_val("nec_slavepriority"))) != NEC_UNDEF_VALUE)
            && ((attr.concpu = atoi(get_conf_val("nec_cpu_count"))) != NEC_UNDEF_VALUE)) {
            if (dispcntl(SG_JID, getjid(0), DCNTL_SET2, &attr) == -1) {
               shepherd_trace("ERROR: can't set scheduling parameter: %s[%d]",
                              strerror(errno), errno);
            } else {
               shepherd_trace("control parameters for active process scheduling modified");
               print_scheduling_parameters(attr);
            }
         } else {
            shepherd_trace("we do not control active process scheduling");
         }
      }               
#     else
         /* write a default os-jobid to file */
         sprintf(osjobid, pid_t_fmt, sid);
#     endif
         sge_switch2admin_user();
      } 
      else /* not running as super user --> we want a default os-jobid */
         sprintf(osjobid, "0");
      
      if(fprintf(fp, "%s\n", osjobid) < 0)
         shepherd_trace("error writing osjobid file");
         
      FCLOSE(fp); /* Close os-jobid file */   
   }
#  endif
   return;
FCLOSE_ERROR:
   shepherd_error(1, "can't close file"); 
}