bool shepherd_read_exit_status_file(int *return_code) { bool ret = true; FILE *fp = NULL; const char *const filename = "exit_status"; fp = fopen(filename, "r"); if (fp != NULL) { int arguments = fscanf(fp, "%d\n", return_code); /* retrieve first exit status from exit status file */ if (arguments != 1) { shepherd_trace("could not read exit_status file"); *return_code = ESSTATE_NO_EXITSTATUS; ret = false; } } else { shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno)); ret = false; } FCLOSE(fp); return ret; FCLOSE_ERROR: shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno)); return false; }
bool shepherd_read_processor_set_number_file(int *proc_set) { bool ret = true; FILE *fp = NULL; const char *const filename = "processor_set_number"; fp = fopen(filename, "r"); if (fp != NULL) { int arguments = fscanf(fp, "%d", proc_set); if (arguments != 1) { shepherd_trace("could not read processor_set_number file"); *proc_set = 0; ret = false; } } else { shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno)); ret = false; } FCLOSE(fp); return ret; FCLOSE_ERROR: shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno)); return false; }
void sge_pset_create_processor_set(void) { #if defined(__sgi) || defined(ALPHA) || defined(SOLARIS64) || defined(SOLARISAMD64) char err_str[2*SGE_PATH_MAX+128]; /* SGI IRIX processor set stuff */ if (strcasecmp("UNDEFINED",get_conf_val("processors"))) { int ret; errno = 0; if (sge_switch2start_user()) { shepherd_trace("can't switch user in sge_pset_create_processor_set"); shepherd_state = SSTATE_PROCSET_NOTSET; shepherd_error(1, strerror(errno)); return; } if ((ret=set_processor_range(get_conf_val("processors"), (int) strtol(get_conf_val("job_id"), NULL, 10), err_str)) != PROC_SET_OK) { sge_switch2admin_user(); if (ret == PROC_SET_WARNING) /* not critical - e.g. not root */ shepherd_trace("warning: processor set not set in set_processor_range"); else { /* critical --> use err_str to indicate error */ shepherd_trace("critical error in set_processor_range - bailing out"); shepherd_state = SSTATE_PROCSET_NOTSET; shepherd_error(1, err_str); } } else { sge_switch2admin_user(); } } #endif }
void sge_pset_free_processor_set(void) { #if defined(__sgi) || defined(ALPHA) || defined(SOLARIS64) || defined(SOLARISAMD64) /* SGI IRIX processor set stuff */ if (strcasecmp("UNDEFINED",get_conf_val("processors"))) { char err_str[2*SGE_PATH_MAX+128]; int ret; errno = 0; if (sge_switch2start_user()) { shepherd_trace("failed to switch user in free_processor_set: %s", strerror(errno)); shepherd_state = SSTATE_PROCSET_NOTFREED; shepherd_error(1, strerror(errno)); return; } if ((ret=free_processor_set(err_str)) != PROC_SET_OK) { sge_switch2admin_user(); switch (ret) { case PROC_SET_WARNING: /* not critical - e.g. not root */ shepherd_trace("warning: processor set not freed in free_processor_set - " "did no exist, probably"); break; case PROC_SET_ERROR: /* critical - err_str indicates error */ shepherd_trace("critical error in free_processor_set - bailing out"); shepherd_state = SSTATE_PROCSET_NOTFREED; shepherd_error(1, err_str); break; case PROC_SET_BUSY: /* still processes running in processor set */ shepherd_trace("error in releasing processor set"); shepherd_state = SSTATE_PROCSET_NOTFREED; shepherd_error(1, err_str); break; default: /* should not occur */ sprintf(err_str, "internal error after free_processor_set - ret=%d", ret); shepherd_state = SSTATE_PROCSET_NOTFREED; shepherd_error(1, err_str); break; } } else { sge_switch2admin_user(); } } #endif }
bool shepherd_read_osjobid_file( #if (IRIX) ash_t *return_code, #elif defined(NECSX4) || defined(NECSX5) id_t *return_code, #elif defined(CRAY) int *return_code, #endif bool is_error ) { bool ret = true; FILE *fp = NULL; const char *const filename = "osjobid"; fp = fopen(filename, "r"); if (fp != NULL) { int arguments = 0; #if defined(IRIX) arguments = fscanf(fp, "%lld\n", return_code); #else arguments = fscanf(fp, "%d\n", return_code); #endif if (arguments != 1) { shepherd_trace("could not read osjobid file"); *return_code = 0; ret = false; } FCLOSE(fp); } else { if (is_error == true) { shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno)); } else { shepherd_trace(MSG_FILE_NOOPEN_SS, filename, strerror(errno)); } ret = false; } return ret; FCLOSE_ERROR: shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno)); return false; }
bool shepherd_write_osjobid_file(const char *osjobid) { bool ret = true; const char *const filename = "osjobid"; FILE *fp = NULL; fp = fopen(filename, "w"); if (fp != NULL) { FPRINTF((fp, "%s\n", osjobid)); FCLOSE(fp); } else { shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno)); ret = false; } return ret; FPRINTF_ERROR: FCLOSE_ERROR: shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno)); return false; }
void create_checkpointed_file(int ckpt_is_in_arena) { const char *const filename = "checkpointed"; FILE *fp = NULL; fp = fopen(filename, "w"); if (fp != NULL) { if (ckpt_is_in_arena) { FPRINTF((fp, "1\n")); } FCLOSE(fp); } else { shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno)); } return; FPRINTF_ERROR: FCLOSE_ERROR: shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno)); return; }
bool shepherd_write_processor_set_number_file(int proc_set) { bool ret = true; const char *const filename = "processor_set_number"; FILE *fp = NULL; fp = fopen(filename, "w"); if (fp != NULL) { FPRINTF((fp, "%d\n", proc_set)); FCLOSE(fp); } else { shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno)); ret = false; } return ret; FPRINTF_ERROR: FCLOSE_ERROR: shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno)); return false; }
bool shepherd_write_sig_info_file(const char *filename, const char *task_id, u_long32 exit_status) { bool ret = true; FILE *fp = NULL; fp = fopen(filename, "a"); if (fp != NULL) { FPRINTF((fp, "%s "sge_u32"\n", task_id, exit_status)); FCLOSE(fp); } else { shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno)); ret = false; } return ret; FPRINTF_ERROR: FCLOSE_ERROR: shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno)); return false; }
bool shepherd_write_shepherd_about_to_exit_file(void) { bool ret = true; const char *const filename = "shepherd_about_to_exit"; FILE *fp; fp = fopen(filename, "w"); if (fp != NULL) { FCLOSE(fp); } else { shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno)); ret = false; } return ret; FCLOSE_ERROR: shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno)); return false; }
bool shepherd_write_usage_file(u_long32 wait_status, int exit_status, int child_signal, u_long32 start_time, u_long32 end_time, struct rusage *rusage) { bool ret = true; const char *const filename = "usage"; FILE *fp = NULL; shepherd_trace("writing usage file to \"usage\""); fp = fopen(filename, "w"); if (fp != NULL) { /* * the wait status is returned by japi_wait() * see sge_reportL.h for bitmask and makro definition */ FPRINTF((fp, "wait_status="sge_u32"\n", wait_status)); FPRINTF((fp, "exit_status=%d\n", exit_status)); FPRINTF((fp, "signal=%d\n", child_signal)); FPRINTF((fp, "start_time=%d\n", (int) start_time)); FPRINTF((fp, "end_time=%d\n", (int) end_time)); FPRINTF((fp, "ru_wallclock="sge_u32"\n", (u_long32) end_time-start_time)); #if defined(NEC_ACCOUNTING_ENTRIES) /* Additional accounting information for NEC SX-4 SX-5 */ #if defined(NECSX4) || defined(NECSX5) #if defined(NECSX4) FPRINTF((fp, "necsx_necsx4="sge_u32"\n", 1)); #elif defined(NECSX5) FPRINTF((fp, "necsx_necsx5="sge_u32"\n", 1)); #endif FPRINTF((fp, "necsx_base_prty="sge_u32"\n", 0)); FPRINTF((fp, "necsx_time_slice="sge_u32"\n", 0)); FPRINTF((fp, "necsx_num_procs="sge_u32"\n", 0)); FPRINTF((fp, "necsx_kcore_min="sge_u32"\n", 0)); FPRINTF((fp, "necsx_mean_size="sge_u32"\n", 0)); FPRINTF((fp, "necsx_maxmem_size="sge_u32"\n", 0)); FPRINTF((fp, "necsx_chars_trnsfd="sge_u32"\n", 0)); FPRINTF((fp, "necsx_blocks_rw="sge_u32"\n", 0)); FPRINTF((fp, "necsx_inst="sge_u32"\n", 0)); FPRINTF((fp, "necsx_vector_inst="sge_u32"\n", 0)); FPRINTF((fp, "necsx_vector_elmt="sge_u32"\n", 0)); FPRINTF((fp, "necsx_vec_exe="sge_u32"\n", 0)); FPRINTF((fp, "necsx_flops="sge_u32"\n", 0)); FPRINTF((fp, "necsx_conc_flops="sge_u32"\n", 0)); FPRINTF((fp, "necsx_fpec="sge_u32"\n", 0)); FPRINTF((fp, "necsx_cmcc="sge_u32"\n", 0)); FPRINTF((fp, "necsx_bccc="sge_u32"\n", 0)); FPRINTF((fp, "necsx_mt_open="sge_u32"\n", 0)); FPRINTF((fp, "necsx_io_blocks="sge_u32"\n", 0)); FPRINTF((fp, "necsx_multi_single="sge_u32"\n", 0)); FPRINTF((fp, "necsx_max_nproc="sge_u32"\n", 0)); #endif #endif FPRINTF((fp, "ru_utime=%f\n", (double)rusage->ru_utime.tv_sec + (double)rusage->ru_utime.tv_usec / 1000000.0)); FPRINTF((fp, "ru_stime=%f\n", (double)rusage->ru_stime.tv_sec + (double)rusage->ru_stime.tv_usec / 1000000.0)); FPRINTF((fp, "ru_maxrss=%ld\n", rusage->ru_maxrss)); FPRINTF((fp, "ru_ixrss=%ld\n", rusage->ru_ixrss)); #if defined(ultrix) FPRINTF((fp, "ru_ismrss=%ld\n", rusage->ru_ismrss)); #endif FPRINTF((fp, "ru_idrss=%ld\n", rusage->ru_idrss)); FPRINTF((fp, "ru_isrss=%ld\n", rusage->ru_isrss)); FPRINTF((fp, "ru_minflt=%ld\n", rusage->ru_minflt)); FPRINTF((fp, "ru_majflt=%ld\n", rusage->ru_majflt)); FPRINTF((fp, "ru_nswap=%ld\n", rusage->ru_nswap)); FPRINTF((fp, "ru_inblock=%ld\n", rusage->ru_inblock)); FPRINTF((fp, "ru_oublock=%ld\n", rusage->ru_oublock)); FPRINTF((fp, "ru_msgsnd=%ld\n", rusage->ru_msgsnd)); FPRINTF((fp, "ru_msgrcv=%ld\n", rusage->ru_msgrcv)); FPRINTF((fp, "ru_nsignals=%ld\n", rusage->ru_nsignals)); FPRINTF((fp, "ru_nvcsw=%ld\n", rusage->ru_nvcsw)); FPRINTF((fp, "ru_nivcsw=%ld\n", rusage->ru_nivcsw)); FCLOSE(fp); } else { shepherd_error(1, MSG_FILE_NOOPEN_SS, filename, strerror(errno)); ret = false; } return ret; FPRINTF_ERROR: FCLOSE_ERROR: shepherd_error(1, MSG_FILE_NOCLOSE_SS, filename, strerror(errno)); return false; }
void shepherd_error_ptr(const char *text) { shepherd_error(1, text); }
void setosjobid(pid_t sid, gid_t *add_grp_id_ptr, struct passwd *pw) { FILE *fp=NULL; shepherd_trace("setosjobid: uid = "pid_t_fmt", euid = "pid_t_fmt, getuid(), geteuid()); # if defined(SOLARIS) || defined(ALPHA) || defined(LINUX) || defined(FREEBSD) || defined(DARWIN) /* Read SgeId from config-File and create Addgrpid-File */ { char *cp; if ((cp = search_conf_val("add_grp_id"))) *add_grp_id_ptr = atol(cp); else *add_grp_id_ptr = 0; } if ((fp = fopen("addgrpid", "w")) == NULL) { shepherd_error(1, "can't open \"addgrpid\" file"); } fprintf(fp, gid_t_fmt"\n", *add_grp_id_ptr); FCLOSE(fp); # elif defined(HP1164) || defined(AIX) { if ((fp = fopen("addgrpid", "w")) == NULL) { shepherd_error(1, "can't open \"addgrpid\" file"); } fprintf(fp, pid_t_fmt"\n", getpgrp()); FCLOSE(fp); } # else { char osjobid[100]; if ((fp = fopen("osjobid", "w")) == NULL) { shepherd_error(1, "can't open \"osjobid\" file"); } if(sge_switch2start_user() == 0) { # if defined(IRIX) { /* The following block contains the operations necessary for * IRIX6.2 (and later) to set array session handles (ASHs) and * service provider info (SPI) records */ struct acct_spi spi; int ret; char *cp; shepherd_trace("in irix code"); /* get _local_ array session id */ if ((ret=newarraysess())) { shepherd_error(1, "error: can't create ASH; errno=%d", ret); } /* retrieve array session id we just assigned to the process and * write it to the os-jobid file */ sprintf(osjobid, "%lld", getash()); shepherd_trace(osjobid); /* set service provider information (spi) record */ strncpy(spi.spi_company, "SGE", 8); strncpy(spi.spi_initiator, get_conf_val("spi_initiator"), 8); strncpy(spi.spi_origin, get_conf_val("queue"),16); strcpy(spi.spi_spi, "Job "); strncat(spi.spi_spi, get_conf_val("job_id"),11); if ((ret=setspinfo(&spi))) { shepherd_error(1, "error: can't set SPI; errno=%d", ret); } if ((cp = search_conf_val("acct_project"))) { prid_t proj; if (strcasecmp(cp, "none") && ((proj = projid(cp)) >= 0)) { shepherd_trace("setting project \"%s\" to id %lld", cp, proj); if (setprid(proj) == -1) shepherd_trace("failed setting project id"); } else { shepherd_trace("can't get id for project \"%s\"", cp); } } else { shepherd_trace("can't get configuration entry for projects"); } } # elif defined(CRAY) { char *cp; { int jobid; if ((jobid=setjob(pw->pw_uid, 0)) < 0) { shepherd_error(1, "error: can't set job ID; errno = %d", errno); } if (sesscntl(jobid, S_ADDFL, S_BATCH) == -1) { shepherd_error(1, "error: sesscntl(%d, S_ADDFL, S_BATCH) failed," " errno = %d", sid, errno); } sprintf(osjobid, "%d", jobid); } if ((cp = search_conf_val("acct_project"))) { int proj; if (strcasecmp(cp, "none") && ((proj = nam2acid(cp)) >= 0)) { shephed_trace("setting project \"%s\" to acid %d", cp, proj); if (acctid(0, proj) == -1) { shepherd_trace("failed setting project id (acctid)"); } } else { shepherd_trace("can't get id for project \"%s\"", cp); } } else { shepherd_trace("can't get configuration entry for projects"); } } # elif defined(NECSX4) || defined(NECSX5) { id_t jobid = 0; dispset2_t attr; int value; /* * Create new Super-UX job */ if (setjid() == -1) { shepherd_trace("ERROR: can't set jobid: %s[%d]", strerror(errno), errno); } else { jobid = getjid(0); shepherd_trace("Created job with id: "sge_u32, (u_long32) jobid); } sprintf(osjobid, sge_u32, (u_long32) jobid); /* * We will use limits for the whole job */ set_rlimits_os_job_id(jobid); /* * The job will use the resources of the configured * Resource Sharing Group (rsg) */ { char *rsg_id_string; int rsg_id; char fsg_dev_string[256]; rsg_id_string = get_conf_val("processors"); rsg_id = atoi(rsg_id_string); if (rsg_id) { int fd; sprintf(fsg_dev_string, "/dev/rsg/%d", rsg_id); fd = open(fsg_dev_string, O_RDONLY); if (fd <= 0) { shepherd_trace("ERROR: can't switch to rsg%d because can't open" "device: %s[%d]", rsg_id, strerror(errno), errno); } else { if (ioctl(fd, RSG_JUMP, NULL) == -1) { close(fd); shepherd_trace("ERROR: can't switch to rsg%d: %s[%d]", rsg_id, strerror(errno), errno); return; } else { close(fd); shepherd_trace("switched to rsg%d", rsg_id); } } } else { shepherd_trace("using default rsg", rsg_id); } } /* * Set scheduling parameter for job */ if (((attr.basepri = atoi(get_conf_val("nec_basepriority"))) != NEC_UNDEF_VALUE) && ((attr.modcpu = atoi(get_conf_val("nec_modcpu"))) != NEC_UNDEF_VALUE) && ((attr.tickcnt = atoi(get_conf_val("nec_tickcnt"))) != NEC_UNDEF_VALUE) && ((attr.dcyfctr = atoi(get_conf_val("nec_dcyfctr"))) != NEC_UNDEF_VALUE) && ((attr.dcyintvl = atoi(get_conf_val("nec_dcyintvl"))) != NEC_UNDEF_VALUE) && ((attr.tmslice = atoi(get_conf_val("nec_timeslice"))) != NEC_UNDEF_VALUE) && ((attr.mempri = atoi(get_conf_val("nec_memorypriority"))) != NEC_UNDEF_VALUE) && ((attr.szefctmrt = atoi(get_conf_val("nec_mrt_size_effct"))) != NEC_UNDEF_VALUE) && ((attr.priefctmrt = atoi(get_conf_val("nec_mrt_pri_effct"))) != NEC_UNDEF_VALUE) && ((attr.minmrt = atoi(get_conf_val("nec_mrt_minimum"))) != NEC_UNDEF_VALUE) && ((attr.agrange = atoi(get_conf_val("nec_aging_range"))) != NEC_UNDEF_VALUE) && ((attr.spinherit = atoi(get_conf_val("nec_slavepriority"))) != NEC_UNDEF_VALUE) && ((attr.concpu = atoi(get_conf_val("nec_cpu_count"))) != NEC_UNDEF_VALUE)) { if (dispcntl(SG_JID, getjid(0), DCNTL_SET2, &attr) == -1) { shepherd_trace("ERROR: can't set scheduling parameter: %s[%d]", strerror(errno), errno); } else { shepherd_trace("control parameters for active process scheduling modified"); print_scheduling_parameters(attr); } } else { shepherd_trace("we do not control active process scheduling"); } } # else /* write a default os-jobid to file */ sprintf(osjobid, pid_t_fmt, sid); # endif sge_switch2admin_user(); } else /* not running as super user --> we want a default os-jobid */ sprintf(osjobid, "0"); if(fprintf(fp, "%s\n", osjobid) < 0) shepherd_trace("error writing osjobid file"); FCLOSE(fp); /* Close os-jobid file */ } # endif return; FCLOSE_ERROR: shepherd_error(1, "can't close file"); }