int inIdxList(LS_LONG_INT jobId, struct idxList *idxList) { struct idxList *idx; if (idxList) { for (idx = idxList; idx; idx = idx->next) { if (LSB_ARRAY_IDX(jobId) < idx->start || LSB_ARRAY_IDX(jobId) > idx->end) continue; if (((LSB_ARRAY_IDX(jobId)-idx->start) % idx->step) == 0) return(TRUE); } return(FALSE); } return(TRUE); }
char * lsb_jobid2str (LS_LONG_INT jobId) { static char string[32]; if (LSB_ARRAY_IDX(jobId) == 0) { sprintf(string, "%d", LSB_ARRAY_JOBID(jobId)); } else { sprintf(string, "%d[%d]", LSB_ARRAY_JOBID(jobId), LSB_ARRAY_IDX(jobId)); } return(string); }
static void lsfdrmaa_job_control( fsd_job_t *self, int action ) { /* * XXX: waiting for job state change was removed * since it is not required for drmaa_control * to return after change completes. */ lsfdrmaa_job_t *lsf_self = (lsfdrmaa_job_t*)self; LS_LONG_INT job_id; int signal; fsd_log_enter(( "({job_id=%s}, action=%d)", self->job_id, action )); job_id = lsf_self->int_job_id; switch( action ) { case DRMAA_CONTROL_SUSPEND: case DRMAA_CONTROL_HOLD: signal = SIGSTOP; break; case DRMAA_CONTROL_RESUME: case DRMAA_CONTROL_RELEASE: signal = SIGCONT; break; case DRMAA_CONTROL_TERMINATE: /* TODO: sending SIGTERM (configurable)? */ signal = SIGKILL; break; default: fsd_exc_raise_fmt( FSD_ERRNO_INVALID_ARGUMENT, "job::control: unknown action %d", action ); } fsd_mutex_lock( &self->session->drm_connection_mutex ); TRY { int rc = lsb_signaljob( lsf_self->int_job_id, signal ); fsd_log_debug(( "lsb_signaljob( %d[%d], %d ) = %d", LSB_ARRAY_JOBID(lsf_self->int_job_id), LSB_ARRAY_IDX(lsf_self->int_job_id), signal, rc )); if( rc < 0 ) fsd_exc_raise_fmt( FSD_ERRNO_INTERNAL_ERROR, "job::control: could not send %s to job %s", fsd_strsignal( signal ), self->job_id ); } FINALLY { fsd_mutex_unlock( &self->session->drm_connection_mutex ); } END_TRY fsd_log_return(( "" )); }
static void lsfdrmaa_job_update_status( fsd_job_t *self ) { lsfdrmaa_job_t *lsf_self = (lsfdrmaa_job_t*)self; struct jobInfoEnt *volatile job_info = NULL; bool job_in_queue; fsd_log_enter(( "({job_id=%s, time_delta=%d})", self->job_id, time(NULL) - self->submit_time )); do { fsd_mutex_lock( &self->session->drm_connection_mutex ); TRY { int n_records; int more; char * username = (lsf_self->int_job_id>0)?"all":NULL; fsd_log_debug(( "drm connection locked" )); n_records = lsb_openjobinfo( lsf_self->int_job_id, NULL, username, NULL, NULL, ALL_JOB ); fsd_log_debug(( "lsb_openjobinfo( %d[%d], NULL, %s, NULL, NULL, ALL_JOB ) =%d", LSB_ARRAY_JOBID(lsf_self->int_job_id), LSB_ARRAY_IDX(lsf_self->int_job_id), username?username:"******", n_records )); job_in_queue = n_records > 0; if(!job_in_queue){ if(!(self->flags & FSD_JOB_CURRENT_SESSION)){ fsd_exc_raise_code( FSD_DRMAA_ERRNO_INVALID_JOB ); }else{/*handling missing job*/ self->on_missing(self); } }else{ job_info = lsb_readjobinfo( &more ); fsd_log_debug(( "lsb_readjobinfo(...) =%p: more=%d", (void*)job_info, more )); if( job_info == NULL ) fsd_exc_raise_lsf( "lsb_readjobinfo" ); lsf_self->read_job_info( self, job_info ); } } FINALLY { /* lsfdrmaa_free_job_info( job_info ); */ lsb_closejobinfo(); fsd_log_debug(( "lsb_closejobinfo()" )); fsd_mutex_unlock( &self->session->drm_connection_mutex ); } END_TRY } while( !job_in_queue ); fsd_log_return(( "" )); }
static void lsfdrmaa_job_read_job_info( fsd_job_t *self, struct jobInfoEnt *job_info ) { int status, flags; fsd_log_enter(( "" )); { int i; fsd_log_debug(( "job status of %s updated from %d[%d]", self->job_id, LSB_ARRAY_JOBID(job_info->jobId), LSB_ARRAY_IDX(job_info->jobId) )); fsd_log_debug(( "\n status: 0x%x", job_info->status )); fsd_log_debug(( "\n submitTime: %ld", job_info->submitTime )); fsd_log_debug(( "\n startTime: %ld", job_info->startTime )); fsd_log_debug(( "\n endTime: %ld", job_info->startTime )); fsd_log_debug(( "\n duration: %d", job_info->duration )); fsd_log_debug(( "\n cpuTime: %f", job_info->cpuTime )); fsd_log_debug(( "\n cwd: %s", job_info->cwd )); fsd_log_debug(( "\n fromHost: %s", job_info->fromHost )); fsd_log_debug(( "\n numExHosts: %d", job_info->numExHosts )); for( i = 0; i < job_info->numExHosts; i++ ) fsd_log_debug(( "\n exHosts[%d]: %s", i, job_info->exHosts[i] )); fsd_log_debug(( "\n exitStatus: %d", job_info->exitStatus )); fsd_log_debug(( "\n execCwd: %s", job_info->execCwd )); fsd_log_debug(( "\n runRusage.mem: %d", job_info->runRusage.mem )); fsd_log_debug(( "\n runRusage.swap: %d", job_info->runRusage.swap )); fsd_log_debug(( "\n runRusage.utime: %d", job_info->runRusage.utime )); fsd_log_debug(( "\n runRusage.stime: %d", job_info->runRusage.stime )); fsd_log_debug(( "\n jName: %s", job_info->jName )); /* fsd_log_debug(( "\n execRusage: %s", job_info->execRusage )); */ } status = job_info->status; flags = 0; if( status & (JOB_STAT_PEND | JOB_STAT_PSUSP) ) flags |= FSD_JOB_QUEUED; if( status & JOB_STAT_PSUSP ) flags |= FSD_JOB_HOLD; if( status & (JOB_STAT_RUN | JOB_STAT_USUSP | JOB_STAT_SSUSP) ) flags |= FSD_JOB_RUNNING; if( status & (JOB_STAT_USUSP | JOB_STAT_SSUSP) ) flags |= FSD_JOB_SUSPENDED; if( status & (JOB_STAT_DONE | JOB_STAT_EXIT) ) flags |= FSD_JOB_TERMINATED; if( status & (JOB_STAT_EXIT | JOB_STAT_PERR) ) flags |= FSD_JOB_ABORTED; self->flags &= ~(FSD_JOB_STATE_MASK | FSD_JOB_ABORTED); self->flags |= flags; if( status & (JOB_STAT_WAIT | JOB_STAT_PEND) ) self->state = DRMAA_PS_QUEUED_ACTIVE; else if( status & JOB_STAT_PSUSP ) self->state = DRMAA_PS_USER_ON_HOLD; else if( status & JOB_STAT_RUN ) self->state = DRMAA_PS_RUNNING; else if( status & JOB_STAT_SSUSP ) self->state = DRMAA_PS_SYSTEM_SUSPENDED; else if( status & JOB_STAT_USUSP ) self->state = DRMAA_PS_USER_SUSPENDED; else if( status & JOB_STAT_DONE ) self->state = DRMAA_PS_DONE; else if( status & JOB_STAT_EXIT ) self->state = DRMAA_PS_FAILED; else if( status & JOB_STAT_PDONE ) self->state = DRMAA_PS_DONE; else if( status & JOB_STAT_PERR ) self->state = DRMAA_PS_FAILED; else if( status & JOB_STAT_UNKWN ) self->state = DRMAA_PS_UNDETERMINED; else self->state = DRMAA_PS_FAILED; self->exit_status = job_info->exitStatus & ~0xff; if( (self->exit_status >> 8) == 0 && (job_info->status & JOB_STAT_EXIT) ) self->exit_status |= 0x01; self->start_time = job_info->startTime; self->end_time = job_info->endTime; self->cpu_usage = job_info->cpuTime; self->mem_usage = max( self->mem_usage, 1024*job_info->runRusage.mem ); self->vmem_usage = max( self->vmem_usage, 1024*job_info->runRusage.swap ); self->walltime = 60*job_info->duration; self->n_execution_hosts = job_info->numExHosts; if( self->execution_hosts == NULL && job_info->exHosts != NULL ) self->execution_hosts = fsd_explode( (const char*const*)job_info->exHosts, ' ', job_info->numExHosts ); self->last_update_time = time(NULL); if( self->state >= DRMAA_PS_DONE ) fsd_cond_broadcast( &self->status_cond ); fsd_log_return(( "" )); }
void prtJobStart(struct jobInfoEnt *job, int prtFlag, int jobPid, int tFormat) { char prline[MAXLINELEN], tBuff[20]; time_t startTime; int i = 0; struct nameList *hostList = NULL; if (lsbParams[LSB_SHORT_HOSTLIST].paramValue && job->numExHosts > 1 && strcmp(lsbParams[LSB_SHORT_HOSTLIST].paramValue, "1") == 0) { hostList = lsb_compressStrList(job->exHosts, job->numExHosts); if (!hostList) { exit(99); } } if (tFormat) { sprintf (tBuff, "%s <%s>", I18N_Job, lsb_jobid2str(job->jobId)); } else if (LSB_ARRAY_IDX(job->jobId) > 0 ) sprintf (tBuff, " [%d]", LSB_ARRAY_IDX(job->jobId)); else tBuff[0] = '\0'; if (job->startTime && job->numExHosts) { if (job->startTime < job->submitTime) startTime = job->submitTime; else startTime = job->startTime; if ((job->submit.options & SUB_PRE_EXEC) && (prtFlag != BJOBS_PRINT)) { if (prtFlag == BHIST_PRINT_PRE_EXEC) { if (tBuff[0] == '\0') sprintf(prline, "%s: %s", _i18n_ctime(ls_catd, CTIME_FORMAT_a_b_d_T, &startTime), I18N(604, "The pre-exec command is started on")); /* catgets 604 */ else sprintf(prline, "%s:%s, %s", _i18n_ctime(ls_catd, CTIME_FORMAT_a_b_d_T, &startTime), tBuff, I18N(605, "the pre-exec command is started on")); /* catgets 605 */ } else { if (tBuff[0] == '\0') sprintf(prline, "%s: %s", _i18n_ctime(ls_catd, CTIME_FORMAT_a_b_d_T, &startTime), I18N(606, "The batch job command is started on")); /*catgets 606 */ else sprintf(prline, "%s:%s, %s", _i18n_ctime(ls_catd, CTIME_FORMAT_a_b_d_T, &startTime), tBuff, I18N(607, "the batch job command is started on")); /*catgets 607 */ } } else { if (jobPid > 0) { if (tBuff[0] == '\0') sprintf(prline, "%s: %s", _i18n_ctime(ls_catd, CTIME_FORMAT_a_b_d_T, &startTime), I18N(608, "Started on")); /* catgets 608 */ else sprintf(prline, "%s:%s %s", _i18n_ctime(ls_catd, CTIME_FORMAT_a_b_d_T, &startTime), tBuff, I18N(609, "started on")); /* catgets 609 */ } else { if (tBuff[0] == '\0') sprintf(prline, "%s: %s", _i18n_ctime(ls_catd, CTIME_FORMAT_a_b_d_T, &startTime), I18N(610, "Dispatched to")); /* catgets 610 */ else sprintf(prline, "%s: %s %s", _i18n_ctime(ls_catd, CTIME_FORMAT_a_b_d_T, &startTime), tBuff, I18N(611, "dispatched to")); /* catgets 611 */ } } prtLineWUF(prline); if (job->numExHosts > 1) { sprintf(prline, " %d %s", job->numExHosts, I18N(612, "Hosts/Processors")); /* catgets 612 */ prtLineWUF(prline); } if (lsbParams[LSB_SHORT_HOSTLIST].paramValue && job->numExHosts > 1 && strcmp(lsbParams[LSB_SHORT_HOSTLIST].paramValue, "1") == 0) { for (i = 0; i < hostList->listSize; i++) { sprintf(prline, " <%d*%s>", hostList->counter[i], hostList->names[i]); prtLineWUF(prline); } } else { for (i = 0; i < job->numExHosts; i++) { sprintf(prline, " <%s>", job->exHosts[i]); prtLineWUF(prline); } } if (job->execHome && strcmp (job->execHome, "")) { sprintf(prline, ", %s <%s>", I18N(615, "Execution Home"), /* catgets 615 */ job->execHome); prtLineWUF(prline); } if (job->execCwd && strcmp (job->execCwd, "")) { sprintf(prline, ", %s <%s>", I18N(616, "Execution CWD"), /* catgets 616 */ job->execCwd); prtLineWUF(prline); } if (job->execUsername && strcmp(job->execUsername, "") && strcmp(job->user, job->execUsername)) { sprintf(prline, ", %s <%s>", I18N(617, "Execution user name"), /* catgets 617 */ job->execUsername); prtLineWUF(prline); } sprintf(prline, ";\n"); prtLineWUF(prline); } }
void prtHeader(struct jobInfoEnt *job, int prt_q, int tFormat) { char prline[MAXLINELEN]; if (!tFormat) { sprintf(prline, "\nJob%s <%s>,", uf_format?"":" Id", lsb_jobid2str(job->jobId)); prtLineWUF(prline); if (job->submit.options & SUB_JOB_NAME) { char *jobName, *pos; jobName = job->submit.jobName; if ((pos = strchr(jobName, '[')) && LSB_ARRAY_IDX(job->jobId)) { *pos = '\0'; sprintf(jobName, "%s[%d]", jobName, LSB_ARRAY_IDX(job->jobId)); } sprintf(prline, " Job Name <%s>,", jobName); prtLineWUF(prline); } } if (tFormat) { sprintf(prline, ","); prtLine(prline); } sprintf(prline, " User <%s>,", job->user); prtLineWUF(prline); if (lsbMode_ & LSB_MODE_BATCH) { sprintf(prline, " Project <%s>,", job->submit.projectName); prtLineWUF(prline); } if (job->submit.userGroup && job->submit.userGroup[0] != '\0') { sprintf(prline, " User Group <%s>,", job->submit.userGroup); prtLineWUF(prline); } if (job->submit.options & SUB_MAIL_USER) { sprintf(prline, " Mail <%s>,", job->submit.mailUser); prtLineWUF(prline); } if (prt_q) { sprintf(prline, " Status <%s>, Queue <%s>,", get_status(job), job->submit.queue); prtLineWUF(prline); } /* Interactive job */ if (job->submit.options & SUB_INTERACTIVE) { sprintf(prline, " Interactive"); if (job->submit.options & SUB_PTY) { strcat(prline, " pseudo-terminal"); if (job->submit.options & (SUB_PTY_SHELL)) strcat(prline, " shell"); } strcat(prline, " mode,"); prtLineWUF(prline); } if (job->jobPriority > 0) { sprintf(prline, " Job Priority <%d>,", job->jobPriority); prtLineWUF(prline); } if (job->submit.options2 & (SUB2_JOB_CMD_SPOOL)) { if (tFormat) sprintf(prline, " Command(Spooled) <%s>", job->submit.command); else sprintf(prline, " Command(Spooled) <%s>", job->submit.command); } else { if (tFormat) sprintf(prline, " Command <%s>", job->submit.command); else sprintf(prline, " Command <%s>", job->submit.command); } prtLineWUF(prline); if (job->submit.options2 & SUB2_JOB_GROUP) { sprintf(prline, ", Job Group <%s>", job->submit.job_group); prtLineWUF(prline); } sprintf(prline, "\n"); prtLineWUF(prline); }
void prtHeader(struct jobInfoEnt *job, int prt_q, int tFormat) { char prline[MAXLINELEN]; if (!tFormat) { sprintf(prline, "\n%s <%s>,", I18N_Job, lsb_jobid2str(job->jobId)); prtLine(prline); if (job->submit.options & SUB_JOB_NAME) { char *jobName, *pos; jobName = job->submit.jobName; if ((pos = strchr(jobName, '[')) && LSB_ARRAY_IDX(job->jobId)) { *pos = '\0'; sprintf(jobName, "%s[%d]", jobName, LSB_ARRAY_IDX(job->jobId)); } sprintf(prline, (_i18n_msg_get(ls_catd,NL_SETN,552, " Job Name <%s>,")), jobName); /* catgets 552 */ prtLine(prline); } } if (tFormat) { sprintf(prline, ","); prtLine(prline); } sprintf(prline, " %s <%s>,", I18N_User, job->user); prtLine(prline); if (lsbMode_ & LSB_MODE_BATCH) { sprintf(prline, (_i18n_msg_get(ls_catd,NL_SETN,554, " Project <%s>,")), job->submit.projectName); /* catgets 554 */ prtLine(prline); } if (job->submit.options & SUB_MAIL_USER) { sprintf(prline, (_i18n_msg_get(ls_catd,NL_SETN,556, " Mail <%s>,")), job->submit.mailUser); /* catgets 556 */ prtLine(prline); } if (prt_q) { sprintf(prline, (_i18n_msg_get(ls_catd,NL_SETN,557, " Status <%s>, Queue <%s>,")), /* catgets 557 */ get_status(job), job->submit.queue); prtLine(prline); } if (job->submit.options & SUB_INTERACTIVE) { sprintf(prline, (_i18n_msg_get(ls_catd,NL_SETN,558, " Interactive"))); /* catgets 558 */ if (job->submit.options & SUB_PTY) { strcat(prline, (_i18n_msg_get(ls_catd,NL_SETN,559, " pseudo-terminal"))); /* catgets 559 */ if (job->submit.options & (SUB_PTY_SHELL)) strcat(prline, (_i18n_msg_get(ls_catd,NL_SETN,560, " shell"))); /* catgets 560 */ } strcat(prline, (_i18n_msg_get(ls_catd,NL_SETN,561, " mode,"))); /* catgets 561 */ prtLine(prline); } if ( job->jobPriority > 0 ) { sprintf(prline, " %s <%d>,", I18N_Job_Priority, job->jobPriority); prtLine(prline); } if (job->submit.options2 & (SUB2_JOB_CMD_SPOOL)) { if (tFormat) sprintf(prline, " %s(Spooled) <%s>", I18N_Command, job->submit.command); else sprintf(prline, " %s(Spooled) <%s>\n", I18N_Command, job->submit.command); } else { if (tFormat) sprintf(prline, " %s <%s>", I18N_Command, job->submit.command); else sprintf(prline, " %s <%s>\n", I18N_Command, job->submit.command); } prtLine(prline); }