static void post_chkpt(struct work_task *ptask) { job *pjob; struct batch_request *preq; preq = (struct batch_request *)ptask->wt_parm1; pjob = find_job(preq->rq_ind.rq_hold.rq_orig.rq_objname); if (!preq || !pjob) return; if (preq->rq_reply.brp_code == 0) { /* checkpointed ok */ if (preq->rq_reply.brp_auxcode) { /* chkpt can be moved */ pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_CHKPT; pjob->ji_qs.ji_svrflags |= JOB_SVFLG_ChkptMig; pjob->ji_modified = 1; (void)job_save(pjob, SAVEJOB_QUICK); } account_record(PBS_ACCT_CHKPNT, pjob, (char *)0); } else { /* need to try rerun if possible or just abort the job */ if (preq->rq_reply.brp_code != PBSE_CKPBSY) { pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_CHKPT; pjob->ji_qs.ji_substate = JOB_SUBSTATE_RUNNING; pjob->ji_modified = 1; (void)job_save(pjob, SAVEJOB_QUICK); if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) rerun_or_kill(pjob, msg_on_shutdown); } } release_req(ptask); }
job *job_recov( char *filename) /* I */ /* pathname to job save file */ { job *pj; char namebuf[MAXPATHLEN]; char log_buf[LOCAL_LOG_BUF_SIZE]; int rc; pj = job_alloc(); /* allocate & initialize job structure space */ if (pj == NULL) { /* FAILURE - cannot alloc memory */ return(NULL); } snprintf(namebuf, MAXPATHLEN, "%s%s", path_jobs, filename); /* job directory path, filename */ size_t logBufLen = sizeof(log_buf); if ((rc = job_recov_xml(namebuf, &pj, log_buf, logBufLen)) && rc == PBSE_INVALID_SYNTAX) rc = job_recov_binary(namebuf, &pj, log_buf, logBufLen); if (rc == PBSE_NONE) rc = set_array_job_ids(&pj, log_buf, logBufLen); if (rc != PBSE_NONE) { if (rc == -1) { log_err(errno, __func__, log_buf); #ifndef PBS_MOM unlock_ji_mutex(pj, __func__, "1", LOGLEVEL); free(pj->ji_mutex); #endif free((char *)pj); } /* sometime pjob is freed by abt_job() */ return(NULL); } pj->ji_commit_done = 1; /* all done recovering the job */ #ifdef PBS_MOM job_save(pj, SAVEJOB_FULL, (multi_mom == 0)?0:pbs_rm_port); #else job_save(pj, SAVEJOB_FULL, 0); #endif return(pj); } /* END job_recov() */
void req_checkpointjob( struct batch_request *preq) { job *pjob; int rc; attribute *pattr; if ((pjob = chk_job_request(preq->rq_ind.rq_manager.rq_objname, preq)) == NULL) { return; } if (is_cloud_job(pjob)) { rc = PBSE_CLOUD_REQUEST; req_reject(rc, 0, preq, NULL, "cloud jobs cannot be checkpointed"); } pattr = &pjob->ji_wattr[(int)JOB_ATR_checkpoint]; if ((pjob->ji_qs.ji_state == JOB_STATE_RUNNING) && ((pattr->at_flags & ATR_VFLAG_SET) && ((csv_find_string(pattr->at_val.at_str, "s") != NULL) || (csv_find_string(pattr->at_val.at_str, "c") != NULL) || (csv_find_string(pattr->at_val.at_str, "enabled") != NULL)))) { /* have MOM attempt checkpointing */ if ((rc = relay_to_mom(pjob->ji_qs.ji_un.ji_exect.ji_momaddr, preq, process_checkpoint_reply)) != 0) { req_reject(rc, 0, preq, NULL, NULL); } else { pjob->ji_qs.ji_svrflags |= JOB_SVFLG_CHECKPOINT_FILE; job_save(pjob, SAVEJOB_QUICK); LOG_EVENT(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); } } else { /* Job does not have checkpointing enabled, so reject the request */ LOG_EVENT( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); req_reject(PBSE_IVALREQ, 0, preq, NULL, "job is not checkpointable"); } } /* END req_checkpointjob() */
/** * @brief * chk_array_doneness - check if all subjobs are expired and if so, * purge the Array Job itself * * @param[in,out] parent - pointer to parent job. * * @return void */ void chk_array_doneness(job *parent) { char acctbuf[40]; int e; int i; struct ajtrkhd *ptbl = parent->ji_ajtrk; if (ptbl == NULL) return; if (ptbl->tkm_flags & TKMFLG_NO_DELETE) return; /* delete of subjobs in progress, don't array */ if (ptbl->tkm_subjsct[JOB_STATE_QUEUED] + ptbl->tkm_subjsct[JOB_STATE_RUNNING] + ptbl->tkm_subjsct[JOB_STATE_EXITING] == 0) { /* Array Job all done, do simple eoj processing */ for (e=i=0; i<ptbl->tkm_ct; ++i) { if (ptbl->tkm_tbl[i].trk_error > 0) e = 1; else if (ptbl->tkm_tbl[i].trk_error < 0) { e = 2; break; } } parent->ji_qs.ji_un_type = JOB_UNION_TYPE_EXEC; parent->ji_qs.ji_un.ji_exect.ji_momaddr = 0; parent->ji_qs.ji_un.ji_exect.ji_momport = 0; parent->ji_qs.ji_un.ji_exect.ji_exitstat = e; check_block(parent, ""); if (parent->ji_qs.ji_state == JOB_STATE_BEGUN) { /* if BEGUN, issue 'E' account record */ sprintf(acctbuf, msg_job_end_stat, e); account_job_update(parent, PBS_ACCT_LAST); account_jobend(parent, acctbuf, PBS_ACCT_END); svr_mailowner(parent, MAIL_END, MAIL_NORMAL, acctbuf); } if (parent->ji_wattr[(int)JOB_ATR_depend].at_flags & ATR_VFLAG_SET) (void)depend_on_term(parent); /* * Check if the history of the finished job can be saved or it needs to be purged . */ svr_saveorpurge_finjobhist(parent); } else { (void)job_save(parent, SAVEJOB_FULL); } }
int shutdown_preempt_chkpt(job *pjob) { struct batch_request *phold; attribute temp; void (*func)(struct work_task *); long *hold_val = NULL; long old_hold = 0; phold = alloc_br(PBS_BATCH_HoldJob); if (phold == NULL) return (PBSE_SYSTEM); temp.at_flags = ATR_VFLAG_SET; temp.at_type = job_attr_def[(int)JOB_ATR_hold].at_type; temp.at_user_encoded = NULL; temp.at_priv_encoded = NULL; temp.at_val.at_long = HOLD_s; phold->rq_perm = ATR_DFLAG_MGRD | ATR_DFLAG_MGWR; (void)strcpy(phold->rq_ind.rq_hold.rq_orig.rq_objname, pjob->ji_qs.ji_jobid); CLEAR_HEAD(phold->rq_ind.rq_hold.rq_orig.rq_attr); if (job_attr_def[(int)JOB_ATR_hold].at_encode(&temp, &phold->rq_ind.rq_hold.rq_orig.rq_attr, job_attr_def[(int)JOB_ATR_hold].at_name, NULL, ATR_ENCODE_CLIENT, NULL) < 0) return (PBSE_SYSTEM); phold->rq_extra = pjob; func = post_chkpt; if (relay_to_mom(pjob, phold, func) == 0) { if (pjob->ji_qs.ji_state == JOB_STATE_TRANSIT) svr_setjobstate(pjob, JOB_STATE_RUNNING, JOB_SUBSTATE_RUNNING); pjob->ji_qs.ji_svrflags |= (JOB_SVFLG_HASRUN | JOB_SVFLG_CHKPT | JOB_SVFLG_HASHOLD); pjob->ji_modified = 1; (void)job_save(pjob, SAVEJOB_QUICK); return (0); } else { *hold_val = old_hold; /* reset to the old value */ return (-1); } }
int attempt_to_queue_job_on_mom( char *job_id, int con, char *job_destin, bool &change_substate_on_attempt_to_queue, tlist_head &attrl, bool &timeout, bool need_to_send_job_script, bool job_has_run, unsigned long job_momaddr, const char *script_name, char *stdout_path, char *stderr_path, char *chkpt_path, int type, int *my_err) { job *pjob = NULL; int rc; if (update_substate_if_needed(job_id, change_substate_on_attempt_to_queue) != PBSE_NONE) return(LOCUTION_FAIL); if ((rc = queue_job_on_mom(con, my_err, job_id, job_destin, attrl, timeout, type)) != PBSE_NONE) return(rc); if ((rc = send_job_script_if_needed(con, need_to_send_job_script, script_name, job_id)) != PBSE_NONE) return(rc); if ((rc = send_files_if_needed(con, job_id, type, job_has_run, job_momaddr, stdout_path, stderr_path, chkpt_path)) != PBSE_NONE) return(rc); if ((pjob = svr_find_job(job_id, TRUE)) != NULL) { pjob->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUTCM; job_save(pjob, SAVEJOB_QUICK, 0); unlock_ji_mutex(pjob, __func__, "5", LOGLEVEL); } else return(LOCUTION_FAIL); return(LOCUTION_SUCCESS); } /* END attempt_to_queue_job_on_mom() */
static int shutdown_chkpt(job *pjob) { struct batch_request *phold; attribute temp; phold = alloc_br(PBS_BATCH_HoldJob); if (phold == (struct batch_request *)0) return (PBSE_SYSTEM); temp.at_flags = ATR_VFLAG_SET; temp.at_type = job_attr_def[(int)JOB_ATR_hold].at_type; temp.at_user_encoded = NULL; temp.at_priv_encoded = NULL; temp.at_val.at_long = HOLD_s; phold->rq_perm = ATR_DFLAG_MGRD | ATR_DFLAG_MGWR; (void)strcpy(phold->rq_ind.rq_hold.rq_orig.rq_objname, pjob->ji_qs.ji_jobid); CLEAR_HEAD(phold->rq_ind.rq_hold.rq_orig.rq_attr); if (job_attr_def[(int)JOB_ATR_hold].at_encode(&temp, &phold->rq_ind.rq_hold.rq_orig.rq_attr, job_attr_def[(int)JOB_ATR_hold].at_name, (char *)0, ATR_ENCODE_CLIENT, NULL) < 0) return (PBSE_SYSTEM); if (relay_to_mom(pjob, phold, post_chkpt) == 0) { if (pjob->ji_qs.ji_state == JOB_STATE_TRANSIT) svr_setjobstate(pjob, JOB_STATE_RUNNING, JOB_SUBSTATE_RUNNING); pjob->ji_qs.ji_substate = JOB_SUBSTATE_RERUN; pjob->ji_qs.ji_svrflags |= JOB_SVFLG_HASRUN; pjob->ji_qs.ji_svrflags |= JOB_SVFLG_CHKPT; pjob->ji_modified = 1; (void)job_save(pjob, SAVEJOB_QUICK); return (0); } else return (-1); }
int update_substate_if_needed( char *job_id, bool &change_substate_on_attempt_to_queue) { if (change_substate_on_attempt_to_queue == true) { job *pjob = svr_find_job(job_id, TRUE); if (pjob != NULL) { mutex_mgr job_mutex(pjob->ji_mutex, true); pjob->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUT; job_save(pjob, SAVEJOB_QUICK, 0); } else { return(PBSE_JOB_RECYCLED); } } return(PBSE_NONE); } /* END update_substate_if_needed() */
void stat_update( struct batch_request *preq, struct stat_cntl *cntl) { job *pjob; struct batch_reply *preply; struct brp_status *pstatus; svrattrl *sattrl; int oldsid; int bad = 0; time_t time_now = time(NULL); char *msg_ptr = NULL; char log_buf[LOCAL_LOG_BUF_SIZE]; preply = &preq->rq_reply; if (preply->brp_un.brp_txt.brp_str != NULL) { msg_ptr = strstr(preply->brp_un.brp_txt.brp_str, PBS_MSG_EQUAL); if (msg_ptr != NULL) msg_ptr += strlen(PBS_MSG_EQUAL); } if (preply->brp_choice == BATCH_REPLY_CHOICE_Status) { pstatus = (struct brp_status *)GET_NEXT(preply->brp_un.brp_status); while (pstatus != NULL) { if ((pjob = svr_find_job(pstatus->brp_objname, FALSE)) != NULL) { mutex_mgr job_mutex(pjob->ji_mutex, true); sattrl = (svrattrl *)GET_NEXT(pstatus->brp_attr); oldsid = pjob->ji_wattr[JOB_ATR_session_id].at_val.at_long; modify_job_attr( pjob, sattrl, ATR_DFLAG_MGWR | ATR_DFLAG_SvWR, &bad); if (oldsid != pjob->ji_wattr[JOB_ATR_session_id].at_val.at_long) { /* first save since running job (or the sid has changed), */ /* must save session id */ job_save(pjob, SAVEJOB_FULL, 0); } #ifdef USESAVEDRESOURCES else { /* save so we can recover resources used */ job_save(pjob, SAVEJOB_FULL, 0); } #endif /* USESAVEDRESOURCES */ pjob->ji_momstat = time_now; } pstatus = (struct brp_status *)GET_NEXT(pstatus->brp_stlink); } /* END while (pstatus != NULL) */ } /* END if (preply->brp_choice == BATCH_REPLY_CHOICE_Status) */ else if ((preply->brp_choice == BATCH_REPLY_CHOICE_Text) && (preply->brp_code == PBSE_UNKJOBID) && (msg_ptr != NULL) && (!strcmp(msg_ptr, preq->rq_ind.rq_status.rq_id))) { /* we sent a stat request, but mom says it doesn't know anything about the job */ if ((pjob = svr_find_job(preq->rq_ind.rq_status.rq_id, FALSE)) != NULL) { /* job really isn't running any more - mom doesn't know anything about it this can happen if a diskless node reboots and the mom_priv/jobs directory is cleared, set its state to queued so job_abt doesn't think it is still running */ mutex_mgr job_mutex(pjob->ji_mutex, true); snprintf(log_buf, sizeof(log_buf), "mother superior no longer recognizes %s as a valid job, aborting. Last reported time was %ld", preq->rq_ind.rq_status.rq_id, pjob->ji_last_reported_time); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); svr_setjobstate(pjob, JOB_STATE_QUEUED, JOB_SUBSTATE_ABORT, FALSE); rel_resc(pjob); job_mutex.set_unlock_on_exit(false); job_abt(&pjob, "Job does not exist on node"); /* TODO, if the job is rerunnable we should set its state back to queued */ } } else { snprintf(log_buf, sizeof(log_buf), "Poll job request failed for job %s", preq->rq_ind.rq_status.rq_id); log_err(preply->brp_code, __func__, log_buf); } cntl->sc_conn = -1; if (cntl->sc_post) cntl->sc_post(cntl); /* continue where we left off */ /* If sc_post has a value it is: * req_stat_job_step2 * if so, it expects cntl to be free'd after the call */ free(cntl); /* a bit of a kludge but its saves an extra func */ return; } /* END stat_update() */
int modify_job( void **j, /* O */ svrattrl *plist, /* I */ struct batch_request *preq, /* I */ int checkpoint_req, /* I */ int flag) /* I */ { int bad = 0; int i; int newstate; int newsubstate; resource_def *prsd; int rc; int sendmom = 0; int copy_checkpoint_files = FALSE; char log_buf[LOCAL_LOG_BUF_SIZE]; struct batch_request *dup_req = NULL; job *pjob = (job *)*j; if (pjob == NULL) { sprintf(log_buf, "job structure is NULL"); log_err(PBSE_IVALREQ, __func__, log_buf); return(PBSE_IVALREQ); } /* cannot be in exiting or transit, exiting has already been checked */ if (pjob->ji_qs.ji_state == JOB_STATE_TRANSIT) { /* FAILURE */ snprintf(log_buf,sizeof(log_buf), "Cannot modify job '%s' in transit\n", pjob->ji_qs.ji_jobid); log_err(PBSE_BADSTATE, __func__, log_buf); return(PBSE_BADSTATE); } if (((checkpoint_req == CHK_HOLD) || (checkpoint_req == CHK_CONT)) && (pjob->ji_qs.ji_substate == JOB_SUBSTATE_RUNNING)) { /* May need to request copy of the checkpoint file from mom */ copy_checkpoint_files = TRUE; if (checkpoint_req == CHK_HOLD) { sprintf(log_buf,"setting jobsubstate for %s to RERUN\n", pjob->ji_qs.ji_jobid); pjob->ji_qs.ji_substate = JOB_SUBSTATE_RERUN; job_save(pjob, SAVEJOB_QUICK, 0); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf); /* remove checkpoint restart file if there is one */ if (pjob->ji_wattr[JOB_ATR_restart_name].at_flags & ATR_VFLAG_SET) { cleanup_restart_file(pjob); } } } /* if job is running, special checks must be made */ /* NOTE: must determine if job exists down at MOM - this will occur if job is running, job is held, or job was held and just barely released (ie qhold/qrls) */ /* COMMENTED OUT BY JOSH B IN 2.3 DUE TO MAJOR PROBLEMS w/ CUSTOMERS * --FIX and uncomment once we know what is really going on. * * We now know that ji_destin gets set on a qmove and that the mom does not * have the job at that point. * if ((pjob->ji_qs.ji_state == JOB_STATE_RUNNING) || ((pjob->ji_qs.ji_state == JOB_STATE_HELD) && (pjob->ji_qs.ji_destin[0] != '\0')) || ((pjob->ji_qs.ji_state == JOB_STATE_QUEUED) && (pjob->ji_qs.ji_destin[0] != '\0'))) */ if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) { while (plist != NULL) { /* is the pbs_attribute modifiable in RUN state ? */ i = find_attr(job_attr_def, plist->al_name, JOB_ATR_LAST); if ((i < 0) || ((job_attr_def[i].at_flags & ATR_DFLAG_ALTRUN) == 0)) { /* FAILURE */ snprintf(log_buf,sizeof(log_buf), "Cannot modify attribute '%s' while running\n", plist->al_name); log_err(PBSE_MODATRRUN, __func__, log_buf); return PBSE_MODATRRUN; } /* NOTE: only explicitly specified job attributes are routed down to MOM */ if (i == JOB_ATR_resource) { /* is the specified resource modifiable while */ /* the job is running */ prsd = find_resc_def(svr_resc_def, plist->al_resc, svr_resc_size); if (prsd == NULL) { /* FAILURE */ snprintf(log_buf,sizeof(log_buf), "Unknown attribute '%s'\n", plist->al_name); log_err(PBSE_UNKRESC, __func__, log_buf); return(PBSE_UNKRESC); } if ((prsd->rs_flags & ATR_DFLAG_ALTRUN) == 0) { /* FAILURE */ snprintf(log_buf,sizeof(log_buf), "Cannot modify attribute '%s' while running\n", plist->al_name); log_err(PBSE_MODATRRUN, __func__, log_buf); return(PBSE_MODATRRUN); } sendmom = 1; } /* else if ((i == JOB_ATR_checkpoint_name) || (i == JOB_ATR_variables)) { sendmom = 1; } */ plist = (svrattrl *)GET_NEXT(plist->al_link); } } /* END if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) */ /* modify the job's attributes */ bad = 0; plist = (svrattrl *)GET_NEXT(preq->rq_ind.rq_modify.rq_attr); rc = modify_job_attr(pjob, plist, preq->rq_perm, &bad); if (rc) { /* FAILURE */ snprintf(log_buf,sizeof(log_buf), "Cannot set attributes for job '%s'\n", pjob->ji_qs.ji_jobid); log_err(rc, __func__, log_buf); if (rc == PBSE_JOBNOTFOUND) *j = NULL; return(rc); } /* Reset any defaults resource limit which might have been unset */ set_resc_deflt(pjob, NULL, FALSE); /* if job is not running, may need to change its state */ if (pjob->ji_qs.ji_state != JOB_STATE_RUNNING) { svr_evaljobstate(pjob, &newstate, &newsubstate, 0); svr_setjobstate(pjob, newstate, newsubstate, FALSE); } else { job_save(pjob, SAVEJOB_FULL, 0); } sprintf(log_buf, msg_manager, msg_jobmod, preq->rq_user, preq->rq_host); log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf); /* if a resource limit changed for a running job, send to MOM */ if (sendmom) { /* if the NO_MOM_RELAY flag is set the calling function will call relay_to_mom so we do not need to do it here */ if (flag != NO_MOM_RELAY) { /* The last number is unused unless this is an array */ if ((rc = copy_batchrequest(&dup_req, preq, 0, -1)) != 0) { } /* The dup_req is freed in relay_to_mom (failure) * or in issue_Drequest (success) */ else if ((rc = relay_to_mom(&pjob, dup_req, post_modify_req))) { if (pjob != NULL) { snprintf(log_buf,sizeof(log_buf), "Unable to relay information to mom for job '%s'\n", pjob->ji_qs.ji_jobid); log_err(rc, __func__, log_buf); } return(rc); /* unable to get to MOM */ } } return(PBSE_RELAYED_TO_MOM); } if (copy_checkpoint_files) { struct batch_request *momreq = 0; momreq = cpy_checkpoint(momreq, pjob, JOB_ATR_checkpoint_name, CKPT_DIR_OUT); if (momreq != NULL) { /* have files to copy */ momreq->rq_extra = strdup(pjob->ji_qs.ji_jobid); /* The momreq is freed in relay_to_mom (failure) * or in issue_Drequest (success) */ if (checkpoint_req == CHK_HOLD) { rc = relay_to_mom(&pjob, momreq, chkpt_xfr_hold); } else { rc = relay_to_mom(&pjob, momreq, chkpt_xfr_done); } if (rc != 0) { if (pjob != NULL) { snprintf(log_buf,sizeof(log_buf), "Unable to relay information to mom for job '%s'\n", pjob->ji_qs.ji_jobid); log_err(rc, __func__, log_buf); } return(PBSE_NONE); /* come back when mom replies */ } } else { log_err(-1, __func__, "Failed to get batch request"); } } return(PBSE_NONE); } /* END modify_job() */
void stat_update( struct batch_request *preq, struct stat_cntl *cntl) { job *pjob; struct batch_reply *preply; struct brp_status *pstatus; svrattrl *sattrl; int oldsid; int bad = 0; time_t time_now = time(NULL); preply = &preq->rq_reply; if (preply->brp_choice == BATCH_REPLY_CHOICE_Status) { pstatus = (struct brp_status *)GET_NEXT(preply->brp_un.brp_status); while (pstatus != NULL) { if ((pjob = svr_find_job(pstatus->brp_objname, FALSE)) != NULL) { sattrl = (svrattrl *)GET_NEXT(pstatus->brp_attr); oldsid = pjob->ji_wattr[JOB_ATR_session_id].at_val.at_long; modify_job_attr( pjob, sattrl, ATR_DFLAG_MGWR | ATR_DFLAG_SvWR, &bad); if (oldsid != pjob->ji_wattr[JOB_ATR_session_id].at_val.at_long) { /* first save since running job (or the sid has changed), */ /* must save session id */ job_save(pjob, SAVEJOB_FULL, 0); } #ifdef USESAVEDRESOURCES else { /* save so we can recover resources used */ job_save(pjob, SAVEJOB_FULL, 0); } #endif /* USESAVEDRESOURCES */ pjob->ji_momstat = time_now; unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); } pstatus = (struct brp_status *)GET_NEXT(pstatus->brp_stlink); } /* END while (pstatus != NULL) */ } /* END if (preply->brp_choice == BATCH_REPLY_CHOICE_Status) */ else { if (preply->brp_code == PBSE_UNKJOBID) { /* we sent a stat request, but mom says it doesn't know anything about the job */ if ((pjob = svr_find_job(preq->rq_ind.rq_status.rq_id, FALSE)) != NULL) { /* job really isn't running any more - mom doesn't know anything about it this can happen if a diskless node reboots and the mom_priv/jobs directory is cleared, set its state to queued so job_abt doesn't think it is still running */ svr_setjobstate(pjob, JOB_STATE_QUEUED, JOB_SUBSTATE_ABORT, FALSE); rel_resc(pjob); job_abt(&pjob, "Job does not exist on node"); /* TODO, if the job is rerunnable we should set its state back to queued */ } } } cntl->sc_conn = -1; /* MUTSU - Unlock job here? */ if (cntl->sc_post) cntl->sc_post(cntl); /* continue where we left off */ /* If sc_post has a value it is: * req_stat_job_step2 * if so, it expects cntl to be free'd after the call */ free(cntl); /* a bit of a kludge but its saves an extra func */ return; } /* END stat_update() */
void scan_for_terminated(void) { static char id[] = "scan_for_terminated"; int exiteval = 0; pid_t pid; job *pjob; task *ptask = NULL; int statloc; unsigned int momport = 0; int tcount; if (LOGLEVEL >= 7) { log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, id, "entered"); } /* update the latest intelligence about the running jobs; */ /* must be done before we reap the zombies, else we lose the info */ termin_child = 0; if (mom_get_sample() == PBSE_NONE) { pjob = (job *)GET_PRIOR(svr_alljobs); while (pjob != NULL) { mom_set_use(pjob); pjob = (job *)GET_PRIOR(pjob->ji_alljobs); } } /* Now figure out which task(s) have terminated (are zombies) */ /* NOTE: does a job's tasks include its epilog? */ while ((pid = waitpid(-1, &statloc, WNOHANG)) > 0) { pjob = (job *)GET_PRIOR(svr_alljobs); while (pjob != NULL) { /* * see if process was a child doing a special * function for MOM */ if (LOGLEVEL >= 7) { snprintf(log_buffer, 1024, "checking job w/subtask pid=%d (child pid=%d)", pjob->ji_momsubt, pid); LOG_EVENT( PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); } if (pid == pjob->ji_momsubt) { if (LOGLEVEL >= 7) { snprintf(log_buffer, 1024, "found match with job subtask for pid=%d", pid); LOG_EVENT( PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); } break; } /* look for task */ ptask = (task *)GET_NEXT(pjob->ji_tasks); /* locate task with associated process id */ tcount = 0; while (ptask != NULL) { if (ptask->ti_qs.ti_sid == pid) { if (LOGLEVEL >= 7) { snprintf(log_buffer, 1024, "found match with job task %d for pid=%d", tcount, pid); LOG_EVENT( PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); } break; } ptask = (task *)GET_NEXT(ptask->ti_jobtask); tcount++; } /* END while (ptask) */ if (ptask != NULL) { /* pid match located - break out of job loop */ break; } pjob = (job *)GET_PRIOR(pjob->ji_alljobs); } /* END while (pjob != NULL) */ if (pjob == NULL) { if (LOGLEVEL >= 1) { sprintf(log_buffer, "pid %d not tracked, exitcode=%d", pid, statloc); log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, id, log_buffer); } continue; } /* END if (pjob == NULL) */ if (WIFEXITED(statloc)) exiteval = WEXITSTATUS(statloc); else if (WIFSIGNALED(statloc)) exiteval = WTERMSIG(statloc) + 0x100; else exiteval = 1; if (pid == pjob->ji_momsubt) { /* PID matches job mom subtask */ /* NOTE: both ji_momsubt and ji_mompost normally set in routine preobit_reply() after epilog child is successfully forked */ if (pjob->ji_mompost != NULL) { if (pjob->ji_mompost(pjob, exiteval) == 0) { /* success */ pjob->ji_mompost = NULL; } } /* END if (pjob->ji_mompost != NULL) */ else { log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, "job has no postprocessing routine registered"); } /* clear mom sub-task */ pjob->ji_momsubt = 0; if(multi_mom) { momport = pbs_rm_port; } job_save(pjob, SAVEJOB_QUICK, momport); continue; } /* END if (pid == pjob->ji_momsubt) */ /* what happens if mom PID is reaped before subtask? */ if (LOGLEVEL >= 2) { sprintf(log_buffer, "pid %d harvested for job %s, task %d, exitcode=%d", pid, pjob->ji_qs.ji_jobid, ptask->ti_qs.ti_task, exiteval); log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, id, log_buffer); } /* where is job purged? How do we keep job from progressing in state until the obit is sent? */ kill_task(ptask, SIGKILL, 0); ptask->ti_qs.ti_exitstat = exiteval; ptask->ti_qs.ti_status = TI_STATE_EXITED; task_save(ptask); sprintf(log_buffer, "%s: job %s task %d terminated, sid=%d", id, pjob->ji_qs.ji_jobid, ptask->ti_qs.ti_task, ptask->ti_qs.ti_sid); LOG_EVENT( PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); exiting_tasks = 1; } /* END while ((pid = waitpid(-1,&statloc,WNOHANG)) > 0) */ return; } /* END scan_for_terminated() */
void req_modifyjob(struct batch_request *preq) { int add_to_am_list = 0; /* if altered during sched cycle */ int bad = 0; int jt; /* job type */ int newstate; int newsubstate; resource_def *outsideselect = NULL; job *pjob; svrattrl *plist; resource *presc; resource_def *prsd; int rc; int running = 0; int sendmom = 0; char hook_msg[HOOK_MSG_SIZE]; int mod_project = 0; pbs_sched *psched; switch (process_hooks(preq, hook_msg, sizeof(hook_msg), pbs_python_set_interrupt)) { case 0: /* explicit reject */ reply_text(preq, PBSE_HOOKERROR, hook_msg); return; case 1: /* explicit accept */ if (recreate_request(preq) == -1) { /* error */ /* we have to reject the request, as 'preq' */ /* may have been partly modified */ strcpy(hook_msg, "modifyjob event: rejected request"); log_event(PBSEVENT_ERROR, PBS_EVENTCLASS_HOOK, LOG_ERR, "", hook_msg); reply_text(preq, PBSE_HOOKERROR, hook_msg); return; } break; case 2: /* no hook script executed - go ahead and accept event*/ break; default: log_event(PBSEVENT_DEBUG2, PBS_EVENTCLASS_HOOK, LOG_INFO, "", "modifyjob event: accept req by default"); } if (pseldef == NULL) /* do one time to keep handy */ pseldef = find_resc_def(svr_resc_def, "select", svr_resc_size); pjob = chk_job_request(preq->rq_ind.rq_modify.rq_objname, preq, &jt); if (pjob == NULL) return; if ((jt == IS_ARRAY_Single) || (jt == IS_ARRAY_Range)) { req_reject(PBSE_IVALREQ, 0, preq); return; } psched = find_sched_from_sock(preq->rq_conn); /* allow scheduler to modify job */ if (psched == NULL) { /* provisioning job is not allowed to be modified */ if ((pjob->ji_qs.ji_state == JOB_STATE_RUNNING) && (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PROVISION)) { req_reject(PBSE_BADSTATE, 0, preq); return; } } /* cannot be in exiting or transit, exiting has already be checked */ if (pjob->ji_qs.ji_state == JOB_STATE_TRANSIT) { req_reject(PBSE_BADSTATE, 0, preq); return; } plist = (svrattrl *)GET_NEXT(preq->rq_ind.rq_modify.rq_attr); if (plist == NULL) { /* nothing to do */ reply_ack(preq); return; } /* * Special checks must be made: * if during a scheduling cycle and certain attributes are altered, * make a note of the job to prevent it from being run now; * if job is running, only certain attributes/resources can be * altered. */ if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) { running = 1; } while (plist) { int i; i = find_attr(job_attr_def, plist->al_name, JOB_ATR_LAST); /* * Is the attribute being altered one which could change * scheduling (ATR_DFLAG_SCGALT set) and if a scheduling * cycle is in progress, then set flag to add the job to list * of jobs which cannot be run in this cycle. * If the scheduler itself sends a modify job request, * no need to delay the job until next cycle. */ if ((psched == NULL) && (scheduler_jobs_stat) && (job_attr_def[i].at_flags & ATR_DFLAG_SCGALT)) add_to_am_list = 1; /* Is the attribute modifiable in RUN state ? */ if (i < 0) { reply_badattr(PBSE_NOATTR, 1, plist, preq); return; } if ((running == 1) && ((job_attr_def[i].at_flags & ATR_DFLAG_ALTRUN) == 0)) { reply_badattr(PBSE_MODATRRUN, 1, plist, preq); return; } if (i == (int)JOB_ATR_resource) { prsd = find_resc_def(svr_resc_def, plist->al_resc, svr_resc_size); if (prsd == 0) { reply_badattr(PBSE_UNKRESC, 1, plist, preq); return; } /* is the specified resource modifiable while */ /* the job is running */ if (running) { if ((prsd->rs_flags & ATR_DFLAG_ALTRUN) == 0) { reply_badattr(PBSE_MODATRRUN, 1, plist, preq); return; } sendmom = 1; } /* should the resource be only in a select spec */ if (prsd->rs_flags & ATR_DFLAG_CVTSLT && !outsideselect && plist->al_atopl.value && plist->al_atopl.value[0]) { /* if "-lresource" is set and has non-NULL value, ** remember as potential bad resource ** if this appears along "select". */ outsideselect = prsd; } } if (strcmp(plist->al_name, ATTR_project) == 0) { mod_project = 1; } else if ((strcmp(plist->al_name, ATTR_runcount) == 0) && ((plist->al_flags & ATR_VFLAG_HOOK) == 0) && (plist->al_value != NULL) && (plist->al_value[0] != '\0') && ((preq->rq_perm & (ATR_DFLAG_MGWR | ATR_DFLAG_OPWR)) == 0) && (atol(plist->al_value) < \ pjob->ji_wattr[(int)JOB_ATR_runcount].at_val.at_long)) { sprintf(log_buffer, "regular user %s@%s cannot decrease '%s' attribute value from %ld to %ld", preq->rq_user, preq->rq_host, ATTR_runcount, pjob->ji_wattr[(int)JOB_ATR_runcount].at_val.at_long, atol(plist->al_value)); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_ERR, pjob->ji_qs.ji_jobid, log_buffer); req_reject(PBSE_PERM, 0, preq); return; } plist = (svrattrl *)GET_NEXT(plist->al_link); } if (outsideselect) { presc = find_resc_entry(&pjob->ji_wattr[(int)JOB_ATR_resource], pseldef); if (presc && ((presc->rs_value.at_flags & ATR_VFLAG_DEFLT) == 0)) { /* select is not a default, so reject qalter */ resc_in_err = strdup(outsideselect->rs_name); req_reject(PBSE_INVALJOBRESC, 0, preq); return; } } /* modify the jobs attributes */ bad = 0; plist = (svrattrl *)GET_NEXT(preq->rq_ind.rq_modify.rq_attr); rc = modify_job_attr(pjob, plist, preq->rq_perm, &bad); if (rc) { if (pjob->ji_clterrmsg) reply_text(preq, rc, pjob->ji_clterrmsg); else reply_badattr(rc, bad, plist, preq); return; } /* If certain attributes modified and if in scheduling cycle */ /* then add to list of jobs which cannot be run in this cycle */ if (add_to_am_list) am_jobs_add(pjob); /* see req_runjob() */ /* check if project attribute was requested to be modified to */ /* be the default project value */ if (mod_project && (pjob->ji_wattr[(int)JOB_ATR_project].at_flags & \ ATR_VFLAG_SET)) { if (strcmp(pjob->ji_wattr[(int)JOB_ATR_project].at_val.at_str, PBS_DEFAULT_PROJECT) == 0) { sprintf(log_buffer, msg_defproject, ATTR_project, PBS_DEFAULT_PROJECT); #ifdef NAS /* localmod 107 */ log_event(PBSEVENT_DEBUG4, PBS_EVENTCLASS_JOB, LOG_INFO, pjob->ji_qs.ji_jobid, log_buffer); #else log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, pjob->ji_qs.ji_jobid, log_buffer); #endif /* localmod 107 */ } } if (pjob->ji_wattr[(int)JOB_ATR_resource].at_flags & ATR_VFLAG_MODIFY) { presc = find_resc_entry(&pjob->ji_wattr[(int)JOB_ATR_resource], pseldef); if (presc && (presc->rs_value.at_flags & ATR_VFLAG_DEFLT)) { /* changing Resource_List and select is a default */ /* clear "select" so it is rebuilt inset_resc_deflt */ pseldef->rs_free(&presc->rs_value); } } /* Reset any defaults resource limit which might have been unset */ if ((rc = set_resc_deflt((void *)pjob, JOB_OBJECT, NULL)) != 0) { req_reject(rc, 0, preq); return; } /* if job is not running, may need to change its state */ if (pjob->ji_qs.ji_state != JOB_STATE_RUNNING) { svr_evaljobstate(pjob, &newstate, &newsubstate, 0); (void)svr_setjobstate(pjob, newstate, newsubstate); } else { (void)job_save(pjob, SAVEJOB_FULL); } (void)sprintf(log_buffer, msg_manager, msg_jobmod, preq->rq_user, preq->rq_host); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, pjob->ji_qs.ji_jobid, log_buffer); /* if a resource limit changed for a running job, send to MOM */ if (sendmom) { rc = relay_to_mom(pjob, preq, post_modify_req); if (rc) req_reject(rc, 0, preq); /* unable to get to MOM */ return; } reply_ack(preq); }
void post_signal_req( batch_request *preq) { char *jobid; job *pjob; char log_buf[LOCAL_LOG_BUF_SIZE]; /* request has been handled elsewhere */ if (preq == NULL) return; preq->rq_conn = preq->rq_orgconn; /* restore client socket */ if (preq->rq_reply.brp_code) { log_event( PBSEVENT_DEBUG, PBS_EVENTCLASS_REQUEST, preq->rq_ind.rq_signal.rq_jid, pbse_to_txt(PBSE_MOMREJECT)); errno = 0; req_reject(preq->rq_reply.brp_code, 0, preq, NULL, NULL); } else { if ((jobid = preq->rq_extra) == NULL) { log_err(ENOMEM, __func__, (char *)"Cannot allocate memory! FAILURE"); return; } if ((pjob = svr_find_job(jobid, FALSE)) != NULL) { if (strcmp(preq->rq_ind.rq_signal.rq_signame, SIG_SUSPEND) == 0) { if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_Suspend) == 0) { pjob->ji_qs.ji_svrflags |= JOB_SVFLG_Suspend; set_statechar(pjob); job_save(pjob, SAVEJOB_QUICK, 0); /* release resources allocated to suspended job - NORWAY */ free_nodes(pjob); } } else if (strcmp(preq->rq_ind.rq_signal.rq_signame, SIG_RESUME) == 0) { if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_Suspend) { /* re-allocate assigned node to resumed job - NORWAY */ set_old_nodes(pjob); pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_Suspend; set_statechar(pjob); job_save(pjob, SAVEJOB_QUICK, 0); } } unlock_ji_mutex(pjob, __func__, (char *)"5", LOGLEVEL); } else { /* job is gone */ snprintf(log_buf,sizeof(log_buf), "Cannot find job '%s', assuming success", jobid); log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB, __func__, log_buf); } free(jobid); reply_ack(preq); } return; } /* END post_signal_req() */
void purge_completed_jobs( struct batch_request *preq) /* I */ { job *pjob; char *time_str; time_t purge_time = 0; int iter; char log_buf[LOCAL_LOG_BUF_SIZE]; /* get the time to purge the jobs that completed before */ time_str = preq->rq_extend; time_str += strlen(PURGECOMP); purge_time = strtol(time_str,NULL,10); /* * Clean unreported capability is only for operators and managers. * Check if request is authorized */ if ((preq->rq_perm & (ATR_DFLAG_OPRD|ATR_DFLAG_OPWR| ATR_DFLAG_MGRD|ATR_DFLAG_MGWR)) == 0) { req_reject(PBSE_PERM,0,preq,NULL, "must have operator or manager privilege to use -c parameter"); return; } reply_ack(preq); if (LOGLEVEL >= 4) { sprintf(log_buf,"Received purge completed jobs command, purge time is %ld (%s)", (long)purge_time, preq->rq_extend); log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_REQUEST, __func__, log_buf); } iter = -1; while ((pjob = next_job(&alljobs,&iter)) != NULL) { if ((pjob->ji_qs.ji_substate == JOB_SUBSTATE_COMPLETE) && (pjob->ji_wattr[JOB_ATR_comp_time].at_val.at_long <= purge_time) && ((pjob->ji_wattr[JOB_ATR_reported].at_flags & ATR_VFLAG_SET) != 0) && (pjob->ji_wattr[JOB_ATR_reported].at_val.at_long == 0)) { if (LOGLEVEL >= 4) { sprintf(log_buf,"Reported job is COMPLETED (%ld), setting reported to TRUE", pjob->ji_wattr[JOB_ATR_comp_time].at_val.at_long); log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf); } pjob->ji_wattr[JOB_ATR_reported].at_val.at_long = 1; pjob->ji_wattr[JOB_ATR_reported].at_flags = ATR_VFLAG_SET | ATR_VFLAG_MODIFY; job_save(pjob, SAVEJOB_FULL, 0); } unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); } return; } /* END purge_completed_jobs() */
void scan_for_terminated(void) /* linux */ { int exiteval = 0; pid_t pid; job *pjob = NULL; task *ptask = NULL; int statloc; unsigned int momport = 0; #ifdef USESAVEDRESOURCES int update_stats = TRUE; #endif /* USESAVEDRESOURCES */ int tcount; if (LOGLEVEL >= 9) { log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, "entered"); } /* update the latest intelligence about the running jobs; */ /* must be done before we reap the zombies, else we lose the info */ termin_child = 0; if (mom_get_sample() == PBSE_NONE) { std::list<job *>::reverse_iterator iter; // get a list of jobs in start time order, first to last for (iter = alljobs_list.rbegin(); iter != alljobs_list.rend(); iter++) { pjob = *iter; if ((pjob->ji_stats_done == true) || (pjob->ji_qs.ji_state < JOB_STATE_RUNNING)) continue; #ifdef USESAVEDRESOURCES ptask = (task *)GET_NEXT(pjob->ji_tasks); /* ** check task with associated process id to see if we are recovering ** after a mom restart where process completed while we were gone */ while (ptask != NULL) { if (ptask->ti_flags & TI_FLAGS_RECOVERY) { if (LOGLEVEL >= 7) { snprintf(log_buffer, sizeof(log_buffer), "Found match for recovering job task for sid=%d", ptask->ti_qs.ti_sid); log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); } update_stats = FALSE; break; } ptask = (task *)GET_NEXT(ptask->ti_jobtask); } /* END while (ptask) */ if (update_stats) { mom_set_use(pjob); } #else mom_set_use(pjob); #endif /* USESAVEDRESOURCES */ } } /* Now figure out which task(s) have terminated (are zombies) */ /* NOTE: does a job's tasks include its epilog? */ while ((pid = waitpid(-1, &statloc, WNOHANG)) > 0) { std::list<job *>::reverse_iterator iter; if (LOGLEVEL >= 8) { sprintf(log_buffer, "Child exited with pid: %d", pid); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buffer); } // get a list of jobs in start time order, first to last for (iter = alljobs_list.rbegin(); iter != alljobs_list.rend(); iter++) { pjob = *iter; /* * see if process was a child doing a special * function for MOM */ if (pjob->ji_momsubt != 0) { if (LOGLEVEL >= 9) { snprintf(log_buffer, sizeof(log_buffer), "Checking to see if exiting child pid '%d' is a match for special mom task with pid=%d", pid, pjob->ji_momsubt); log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); } if (pid == pjob->ji_momsubt) { if (LOGLEVEL >= 9) { snprintf(log_buffer, sizeof(log_buffer), "The exiting child is a match of special subtask with pid=%d for job %s", pid, pjob->ji_qs.ji_jobid); log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); } break; } } /* look for task */ ptask = (task *)GET_NEXT(pjob->ji_tasks); /* locate task with associated process id */ tcount = 0; while (ptask != NULL) { if ((ptask->ti_qs.ti_sid == pid) && (ptask->ti_qs.ti_status != TI_STATE_EXITED)) { if (LOGLEVEL >= 7) { snprintf(log_buffer, sizeof(log_buffer), "Exiting child matches job task %d for pid=%d", tcount, pid); log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); } break; } ptask = (task *)GET_NEXT(ptask->ti_jobtask); tcount++; } /* END while (ptask) */ // make sure the task is the top level task for the job to mark the job done if ((ptask != NULL) && (ptask->ti_qs.ti_parenttask == TM_NULL_TASK)) { /* pid match located - break out of job loop */ pjob->ji_stats_done = true; break; } } /* END while (pjob != NULL) */ if (WIFEXITED(statloc)) exiteval = WEXITSTATUS(statloc); else if (WIFSIGNALED(statloc)) exiteval = WTERMSIG(statloc) + 0x100; else exiteval = 1; if (pjob == NULL) { if (LOGLEVEL >= 1) { sprintf(log_buffer, "Child pid %d is not part of a job, statloc=%d, exitval=%d", pid, statloc, exiteval); log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buffer); } continue; } /* END if (pjob == NULL) */ if (pid == pjob->ji_momsubt) { /* PID matches job mom subtask */ /* NOTE: both ji_momsubt and ji_mompost normally set in routine preobit_reply() after epilog child is successfully forked */ if (pjob->ji_mompost != NULL) { if (pjob->ji_mompost(pjob, exiteval) == 0) { /* success */ pjob->ji_mompost = NULL; } } /* END if (pjob->ji_mompost != NULL) */ else if (LOGLEVEL >= 8) // This is a debug statement { log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, "Job has no postprocessing routine registered"); } /* clear mom sub-task */ pjob->ji_momsubt = 0; if (multi_mom) { momport = pbs_rm_port; } job_save(pjob, SAVEJOB_QUICK, momport); continue; } /* END if (pid == pjob->ji_momsubt) */ if (ptask == NULL) continue; /* what happens if mom PID is reaped before subtask? */ if (LOGLEVEL >= 2) { sprintf(log_buffer, "pid %d harvested for job %s, task %d, exitcode=%d", pid, pjob->ji_qs.ji_jobid, ptask->ti_qs.ti_task, exiteval); log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buffer); } /* where is job purged? How do we keep job from progressing in state until the obit is sent? */ kill_task(pjob, ptask, SIGKILL, 0); ptask->ti_qs.ti_exitstat = exiteval; ptask->ti_qs.ti_status = TI_STATE_EXITED; task_save(ptask); sprintf(log_buffer, "%s: job %s task %d terminated, sid=%d", __func__, pjob->ji_qs.ji_jobid, ptask->ti_qs.ti_task, ptask->ti_qs.ti_sid); log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); exiting_tasks = 1; } /* END while ((pid = waitpid(-1,&statloc,WNOHANG)) > 0) */ return; } /* END scan_for_terminated() */
/** * @brief wait_action * Wait for a task that has terminated or a socket that is ready to read. * Mark any terminated task as Exiting and do network processing on * any ready socket. * * @return void */ void wait_action(void) { static char id[] = "wait_action"; int rc = 0; int hNum = 0; HANDLE hArray[MAXIMUM_WAIT_OBJECTS+1] = {INVALID_HANDLE_VALUE}; HANDLE hProc = INVALID_HANDLE_VALUE; extern HANDLE hStop; /* mutex: quit when released */ int ecode = -1; job *pjob = NULL; task *ptask = NULL; int waittime = 500; extern int mom_run_state; struct work_task *p_wtask = NULL; HANDLE pid = INVALID_HANDLE_VALUE; /* Check for non job-related tasks like periodic hook tasks */ while (1) { if ((pid = waitpid((HANDLE)-1, &ecode, WNOHANG)) == (HANDLE)-1) { if (errno == EINTR) { continue; } else { break; } } else if (pid == 0) { break; } p_wtask = (struct work_task *)GET_NEXT(task_list_event); while (p_wtask) { if ((p_wtask->wt_type == WORK_Deferred_Child) && ((HANDLE)p_wtask->wt_event == pid)) { p_wtask->wt_type = WORK_Deferred_Cmp; p_wtask->wt_aux = (int)ecode; /* exit status */ svr_delay_entry++; /* see next_task() */ } p_wtask = (struct work_task *)GET_NEXT(p_wtask->wt_linkall); } } for (;;) { hNum = 0; if (mom_run_state && hStop != NULL) /* add mutex to array */ hArray[hNum++] = hStop; pjob = (job *)GET_NEXT(svr_alljobs); while (pjob) { /* * see if process was a child doing a special * function for MOM */ if ((pjob->ji_momsubt != NULL) && (pjob->ji_momsubt != INVALID_HANDLE_VALUE) && (pjob->ji_mompost != NULL)) { hArray[hNum++] = pjob->ji_momsubt; } /* * process tasks */ ptask = (task *)GET_NEXT(pjob->ji_tasks); while (ptask) { if ((ptask->ti_hProc != NULL) && (ptask->ti_hProc != INVALID_HANDLE_VALUE)) hArray[hNum++] = ptask->ti_hProc; if (hNum > MAXIMUM_WAIT_OBJECTS) break; ptask = (task *)GET_NEXT(ptask->ti_jobtask); } if (hNum > MAXIMUM_WAIT_OBJECTS) { DBPRT(("%s: %d more than MAX\n", id, hNum)) hNum = MAXIMUM_WAIT_OBJECTS; break; } pjob = (job *)GET_NEXT(pjob->ji_alljobs); } if (hNum == 0) /* nothing to wait for */ break; rc = WaitForMultipleObjects(hNum, hArray, FALSE, waittime); if (rc == WAIT_TIMEOUT) /* nobody is done */ break; else if (rc == WAIT_FAILED) { log_err(-1, id, "WaitForMultipleObjects"); break; } waittime = 0; /* only wait the first time */ rc -= WAIT_OBJECT_0; /* which object was it? */ assert(0 <= rc && rc < hNum); if (rc == 0 && mom_run_state && hStop != NULL) { /* got mutex */ mom_run_state = 0; /* shutdown */ continue; } /* ** It was a process finishing. Find which one. */ hProc = hArray[rc]; rc = GetExitCodeProcess(hProc, &ecode); if (rc == 0) { log_err(-1, id, "GetExitCodeProcess"); ecode = 99; } else if (rc == STILL_ACTIVE) /* shouldn't happen */ break; CloseHandle(hProc); /* find which process finished */ pjob = (job *)GET_NEXT(svr_alljobs); while (pjob) { if (pjob->ji_momsubt == hProc) break; ptask = (task *)GET_NEXT(pjob->ji_tasks); while (ptask) { if (ptask->ti_hProc == hProc) break; ptask = (task *)GET_NEXT(ptask->ti_jobtask); } if (ptask) break; pjob = (job *)GET_NEXT(pjob->ji_alljobs); } assert(pjob != NULL); if (pjob->ji_momsubt == hProc) { pjob->ji_momsubt = NULL; if (pjob->ji_mompost) { pjob->ji_mompost(pjob, ecode); /* After epilogue, get rid of any HOSTFILE */ if (pjob->ji_mompost == send_obit) { char file[MAXPATHLEN+1]; (void)sprintf(file, "%s/aux/%s", pbs_conf.pbs_home_path, pjob->ji_qs.ji_jobid); (void)unlink(file); } pjob->ji_mompost = 0; } (void)job_save(pjob, SAVEJOB_QUICK); continue; } DBPRT(("%s: task %d pid %d exit value %d\n", id, ptask->ti_qs.ti_task, ptask->ti_qs.ti_sid, ecode)) ptask->ti_hProc = NULL; ptask->ti_qs.ti_exitstat = ecode; ptask->ti_qs.ti_status = TI_STATE_EXITED; ptask->ti_qs.ti_sid = 0; (void)task_save(ptask); sprintf(log_buffer, "task %d terminated", ptask->ti_qs.ti_task); log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_DEBUG, pjob->ji_qs.ji_jobid, log_buffer); exiting_tasks = 1; } connection_idlecheck(); }
/** * update_array_values() * * updates internal bookeeping values for job arrays * @param pa - array to update * @param pjob - the pjob that an event happened on * @param event - code for what event just happened */ void update_array_values( job_array *pa, /* I */ void *j, /* I */ int old_state, /* I */ enum ArrayEventsEnum event) /* I */ { job *pjob = (job *)j; int exit_status; switch (event) { case aeQueue: /* NYI, nothing needs to be done for this yet */ break; case aeRun: if (old_state != JOB_STATE_RUNNING) { pa->ai_qs.jobs_running++; pa->ai_qs.num_started++; } break; case aeTerminate: exit_status = pjob->ji_qs.ji_un.ji_exect.ji_exitstat; if (old_state == JOB_STATE_RUNNING) { if (pa->ai_qs.jobs_running > 0) pa->ai_qs.jobs_running--; } if (exit_status == 0) { pa->ai_qs.num_successful++; pa->ai_qs.jobs_done++; } else { pa->ai_qs.num_failed++; pa->ai_qs.jobs_done++; } array_save(pa); /* update slot limit hold if necessary */ if (server.sv_attr[SRV_ATR_MoabArrayCompatible].at_val.at_long != FALSE) { /* only need to update if the job wasn't previously held */ if ((pjob->ji_wattr[JOB_ATR_hold].at_val.at_long & HOLD_l) == FALSE) { int i; int newstate; int newsub; job *pj; /* find the first held job and release its hold */ for (i = 0; i < pa->ai_qs.array_size; i++) { if (pa->jobs[i] == NULL) continue; pj = (job *)pa->jobs[i]; if (pj->ji_wattr[JOB_ATR_hold].at_val.at_long & HOLD_l) { pj->ji_wattr[JOB_ATR_hold].at_val.at_long &= ~HOLD_l; if (pj->ji_wattr[JOB_ATR_hold].at_val.at_long == 0) { pj->ji_wattr[JOB_ATR_hold].at_flags &= ~ATR_VFLAG_SET; } svr_evaljobstate(pj, &newstate, &newsub, 1); svr_setjobstate(pj, newstate, newsub); job_save(pj, SAVEJOB_FULL, 0); break; } } } } break; default: /* log error? */ break; } set_array_depend_holds(pa); array_save(pa); } /* END update_array_values() */
void req_orderjob(struct batch_request *req) { int jt1, jt2; /* job type */ job *pjob; job *pjob1; job *pjob2; long rank; int rc; char tmpqn[PBS_MAXQUEUENAME+1]; if ((pjob1=chk_job_request(req->rq_ind.rq_move.rq_jid, req, &jt1)) == NULL) return; if ((pjob2=chk_job_request(req->rq_ind.rq_move.rq_destin, req, &jt2)) == NULL) return; if ((jt1 == IS_ARRAY_Single) || (jt2 == IS_ARRAY_Single) || (jt1 == IS_ARRAY_Range) || (jt2 == IS_ARRAY_Range)) { /* can only move regular or Array Job, not Subjobs */ req_reject(PBSE_IVALREQ, 0, req); return; } if (((pjob = pjob1)->ji_qs.ji_state == JOB_STATE_RUNNING) || ((pjob = pjob2)->ji_qs.ji_state == JOB_STATE_RUNNING) || ((pjob = pjob1)->ji_qs.ji_state == JOB_STATE_BEGUN) || ((pjob = pjob2)->ji_qs.ji_state == JOB_STATE_BEGUN)) { #ifndef NDEBUG (void)sprintf(log_buffer, "(%s) %s, state=%d", __func__, msg_badstate, pjob->ji_qs.ji_state); log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_DEBUG, pjob->ji_qs.ji_jobid, log_buffer); #endif /* NDEBUG */ req_reject(PBSE_BADSTATE, 0, req); return; } else if (pjob1->ji_qhdr != pjob2->ji_qhdr) { /* Jobs are in different queues */ if ((rc = svr_chkque(pjob1, pjob2->ji_qhdr, get_hostPart(pjob1->ji_wattr[(int)JOB_ATR_job_owner].at_val.at_str), MOVE_TYPE_Order)) || (rc = svr_chkque(pjob2, pjob1->ji_qhdr, get_hostPart(pjob2->ji_wattr[(int)JOB_ATR_job_owner].at_val.at_str), MOVE_TYPE_Order))) { req_reject(rc, 0, req); return; } } /* now swap the order of the two jobs in the queue lists */ rank = pjob1->ji_wattr[(int)JOB_ATR_qrank].at_val.at_long; pjob1->ji_wattr[(int)JOB_ATR_qrank].at_val.at_long = pjob2->ji_wattr[(int)JOB_ATR_qrank].at_val.at_long; pjob1->ji_wattr[(int)JOB_ATR_qrank].at_flags |= ATR_VFLAG_MODCACHE; pjob2->ji_wattr[(int)JOB_ATR_qrank].at_val.at_long = rank; pjob2->ji_wattr[(int)JOB_ATR_qrank].at_flags |= ATR_VFLAG_MODCACHE; if (pjob1->ji_qhdr != pjob2->ji_qhdr) { (void)strcpy(tmpqn, pjob1->ji_qs.ji_queue); (void)strcpy(pjob1->ji_qs.ji_queue, pjob2->ji_qs.ji_queue); (void)strcpy(pjob2->ji_qs.ji_queue, tmpqn); svr_dequejob(pjob1); svr_dequejob(pjob2); (void)svr_enquejob(pjob1); (void)svr_enquejob(pjob2); } else { swap_link(&pjob1->ji_jobque, &pjob2->ji_jobque); swap_link(&pjob1->ji_alljobs, &pjob2->ji_alljobs); } /* need to update disk copy of both jobs to save new order */ (void)job_save(pjob1, SAVEJOB_FULL); (void)job_save(pjob2, SAVEJOB_FULL); reply_ack(req); }
void *req_checkpointjob( batch_request *preq) /* I */ { job *pjob; int rc; pbs_attribute *pattr; char log_buf[LOCAL_LOG_BUF_SIZE]; batch_request *dup_req = NULL; if ((pjob = chk_job_request(preq->rq_ind.rq_manager.rq_objname, preq)) == NULL) { return(NULL); } mutex_mgr job_mutex(pjob->ji_mutex, true); pattr = &pjob->ji_wattr[JOB_ATR_checkpoint]; if ((pjob->ji_qs.ji_state == JOB_STATE_RUNNING) && ((pattr->at_flags & ATR_VFLAG_SET) && ((csv_find_string(pattr->at_val.at_str, "s") != NULL) || (csv_find_string(pattr->at_val.at_str, "c") != NULL) || (csv_find_string(pattr->at_val.at_str, "enabled") != NULL)))) { /* have MOM attempt checkpointing */ if ((dup_req = duplicate_request(preq)) == NULL) { req_reject(PBSE_SYSTEM, 0, preq, NULL, "failure to allocate memory"); } /* The dup_req is freed in relay_to_mom (failure) * or in issue_Drequest (success) */ else if ((rc = relay_to_mom(&pjob, dup_req, NULL)) != PBSE_NONE) { req_reject(rc, 0, preq, NULL, NULL); free_br(dup_req); if (pjob == NULL) job_mutex.set_unlock_on_exit(false); } else { if (pjob != NULL) { pjob->ji_qs.ji_svrflags |= JOB_SVFLG_CHECKPOINT_FILE; job_save(pjob, SAVEJOB_QUICK, 0); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf); unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); pjob = NULL; } else job_mutex.set_unlock_on_exit(false); process_checkpoint_reply(dup_req); } } else { /* Job does not have checkpointing enabled, so reject the request */ log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf); req_reject(PBSE_IVALREQ, 0, preq, NULL, "job is not checkpointable"); } return(NULL); } /* END req_checkpointjob() */
int req_holdjob( batch_request *vp) /* I */ { long *hold_val; int newstate; int newsub; long old_hold; job *pjob; char *pset; int rc; pbs_attribute temphold; pbs_attribute *pattr; batch_request *preq = (struct batch_request *)vp; char log_buf[LOCAL_LOG_BUF_SIZE]; batch_request *dup_req = NULL; pjob = chk_job_request(preq->rq_ind.rq_hold.rq_orig.rq_objname, preq); if (pjob == NULL) { return(PBSE_NONE); } mutex_mgr job_mutex(pjob->ji_mutex, true); /* cannot do anything until we decode the holds to be set */ if ((rc = get_hold(&preq->rq_ind.rq_hold.rq_orig.rq_attr, (const char **)&pset, &temphold)) != 0) { req_reject(rc, 0, preq, NULL, NULL); return(PBSE_NONE); } /* if other than HOLD_u is being set, must have privil */ if ((rc = chk_hold_priv(temphold.at_val.at_long, preq->rq_perm)) != 0) { req_reject(rc, 0, preq, NULL, NULL); return(PBSE_NONE); } hold_val = &pjob->ji_wattr[JOB_ATR_hold].at_val.at_long; old_hold = *hold_val; *hold_val |= temphold.at_val.at_long; pjob->ji_wattr[JOB_ATR_hold].at_flags |= ATR_VFLAG_SET; sprintf(log_buf, msg_jobholdset, pset, preq->rq_user, preq->rq_host); pattr = &pjob->ji_wattr[JOB_ATR_checkpoint]; if ((pjob->ji_qs.ji_state == JOB_STATE_RUNNING) && ((pattr->at_flags & ATR_VFLAG_SET) && ((csv_find_string(pattr->at_val.at_str, "s") != NULL) || (csv_find_string(pattr->at_val.at_str, "c") != NULL) || (csv_find_string(pattr->at_val.at_str, "enabled") != NULL)))) { /* have MOM attempt checkpointing */ /* ** The jobid in the request always have the server suffix attached ** which is dropped when the server attribute ** 'display_job_server_suffix' is FALSE and so will in the MOM's. ** Therefore, it must be passed as the server to the MOM so she can ** find it to hold. */ if (strncmp(pjob->ji_qs.ji_jobid, preq->rq_ind.rq_hold.rq_orig.rq_objname, PBS_MAXSVRJOBID)) snprintf(preq->rq_ind.rq_hold.rq_orig.rq_objname, sizeof(preq->rq_ind.rq_hold.rq_orig.rq_objname), "%s", pjob->ji_qs.ji_jobid); if ((dup_req = duplicate_request(preq)) == NULL) { req_reject(rc, 0, preq, NULL, "memory allocation failure"); } /* The dup_req is freed in relay_to_mom (failure) * or in issue_Drequest (success) */ else if ((rc = relay_to_mom(&pjob, dup_req, NULL)) != PBSE_NONE) { free_br(dup_req); *hold_val = old_hold; /* reset to the old value */ req_reject(rc, 0, preq, NULL, "relay to mom failed"); if (pjob == NULL) job_mutex.set_unlock_on_exit(false); } else { if (pjob != NULL) { pjob->ji_qs.ji_svrflags |= JOB_SVFLG_HASRUN | JOB_SVFLG_CHECKPOINT_FILE; job_save(pjob, SAVEJOB_QUICK, 0); /* fill in log_buf again, since relay_to_mom changed it */ sprintf(log_buf, msg_jobholdset, pset, preq->rq_user, preq->rq_host); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf); unlock_ji_mutex(pjob, __func__, "3", LOGLEVEL); pjob = NULL; reply_ack(preq); } else job_mutex.set_unlock_on_exit(false); process_hold_reply(dup_req); } } #ifdef ENABLE_BLCR else if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) { /* * This system is configured with BLCR checkpointing to be used, * but this Running job does not have checkpointing enabled, * so we reject the request */ log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf); req_reject(PBSE_IVALREQ, 0, preq, NULL, "job not held since checkpointing is expected but not enabled for job"); } #endif else { /* everything went well, may need to update the job state */ log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf); if (old_hold != *hold_val) { /* indicate attributes changed */ pjob->ji_modified = 1; svr_evaljobstate(*pjob, newstate, newsub, 0); svr_setjobstate(pjob, newstate, newsub, FALSE); } reply_ack(preq); } return(PBSE_NONE); } /* END req_holdjob() */
job *job_recov( char *filename) /* I */ /* pathname to job save file */ { int fds; job *pj; char *pn; char namebuf[MAXPATHLEN]; int qs_upgrade; #ifndef PBS_MOM char parent_id[PBS_MAXSVRJOBID + 1]; job_array *pa; #endif qs_upgrade = FALSE; pj = job_alloc(); /* allocate & initialize job structure space */ if (pj == NULL) { /* FAILURE - cannot alloc memory */ return(NULL); } strcpy(namebuf, path_jobs); /* job directory path */ strcat(namebuf, filename); fds = open(namebuf, O_RDONLY, 0); if (fds < 0) { sprintf(log_buffer, "unable to open %s", namebuf); log_err(errno, "job_recov", log_buffer); free((char *)pj); /* FAILURE - cannot open job file */ return(NULL); } /* read in job quick save sub-structure */ if (read(fds, (char *)&pj->ji_qs, quicksize) != (ssize_t)quicksize && pj->ji_qs.qs_version == PBS_QS_VERSION) { sprintf(log_buffer, "Unable to read %s", namebuf); log_err(errno, "job_recov", log_buffer); free((char *)pj); close(fds); return(NULL); } /* is ji_qs the version we expect? */ if (pj->ji_qs.qs_version != PBS_QS_VERSION) { /* ji_qs is older version */ sprintf(log_buffer, "%s appears to be from an old version. Attempting to convert.\n", namebuf); log_err(-1, "job_recov", log_buffer); if (job_qs_upgrade(pj, fds, namebuf, pj->ji_qs.qs_version) != 0) { sprintf(log_buffer, "unable to upgrade %s\n", namebuf); log_err(-1, "job_recov", log_buffer); free((char *)pj); close(fds); return(NULL); } qs_upgrade = TRUE; } /* END if (pj->ji_qs.qs_version != PBS_QS_VERSION) */ /* Does file name match the internal name? */ /* This detects ghost files */ pn = strrchr(namebuf, (int)'/') + 1; if (strncmp(pn, pj->ji_qs.ji_fileprefix, strlen(pj->ji_qs.ji_fileprefix)) != 0) { /* mismatch, discard job */ sprintf(log_buffer, "Job Id %s does not match file name for %s", pj->ji_qs.ji_jobid, namebuf); log_err(-1, "job_recov", log_buffer); free((char *)pj); close(fds); return(NULL); } /* read in working attributes */ if (recov_attr( fds, pj, job_attr_def, pj->ji_wattr, (int)JOB_ATR_LAST, (int)JOB_ATR_UNKN, TRUE) != 0) { sprintf(log_buffer, "unable to recover %s (file is likely corrupted)", namebuf); log_err(-1, "job_recov", log_buffer); job_free(pj); close(fds); return(NULL); } #ifdef PBS_MOM /* read in tm sockets and ips */ if (recov_tmsock(fds, pj) != 0) { sprintf(log_buffer, "warning: tmsockets not recovered from %s (written by an older pbs_mom?)", namebuf); log_err(-1, "job_recov", log_buffer); } if (recov_roottask(fds, pj) != 0) { sprintf(log_buffer, "warning: root task not recovered from %s (written by an older pbs_mom?)", namebuf); log_err(-1, "job_recov", log_buffer); } if (recov_jobflags(fds, pj) != 0) { sprintf(log_buffer, "warning: job flags not recovered from %s (written by an older pbs_mom?)", namebuf); log_err(-1, "job_recov", log_buffer); } #else /* PBS_MOM */ if (pj->ji_wattr[(int)JOB_ATR_job_array_request].at_flags & ATR_VFLAG_SET) { /* job is part of an array. We need to put a link back to the server job array struct for this array. We also have to link this job into the linked list of jobs belonging to the array. */ array_get_parent_id(pj->ji_qs.ji_jobid, parent_id); pa = get_array(parent_id); if (strcmp(parent_id, pj->ji_qs.ji_jobid) == 0) { pj->ji_isparent = TRUE; } else { if (pa == NULL) { /* couldn't find array struct, it must not have been recovered, treat job as indepentent job? perhaps we should delete the job XXX_JOB_ARRAY: should I unset this?*/ pj->ji_wattr[(int)JOB_ATR_job_array_request].at_flags &= ~ATR_VFLAG_SET; } else { CLEAR_LINK(pj->ji_arrayjobs); append_link(&pa->array_alljobs, &pj->ji_arrayjobs, (void*)pj); pj->ji_arraystruct = pa; pa->jobs_recovered++; } } } #endif close(fds); /* all done recovering the job */ if (qs_upgrade == TRUE) { job_save(pj, SAVEJOB_FULL); } return(pj); } /* END job_recov() */
int setup_array_struct(job *pjob) { job_array *pa; /* struct work_task *wt; */ array_request_node *rn; int bad_token_count; int array_size; int rc; /* setup a link to this job array in the servers all_arrays list */ pa = (job_array *)calloc(1,sizeof(job_array)); pa->ai_qs.struct_version = ARRAY_QS_STRUCT_VERSION; pa->template_job = pjob; /*pa->ai_qs.array_size = pjob->ji_wattr[(int)JOB_ATR_job_array_size].at_val.at_long;*/ strcpy(pa->ai_qs.parent_id, pjob->ji_qs.ji_jobid); strcpy(pa->ai_qs.fileprefix, pjob->ji_qs.ji_fileprefix); strncpy(pa->ai_qs.owner, pjob->ji_wattr[JOB_ATR_job_owner].at_val.at_str, PBS_MAXUSER + PBS_MAXSERVERNAME + 2); strncpy(pa->ai_qs.submit_host, get_variable(pjob, pbs_o_host), PBS_MAXSERVERNAME); pa->ai_qs.num_cloned = 0; CLEAR_LINK(pa->all_arrays); CLEAR_HEAD(pa->request_tokens); append_link(&svr_jobarrays, &pa->all_arrays, (void*)pa); if (job_save(pjob, SAVEJOB_FULL, 0) != 0) { job_purge(pjob); if (LOGLEVEL >= 6) { log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, (pjob != NULL) ? pjob->ji_qs.ji_jobid : "NULL", "cannot save job"); } return 1; } if ((rc = set_slot_limit(pjob->ji_wattr[JOB_ATR_job_array_request].at_val.at_str, pa))) { array_delete(pa); snprintf(log_buffer,sizeof(log_buffer), "Array %s requested a slot limit above the max limit %ld, rejecting\n", pa->ai_qs.parent_id, server.sv_attr[SRV_ATR_MaxSlotLimit].at_val.at_long); log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB, pa->ai_qs.parent_id, log_buffer); return(INVALID_SLOT_LIMIT); } pa->ai_qs.jobs_running = 0; pa->ai_qs.num_started = 0; pa->ai_qs.num_failed = 0; pa->ai_qs.num_successful = 0; bad_token_count = parse_array_request(pjob->ji_wattr[JOB_ATR_job_array_request].at_val.at_str, &(pa->request_tokens)); /* get the number of elements that should be allocated in the array */ rn = (array_request_node *)GET_NEXT(pa->request_tokens); array_size = 0; pa->ai_qs.num_jobs = 0; while (rn != NULL) { if (rn->end > array_size) array_size = rn->end; /* calculate the actual number of jobs (different from array size) */ pa->ai_qs.num_jobs += rn->end - rn->start + 1; rn = (array_request_node *)GET_NEXT(rn->request_tokens_link); } /* size of array is the biggest index + 1 */ array_size++; if (server.sv_attr[SRV_ATR_MaxArraySize].at_flags & ATR_VFLAG_SET) { int max_array_size = server.sv_attr[SRV_ATR_MaxArraySize].at_val.at_long; if (max_array_size < pa->ai_qs.num_jobs) { array_delete(pa); return(ARRAY_TOO_LARGE); } } /* initialize the array */ pa->jobs = malloc(array_size * sizeof(job *)); memset(pa->jobs,0,array_size * sizeof(job *)); /* remember array_size */ pa->ai_qs.array_size = array_size; CLEAR_HEAD(pa->ai_qs.deps); array_save(pa); if (bad_token_count > 0) { array_delete(pa); return 2; } return 0; }
int send_job( job *jobp, pbs_net_t hostaddr, /* host address, host byte order */ int port, /* service port, host byte order */ int move_type, /* move, route, or execute */ void (*post_func)(struct work_task *), /* after move */ void *data) /* ptr to optional batch_request to be put */ /* in the work task structure */ { tlist_head attrl; enum conn_type cntype = ToServerDIS; int con; char *destin = jobp->ji_qs.ji_destin; int encode_type; int i; int NumRetries; char *id = "send_job"; attribute *pattr; pid_t pid; struct attropl *pqjatr; /* list (single) of attropl for quejob */ char *safail = "sigaction failed\n"; char *spfail = "sigprocmask failed\n"; char script_name[MAXPATHLEN + 1]; sigset_t child_set, all_set; struct sigaction child_action; struct work_task *ptask; mbool_t Timeout = FALSE; char *pc; sigemptyset(&child_set); sigaddset(&child_set, SIGCHLD); sigfillset(&all_set); /* block SIGCHLD until work task is established */ if (sigprocmask(SIG_BLOCK, &child_set, NULL) == -1) { log_err(errno,id,spfail); pbs_errno = PBSE_SYSTEM; log_event( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, jobp->ji_qs.ji_jobid, "cannot set signal mask"); return(ROUTE_PERM_FAILURE); } if (LOGLEVEL >= 6) { sprintf(log_buffer,"about to send job - type=%d", move_type); log_event( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, jobp->ji_qs.ji_jobid, "forking in send_job"); } pid = fork(); if (pid == -1) { /* error on fork */ log_err(errno, id, "fork failed\n"); if (sigprocmask(SIG_UNBLOCK, &child_set, NULL) == -1) log_err(errno, id, spfail); pbs_errno = PBSE_SYSTEM; return(ROUTE_PERM_FAILURE); } if (pid != 0) { /* The parent (main server) */ /* create task to monitor job startup */ /* CRI: need way to report to scheduler job is starting, not started */ ptask = set_task(WORK_Deferred_Child, pid, post_func, jobp); if (ptask == NULL) { log_err(errno, id, msg_err_malloc); return(ROUTE_PERM_FAILURE); } ptask->wt_parm2 = data; append_link( &((job *)jobp)->ji_svrtask, &ptask->wt_linkobj, ptask); /* now can unblock SIGCHLD */ if (sigprocmask(SIG_UNBLOCK, &child_set, NULL) == -1) log_err(errno, id, spfail); if (LOGLEVEL >= 1) { extern long DispatchTime[]; extern job *DispatchJob[]; extern char *DispatchNode[]; extern time_t time_now; struct pbsnode *NP; /* record job dispatch time */ int jindex; for (jindex = 0;jindex < 20;jindex++) { if (DispatchJob[jindex] == NULL) { DispatchTime[jindex] = time_now; DispatchJob[jindex] = jobp; if ((NP = PGetNodeFromAddr(hostaddr)) != NULL) DispatchNode[jindex] = NP->nd_name; else DispatchNode[jindex] = NULL; break; } } } /* SUCCESS */ return(ROUTE_DEFERRED); } /* END if (pid != 0) */ /* * the child process * * set up signal catcher for error return */ rpp_terminate(); child_action.sa_handler = net_move_die; sigfillset(&child_action.sa_mask); child_action.sa_flags = 0; if (sigaction(SIGHUP, &child_action, NULL)) log_err(errno, id, safail); if (sigaction(SIGINT, &child_action, NULL)) log_err(errno, id, safail); if (sigaction(SIGQUIT, &child_action, NULL)) log_err(errno, id, safail); /* signal handling is set, now unblock */ if (sigprocmask(SIG_UNBLOCK, &child_set, NULL) == -1) log_err(errno, id, spfail); /* encode job attributes to be moved */ CLEAR_HEAD(attrl); /* select attributes/resources to send based on move type */ if (move_type == MOVE_TYPE_Exec) { /* moving job to MOM - ie job start */ resc_access_perm = ATR_DFLAG_MOM; encode_type = ATR_ENCODE_MOM; cntype = ToServerDIS; } else { /* moving job to alternate server? */ resc_access_perm = ATR_DFLAG_USWR | ATR_DFLAG_OPWR | ATR_DFLAG_MGWR | ATR_DFLAG_SvRD; encode_type = ATR_ENCODE_SVR; /* clear default resource settings */ svr_dequejob(jobp); } pattr = jobp->ji_wattr; for (i = 0;i < JOB_ATR_LAST;i++) { if (((job_attr_def + i)->at_flags & resc_access_perm) || ((strncmp((job_attr_def + i)->at_name,"session_id",10) == 0) && (jobp->ji_wattr[JOB_ATR_checkpoint_name].at_flags & ATR_VFLAG_SET))) { (job_attr_def + i)->at_encode( pattr + i, &attrl, (job_attr_def + i)->at_name, NULL, encode_type); } } /* END for (i) */ attrl_fixlink(&attrl); /* put together the job script file name */ strcpy(script_name, path_jobs); if (jobp->ji_wattr[JOB_ATR_job_array_request].at_flags & ATR_VFLAG_SET) { strcat(script_name, jobp->ji_arraystruct->ai_qs.fileprefix); } else { strcat(script_name, jobp->ji_qs.ji_fileprefix); } strcat(script_name, JOB_SCRIPT_SUFFIX); pbs_errno = 0; con = -1; for (NumRetries = 0;NumRetries < RETRY;NumRetries++) { int rc; /* connect to receiving server with retries */ if (NumRetries > 0) { /* recycle after an error */ if (con >= 0) svr_disconnect(con); /* check pbs_errno from previous attempt */ if (should_retry_route(pbs_errno) == -1) { sprintf(log_buffer, "child failed in previous commit request for job %s", jobp->ji_qs.ji_jobid); log_err(pbs_errno, id, log_buffer); exit(1); /* fatal error, don't retry */ } sleep(1 << NumRetries); } /* NOTE: on node hangs, svr_connect is successful */ if ((con = svr_connect(hostaddr, port, 0, cntype)) == PBS_NET_RC_FATAL) { sprintf(log_buffer, "send_job failed to %lx port %d", hostaddr, port); log_err(pbs_errno, id, log_buffer); exit(1); } if (con == PBS_NET_RC_RETRY) { pbs_errno = 0; /* should retry */ continue; } /* * if the job is substate JOB_SUBSTATE_TRNOUTCM which means * we are recovering after being down or a late failure, we * just want to send the "ready-to-commit/commit" */ if (jobp->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUTCM) { if (jobp->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUT) { jobp->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUT; job_save(jobp, SAVEJOB_QUICK); } pqjatr = &((svrattrl *)GET_NEXT(attrl))->al_atopl; if ((pc = PBSD_queuejob( con, jobp->ji_qs.ji_jobid, destin, pqjatr, NULL)) == NULL) { if ((pbs_errno == PBSE_EXPIRED) || (pbs_errno == PBSE_READ_REPLY_TIMEOUT)) { /* queue job timeout based on pbs_tcp_timeout */ Timeout = TRUE; } if ((pbs_errno == PBSE_JOBEXIST) && (move_type == MOVE_TYPE_Exec)) { /* already running, mark it so */ log_event( PBSEVENT_ERROR, PBS_EVENTCLASS_JOB, jobp->ji_qs.ji_jobid, "MOM reports job already running"); exit(0); } sprintf(log_buffer, "send of job to %s failed error = %d", destin, pbs_errno); log_event( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, jobp->ji_qs.ji_jobid, log_buffer); continue; } /* END if ((pc = PBSD_queuejob() == NULL) */ free(pc); if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) { if (PBSD_jscript(con, script_name, jobp->ji_qs.ji_jobid) != 0) continue; } /* XXX may need to change the logic below, if we are sending the job to a mom on the same host and the mom and server are not sharing the same spool directory, then we still need to move the file */ if ((move_type == MOVE_TYPE_Exec) && (jobp->ji_qs.ji_svrflags & JOB_SVFLG_HASRUN) && (hostaddr != pbs_server_addr)) { /* send files created on prior run */ if ((move_job_file(con,jobp,StdOut) != 0) || (move_job_file(con,jobp,StdErr) != 0) || (move_job_file(con,jobp,Checkpoint) != 0)) { continue; } } /* ignore signals */ if (sigprocmask(SIG_BLOCK, &all_set, NULL) == -1) log_err(errno, id, "sigprocmask\n"); jobp->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUTCM; job_save(jobp, SAVEJOB_QUICK); } else { /* ignore signals */ if (sigprocmask(SIG_BLOCK, &all_set, NULL) == -1) log_err(errno, id, "sigprocmask\n"); } if (PBSD_rdytocmt(con, jobp->ji_qs.ji_jobid) != 0) { if (sigprocmask(SIG_UNBLOCK, &all_set, NULL) == -1) log_err(errno, id, "sigprocmask\n"); continue; } if ((rc = PBSD_commit(con, jobp->ji_qs.ji_jobid)) != 0) { int errno2; /* NOTE: errno is modified by log_err */ errno2 = errno; sprintf(log_buffer, "send_job commit failed, rc=%d (%s)", rc, (connection[con].ch_errtxt != NULL) ? connection[con].ch_errtxt : "N/A"); log_ext(errno2, id, log_buffer, LOG_WARNING); /* if failure occurs, pbs_mom should purge job and pbs_server should set * job state to idle w/error msg */ if (errno2 == EINPROGRESS) { /* request is still being processed */ /* increase tcp_timeout in qmgr? */ Timeout = TRUE; /* do we need a continue here? */ sprintf(log_buffer, "child commit request timed-out for job %s, increase tcp_timeout?", jobp->ji_qs.ji_jobid); log_ext(errno2, id, log_buffer, LOG_WARNING); /* don't retry on timeout--break out and report error! */ break; } else { sprintf(log_buffer, "child failed in commit request for job %s", jobp->ji_qs.ji_jobid); log_ext(errno2, id, log_buffer, LOG_CRIT); /* FAILURE */ exit(1); } } /* END if ((rc = PBSD_commit(con,jobp->ji_qs.ji_jobid)) != 0) */ svr_disconnect(con); /* child process is done */ /* SUCCESS */ exit(0); } /* END for (NumRetries) */ if (con >= 0) svr_disconnect(con); if (Timeout == TRUE) { /* 10 indicates that job migrate timed out, server will mark node down * and abort the job - see post_sendmom() */ sprintf(log_buffer, "child timed-out attempting to start job %s", jobp->ji_qs.ji_jobid); log_ext(pbs_errno, id, log_buffer, LOG_WARNING); exit(10); } if (should_retry_route(pbs_errno) == -1) { sprintf(log_buffer, "child failed and will not retry job %s", jobp->ji_qs.ji_jobid); log_err(pbs_errno, id, log_buffer); exit(1); } exit(2); /*NOTREACHED*/ return(ROUTE_SUCCESS); } /* END send_job() */
static int local_move( job *jobp, struct batch_request *req) { char *id = "local_move"; pbs_queue *qp; char *destination = jobp->ji_qs.ji_destin; int mtype; /* search for destination queue */ if ((qp = find_queuebyname(destination)) == NULL) { sprintf(log_buffer, "queue %s does not exist\n", destination); log_err(-1, id, log_buffer); pbs_errno = PBSE_UNKQUE; return(ROUTE_PERM_FAILURE); } /* * if being moved at specific request of administrator, then * checks on queue availability, etc. are skipped; * otherwise all checks are enforced. */ if (req == 0) { mtype = MOVE_TYPE_Route; /* route */ } else if (req->rq_perm & (ATR_DFLAG_MGRD | ATR_DFLAG_MGWR)) { mtype = MOVE_TYPE_MgrMv; /* privileged move */ } else { mtype = MOVE_TYPE_Move; /* non-privileged move */ } if ((pbs_errno = svr_chkque( jobp, qp, get_variable(jobp, pbs_o_host), mtype, NULL))) { /* should this queue be retried? */ return(should_retry_route(pbs_errno)); } /* dequeue job from present queue, update destination and */ /* queue_rank for new queue and enqueue into destination */ svr_dequejob(jobp); strcpy(jobp->ji_qs.ji_queue, destination); jobp->ji_wattr[JOB_ATR_qrank].at_val.at_long = ++queue_rank; pbs_errno = svr_enquejob(jobp); if (pbs_errno != 0) { return(ROUTE_PERM_FAILURE); /* should never ever get here */ } jobp->ji_lastdest = 0; /* reset in case of another route */ job_save(jobp, SAVEJOB_FULL); return(ROUTE_SUCCESS); } /* END local_move() */
void req_rdytocommit( struct batch_request *preq) /* I */ { job *pj; int sock = preq->rq_conn; int OrigState; int OrigSState; char OrigSChar; long OrigFlags; pj = locate_new_job(sock, preq->rq_ind.rq_rdytocommit); if (LOGLEVEL >= 6) { log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL", "ready to commit job"); } if (pj == NULL) { log_err(errno, "req_rdytocommit", "unknown job id"); req_reject(PBSE_UNKJOBID, 0, preq, NULL, NULL); /* FAILURE */ return; } if (pj->ji_qs.ji_substate != JOB_SUBSTATE_TRANSIN) { log_err(errno, "req_rdytocommit", "cannot commit job in unexpected state"); req_reject(PBSE_IVALREQ, 0, preq, NULL, NULL); /* FAILURE */ return; } OrigState = pj->ji_qs.ji_state; OrigSState = pj->ji_qs.ji_substate; OrigSChar = pj->ji_wattr[(int)JOB_ATR_state].at_val.at_char; OrigFlags = pj->ji_wattr[(int)JOB_ATR_state].at_flags; pj->ji_qs.ji_state = JOB_STATE_TRANSIT; pj->ji_qs.ji_substate = JOB_SUBSTATE_TRANSICM; pj->ji_wattr[(int)JOB_ATR_state].at_val.at_char = 'T'; pj->ji_wattr[(int)JOB_ATR_state].at_flags |= ATR_VFLAG_SET; if (job_save(pj, SAVEJOB_NEW) == -1) { char tmpLine[1024]; sprintf(tmpLine, "cannot save job - errno=%d - %s", errno, strerror(errno)); log_err(errno, "req_rdytocommit", tmpLine); /* commit failed, backoff state changes */ pj->ji_qs.ji_state = OrigState; pj->ji_qs.ji_substate = OrigSState; pj->ji_wattr[(int)JOB_ATR_state].at_val.at_char = OrigSChar; pj->ji_wattr[(int)JOB_ATR_state].at_flags = OrigFlags; req_reject(PBSE_SYSTEM, 0, preq, NULL, tmpLine); /* FAILURE */ return; } /* acknowledge the request with the job id */ if (reply_jobid(preq, pj->ji_qs.ji_jobid, BATCH_REPLY_CHOICE_RdytoCom) != 0) { /* reply failed, purge the job and close the connection */ sprintf(log_buffer, "cannot report jobid - errno=%d - %s", errno, strerror(errno)); log_err(errno, "req_rdytocommit", log_buffer); close_conn(sock); job_purge(pj); /* FAILURE */ return; } if (LOGLEVEL >= 6) { log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL", "ready to commit job completed"); } return; } /* END req_rdytocommit() */
int req_orderjob( struct batch_request *vp) /* I */ { job *pjob; job *pjob1; job *pjob2; int rank; int rc = 0; char tmpqn[PBS_MAXQUEUENAME+1]; struct batch_request *req = (struct batch_request *)vp; char log_buf[LOCAL_LOG_BUF_SIZE]; pbs_queue *pque1; pbs_queue *pque2; if ((pjob1 = chk_job_request(req->rq_ind.rq_move.rq_jid, req)) == NULL) { return(PBSE_NONE); } mutex_mgr job1_mutex(pjob1->ji_mutex, true); if ((pjob2 = chk_job_request(req->rq_ind.rq_move.rq_destin, req)) == NULL) { return(PBSE_NONE); } mutex_mgr job2_mutex(pjob2->ji_mutex, true); if (((pjob = pjob1)->ji_qs.ji_state == JOB_STATE_RUNNING) || ((pjob = pjob2)->ji_qs.ji_state == JOB_STATE_RUNNING)) { #ifndef NDEBUG sprintf(log_buf, "%s %d", pbse_to_txt(PBSE_BADSTATE), pjob->ji_qs.ji_state); strcat(log_buf, __func__); log_event( PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf); #endif /* NDEBUG */ req_reject(PBSE_BADSTATE, 0, req, NULL, NULL); return(PBSE_NONE); } else if ((pjob1->ji_qhdr == NULL) || (pjob2->ji_qhdr == NULL)) { req_reject(PBSE_BADSTATE, 0, req, NULL, "One of the jobs does not have a queue"); return(PBSE_NONE); } else if (pjob1->ji_qhdr != pjob2->ji_qhdr) { /* jobs are in different queues */ int ok = FALSE; if ((pque2 = get_jobs_queue(&pjob2)) == NULL) { rc = PBSE_BADSTATE; job2_mutex.set_lock_on_exit(false); } else { mutex_mgr pque2_mutex = mutex_mgr(pque2->qu_mutex, true); if ((rc = svr_chkque(pjob1, pque2, get_variable(pjob1, pbs_o_host), MOVE_TYPE_Order, NULL)) == PBSE_NONE) { pque2_mutex.unlock(); if ((pque1 = get_jobs_queue(&pjob1)) == NULL) { rc = PBSE_BADSTATE; job1_mutex.set_lock_on_exit(false); } else if (pjob1 != NULL) { mutex_mgr pque1_mutex = mutex_mgr(pque1->qu_mutex, true); if ((rc = svr_chkque(pjob2, pque1, get_variable(pjob2, pbs_o_host), MOVE_TYPE_Order, NULL)) == PBSE_NONE) { ok = TRUE; } } } } if (ok == FALSE) { req_reject(rc, 0, req, NULL, NULL); return(PBSE_NONE); } } /* now swap the order of the two jobs in the queue lists */ rank = pjob1->ji_wattr[JOB_ATR_qrank].at_val.at_long; pjob1->ji_wattr[JOB_ATR_qrank].at_val.at_long = pjob2->ji_wattr[JOB_ATR_qrank].at_val.at_long; pjob2->ji_wattr[JOB_ATR_qrank].at_val.at_long = rank; if (pjob1->ji_qhdr != pjob2->ji_qhdr) { strcpy(tmpqn, pjob1->ji_qs.ji_queue); strcpy(pjob1->ji_qs.ji_queue, pjob2->ji_qs.ji_queue); strcpy(pjob2->ji_qs.ji_queue, tmpqn); svr_dequejob(pjob1, FALSE); svr_dequejob(pjob2, FALSE); if (svr_enquejob(pjob1, FALSE, -1) == PBSE_JOB_RECYCLED) { pjob1 = NULL; job1_mutex.set_lock_on_exit(false); } if (svr_enquejob(pjob2, FALSE, -1) == PBSE_JOB_RECYCLED) { pjob2 = NULL; job2_mutex.set_lock_on_exit(false); } } else { if ((pque1 = get_jobs_queue(&pjob1)) != NULL) { mutex_mgr pque1_mutex = mutex_mgr(pque1->qu_mutex, true); swap_jobs(pque1->qu_jobs,pjob1,pjob2); swap_jobs(NULL,pjob1,pjob2); } } /* need to update disk copy of both jobs to save new order */ if (pjob1 != NULL) { job_save(pjob1, SAVEJOB_FULL, 0); } if (pjob2 != NULL) { job_save(pjob2, SAVEJOB_FULL, 0); } /* SUCCESS */ reply_ack(req); return(PBSE_NONE); } /* END req_orderjob() */
void req_commit( struct batch_request *preq) /* I */ { job *pj; pj = locate_new_job(preq->rq_conn, preq->rq_ind.rq_commit); if (LOGLEVEL >= 6) { log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL", "committing job"); } if (pj == NULL) { req_reject(PBSE_UNKJOBID, 0, preq, NULL, NULL); return; } if (pj->ji_qs.ji_substate != JOB_SUBSTATE_TRANSICM) { log_err(errno, "req_commit", "cannot commit job in unexpected state"); req_reject(PBSE_IVALREQ, 0, preq, NULL, NULL); return; } /* move job from new job list to "all" job list, set to running state */ delete_link(&pj->ji_alljobs); append_link(&svr_alljobs, &pj->ji_alljobs, pj); /* ** Set JOB_SVFLG_HERE to indicate that this is Mother Superior. */ pj->ji_qs.ji_svrflags |= JOB_SVFLG_HERE; pj->ji_qs.ji_state = JOB_STATE_RUNNING; pj->ji_qs.ji_substate = JOB_SUBSTATE_PRERUN; pj->ji_qs.ji_un_type = JOB_UNION_TYPE_MOM; pj->ji_qs.ji_un.ji_momt.ji_svraddr = get_connectaddr(preq->rq_conn); pj->ji_qs.ji_un.ji_momt.ji_exitstat = 0; /* For MOM - start up the job (blocks) */ if (LOGLEVEL >= 6) { log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL", "starting job execution"); } start_exec(pj); if (LOGLEVEL >= 6) { log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL", "job execution started"); } /* if start request fails, reply with failure string */ if (pj->ji_qs.ji_substate == JOB_SUBSTATE_EXITING) { char tmpLine[1024]; if ((pj->ji_hosts != NULL) && (pj->ji_nodekill >= 0) && (pj->ji_hosts[pj->ji_nodekill].hn_host != NULL)) { sprintf(tmpLine, "start failed on node %s", pj->ji_hosts[pj->ji_nodekill].hn_host); } else { sprintf(tmpLine, "start failed on unknown node"); } if (LOGLEVEL >= 6) { log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, (pj != NULL) ? pj->ji_qs.ji_jobid : "NULL", tmpLine); } reply_text(preq, 0, tmpLine); } else { reply_jobid(preq, pj->ji_qs.ji_jobid, BATCH_REPLY_CHOICE_Commit); } job_save(pj, SAVEJOB_FULL); /* NOTE: we used to flag JOB_ATR_errpath, JOB_ATR_outpath, * JOB_ATR_session_id, and JOB_ATR_altid as modified at this point to make sure * pbs_server got these attr values. This worked fine before TORQUE modified * job launched into an async process. At 2.0.0p6, a new attribute "SEND" flag * was added to handle this process. */ return; } /* END req_commit() */
int execute_job_delete( job *pjob, /* M */ char *Msg, /* I */ struct batch_request *preq) /* I */ { struct work_task *pwtnew; int rc; char *sigt = "SIGTERM"; int has_mutex = TRUE; char log_buf[LOCAL_LOG_BUF_SIZE]; time_t time_now = time(NULL); long force_cancel = FALSE; long array_compatible = FALSE; chk_job_req_permissions(&pjob,preq); if (pjob == NULL) { /* preq is rejected in chk_job_req_permissions here */ return(-1); } if (pjob->ji_qs.ji_state == JOB_STATE_TRANSIT) { /* see note in req_delete - not sure this is possible still, * but the deleted code is irrelevant now. I will leave this * part --dbeer */ unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); return(-1); } if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN || pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN || pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN1 || pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN2 || pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN3 ) { /* If JOB_SUBSTATE_PRERUN being sent to MOM, wait till she gets it going */ /* retry in one second */ /* If JOB_SUBSTATE_RERUN, RERUN1, RERUN2 or RERUN3 the job is being requeued. Wait until finished */ static time_t cycle_check_when = 0; static char cycle_check_jid[PBS_MAXSVRJOBID + 1]; if (cycle_check_when != 0) { if (!strcmp(pjob->ji_qs.ji_jobid, cycle_check_jid) && (time_now - cycle_check_when > 10)) { /* state not updated after 10 seconds */ /* did the mom ever get it? delete it anyways... */ cycle_check_jid[0] = '\0'; cycle_check_when = 0; goto jump; } if (time_now - cycle_check_when > 20) { /* give up after 20 seconds */ cycle_check_jid[0] = '\0'; cycle_check_when = 0; } } /* END if (cycle_check_when != 0) */ if (cycle_check_when == 0) { /* new PRERUN job located */ cycle_check_when = time_now; strcpy(cycle_check_jid, pjob->ji_qs.ji_jobid); } sprintf(log_buf, "job cannot be deleted, state=PRERUN, requeuing delete request"); log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf); pwtnew = set_task(WORK_Timed,time_now + 1,post_delete_route,preq,FALSE); unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL); if (pwtnew == NULL) { req_reject(PBSE_SYSTEM, 0, preq, NULL, NULL); return(-1); } else { return(ROUTE_DELETE); } } /* END if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN) */ jump: /* * Log delete and if requesting client is not job owner, send mail. */ sprintf(log_buf, "requestor=%s@%s", preq->rq_user, preq->rq_host); /* NOTE: should annotate accounting record with extend message (NYI) */ account_record(PBS_ACCT_DEL, pjob, log_buf); sprintf(log_buf, msg_manager, msg_deletejob, preq->rq_user, preq->rq_host); log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf); /* NOTE: should incorporate job delete message */ if (Msg != NULL) { /* have text message in request extension, add it */ strcat(log_buf, "\n"); strcat(log_buf, Msg); } if ((svr_chk_owner(preq, pjob) != 0) && (pjob->ji_has_delete_nanny == FALSE)) { /* only send email if owner did not delete job and job deleted has not been previously attempted */ svr_mailowner(pjob, MAIL_DEL, MAIL_FORCE, log_buf); /* * If we sent mail and already sent the extra message * then reset message so we don't trigger a redundant email * in job_abt() */ if (Msg != NULL) { Msg = NULL; } } if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0) { /* job has restart file at mom, change restart comment if failed */ change_restart_comment_if_needed(pjob); } if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) { /* * setup a nanny task to make sure the job is actually deleted (see the * comments at job_delete_nanny()). */ if (pjob->ji_has_delete_nanny == TRUE) { unlock_ji_mutex(pjob, __func__, "3", LOGLEVEL); req_reject(PBSE_IVALREQ, 0, preq, NULL, "job cancel in progress"); return(-1); } apply_job_delete_nanny(pjob, time_now + 60); /* * Send signal request to MOM. The server will automagically * pick up and "finish" off the client request when MOM replies. */ get_batch_request_id(preq); if ((rc = issue_signal(&pjob, sigt, post_delete_mom1, strdup(preq->rq_id)))) { /* cant send to MOM */ req_reject(rc, 0, preq, NULL, NULL); } /* normally will ack reply when mom responds */ if (pjob != NULL) { sprintf(log_buf, msg_delrunjobsig, sigt); log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf); unlock_ji_mutex(pjob, __func__, "4", LOGLEVEL); } return(-1); } /* END if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) */ /* make a cleanup task if set */ get_svr_attr_l(SRV_ATR_JobForceCancelTime, &force_cancel); if (force_cancel > 0) { char *dup_jobid = strdup(pjob->ji_qs.ji_jobid); set_task(WORK_Timed, time_now + force_cancel, ensure_deleted, dup_jobid, FALSE); } /* if configured, and this job didn't have a slot limit hold, free a job * held with the slot limit hold */ get_svr_attr_l(SRV_ATR_MoabArrayCompatible, &array_compatible); if ((array_compatible != FALSE) && ((pjob->ji_wattr[JOB_ATR_hold].at_val.at_long & HOLD_l) == FALSE)) { if ((pjob->ji_arraystruct != NULL) && (pjob->ji_is_array_template == FALSE)) { int i; int newstate; int newsub; job *tmp; job_array *pa = get_jobs_array(&pjob); if (pjob == NULL) return(-1); for (i = 0; i < pa->ai_qs.array_size; i++) { if (pa->job_ids[i] == NULL) continue; if (!strcmp(pa->job_ids[i], pjob->ji_qs.ji_jobid)) continue; if ((tmp = svr_find_job(pa->job_ids[i], FALSE)) == NULL) { free(pa->job_ids[i]); pa->job_ids[i] = NULL; } else { if (tmp->ji_wattr[JOB_ATR_hold].at_val.at_long & HOLD_l) { tmp->ji_wattr[JOB_ATR_hold].at_val.at_long &= ~HOLD_l; if (tmp->ji_wattr[JOB_ATR_hold].at_val.at_long == 0) { tmp->ji_wattr[JOB_ATR_hold].at_flags &= ~ATR_VFLAG_SET; } svr_evaljobstate(tmp, &newstate, &newsub, 1); svr_setjobstate(tmp, newstate, newsub, FALSE); job_save(tmp, SAVEJOB_FULL, 0); unlock_ji_mutex(tmp, __func__, "5", LOGLEVEL); break; } unlock_ji_mutex(tmp, __func__, "6", LOGLEVEL); } } if (LOGLEVEL >= 7) { sprintf(log_buf, "%s: unlocking ai_mutex", __func__); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf); } pthread_mutex_unlock(pa->ai_mutex); } } /* END MoabArrayCompatible check */ if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0) { /* job has restart file at mom, do end job processing */ svr_setjobstate(pjob, JOB_STATE_EXITING, JOB_SUBSTATE_EXITING, FALSE); /* force new connection */ pjob->ji_momhandle = -1; if (LOGLEVEL >= 7) { sprintf(log_buf, "calling on_job_exit from %s", __func__); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf); } set_task(WORK_Immed, 0, on_job_exit, strdup(pjob->ji_qs.ji_jobid), FALSE); } else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn) != 0) { /* job has staged-in file, should remove them */ remove_stagein(&pjob); if (pjob != NULL) job_abt(&pjob, Msg); has_mutex = FALSE; } else { /* * the job is not transitting (though it may have been) and * is not running, so put in into a complete state. */ struct pbs_queue *pque; int KeepSeconds = 0; svr_setjobstate(pjob, JOB_STATE_COMPLETE, JOB_SUBSTATE_COMPLETE, FALSE); if ((pque = get_jobs_queue(&pjob)) != NULL) { pque->qu_numcompleted++; unlock_queue(pque, __func__, NULL, LOGLEVEL); if (LOGLEVEL >= 7) { sprintf(log_buf, "calling on_job_exit from %s", __func__); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf); } pthread_mutex_lock(server.sv_attr_mutex); KeepSeconds = attr_ifelse_long( &pque->qu_attr[QE_ATR_KeepCompleted], &server.sv_attr[SRV_ATR_KeepCompleted], 0); pthread_mutex_unlock(server.sv_attr_mutex); } else KeepSeconds = 0; if (pjob != NULL) { set_task(WORK_Timed, time_now + KeepSeconds, on_job_exit, strdup(pjob->ji_qs.ji_jobid), FALSE); } else has_mutex = FALSE; } /* END else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0) */ if (has_mutex == TRUE) unlock_ji_mutex(pjob, __func__, "7", LOGLEVEL); return(PBSE_NONE); } /* END execute_job_delete() */
void req_register(struct batch_request *preq) { int made; attribute *pattr; struct depend *pdep; struct depend_job *pdj; job *pjob; char *ps; struct work_task *ptask; int rc = 0; int revtype; int type; int savetype = SAVEJOB_FULL; /* make sure request is from a server */ if (!preq->rq_fromsvr) { #ifdef NAS /* localmod 109 */ sprintf(log_buffer, "Dependency request not from server"); log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO, preq->rq_ind.rq_register.rq_parent, log_buffer); #endif /* localmod 109 */ req_reject(PBSE_IVALREQ, 0, preq); return; } /* find the "parent" job specified in the request */ if ((pjob = find_job(preq->rq_ind.rq_register.rq_parent)) == NULL) { /* * job not found... if server is initializing, it may not * yet recovered, that is not an error. */ if (server.sv_attr[(int)SRV_ATR_State].at_val.at_long != SV_STATE_INIT) { log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO, preq->rq_ind.rq_register.rq_parent, msg_unkjobid); req_reject(PBSE_UNKJOBID, 0, preq); } else { reply_ack(preq); } return; } pattr = &pjob->ji_wattr[(int)JOB_ATR_depend]; type = preq->rq_ind.rq_register.rq_dependtype; pjob->ji_modified = 1; /* more of the server:port fix kludge */ ps = strchr(preq->rq_ind.rq_register.rq_child, (int)'@'); if (ps != NULL) { (void)strcpy(preq->rq_ind.rq_register.rq_svr, ps+1); *ps = '\0'; } else { (void)strcpy(preq->rq_ind.rq_register.rq_svr, preq->rq_host); } if (pjob->ji_qs.ji_state == JOB_STATE_MOVED) { snprintf(log_buffer, sizeof(log_buffer), "Parent %s%s", msg_movejob, pjob->ji_qs.ji_destin); log_event(PBSEVENT_DEBUG|PBSEVENT_SYSTEM|PBSEVENT_ERROR, PBS_EVENTCLASS_REQUEST, LOG_INFO, preq->rq_ind.rq_register.rq_child, log_buffer); req_reject(PBSE_JOB_MOVED, 0, preq); return; } switch (preq->rq_ind.rq_register.rq_op) { /* * Register a dependency */ case JOB_DEPEND_OP_REGISTER: switch (type) { case JOB_DEPEND_TYPE_AFTERSTART: if (pjob->ji_qs.ji_substate >= JOB_SUBSTATE_RUNNING) { /* job already running, setup task to send */ /* release back to child and continue with */ /* registration process */ ptask = set_task(WORK_Immed, 0, post_run_depend, (void *)pjob); if (ptask) append_link(&pjob->ji_svrtask, &ptask->wt_linkobj, ptask); } /* fall through to complete registration */ case JOB_DEPEND_TYPE_AFTERANY: case JOB_DEPEND_TYPE_AFTEROK: case JOB_DEPEND_TYPE_AFTERNOTOK: rc = register_dep(pattr, preq, type, &made); break; case JOB_DEPEND_TYPE_BEFORESTART: case JOB_DEPEND_TYPE_BEFOREANY: case JOB_DEPEND_TYPE_BEFOREOK: case JOB_DEPEND_TYPE_BEFORENOTOK: /* * Check job owner for permission, use the real * job owner, not the sending server's name. */ (void)strcpy(preq->rq_user, preq->rq_ind.rq_register.rq_owner); if (svr_chk_owner(preq, pjob)) { rc = PBSE_PERM; /* not same user */ } else { /* ok owner, see if job has "on" */ pdep = find_depend(JOB_DEPEND_TYPE_ON, pattr); if (pdep == 0) { /* on "on", see if child already registered */ revtype = type ^ (JOB_DEPEND_TYPE_BEFORESTART - JOB_DEPEND_TYPE_AFTERSTART); pdep = find_depend(revtype, pattr); if (pdep == 0) { /* no "on" and no prior - return error */ rc = PBSE_BADDEPEND; } else { pdj = find_dependjob(pdep, preq->rq_ind.rq_register.rq_child); if (pdj) { /* has prior register, update it */ (void)strcpy(pdj->dc_svr, preq->rq_ind.rq_register.rq_svr); } } } else if ((rc=register_dep(pattr, preq, type, &made)) == 0) { if (made) { /* first time registered */ if (--pdep->dp_numexp <= 0) del_depend(pdep); } } } break; default: #ifdef NAS /* localmod 109 */ sprintf(log_buffer, "Unknown dep. op: %d", preq->rq_ind.rq_register.rq_op); log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO, preq->rq_ind.rq_register.rq_parent, log_buffer); #endif /* localmod 109 */ rc = PBSE_IVALREQ; break; } break; /* * Release a dependency so job might run */ case JOB_DEPEND_OP_RELEASE: switch (type) { case JOB_DEPEND_TYPE_BEFORESTART: case JOB_DEPEND_TYPE_BEFOREANY: case JOB_DEPEND_TYPE_BEFOREOK: case JOB_DEPEND_TYPE_BEFORENOTOK: /* predecessor sent release-reduce "on", */ /* see if this job can now run */ type ^= (JOB_DEPEND_TYPE_BEFORESTART - JOB_DEPEND_TYPE_AFTERSTART); if ((pdep = find_depend(type, pattr)) != NULL) { pdj = find_dependjob(pdep, preq->rq_ind.rq_register.rq_child); if (pdj) { del_depend_job(pdj); pattr->at_flags |= ATR_VFLAG_MODIFY | ATR_VFLAG_MODCACHE; savetype = SAVEJOB_FULLFORCE; (void)sprintf(log_buffer, msg_registerrel, preq->rq_ind.rq_register.rq_child); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, pjob->ji_qs.ji_jobid, log_buffer); if (GET_NEXT(pdep->dp_jobs) == 0) { /* no more dependencies of this type */ del_depend(pdep); set_depend_hold(pjob, pattr); } break; } #ifdef NAS /* localmod 109 */ sprintf(log_buffer, "Dep.rls. job not found: %d/%s", type, preq->rq_ind.rq_register.rq_child); } else { sprintf(log_buffer, "Dep.rls. type not found: %d", type); #endif /* localmod 109 */ } #ifdef NAS /* localmod 109 */ log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, LOG_INFO, preq->rq_ind.rq_register.rq_parent, log_buffer); #endif /* localmod 109 */ rc = PBSE_IVALREQ; break; } break; case JOB_DEPEND_OP_READY: rc = PBSE_NOSYNCMSTR; break; case JOB_DEPEND_OP_DELETE: (void)sprintf(log_buffer, msg_registerdel, preq->rq_ind.rq_register.rq_child); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, LOG_INFO, pjob->ji_qs.ji_jobid, log_buffer); job_abt(pjob, log_buffer); break; case JOB_DEPEND_OP_UNREG: unregister_dep(pattr, preq); set_depend_hold(pjob, pattr); break; default: sprintf(log_buffer, msg_illregister, preq->rq_ind.rq_register.rq_parent); log_event(PBSEVENT_DEBUG|PBSEVENT_SYSTEM|PBSEVENT_ERROR, PBS_EVENTCLASS_REQUEST, LOG_INFO, preq->rq_host, log_buffer); rc = PBSE_IVALREQ; break;; } if (rc) { pjob->ji_modified = 0; req_reject(rc, 0, preq); } else { /* If this is an array job, forcibly save it to ensure * dependencies are recorded. */ if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_ArrayJob) savetype = SAVEJOB_FULLFORCE; if (pjob->ji_modified) (void)job_save(pjob, savetype); reply_ack(preq); } return; }