int release_whole_array( job_array *pa, /* I/0 */ struct batch_request *preq) /* I */ { int i; int rc; job *pjob; for (i = 0; i < pa->ai_qs.array_size; i++) { if (pa->job_ids[i] == NULL) continue; if ((pjob = svr_find_job(pa->job_ids[i], FALSE)) == NULL) { free(pa->job_ids[i]); pa->job_ids[i] = NULL; } else { mutex_mgr job_mutex(pjob->ji_mutex, true); if ((rc = release_job(preq, pjob, pa)) != 0) return(rc); } } /* SUCCESS */ return(PBSE_NONE); } /* END release_whole_array */
void post_rerun( batch_request *preq) { int newstate; int newsub; job *pjob; char log_buf[LOCAL_LOG_BUF_SIZE]; if (preq == NULL) return; if (preq->rq_reply.brp_code != 0) { sprintf(log_buf, "rerun signal reject by mom: %s - %d", preq->rq_ind.rq_signal.rq_jid, preq->rq_reply.brp_code); log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,__func__,log_buf); if ((pjob = svr_find_job(preq->rq_ind.rq_signal.rq_jid, FALSE))) { mutex_mgr job_mutex(pjob->ji_mutex, true); svr_evaljobstate(*pjob, newstate, newsub, 1); svr_setjobstate(pjob, newstate, newsub, FALSE); } } return; } /* END post_rerun() */
int req_releasejob( batch_request *vp) /* I */ { job *pjob; int rc; batch_request *preq = (batch_request *)vp; pjob = chk_job_request(preq->rq_ind.rq_release.rq_objname, preq); if (pjob == NULL) { return(PBSE_NONE); } mutex_mgr job_mutex(pjob->ji_mutex, true); if ((rc = release_job(preq, pjob, NULL)) != 0) { req_reject(rc,0,preq,NULL,NULL); } else { reply_ack(preq); } return(PBSE_NONE); } /* END req_releasejob() */
int handle_requeue_all( batch_request *preq) { int rc; job *pjob; all_jobs_iterator *iter; if ((preq->rq_perm & (ATR_DFLAG_MGWR)) == 0) { rc = PBSE_PERM; req_reject(rc, 0, preq, NULL, "You must be a manager to requeue all jobs"); return(rc); } alljobs.lock(); iter = alljobs.get_iterator(); alljobs.unlock(); while ((pjob = next_job(&alljobs, iter)) != NULL) { mutex_mgr job_mutex(pjob->ji_mutex, true); requeue_job_without_contacting_mom(*pjob); } delete iter; reply_ack(preq); return(PBSE_NONE); } /* END handle_requeue_all() */
void job_delete_nanny( struct work_task *pwt) { job *pjob; const char *sigk = "SIGKILL"; char *jobid; struct batch_request *newreq; char log_buf[LOCAL_LOG_BUF_SIZE]; time_t time_now = time(NULL); long nanny = FALSE; /* short-circuit if nanny isn't enabled */ get_svr_attr_l(SRV_ATR_JobNanny, &nanny); if (!nanny) { jobid = (char *)pwt->wt_parm1; if (jobid != NULL) { pjob = svr_find_job(jobid, FALSE); if (pjob != NULL) { mutex_mgr job_mutex(pjob->ji_mutex, true); sprintf(log_buf, "exiting job '%s' still exists, sending a SIGKILL", pjob->ji_qs.ji_jobid); log_err(-1, "job nanny", log_buf); /* build up a Signal Job batch request */ if ((newreq = alloc_br(PBS_BATCH_SignalJob)) != NULL) { strcpy(newreq->rq_ind.rq_signal.rq_jid, pjob->ji_qs.ji_jobid); snprintf(newreq->rq_ind.rq_signal.rq_signame, sizeof(newreq->rq_ind.rq_signal.rq_signame), "%s", sigk); } issue_signal(&pjob, sigk, post_job_delete_nanny, newreq); if (pjob != NULL) apply_job_delete_nanny(pjob, time_now + 60); else job_mutex.set_lock_on_exit(false); } } else { log_err(ENOMEM, __func__, "Cannot allocate memory"); } } if (pwt->wt_parm1 != NULL) free(pwt->wt_parm1); free(pwt->wt_mutex); free(pwt); } /* END job_delete_nanny() */
void *req_modifyjob( batch_request *preq) /* I */ { job *pjob; svrattrl *plist; char log_buf[LOCAL_LOG_BUF_SIZE]; pjob = chk_job_request(preq->rq_ind.rq_modify.rq_objname, preq); if (pjob == NULL) { return(NULL); } mutex_mgr job_mutex(pjob->ji_mutex, true); plist = (svrattrl *)GET_NEXT(preq->rq_ind.rq_modify.rq_attr); if (plist == NULL) { /* nothing to do */ reply_ack(preq); /* SUCCESS */ return(NULL); } job_mutex.unlock(); /* If async modify, reply now; otherwise reply is handled later */ if (preq->rq_type == PBS_BATCH_AsyModifyJob) { /* reply_ack will free preq. We need to copy it before we call reply_ack */ batch_request *new_preq; new_preq = duplicate_request(preq, -1); if (new_preq == NULL) { sprintf(log_buf, "failed to duplicate batch request"); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); return(NULL); } get_batch_request_id(new_preq); reply_ack(preq); new_preq->rq_noreply = TRUE; /* set for no more replies */ enqueue_threadpool_request((void *(*)(void *))modify_job_work, new_preq); } else modify_job_work(preq); return(NULL); } /* END req_modifyjob() */
void post_modify_req( batch_request *preq) { job *pjob; char log_buf[LOCAL_LOG_BUF_SIZE]; if (preq == NULL) return; preq->rq_conn = preq->rq_orgconn; /* restore socket to client */ if ((preq->rq_reply.brp_code) && (preq->rq_reply.brp_code != PBSE_UNKJOBID)) { sprintf(log_buf, msg_mombadmodify, preq->rq_reply.brp_code); log_event( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, preq->rq_ind.rq_modify.rq_objname, log_buf); req_reject(preq->rq_reply.brp_code, 0, preq, NULL, NULL); } else { if (preq->rq_reply.brp_code == PBSE_UNKJOBID) { if ((pjob = svr_find_job(preq->rq_ind.rq_modify.rq_objname, FALSE)) == NULL) { req_reject(preq->rq_reply.brp_code, 0, preq, NULL, NULL); return; } else { mutex_mgr job_mutex(pjob->ji_mutex, true); if (LOGLEVEL >= 0) { sprintf(log_buf, "post_modify_req: PBSE_UNKJOBID for job %s in state %s-%s, dest = %s", pjob->ji_qs.ji_jobid, PJobState[pjob->ji_qs.ji_state], PJobSubState[pjob->ji_qs.ji_substate], pjob->ji_qs.ji_destin); log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf); } } } reply_ack(preq); } return; } /* END post_modify_req() */
int modify_whole_array( job_array *pa, /* I/O */ svrattrl *plist, /* I */ struct batch_request *preq, /* I */ int checkpoint_req) /* I */ { int i; int rc = PBSE_NONE; int modify_job_rc = PBSE_NONE; job *pjob; for (i = 0; i < pa->ai_qs.array_size; i++) { if (pa->job_ids[i] == NULL) continue; if ((pjob = svr_find_job(pa->job_ids[i], FALSE)) == NULL) { free(pa->job_ids[i]); pa->job_ids[i] = NULL; } else { /* NO_MOM_RELAY will prevent modify_job from calling relay_to_mom */ batch_request *array_req = duplicate_request(preq, i); mutex_mgr job_mutex(pjob->ji_mutex, true); pthread_mutex_unlock(pa->ai_mutex); array_req->rq_noreply = TRUE; rc = modify_job((void **)&pjob, plist, array_req, checkpoint_req, NO_MOM_RELAY); if (rc != PBSE_NONE) { modify_job_rc = rc; } pa = get_jobs_array(&pjob); if (pa == NULL) { if (pjob == NULL) job_mutex.set_lock_on_exit(false); return(PBSE_JOB_RECYCLED); } if (pjob == NULL) { pa->job_ids[i] = NULL; job_mutex.set_lock_on_exit(false); continue; } } } /* END foreach job in array */ return(modify_job_rc); } /* END modify_whole_array() */
void finish_move_process( char *job_id, batch_request *preq, long time, const char *node_name, int status, int type, int mom_err) { char log_buf[LOCAL_LOG_BUF_SIZE+1]; job *pjob = svr_find_job(job_id, TRUE); if (pjob == NULL) { /* somehow the job has been deleted mid-runjob */ snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "Job %s was deleted while servicing move request", job_id); if (preq != NULL) { if (mom_err != PBSE_NONE) req_reject(mom_err, 0, preq, node_name, log_buf); else req_reject(PBSE_JOBNOTFOUND, 0, preq, node_name, log_buf); } } else { mutex_mgr job_mutex(pjob->ji_mutex, true); switch (type) { case MOVE_TYPE_Move: finish_moving_processing(pjob, preq, status); break; case MOVE_TYPE_Route: finish_routing_processing(pjob, status); break; case MOVE_TYPE_Exec: job_mutex.unlock(); finish_sendmom(job_id, preq, time, node_name, status, mom_err); break; } /* END switch (type) */ } } /* END finish_move_process() */
/* * record_reservation() * * @pre-cond: pnode and rsv_id must be valid pointers * @post-cond: the reservation will be recorded in pbs_server's tracking mechanism * and on the job which has the node reserved, or -1 is returned and the reservation * is not recorded. * @param - pnode the node which is reporting the reservation * @param - rsv_id the id of the reservation being reported * @return - PBSE_NONE if the reservation was successfully recorded, -1 otherwise */ int record_reservation( struct pbsnode *pnode, const char *rsv_id) { job *pjob; bool found_job = false; char jobid[PBS_MAXSVRJOBID + 1]; for (unsigned int i = 0; i < pnode->nd_job_usages.size(); i++) { /* cray only allows one job per node, so any valid job will be the job that is * reserving this node. */ job_usage_info *jui = pnode->nd_job_usages[i]; strcpy(jobid, jui->jobid); unlock_node(pnode, __func__, NULL, LOGLEVEL); if ((pjob = svr_find_job(jobid, TRUE)) != NULL) { mutex_mgr job_mutex(pjob->ji_mutex, true); pjob->ji_wattr[JOB_ATR_reservation_id].at_val.at_str = strdup(rsv_id); pjob->ji_wattr[JOB_ATR_reservation_id].at_flags = ATR_VFLAG_SET; /* add environment variable BATCH_PARTITION_ID */ char buf[1024]; snprintf(buf, sizeof(buf), "BATCH_PARTITION_ID=%s", rsv_id); pbs_attribute tempattr; clear_attr(&tempattr, &job_attr_def[JOB_ATR_variables]); job_attr_def[JOB_ATR_variables].at_decode(&tempattr, NULL, NULL, buf, 0); job_attr_def[JOB_ATR_variables].at_set( &pjob->ji_wattr[JOB_ATR_variables], &tempattr, INCR); job_attr_def[JOB_ATR_variables].at_free(&tempattr); track_alps_reservation(pjob); found_job = true; job_mutex.unlock(); lock_node(pnode, __func__, NULL, LOGLEVEL); break; } else lock_node(pnode, __func__, NULL, LOGLEVEL); } if (found_job == false) return(-1); return(PBSE_NONE); } /* END record_reservation() */
/** * poll _job_task * * The invocation of this routine is triggered from * the pbs_server main_loop code. The check of * SRV_ATR_PollJobs appears to be redundant. */ void poll_job_task( struct work_task *ptask) { char *job_id = (char *)ptask->wt_parm1; job *pjob; time_t time_now = time(NULL); long poll_jobs = 0; int job_state = -1; if (job_id != NULL) { pjob = svr_find_job(job_id, FALSE); if (pjob != NULL) { mutex_mgr job_mutex(pjob->ji_mutex, true); job_state = pjob->ji_qs.ji_state; job_mutex.unlock(); get_svr_attr_l(SRV_ATR_PollJobs, &poll_jobs); if ((poll_jobs) && (job_state == JOB_STATE_RUNNING)) { /* we need to throttle the number of outstanding threads are doing job polling. This prevents a problem where pbs_server gets hung waiting on I/O from the mom */ pthread_mutex_lock(poll_job_task_mutex); if (current_poll_job_tasks < max_poll_job_tasks) { current_poll_job_tasks++; pthread_mutex_unlock(poll_job_task_mutex); stat_mom_job(job_id); pthread_mutex_lock(poll_job_task_mutex); current_poll_job_tasks--; } pthread_mutex_unlock(poll_job_task_mutex); /* add another task */ set_task(WORK_Timed, time_now + JobStatRate, poll_job_task, strdup(job_id), FALSE); } } free(job_id); } free(ptask->wt_mutex); free(ptask); } /* END poll_job_task() */
int gpu_has_job( struct pbsnode *pnode, int gpuid) { job *pjob; char *gpu_str; char *found_str; /* increased so that really high gpu indexes don't bother us */ char tmp_str[PBS_MAXHOSTNAME + 10]; /* check each subnode for a job using a gpuid */ for (unsigned int i = 0; i < pnode->nd_job_usages.size(); i++) { // make a copy because we're going to lose the lock below job_usage_info jui = pnode->nd_job_usages[i]; if ((pjob = get_job_from_job_usage_info(&jui, pnode)) != NULL) { mutex_mgr job_mutex(pjob->ji_mutex, true); /* Does this job have this gpuid assigned? */ if ((pjob->ji_qs.ji_state == JOB_STATE_RUNNING) && (pjob->ji_wattr[JOB_ATR_exec_gpus].at_flags & ATR_VFLAG_SET) != 0) { gpu_str = pjob->ji_wattr[JOB_ATR_exec_gpus].at_val.at_str; if (gpu_str != NULL) { snprintf(tmp_str, sizeof(tmp_str), "%s-gpu/%d", pnode->get_name(), gpuid); /* look thru the string and see if it has this host and gpuid. * exec_gpus string should be in format of * <hostname>-gpu/<index>[+<hostname>-gpu/<index>...] */ found_str = strstr (gpu_str, tmp_str); if (found_str != NULL) { return(TRUE); } } } } } /* END for each job on node */ return(FALSE); }
void *req_messagejob( batch_request *preq) /* I */ { job *pjob; int rc; batch_request *dup_req = NULL; if ((pjob = chk_job_request(preq->rq_ind.rq_message.rq_jid, preq)) == NULL) return(NULL); mutex_mgr job_mutex(pjob->ji_mutex, true); /* the job must be running */ if (pjob->ji_qs.ji_state != JOB_STATE_RUNNING) { req_reject(PBSE_BADSTATE, 0, preq, NULL, NULL); return(NULL); } if ((rc = copy_batchrequest(&dup_req, preq, 0, -1)) != 0) { req_reject(PBSE_MEM_MALLOC, 0, preq, NULL, NULL); } /* pass the request on to MOM */ /* The dup_req is freed in relay_to_mom (failure) * or in issue_Drequest (success) */ else if ((rc = relay_to_mom(&pjob, dup_req, NULL)) != PBSE_NONE) { req_reject(rc, 0, preq, NULL, NULL); /* unable to get to MOM */ free_br(dup_req); } else { post_message_req(dup_req); free_br(preq); } /* After MOM acts and replies to us, we pick up in post_message_req() */ if (pjob == NULL) job_mutex.set_lock_on_exit(false); return(NULL); } /* END req_messagejob() */
/** * poll_job_task * * The invocation of this routine is triggered from * the pbs_server main_loop code. */ void poll_job_task( struct work_task *ptask) { char *job_id = (char *)ptask->wt_parm1; job *pjob; time_t time_now = time(NULL); long poll_jobs = 0; long job_stat_rate; free(ptask->wt_mutex); free(ptask); if (job_id != NULL) { pjob = svr_find_job(job_id, FALSE); if (pjob != NULL) { mutex_mgr job_mutex(pjob->ji_mutex, true); int job_state = -1; job_state = pjob->ji_qs.ji_state; // only do things for running jobs if (job_state == JOB_STATE_RUNNING) { job_mutex.unlock(); get_svr_attr_l(SRV_ATR_JobStatRate, &job_stat_rate); if (time(NULL) - pjob->ji_last_reported_time > job_stat_rate) { get_svr_attr_l(SRV_ATR_PollJobs, &poll_jobs); if (poll_jobs) stat_mom_job(job_id); } /* add another task */ set_task(WORK_Timed, time_now + (job_stat_rate / 3), poll_job_task, strdup(job_id), FALSE); } } free(job_id); } } /* END poll_job_task() */
void *req_modifyjob( batch_request *preq) /* I */ { job *pjob; svrattrl *plist; pjob = chk_job_request(preq->rq_ind.rq_modify.rq_objname, preq); if (pjob == NULL) { return(NULL); } mutex_mgr job_mutex(pjob->ji_mutex, true); plist = (svrattrl *)GET_NEXT(preq->rq_ind.rq_modify.rq_attr); if (plist == NULL) { /* nothing to do */ reply_ack(preq); /* SUCCESS */ return(NULL); } job_mutex.unlock(); /* If async modify, reply now; otherwise reply is handled later */ if (preq->rq_type == PBS_BATCH_AsyModifyJob) { reply_ack(preq); preq->rq_noreply = TRUE; /* set for no more replies */ enqueue_threadpool_request((void *(*)(void *))modify_job_work, preq); } else modify_job_work(preq); return(NULL); } /* END req_modifyjob() */
void post_delete_mom2( struct work_task *pwt) { char *jobid; const char *sigk = "SIGKILL"; char log_buf[LOCAL_LOG_BUF_SIZE]; job *pjob; jobid = (char *)pwt->wt_parm1; free(pwt->wt_mutex); free(pwt); if (jobid == NULL) { log_err(ENOMEM, __func__, "Cannot allocate memory"); return; } pjob = svr_find_job(jobid, FALSE); free(jobid); if (pjob != NULL) { mutex_mgr job_mutex(pjob->ji_mutex, true); if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) { issue_signal(&pjob, sigk, free_br, NULL); if (pjob != NULL) { sprintf(log_buf, msg_delrunjobsig, sigk); log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf); } } if (pjob == NULL) job_mutex.set_lock_on_exit(false); } } /* END post_delete_mom2() */
/* * record_reservation() * * @pre-cond: pnode and rsv_id must be valid pointers * @post-cond: the reservation will be recorded in pbs_server's tracking mechanism * and on the job which has the node reserved, or -1 is returned and the reservation * is not recorded. * @param - pnode the node which is reporting the reservation * @param - rsv_id the id of the reservation being reported * @return - PBSE_NONE if the reservation was successfully recorded, -1 otherwise */ int record_reservation( struct pbsnode *pnode, const char *rsv_id) { job *pjob; bool found_job = false; char jobid[PBS_MAXSVRJOBID + 1]; for (unsigned int i = 0; i < pnode->nd_job_usages.size(); i++) { /* cray only allows one job per node, so any valid job will be the job that is * reserving this node. */ job_usage_info *jui = pnode->nd_job_usages[i]; strcpy(jobid, jui->jobid); unlock_node(pnode, __func__, NULL, LOGLEVEL); if ((pjob = svr_find_job(jobid, TRUE)) != NULL) { mutex_mgr job_mutex(pjob->ji_mutex, true); pjob->ji_wattr[JOB_ATR_reservation_id].at_val.at_str = strdup(rsv_id); pjob->ji_wattr[JOB_ATR_reservation_id].at_flags = ATR_VFLAG_SET; track_alps_reservation(pjob); found_job = true; job_mutex.unlock(); lock_node(pnode, __func__, NULL, LOGLEVEL); break; } else lock_node(pnode, __func__, NULL, LOGLEVEL); } if (found_job == false) return(-1); return(PBSE_NONE); } /* END record_reservation() */
void *modify_job_work( batch_request *vp) /* I */ { job *pjob; svrattrl *plist; int checkpoint_req = FALSE; batch_request *preq = (struct batch_request *)vp; pjob = svr_find_job(preq->rq_ind.rq_modify.rq_objname, FALSE); if (pjob == NULL) { req_reject(PBSE_JOBNOTFOUND, 0, preq, NULL, "Job unexpectedly deleted"); return(NULL); } mutex_mgr job_mutex(pjob->ji_mutex, true); /* pbs_mom sets the extend string to trigger copying of checkpoint files */ if (preq->rq_extend != NULL) { if (strcmp(preq->rq_extend,CHECKPOINTHOLD) == 0) { checkpoint_req = CHK_HOLD; } else if (strcmp(preq->rq_extend,CHECKPOINTCONT) == 0) { checkpoint_req = CHK_CONT; } } plist = (svrattrl *)GET_NEXT(preq->rq_ind.rq_modify.rq_attr); /* modify_job will free preq and respond to it */ modify_job((void **)&pjob, plist, preq, checkpoint_req, 0); return(NULL); } /* END modify_job_work() */
int update_substate_if_needed( char *job_id, bool &change_substate_on_attempt_to_queue) { if (change_substate_on_attempt_to_queue == true) { job *pjob = svr_find_job(job_id, TRUE); if (pjob != NULL) { mutex_mgr job_mutex(pjob->ji_mutex, true); pjob->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUT; job_save(pjob, SAVEJOB_QUICK, 0); } else { return(PBSE_JOB_RECYCLED); } } return(PBSE_NONE); } /* END update_substate_if_needed() */
int get_mom_node_version( const char *job_id, int &version) { job *pjob; pbsnode *pnode; pjob = svr_find_job(job_id, TRUE); if (pjob == NULL) return(PBSE_UNKJOBID); mutex_mgr job_mutex(pjob->ji_mutex, true); pnode = find_nodebyname(pjob->ji_qs.ji_destin); if (pnode == NULL) return(PBSE_UNKNODE); mutex_mgr node_mutex(&pnode->nd_mutex, true); version = pnode->get_version(); return(PBSE_NONE); }
int req_movejob( batch_request *req) /* I */ { job *jobp; char log_buf[LOCAL_LOG_BUF_SIZE]; int local_errno = 0; jobp = chk_job_request(req->rq_ind.rq_move.rq_jid, req); if (jobp == NULL) { return(PBSE_NONE); } mutex_mgr job_mutex(jobp->ji_mutex, true); if (LOGLEVEL >= 7) { sprintf(log_buf, "%s", jobp->ji_qs.ji_jobid); LOG_EVENT(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); } if ((jobp->ji_qs.ji_state != JOB_STATE_QUEUED) && (jobp->ji_qs.ji_state != JOB_STATE_HELD) && (jobp->ji_qs.ji_state != JOB_STATE_WAITING)) { #ifndef NDEBUG sprintf(log_buf, "%s %d %s", pbse_to_txt(PBSE_BADSTATE), jobp->ji_qs.ji_state, __func__); log_event(PBSEVENT_DEBUG,PBS_EVENTCLASS_JOB,jobp->ji_qs.ji_jobid,log_buf); #endif /* NDEBUG */ req_reject(PBSE_BADSTATE, 0, req, NULL, NULL); return(PBSE_NONE); } /* * svr_movejob() does the real work, handles both local and * network moves */ /* We have found that sometimes the destination queue and the parent queue are the same. If so we do not need to do anything else */ if (strcmp(jobp->ji_qs.ji_queue, req->rq_ind.rq_move.rq_destin) == 0) { sprintf(log_buf, "Job %s already in queue %s", jobp->ji_qs.ji_jobid, jobp->ji_qs.ji_queue); if (LOGLEVEL >= 7) { log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); } req_reject(PBSE_JOB_ALREADY_IN_QUEUE, 0, req, NULL, log_buf); return(PBSE_NONE); } switch (svr_movejob(jobp, req->rq_ind.rq_move.rq_destin, &local_errno, req)) { case 0: /* success */ snprintf(log_buf, sizeof(log_buf), "%s", msg_movejob); snprintf(log_buf + strlen(log_buf), sizeof(log_buf) - strlen(log_buf), msg_manager, req->rq_ind.rq_move.rq_destin, req->rq_user, req->rq_host); log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,jobp->ji_qs.ji_jobid,log_buf); reply_ack(req); break; case - 1: case 1: /* fail */ /* NOTE: can pass detailed response to requestor (NYI) */ req_reject(local_errno, 0, req, NULL, NULL); break; case 2: /* deferred, will be handled by */ /* post_movejob() when the child completes */ /* NO-OP */ break; } /* END switch (svr_movejob(jobp,req->rq_ind.rq_move.rq_destin,req)) */ return(PBSE_NONE); } /* END req_movejob() */
void stat_update( struct batch_request *preq, struct stat_cntl *cntl) { job *pjob; struct batch_reply *preply; struct brp_status *pstatus; svrattrl *sattrl; int oldsid; int bad = 0; time_t time_now = time(NULL); char *msg_ptr = NULL; char log_buf[LOCAL_LOG_BUF_SIZE]; preply = &preq->rq_reply; if (preply->brp_un.brp_txt.brp_str != NULL) { msg_ptr = strstr(preply->brp_un.brp_txt.brp_str, PBS_MSG_EQUAL); if (msg_ptr != NULL) msg_ptr += strlen(PBS_MSG_EQUAL); } if (preply->brp_choice == BATCH_REPLY_CHOICE_Status) { pstatus = (struct brp_status *)GET_NEXT(preply->brp_un.brp_status); while (pstatus != NULL) { if ((pjob = svr_find_job(pstatus->brp_objname, FALSE)) != NULL) { mutex_mgr job_mutex(pjob->ji_mutex, true); sattrl = (svrattrl *)GET_NEXT(pstatus->brp_attr); oldsid = pjob->ji_wattr[JOB_ATR_session_id].at_val.at_long; modify_job_attr( pjob, sattrl, ATR_DFLAG_MGWR | ATR_DFLAG_SvWR, &bad); if (oldsid != pjob->ji_wattr[JOB_ATR_session_id].at_val.at_long) { /* first save since running job (or the sid has changed), */ /* must save session id */ job_save(pjob, SAVEJOB_FULL, 0); } #ifdef USESAVEDRESOURCES else { /* save so we can recover resources used */ job_save(pjob, SAVEJOB_FULL, 0); } #endif /* USESAVEDRESOURCES */ pjob->ji_momstat = time_now; } pstatus = (struct brp_status *)GET_NEXT(pstatus->brp_stlink); } /* END while (pstatus != NULL) */ } /* END if (preply->brp_choice == BATCH_REPLY_CHOICE_Status) */ else if ((preply->brp_choice == BATCH_REPLY_CHOICE_Text) && (preply->brp_code == PBSE_UNKJOBID) && (msg_ptr != NULL) && (!strcmp(msg_ptr, preq->rq_ind.rq_status.rq_id))) { /* we sent a stat request, but mom says it doesn't know anything about the job */ if ((pjob = svr_find_job(preq->rq_ind.rq_status.rq_id, FALSE)) != NULL) { /* job really isn't running any more - mom doesn't know anything about it this can happen if a diskless node reboots and the mom_priv/jobs directory is cleared, set its state to queued so job_abt doesn't think it is still running */ mutex_mgr job_mutex(pjob->ji_mutex, true); snprintf(log_buf, sizeof(log_buf), "mother superior no longer recognizes %s as a valid job, aborting. Last reported time was %ld", preq->rq_ind.rq_status.rq_id, pjob->ji_last_reported_time); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); svr_setjobstate(pjob, JOB_STATE_QUEUED, JOB_SUBSTATE_ABORT, FALSE); rel_resc(pjob); job_mutex.set_unlock_on_exit(false); job_abt(&pjob, "Job does not exist on node"); /* TODO, if the job is rerunnable we should set its state back to queued */ } } else { snprintf(log_buf, sizeof(log_buf), "Poll job request failed for job %s", preq->rq_ind.rq_status.rq_id); log_err(preply->brp_code, __func__, log_buf); } cntl->sc_conn = -1; if (cntl->sc_post) cntl->sc_post(cntl); /* continue where we left off */ /* If sc_post has a value it is: * req_stat_job_step2 * if so, it expects cntl to be free'd after the call */ free(cntl); /* a bit of a kludge but its saves an extra func */ return; } /* END stat_update() */
int stat_to_mom( char *job_id, struct stat_cntl *cntl) /* M */ { struct batch_request *newrq; int rc = PBSE_NONE; unsigned long addr; char log_buf[LOCAL_LOG_BUF_SIZE+1]; struct pbsnode *node; int handle = -1; unsigned long job_momaddr = -1; unsigned short job_momport = -1; char *job_momname = NULL; job *pjob = NULL; if ((pjob = svr_find_job(job_id, FALSE)) == NULL) return(PBSE_JOBNOTFOUND); mutex_mgr job_mutex(pjob->ji_mutex, true); if ((pjob->ji_qs.ji_un.ji_exect.ji_momaddr == 0) || (!pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str)) { job_mutex.unlock(); snprintf(log_buf, sizeof(log_buf), "Job %s missing MOM's information. Skipping statting on this job", pjob->ji_qs.ji_jobid); log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); return PBSE_BAD_PARAMETER; } job_momaddr = pjob->ji_qs.ji_un.ji_exect.ji_momaddr; job_momport = pjob->ji_qs.ji_un.ji_exect.ji_momport; job_momname = strdup(pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str); job_mutex.unlock(); if (job_momname == NULL) return PBSE_MEM_MALLOC; if ((newrq = alloc_br(PBS_BATCH_StatusJob)) == NULL) { free(job_momname); return PBSE_MEM_MALLOC; } if (cntl->sc_type == 1) snprintf(newrq->rq_ind.rq_status.rq_id, sizeof(newrq->rq_ind.rq_status.rq_id), "%s", job_id); else newrq->rq_ind.rq_status.rq_id[0] = '\0'; /* get stat of all */ CLEAR_HEAD(newrq->rq_ind.rq_status.rq_attr); /* if MOM is down just return stale information */ addr = job_momaddr; node = tfind_addr(addr,job_momport,job_momname); free(job_momname); if (node == NULL) return PBSE_UNKNODE; if ((node->nd_state & INUSE_DOWN)||(node->nd_power_state != POWER_STATE_RUNNING)) { if (LOGLEVEL >= 6) { snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "node '%s' is allocated to job but in state 'down'", node->nd_name); log_event(PBSEVENT_SYSTEM,PBS_EVENTCLASS_JOB,job_id,log_buf); } unlock_node(node, __func__, "no rely mom", LOGLEVEL); free_br(newrq); return PBSE_NORELYMOM; } /* get connection to MOM */ unlock_node(node, __func__, "before svr_connect", LOGLEVEL); handle = svr_connect(job_momaddr, job_momport, &rc, NULL, NULL); if (handle >= 0) { if ((rc = issue_Drequest(handle, newrq, true)) == PBSE_NONE) { stat_update(newrq, cntl); } } else rc = PBSE_CONNECT; if (rc == PBSE_SYSTEM) rc = PBSE_MEM_MALLOC; free_br(newrq); return(rc); } /* END stat_to_mom() */
int req_holdjob( batch_request *vp) /* I */ { long *hold_val; int newstate; int newsub; long old_hold; job *pjob; char *pset; int rc; pbs_attribute temphold; pbs_attribute *pattr; batch_request *preq = (struct batch_request *)vp; char log_buf[LOCAL_LOG_BUF_SIZE]; batch_request *dup_req = NULL; pjob = chk_job_request(preq->rq_ind.rq_hold.rq_orig.rq_objname, preq); if (pjob == NULL) { return(PBSE_NONE); } mutex_mgr job_mutex(pjob->ji_mutex, true); /* cannot do anything until we decode the holds to be set */ if ((rc = get_hold(&preq->rq_ind.rq_hold.rq_orig.rq_attr, (const char **)&pset, &temphold)) != 0) { req_reject(rc, 0, preq, NULL, NULL); return(PBSE_NONE); } /* if other than HOLD_u is being set, must have privil */ if ((rc = chk_hold_priv(temphold.at_val.at_long, preq->rq_perm)) != 0) { req_reject(rc, 0, preq, NULL, NULL); return(PBSE_NONE); } hold_val = &pjob->ji_wattr[JOB_ATR_hold].at_val.at_long; old_hold = *hold_val; *hold_val |= temphold.at_val.at_long; pjob->ji_wattr[JOB_ATR_hold].at_flags |= ATR_VFLAG_SET; sprintf(log_buf, msg_jobholdset, pset, preq->rq_user, preq->rq_host); pattr = &pjob->ji_wattr[JOB_ATR_checkpoint]; if ((pjob->ji_qs.ji_state == JOB_STATE_RUNNING) && ((pattr->at_flags & ATR_VFLAG_SET) && ((csv_find_string(pattr->at_val.at_str, "s") != NULL) || (csv_find_string(pattr->at_val.at_str, "c") != NULL) || (csv_find_string(pattr->at_val.at_str, "enabled") != NULL)))) { /* have MOM attempt checkpointing */ /* ** The jobid in the request always have the server suffix attached ** which is dropped when the server attribute ** 'display_job_server_suffix' is FALSE and so will in the MOM's. ** Therefore, it must be passed as the server to the MOM so she can ** find it to hold. */ if (strncmp(pjob->ji_qs.ji_jobid, preq->rq_ind.rq_hold.rq_orig.rq_objname, PBS_MAXSVRJOBID)) snprintf(preq->rq_ind.rq_hold.rq_orig.rq_objname, sizeof(preq->rq_ind.rq_hold.rq_orig.rq_objname), "%s", pjob->ji_qs.ji_jobid); if ((dup_req = duplicate_request(preq)) == NULL) { req_reject(rc, 0, preq, NULL, "memory allocation failure"); } /* The dup_req is freed in relay_to_mom (failure) * or in issue_Drequest (success) */ else if ((rc = relay_to_mom(&pjob, dup_req, NULL)) != PBSE_NONE) { free_br(dup_req); *hold_val = old_hold; /* reset to the old value */ req_reject(rc, 0, preq, NULL, "relay to mom failed"); if (pjob == NULL) job_mutex.set_unlock_on_exit(false); } else { if (pjob != NULL) { pjob->ji_qs.ji_svrflags |= JOB_SVFLG_HASRUN | JOB_SVFLG_CHECKPOINT_FILE; job_save(pjob, SAVEJOB_QUICK, 0); /* fill in log_buf again, since relay_to_mom changed it */ sprintf(log_buf, msg_jobholdset, pset, preq->rq_user, preq->rq_host); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf); unlock_ji_mutex(pjob, __func__, "3", LOGLEVEL); pjob = NULL; reply_ack(preq); } else job_mutex.set_unlock_on_exit(false); process_hold_reply(dup_req); } } #ifdef ENABLE_BLCR else if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) { /* * This system is configured with BLCR checkpointing to be used, * but this Running job does not have checkpointing enabled, * so we reject the request */ log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf); req_reject(PBSE_IVALREQ, 0, preq, NULL, "job not held since checkpointing is expected but not enabled for job"); } #endif else { /* everything went well, may need to update the job state */ log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf); if (old_hold != *hold_val) { /* indicate attributes changed */ pjob->ji_modified = 1; svr_evaljobstate(*pjob, newstate, newsub, 0); svr_setjobstate(pjob, newstate, newsub, FALSE); } reply_ack(preq); } return(PBSE_NONE); } /* END req_holdjob() */
void mom_cleanup_checkpoint_hold( struct work_task *ptask) { int rc = 0; job *pjob; char *jobid; batch_request *preq; char log_buf[LOCAL_LOG_BUF_SIZE]; time_t time_now = time(NULL); jobid = (char *)ptask->wt_parm1; free(ptask->wt_mutex); free(ptask); if (jobid == NULL) { log_err(ENOMEM, __func__, "Cannot allocate memory"); return; } pjob = svr_find_job(jobid, FALSE); if (pjob == NULL) { if (LOGLEVEL >= 3) { sprintf(log_buf, "%s:failed to find job\n", __func__); log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,jobid,log_buf); } free(jobid); return; } free(jobid); mutex_mgr job_mutex(pjob->ji_mutex, true); if (LOGLEVEL >= 7) { sprintf(log_buf, "checking mom cleanup job state is %s-%s\n", PJobState[pjob->ji_qs.ji_state], PJobSubState[pjob->ji_qs.ji_substate]); log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf); } /* * if the job is no longer running then we have recieved the job obit * and need to request the mom to clean up after the job */ if (pjob->ji_qs.ji_state != JOB_STATE_RUNNING) { if ((preq = alloc_br(PBS_BATCH_DeleteJob)) == NULL) { log_err(-1, __func__, "unable to allocate DeleteJob request - big trouble!"); } else { strcpy(preq->rq_ind.rq_delete.rq_objname, pjob->ji_qs.ji_jobid); /* The preq is freed in relay_to_mom (failure) * or in issue_Drequest (success) */ if ((rc = relay_to_mom(&pjob, preq, NULL)) != PBSE_NONE) { if (pjob != NULL) { snprintf(log_buf,sizeof(log_buf), "Unable to relay information to mom for job '%s'\n", pjob->ji_qs.ji_jobid); log_err(rc, __func__, log_buf); } else job_mutex.set_lock_on_exit(false); free_br(preq); return; } else free_br(preq); if ((LOGLEVEL >= 7) && (pjob != NULL)) { log_event( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, "requested mom cleanup"); } } } else { set_task(WORK_Timed, time_now + 1, mom_cleanup_checkpoint_hold, strdup(pjob->ji_qs.ji_jobid), FALSE); } if (pjob == NULL) job_mutex.set_lock_on_exit(false); } /* END mom_cleanup_checkpoint_hold() */
int send_job_work( char *job_id, const char *node_name, /* I */ int type, /* I */ int *my_err, /* O */ batch_request *preq) /* M */ { int rc = LOCUTION_FAIL; int ret = PBSE_NONE; int local_errno = 0; tlist_head attrl; int encode_type; int mom_err = PBSE_NONE; int resc_access_perm; std::string script_name; char *pc; char stdout_path[MAXPATHLEN + 1]; char stderr_path[MAXPATHLEN + 1]; char chkpt_path[MAXPATHLEN + 1]; char log_buf[LOCAL_LOG_BUF_SIZE]; long start_time = time(NULL); bool attempt_to_queue_job = false; bool change_substate_on_attempt_to_queue = false; bool need_to_send_job_script = false; bool job_has_run = false; job *pjob = NULL; char job_destin[PBS_MAXROUTEDEST+1]; bool Timeout = false; unsigned long job_momaddr = -1; unsigned short job_momport = -1; if ((pjob = svr_find_job(job_id, TRUE)) == NULL) { *my_err = PBSE_JOBNOTFOUND; req_reject(-1, 0, preq, NULL, NULL); return(PBSE_JOBNOTFOUND); } mutex_mgr job_mutex(pjob->ji_mutex, true); if (strlen(pjob->ji_qs.ji_destin) != 0) strcpy(job_destin, pjob->ji_qs.ji_destin); else job_destin[0] = '\0'; job_momaddr = pjob->ji_qs.ji_un.ji_exect.ji_momaddr; job_momport = pjob->ji_qs.ji_un.ji_exect.ji_momport; if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) need_to_send_job_script = TRUE; if (pjob->ji_qs.ji_svrflags & JOB_SVFLG_HASRUN) job_has_run = TRUE; if ((job_destin[0] != '\0') && (type != MOVE_TYPE_Exec)) { if ((pc = strchr(job_destin, '@')) != NULL) { job_momaddr = get_hostaddr(&local_errno, pc + 1); job_momport = pbs_server_port_dis; } } /* encode job attributes to be moved */ CLEAR_HEAD(attrl); /* select attributes/resources to send based on move type */ if (type == MOVE_TYPE_Exec) { /* moving job to MOM - ie job start */ resc_access_perm = ATR_DFLAG_MOM; encode_type = ATR_ENCODE_MOM; } else { /* moving job to alternate server? */ resc_access_perm = ATR_DFLAG_USWR | ATR_DFLAG_OPWR | ATR_DFLAG_MGWR | ATR_DFLAG_SvRD; encode_type = ATR_ENCODE_SVR; /* clear default resource settings */ ret = svr_dequejob(pjob, FALSE); if (ret) { job_mutex.set_unlock_on_exit(false); return(ret); } } encode_attributes(attrl, pjob, resc_access_perm, encode_type); rc = get_job_script_path(pjob, script_name); if (rc != PBSE_NONE) { if (rc == PBSE_JOB_RECYCLED) job_mutex.set_unlock_on_exit(false); free_server_attrs(&attrl); return(rc); } if (job_has_run) { if ((get_job_file_path(pjob, StdOut, stdout_path, sizeof(stdout_path)) != 0) || (get_job_file_path(pjob, StdErr, stderr_path, sizeof(stderr_path)) != 0) || (get_job_file_path(pjob, Checkpoint, chkpt_path, sizeof(chkpt_path)) != 0)) { job_mutex.unlock(); goto send_job_work_end; } } /* if the job is substate JOB_SUBSTATE_TRNOUTCM it means we are * recovering after being down or a late failure so we just want * to send the "ready-to-commit/commit" */ if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUTCM) { attempt_to_queue_job = true; if (pjob->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUT) change_substate_on_attempt_to_queue = true; } job_mutex.unlock(); rc = send_job_over_network_with_retries(job_id, job_destin, attrl, attempt_to_queue_job, change_substate_on_attempt_to_queue, Timeout, script_name.c_str(), need_to_send_job_script, job_has_run, job_momaddr, job_momport, stdout_path, stderr_path, chkpt_path, type, my_err, &mom_err); if (Timeout == TRUE) { /* 10 indicates that job migrate timed out, server will mark node down * and abort the job - see post_sendmom() */ sprintf(log_buf, "child timed-out attempting to start job %s", job_id); log_ext(*my_err, __func__, log_buf, LOG_WARNING); rc = LOCUTION_REQUEUE; } else if (rc != LOCUTION_SUCCESS) { if (should_retry_route(*my_err) == -1) { sprintf(log_buf, "child failed and will not retry job %s", job_id); log_err(*my_err, __func__, log_buf); rc = LOCUTION_FAIL; } else rc = LOCUTION_REQUEUE; } if (type == MOVE_TYPE_Exec) { if (node_name != NULL) update_failure_counts(node_name, rc); else update_failure_counts(job_destin, rc); } send_job_work_end: finish_move_process(job_id, preq, start_time, node_name, rc, type, mom_err); free_server_attrs(&attrl); return(rc); } /* END send_job_work() */
void req_stat_job_step2( struct stat_cntl *cntl) /* I/O (free'd on return) */ { batch_request *preq = cntl->sc_origrq; svrattrl *pal = (svrattrl *)GET_NEXT(preq->rq_ind.rq_status.rq_attr); job *pjob = NULL; struct batch_reply *preply = &preq->rq_reply; int rc = 0; enum TJobStatTypeEnum type = (enum TJobStatTypeEnum)cntl->sc_type; bool exec_only = false; int bad = 0; /* delta time - only report full pbs_attribute list if J->MTime > DTime */ int job_array_index = -1; job_array *pa = NULL; all_jobs_iterator *iter; if (preq->rq_extend != NULL) { /* FORMAT: { EXECQONLY } */ if (strstr(preq->rq_extend, EXECQUEONLY)) exec_only = true; } if ((type == tjstTruncatedServer) || (type == tjstTruncatedQueue)) { handle_truncated_qstat(exec_only, cntl->sc_condensed, preq); return; } /* END if ((type == tjstTruncatedServer) || ...) */ else if (type == tjstJob) { pjob = svr_find_job(preq->rq_ind.rq_status.rq_id, FALSE); if (pjob != NULL) { if ((rc = status_job(pjob, preq, pal, &preply->brp_un.brp_status, cntl->sc_condensed, &bad))) req_reject(rc, bad, preq, NULL, NULL); else reply_send_svr(preq); unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); } else { req_reject(PBSE_JOBNOTFOUND, bad, preq, NULL, NULL); } } else { if (type == tjstArray) { pa = get_array(preq->rq_ind.rq_status.rq_id); if (pa == NULL) { req_reject(PBSE_UNKARRAYID, 0, preq, NULL, "unable to find array"); return; } } else if ((type == tjstSummarizeArraysQueue) || (type == tjstSummarizeArraysServer)) update_array_statuses(); iter = get_correct_status_iterator(cntl); for (pjob = get_next_status_job(cntl, job_array_index, pa, iter); pjob != NULL; pjob = get_next_status_job(cntl, job_array_index, pa, iter)) { mutex_mgr job_mutex(pjob->ji_mutex, true); /* go ahead and build the status reply for this job */ if (pjob->ji_being_recycled == true) continue; if (exec_only) { if (cntl->sc_pque != NULL) { if (cntl->sc_pque->qu_qs.qu_type != QTYPE_Execution) continue; } else if (in_execution_queue(pjob, pa) == false) continue; } rc = status_job(pjob, preq, pal, &preply->brp_un.brp_status, cntl->sc_condensed, &bad); if ((rc != PBSE_NONE) && (rc != PBSE_PERM)) { if (pa != NULL) unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); req_reject(rc, bad, preq, NULL, NULL); delete iter; return; } } /* END for (pjob != NULL) */ delete iter; if (pa != NULL) { unlock_ai_mutex(pa, __func__, "1", LOGLEVEL); } reply_send_svr(preq); } if (LOGLEVEL >= 7) { log_event(PBSEVENT_SYSTEM, PBS_EVENTCLASS_JOB, "req_statjob", "Successfully returned the status of queued jobs\n"); } return; } /* END req_stat_job_step2() */
/** * poll _job_task * * The invocation of this routine is triggered from * the pbs_server main_loop code. */ void poll_job_task( struct work_task *ptask) { char *job_id = (char *)ptask->wt_parm1; job *pjob; time_t time_now = time(NULL); int job_state = -1; char log_buf[LOCAL_LOG_BUF_SIZE]; if (job_id != NULL) { pjob = svr_find_job(job_id, FALSE); if (pjob != NULL) { mutex_mgr job_mutex(pjob->ji_mutex, true); job_state = pjob->ji_qs.ji_state; job_mutex.unlock(); if (job_state == JOB_STATE_RUNNING) { /* we need to throttle the number of outstanding threads are doing job polling. This prevents a problem where pbs_server gets hung waiting on I/O from the mom */ pthread_mutex_lock(poll_job_task_mutex); if (current_poll_job_tasks < max_poll_job_tasks) { if ((pjob->ji_qs.ji_un.ji_exect.ji_momaddr == 0) || (!pjob->ji_wattr[JOB_ATR_exec_host].at_val.at_str)) { pthread_mutex_unlock(poll_job_task_mutex); snprintf(log_buf, sizeof(log_buf), "Job %s missing MOM's information. Skipping polling on this job", pjob->ji_qs.ji_jobid); log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); } else { current_poll_job_tasks++; pthread_mutex_unlock(poll_job_task_mutex); stat_mom_job(job_id); pthread_mutex_lock(poll_job_task_mutex); current_poll_job_tasks--; } } pthread_mutex_unlock(poll_job_task_mutex); /* add another task */ set_task(WORK_Timed, time_now + JobStatRate, poll_job_task, strdup(job_id), FALSE); } } free(job_id); } free(ptask->wt_mutex); free(ptask); } /* END poll_job_task() */
void process_hold_reply( batch_request *preq) { job *pjob; pbs_attribute temphold; int newstate; int newsub; int rc; char *pset; char log_buf[LOCAL_LOG_BUF_SIZE]; /* preq was handled previously */ if (preq == NULL) return; preq->rq_conn = preq->rq_orgconn; /* restore client socket */ if ((pjob = svr_find_job(preq->rq_ind.rq_hold.rq_orig.rq_objname, FALSE)) == NULL) { log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, preq->rq_ind.rq_hold.rq_orig.rq_objname, msg_postmomnojob); req_reject(PBSE_UNKJOBID, 0, preq, NULL, msg_postmomnojob); } else { mutex_mgr job_mutex(pjob->ji_mutex, true); if (preq->rq_reply.brp_code != 0) { rc = get_hold(&preq->rq_ind.rq_hold.rq_orig.rq_attr, (const char **)&pset, &temphold); if (rc == 0) { rc = job_attr_def[JOB_ATR_hold].at_set(&pjob->ji_wattr[JOB_ATR_hold], &temphold, DECR); } pjob->ji_qs.ji_substate = JOB_SUBSTATE_RUNNING; /* reset it */ pjob->ji_modified = 1; /* indicate attributes changed */ svr_evaljobstate(*pjob, newstate, newsub, 0); svr_setjobstate(pjob, newstate, newsub, FALSE); /* saves job */ if (preq->rq_reply.brp_code != PBSE_NOSUP) { sprintf(log_buf, msg_mombadhold, preq->rq_reply.brp_code); log_event(PBSEVENT_DEBUG, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf); req_reject(preq->rq_reply.brp_code, 0, preq, NULL, log_buf); } else { reply_ack(preq); } } else { /* record that MOM has a checkpoint file */ /* PBS_CHECKPOINT_MIGRATEABLE is defined as zero therefore this code will never fire. * And if these flags are not set, start_exec will not try to run the job from * the checkpoint image file. */ pjob->ji_qs.ji_svrflags |= JOB_SVFLG_CHECKPOINT_FILE; if (preq->rq_reply.brp_auxcode) /* checkpoint can be moved */ { pjob->ji_qs.ji_svrflags &= ~JOB_SVFLG_CHECKPOINT_FILE; pjob->ji_qs.ji_svrflags |= JOB_SVFLG_HASRUN | JOB_SVFLG_CHECKPOINT_MIGRATEABLE; } pjob->ji_modified = 1; /* indicate attributes changed */ svr_evaljobstate(*pjob, newstate, newsub, 0); svr_setjobstate(pjob, newstate, newsub, FALSE); /* saves job */ account_record(PBS_ACCT_CHKPNT, pjob, "Checkpointed and held"); /* note in accounting file */ reply_ack(preq); } } } /* END process_hold_reply() */
void *req_checkpointjob( batch_request *preq) /* I */ { job *pjob; int rc; pbs_attribute *pattr; char log_buf[LOCAL_LOG_BUF_SIZE]; batch_request *dup_req = NULL; if ((pjob = chk_job_request(preq->rq_ind.rq_manager.rq_objname, preq)) == NULL) { return(NULL); } mutex_mgr job_mutex(pjob->ji_mutex, true); pattr = &pjob->ji_wattr[JOB_ATR_checkpoint]; if ((pjob->ji_qs.ji_state == JOB_STATE_RUNNING) && ((pattr->at_flags & ATR_VFLAG_SET) && ((csv_find_string(pattr->at_val.at_str, "s") != NULL) || (csv_find_string(pattr->at_val.at_str, "c") != NULL) || (csv_find_string(pattr->at_val.at_str, "enabled") != NULL)))) { /* have MOM attempt checkpointing */ if ((dup_req = duplicate_request(preq)) == NULL) { req_reject(PBSE_SYSTEM, 0, preq, NULL, "failure to allocate memory"); } /* The dup_req is freed in relay_to_mom (failure) * or in issue_Drequest (success) */ else if ((rc = relay_to_mom(&pjob, dup_req, NULL)) != PBSE_NONE) { req_reject(rc, 0, preq, NULL, NULL); free_br(dup_req); if (pjob == NULL) job_mutex.set_unlock_on_exit(false); } else { if (pjob != NULL) { pjob->ji_qs.ji_svrflags |= JOB_SVFLG_CHECKPOINT_FILE; job_save(pjob, SAVEJOB_QUICK, 0); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf); unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); pjob = NULL; } else job_mutex.set_unlock_on_exit(false); process_checkpoint_reply(dup_req); } } else { /* Job does not have checkpointing enabled, so reject the request */ log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf); req_reject(PBSE_IVALREQ, 0, preq, NULL, "job is not checkpointable"); } return(NULL); } /* END req_checkpointjob() */