void array_delete_wt(struct work_task *ptask) { struct batch_request *preq; job_array *pa; /*struct work_task *pnew_task;*/ struct work_task *pwtnew; int i; static int last_check = 0; static char *last_id = NULL; preq = ptask->wt_parm1; pa = get_array(preq->rq_ind.rq_delete.rq_objname); if (pa == NULL) { /* jobs must have exited already */ reply_ack(preq); last_check = 0; free(last_id); last_id = NULL; return; } if (last_id == NULL) { last_id = strdup(preq->rq_ind.rq_delete.rq_objname); last_check = time_now; } else if (strcmp(last_id, preq->rq_ind.rq_delete.rq_objname) != 0) { last_check = time_now; free(last_id); last_id = strdup(preq->rq_ind.rq_delete.rq_objname); } else if (time_now - last_check > 10) { int num_jobs; int num_prerun; job *pjob; num_jobs = 0; num_prerun = 0; for (i = 0; i < pa->ai_qs.array_size; i++) { if (pa->jobs[i] == NULL) continue; pjob = (job *)pa->jobs[i]; num_jobs++; if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN) { num_prerun++; /* mom still hasn't gotten job?? delete anyway */ if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0) { /* job has restart file at mom, do end job processing */ change_restart_comment_if_needed(pjob); svr_setjobstate(pjob, JOB_STATE_EXITING, JOB_SUBSTATE_EXITING); pjob->ji_momhandle = -1; /* force new connection */ pwtnew = set_task(WORK_Immed, 0, on_job_exit, (void *)pjob); if (pwtnew) { append_link(&pjob->ji_svrtask, &pwtnew->wt_linkobj, pwtnew); } } else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn) != 0) { /* job has staged-in file, should remove them */ remove_stagein(pjob); job_abt(&pjob, NULL); } else { job_abt(&pjob, NULL); } } } if (num_jobs == num_prerun) { reply_ack(preq); free(last_id); last_id = NULL; return; } } req_deletearray(preq); }
int dispatch_request( int sfds, /* I */ struct batch_request *request) /* I */ { int rc = PBSE_NONE; char log_buf[LOCAL_LOG_BUF_SIZE]; char *job_id = NULL; if (LOGLEVEL >= 5) { sprintf(log_buf,"dispatching request %s on sd=%d", reqtype_to_txt(request->rq_type), sfds); log_record(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); } switch (request->rq_type) { case PBS_BATCH_QueueJob: rc = req_quejob(request, &job_id); if ((rc != PBSE_NONE) && (job_id != NULL)) close_quejob_by_jobid(job_id); if (job_id != NULL) free(job_id); break; case PBS_BATCH_JobCred: rc = req_jobcredential(request); break; case PBS_BATCH_jobscript: job_id = strdup(request->rq_ind.rq_jobfile.rq_jobid); rc = req_jobscript(request); if ((rc != PBSE_NONE) && (job_id != NULL)) close_quejob_by_jobid(job_id); if (job_id != NULL) free(job_id); break; case PBS_BATCH_RdytoCommit: job_id = strdup(request->rq_ind.rq_rdytocommit); rc = req_rdytocommit(request); if ((rc != PBSE_NONE) && (job_id != NULL)) close_quejob_by_jobid(job_id); if (job_id != NULL) free(job_id); break; case PBS_BATCH_Commit: job_id = strdup(request->rq_ind.rq_commit); rc = req_commit(request); if ((rc != PBSE_NONE) && (job_id != NULL)) close_quejob_by_jobid(job_id); if (job_id != NULL) free(job_id); break; case PBS_BATCH_DeleteJob: /* if this is a server size job delete request, then the request * could also be for an entire array. we check to see if the * request object name is an array id. If so we hand off the the * req_deletearray() function. If not we pass along to the normal * req_deletejob() function. */ if (is_array(request->rq_ind.rq_delete.rq_objname)) rc = req_deletearray(request); else rc = req_deletejob(request); break; case PBS_BATCH_HoldJob: if (is_array(request->rq_ind.rq_hold.rq_orig.rq_objname)) rc = req_holdarray(request); else rc = req_holdjob(request); break; case PBS_BATCH_CheckpointJob: rc = req_checkpointjob(request); break; case PBS_BATCH_LocateJob: rc = req_locatejob(request); break; case PBS_BATCH_Manager: rc = req_manager(request); break; case PBS_BATCH_MessJob: rc = req_messagejob(request); break; case PBS_BATCH_AsyModifyJob: case PBS_BATCH_ModifyJob: if (is_array(request->rq_ind.rq_delete.rq_objname)) rc = req_modifyarray(request); else req_modifyjob(request); break; case PBS_BATCH_Rerun: rc = req_rerunjob(request); break; case PBS_BATCH_MoveJob: rc = req_movejob(request); break; case PBS_BATCH_OrderJob: rc = req_orderjob(request); break; case PBS_BATCH_Rescq: rc = req_rescq(request); break; case PBS_BATCH_ReserveResc: rc = req_rescreserve(request); break; case PBS_BATCH_ReleaseResc: rc = req_rescfree(request); break; case PBS_BATCH_ReleaseJob: if (is_array(request->rq_ind.rq_delete.rq_objname)) rc = req_releasearray(request); else rc = req_releasejob(request); break; case PBS_BATCH_RunJob: case PBS_BATCH_AsyrunJob: globalset_del_sock(request->rq_conn); rc = req_runjob(request); break; case PBS_BATCH_SelectJobs: case PBS_BATCH_SelStat: /* handle special 'truncated' keyword */ if (!strncasecmp(request->rq_ind.rq_status.rq_id, "truncated", strlen("truncated"))) rc =req_stat_job(request); else rc = req_selectjobs(request); break; case PBS_BATCH_Shutdown: req_shutdown(request); break; case PBS_BATCH_SignalJob: case PBS_BATCH_AsySignalJob: rc = req_signaljob(request); break; case PBS_BATCH_GpuCtrl: rc = req_gpuctrl_svr(request); break; case PBS_BATCH_MvJobFile: rc = req_mvjobfile(request); break; case PBS_BATCH_StatusQue: rc = req_stat_que(request); break; case PBS_BATCH_StatusNode: rc = req_stat_node(request); break; case PBS_BATCH_StatusSvr: rc = req_stat_svr(request); break; /* DIAGTODO: handle PBS_BATCH_StatusDiag and define req_stat_diag() */ case PBS_BATCH_TrackJob: rc = req_track(request); break; case PBS_BATCH_RegistDep: if (is_array(request->rq_ind.rq_register.rq_parent)) { rc = req_registerarray(request); } else { rc = req_register(request); } break; case PBS_BATCH_AuthenUser: /* determine if user is valid */ rc = req_authenuser( request); break; case PBS_BATCH_AltAuthenUser: break; case PBS_BATCH_JobObit: rc = req_jobobit(request); break; case PBS_BATCH_StageIn: rc = req_stagein(request); break; case PBS_BATCH_StatusJob: rc = req_stat_job(request); break; default: req_reject(PBSE_UNKREQ, 0, request, NULL, NULL); if (sfds != PBS_LOCAL_CONNECTION) close_conn(sfds, FALSE); break; } /* END switch (request->rq_type) */ return(rc); } /* END dispatch_request() */
void array_delete_wt( struct work_task *ptask) { struct batch_request *preq; job_array *pa; int i; char log_buf[LOCAL_LOG_BUF_SIZE]; int num_jobs = 0; int num_prerun = 0; job *pjob; preq = get_remove_batch_request((char *)ptask->wt_parm1); free(ptask->wt_mutex); free(ptask); if (preq == NULL) return; pa = get_array(preq->rq_ind.rq_delete.rq_objname); if (pa == NULL) { /* jobs must have exited already */ reply_ack(preq); return; } for (i = 0; i < pa->ai_qs.array_size; i++) { if (pa->job_ids[i] == NULL) continue; if ((pjob = svr_find_job(pa->job_ids[i], FALSE)) == NULL) { free(pa->job_ids[i]); pa->job_ids[i] = NULL; } else { num_jobs++; if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN) { num_prerun++; /* mom still hasn't gotten job?? delete anyway */ if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0) { /* job has restart file at mom, do end job processing */ change_restart_comment_if_needed(pjob); svr_setjobstate(pjob, JOB_STATE_EXITING, JOB_SUBSTATE_EXITING, FALSE); pjob->ji_momhandle = -1; /* force new connection */ if (LOGLEVEL >= 7) { sprintf(log_buf, "calling on_job_exit from %s", __func__); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf); } set_task(WORK_Immed, 0, on_job_exit, strdup(pjob->ji_qs.ji_jobid), FALSE); unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); } } else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn) != 0) { /* job has staged-in file, should remove them */ remove_stagein(&pjob); if (pjob != NULL) { /* job_abt() calls svr_job_purge which will try to lock the array again */ pthread_mutex_unlock(pa->ai_mutex); job_abt(&pjob, NULL); pthread_mutex_lock(pa->ai_mutex); } } else { /* job_abt() calls svr_job_purge which will try to lock the array again */ pthread_mutex_unlock(pa->ai_mutex); job_abt(&pjob, NULL); pthread_mutex_lock(pa->ai_mutex); } } /* END if (ji_substate == JOB_SUBSTATE_PRERUN) */ } /* END for each job in array */ pthread_mutex_unlock(pa->ai_mutex); if (LOGLEVEL >= 7) { sprintf(log_buf, "%s: unlocked ai_mutex", __func__); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, __func__, log_buf); } if (num_jobs == num_prerun) { reply_ack(preq); } else { req_deletearray(preq); } } /* END array_delete_wt() */
void dispatch_request( int sfds, /* I */ struct batch_request *request) /* I */ { char *id = "dispatch_request"; if (LOGLEVEL >= 5) { sprintf(log_buffer,"dispatching request %s on sd=%d", reqtype_to_txt(request->rq_type), sfds); log_record( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, id, log_buffer); } switch (request->rq_type) { case PBS_BATCH_QueueJob: net_add_close_func(sfds, close_quejob); req_quejob(request); break; case PBS_BATCH_JobCred: req_jobcredential(request); break; case PBS_BATCH_jobscript: req_jobscript(request); break; case PBS_BATCH_RdytoCommit: req_rdytocommit(request); break; case PBS_BATCH_Commit: req_commit(request); net_add_close_func(sfds, (void (*)())0); break; case PBS_BATCH_DeleteJob: #ifdef PBS_MOM req_deletejob(request); #else /* if this is a server size job delete request, then the request could also be * for an entire array. we check to see if the request object name is an array id. * if so we hand off the the req_deletearray() function. If not we pass along to the * normal req_deltejob() function. */ if (is_array(request->rq_ind.rq_delete.rq_objname)) { req_deletearray(request); } else { req_deletejob(request); } #endif break; case PBS_BATCH_HoldJob: #ifdef PBS_MOM req_holdjob(request); #else if (is_array(request->rq_ind.rq_hold.rq_orig.rq_objname)) { req_holdarray(request); } else { req_holdjob(request); } #endif break; case PBS_BATCH_CheckpointJob: req_checkpointjob(request); break; #ifndef PBS_MOM case PBS_BATCH_LocateJob: req_locatejob(request); break; case PBS_BATCH_Manager: req_manager(request); break; #endif /* END !PBS_MOM */ case PBS_BATCH_MessJob: req_messagejob(request); break; case PBS_BATCH_AsyModifyJob: case PBS_BATCH_ModifyJob: #ifndef PBS_MOM if (is_array(request->rq_ind.rq_delete.rq_objname)) { req_modifyarray(request); } else { req_modifyjob(request); } #else /* END ifndef PBS_MOM */ req_modifyjob(request); #endif /* PBS_MOM */ break; case PBS_BATCH_Rerun: req_rerunjob(request); break; #ifndef PBS_MOM case PBS_BATCH_MoveJob: req_movejob(request); break; case PBS_BATCH_OrderJob: req_orderjob(request); break; case PBS_BATCH_Rescq: req_rescq(request); break; case PBS_BATCH_ReserveResc: req_rescreserve(request); break; case PBS_BATCH_ReleaseResc: req_rescfree(request); break; case PBS_BATCH_ReleaseJob: if (is_array(request->rq_ind.rq_delete.rq_objname)) { req_releasearray(request); } else { req_releasejob(request); } break; case PBS_BATCH_RunJob: case PBS_BATCH_AsyrunJob: req_runjob(request); break; case PBS_BATCH_SelectJobs: case PBS_BATCH_SelStat: /* handle special 'truncated' keyword */ if (!strncasecmp(request->rq_ind.rq_status.rq_id, "truncated", strlen("truncated"))) req_stat_job(request); else req_selectjobs(request); break; #endif /* !PBS_MOM */ case PBS_BATCH_Shutdown: req_shutdown(request); break; case PBS_BATCH_SignalJob: case PBS_BATCH_AsySignalJob: req_signaljob(request); break; case PBS_BATCH_GpuCtrl: req_gpuctrl(request); break; case PBS_BATCH_StatusJob: req_stat_job(request); break; case PBS_BATCH_MvJobFile: req_mvjobfile(request); break; #ifndef PBS_MOM /* server only functions */ case PBS_BATCH_StatusQue: req_stat_que(request); break; case PBS_BATCH_StatusNode: req_stat_node(request); break; case PBS_BATCH_StatusSvr: req_stat_svr(request); break; /* DIAGTODO: handle PBS_BATCH_StatusDiag and define req_stat_diag() */ case PBS_BATCH_TrackJob: req_track(request); break; case PBS_BATCH_RegistDep: if (is_array(request->rq_ind.rq_register.rq_parent)) { req_registerarray(request); } else { req_register(request); } break; case PBS_BATCH_AuthenUser: /* determine if user is valid */ req_authenuser(request); break; case PBS_BATCH_AltAuthenUser: break; case PBS_BATCH_JobObit: req_jobobit(request); break; case PBS_BATCH_StageIn: req_stagein(request); break; #else /* MOM only functions */ case PBS_BATCH_ReturnFiles: req_returnfiles(request); break; case PBS_BATCH_CopyFiles: req_cpyfile(request); break; case PBS_BATCH_DelFiles: req_delfile(request); break; #endif /* !PBS_MOM */ default: req_reject(PBSE_UNKREQ, 0, request, NULL, NULL); close_client(sfds); break; } /* END switch (request->rq_type) */ return; } /* END dispatch_request() */