END_TEST START_TEST(test_one) { int rc; initialize_all_tasks_array(&task_list_event); initialize_task_recycler(); if (task_list_timed == NULL) task_list_timed = new std::list<timed_task>(); rc = initialize_threadpool(&request_pool, 5, 50, 60); fail_unless(rc == PBSE_NONE, "initalize_threadpool failed", rc); struct work_task *pWorkTask = set_task(WORK_Timed,357,check_nodes,NULL,0); fail_unless(pWorkTask != NULL); struct work_task *pWorkTask2 = set_task(WORK_Timed,356,check_nodes,NULL,0); fail_unless(pWorkTask2 != NULL); struct work_task *pWorkTask3 = set_task(WORK_Timed,358,check_nodes,NULL,0); fail_unless(pWorkTask3 != NULL); rc = dispatch_task(pWorkTask); fail_unless(rc == PBSE_NONE, "dispatch_task failed", rc); delete_task(pWorkTask); int iter = -1; struct work_task *pRecycled = next_task_from_recycler(&tr.tasks,&iter); fprintf(stderr,"%p %p\n",(void *)pWorkTask,(void *)pRecycled); fail_unless(pRecycled == pWorkTask); fail_unless(task_is_in_threadpool(pWorkTask2)); }
acl_int64 aio_timer_callback::set_task(unsigned int id, acl_int64 delay) { aio_timer_task* task = NULL; std::list<aio_timer_task*>::iterator it = tasks_.begin(); for (; it != tasks_.end(); ++it) { if ((*it)->id == id) { task = (*it); tasks_.erase(it); length_--; break; } } if (task == NULL) { task = NEW aio_timer_task(); task->delay = delay; task->id = id; } else task->delay = delay; return set_task(task); }
void chkpt_xfr_hold( batch_request *preq, job *pjob) { char log_buf[LOCAL_LOG_BUF_SIZE]; if ((preq == NULL) || (preq->rq_extra == NULL) || (pjob == NULL)) return; if (LOGLEVEL >= 7) { sprintf(log_buf, "BLCR copy completed (state is %s-%s)", PJobState[pjob->ji_qs.ji_state], PJobSubState[pjob->ji_qs.ji_substate]); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf); } free_br(preq); set_task(WORK_Immed, 0, mom_cleanup_checkpoint_hold, strdup(pjob->ji_qs.ji_jobid), FALSE); return; } /* END chkpt_xfr_hold() */
/** * @brief * cnvrt_delete - delete reservation when no reservation job for 10 min * * @param[in,out] pwt - work task structure which contains the reservation */ void cnvrt_delete(struct work_task *ptask) { int flag = FALSE; resc_resv *ptmp, *presv; struct work_task *wt; ptmp = (resc_resv *)ptask->wt_parm1; presv = (resc_resv *)GET_NEXT(svr_allresvs); if (presv == NULL || ptmp == NULL) return; while (presv) { if ((presv->ri_wattr[(int)RESV_ATR_convert].at_val.at_str != NULL) && (ptmp->ri_wattr[(int)RESV_ATR_convert].at_val.at_str != NULL)) { if (strcmp(presv->ri_wattr[(int)RESV_ATR_convert].at_val.at_str, ptmp->ri_wattr[(int)RESV_ATR_convert].at_val.at_str) == 0) { flag = TRUE; break; } } presv = (resc_resv *)GET_NEXT(presv->ri_allresvs); } if (presv == NULL && flag == FALSE) return; if (flag == TRUE && ptmp->ri_qp->qu_numjobs == 0) { gen_future_deleteResv(ptmp, 10); return; } wt = set_task(WORK_Timed, (time_now + 600), cnvrt_delete, ptmp); append_link(&presv->ri_svrtask, &wt->wt_linkobj, wt); }
void chkpt_xfr_hold( struct work_task *ptask) { job *pjob; struct work_task *ptasknew; struct batch_request *preq; preq = (struct batch_request *)ptask->wt_parm1; pjob = (job *)preq->rq_extra; if (LOGLEVEL >= 7) { sprintf(log_buffer, "BLCR copy completed (state is %s-%s)", PJobState[pjob->ji_qs.ji_state], PJobSubState[pjob->ji_qs.ji_substate]); LOG_EVENT( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buffer); } release_req(ptask); ptasknew = set_task(WORK_Immed, 0, mom_cleanup_checkpoint_hold, (void*)pjob); return; } /* END chkpt_xfr_hold() */
/** * @brief * add a task to the moms deferred command list * of commands issued to the server * * Used only in case of TPP * * @param[in] stream - stream on which command is being sent * @param[in] minfo - The mominfo_t pointer for the mom * @param[in] func - Call back func when mom responds * @param[in] msgid - String unique identifying the command from others * @param[in] parm1 - Fist parameter to the work task to be set * @param[in] parm2 - Second parameter to the work task to be set * * @return Work task structure that was allocated and added to moms deferred cmd list * @retval NULL - Failure * @retval !NULL - Success * */ struct work_task * add_mom_deferred_list(int stream, mominfo_t *minfo, void (*func)(), char *msgid, void *parm1, void *parm2) { struct work_task *ptask = NULL; /* WORK_Deferred_cmd is very similar to WORK_Deferred_reply. * However in case of WORK_Deferred_reply, the wt_parm1 is assumed to * contain a batch_request structure. In cases where there is no * batch_request structure associated, we use the WORK_Deferred_cmd * event type to differentiate it in process_DreplyRPP. */ ptask = set_task(WORK_Deferred_cmd, (long) stream, func, parm1); if (ptask == NULL) { log_err(errno, __func__, "could not set_task"); return NULL; } ptask->wt_aux2 = 1; /* set to rpp */ ptask->wt_parm2 = parm2; ptask->wt_event2 = msgid; /* remove this task from the event list, as we will be adding to deferred list anyway * and there is no child process whose exit needs to be reaped */ delete_link(&ptask->wt_linkall); /* append to the moms deferred command list */ append_link(&(((mom_svrinfo_t *) (minfo->mi_data))->msr_deferred_cmds), &ptask->wt_linkobj2, ptask); return ptask; }
acl_int64 event_timer::set_task(unsigned int id, acl_int64 delay) { if (delay < 0) { logger_error("invalid task, id: %u, delay: %lld", id, delay); return -1; } event_task* task = NULL; std::list<event_task*>::iterator it = tasks_.begin(); for (; it != tasks_.end(); ++it) { if ((*it)->id == id) { task = (*it); tasks_.erase(it); length_--; break; } } if (task == NULL) { task = NEW event_task(); task->delay = delay; task->id = id; } else task->delay = delay; return set_task(task); }
int main(void) { W5100_RESET_DDR |= (1 << W5100_RESET); W5100_RESET_PORT |= (1 << W5100_RESET); _delay_ms(100); w5100.hard_reset(); _delay_ms(100); Spi.init(); UART.init(); _delay_ms(100); w5100.soft_reset(); _delay_ms(100); w5100.set_mac(DEV_MAC_5, DEV_MAC_4, DEV_MAC_3, DEV_MAC_2, DEV_MAC_1, DEV_MAC_0); w5100.set_ip(DEV_IP_3, DEV_IP_2, DEV_IP_1, DEV_IP_0); w5100.set_mask(NET_MASK_3, NET_MASK_2, NET_MASK_1, NET_MASK_0); w5100.set_gateway(NET_GW_3, NET_GW_2, NET_GW_1, NET_GW_0); w5100.set_other_options(); UART.write_str("w5100 settings success\n\r"); socket0.open_tcp(LOCAL_SERVER_PORT); socket0.listen(); init_dispatcher(); run_dispatcher(); device_init(); set_task(task_socket0_poll, 0); set_task(task_socket1_poll, 0); set_timer_task(task_heartbeat, 0, HEARTBEAT_INTERVAL_MS); set_timer_task(task_outcoming_requests_queue_service, 0, REQUESTS_QUEUE_SERVICE_INTERVAL_MS); sprintf(large_txt_buf, "start s0 and s1 polling\r\n"); UART.write_str(large_txt_buf); while(1) { task_manager(); } }
int main(void){ init_devices(); // // start at least one task here // set_task(7); //task7 runs set_task(6); //task6 runs // main loop while(1){ if (tick_flag){ tick_flag = 0; task_dispatch(); // well.... } } return 0; }
int process_task(unsigned char *taskdata) { int task,del; task=*(unsigned int*)(taskdata); switch(task) { case 0: xlog("task test"); del=1; break; case 1: del=set_task((void*)(taskdata),(int (*)(void*,struct character *))set_clan_rank); break; case 2: del=set_task((void*)(taskdata),(int (*)(void*,struct character *))fire_from_clan); break; case 5: del=set_task((void*)(taskdata),(int (*)(void*,struct character *))set_flags); break; default: elog("deleting unknown task %d",task); del=1; break; } return del; }
acl_int64 event_timer::trigger(void) { // sanity check if (tasks_.empty()) return TIMER_EMPTY; acl_assert(length_ > 0); set_time(); std::list<event_task*> tasks; // 从定时器中取出到达的定时任务 for (std::list<event_task*>::iterator it = tasks_.begin(); it != tasks_.end();) { if ((*it)->when > present_) break; tasks.push_back(*it); it = tasks_.erase(it); length_--; } if (tasks.empty()) { acl_assert(!tasks_.empty()); event_task* first = tasks_.front(); acl_int64 delay = first->when - present_; return delay < 0 ? 0 : delay; } for (std::list<event_task*>::iterator it = tasks.begin(); it != tasks.end(); ++it) { set_task(*it); // 调用子类虚函数,触发定时器任务过程 timer_callback((*it)->id); } tasks.clear(); // 子类有可能会在 timer_callback 中删除了所有的定时任务 if (tasks_.empty()) return TIMER_EMPTY; event_task* first = tasks_.front(); acl_int64 delay = first->when - present_; if (delay < 0) return 0; else if (delay > first->delay) /* xxx */ return first->delay; else return delay; }
acl_int64 event_timer::trigger(void) { // sanity check if (tasks_.empty()) return TIMER_EMPTY; acl_assert(length_ > 0); set_time(); std::list<event_task*>::iterator it, next; std::list<event_task*> tasks; event_task* task; // 从定时器中取出到达的定时任务 for (it = tasks_.begin(); it != tasks_.end(); it = next) { if ((*it)->when > present_) break; next = it; ++next; task = *it; tasks_.erase(it); length_--; tasks.push_back(task); } // 有可能这些到达的定时任务已经被用户提前删除了 if (tasks.empty()) { acl_assert(!tasks_.empty()); event_task* first = tasks_.front(); acl_int64 delay = first->when - present_; return delay < 0 ? 0 : delay; } for (it = tasks.begin(); it != tasks.end(); ++it) { set_task(*it); // 调用子类虚函数,触发定时器任务过程 timer_callback((*it)->id); } tasks.clear(); // 子类有可能会在 timer_callback 中删除了所有的定时任务 if (tasks_.empty()) return TIMER_EMPTY; event_task* first = tasks_.front(); acl_int64 delay = first->when - present_; return delay < 0 ? 0 : delay; }
int que_to_local_svr(struct batch_request *preq) /* I */ { preq->rq_fromsvr = 1; preq->rq_perm = ATR_DFLAG_MGRD | ATR_DFLAG_MGWR | ATR_DFLAG_SvWR; if (preq->rq_id == NULL) get_batch_request_id(preq); set_task(WORK_Immed, 0, reissue_to_svr, preq->rq_id, TRUE); return(PBSE_NONE); } /* END que_to_local_svr() */
/** * poll _job_task * * The invocation of this routine is triggered from * the pbs_server main_loop code. The check of * SRV_ATR_PollJobs appears to be redundant. */ void poll_job_task( struct work_task *ptask) { char *job_id = (char *)ptask->wt_parm1; job *pjob; time_t time_now = time(NULL); long poll_jobs = 0; int job_state = -1; if (job_id != NULL) { pjob = svr_find_job(job_id, FALSE); if (pjob != NULL) { mutex_mgr job_mutex(pjob->ji_mutex, true); job_state = pjob->ji_qs.ji_state; job_mutex.unlock(); get_svr_attr_l(SRV_ATR_PollJobs, &poll_jobs); if ((poll_jobs) && (job_state == JOB_STATE_RUNNING)) { /* we need to throttle the number of outstanding threads are doing job polling. This prevents a problem where pbs_server gets hung waiting on I/O from the mom */ pthread_mutex_lock(poll_job_task_mutex); if (current_poll_job_tasks < max_poll_job_tasks) { current_poll_job_tasks++; pthread_mutex_unlock(poll_job_task_mutex); stat_mom_job(job_id); pthread_mutex_lock(poll_job_task_mutex); current_poll_job_tasks--; } pthread_mutex_unlock(poll_job_task_mutex); /* add another task */ set_task(WORK_Timed, time_now + JobStatRate, poll_job_task, strdup(job_id), FALSE); } } free(job_id); } free(ptask->wt_mutex); free(ptask); } /* END poll_job_task() */
int issue_to_svr(char *servern, struct batch_request *preq, void (*replyfunc)(struct work_task *)) { int do_retry = 0; int handle; pbs_net_t svraddr; char *svrname; unsigned int port = pbs_server_port_dis; struct work_task *pwt; extern int pbs_failover_active; extern char primary_host[]; extern char server_host[]; (void)strcpy(preq->rq_host, servern); preq->rq_fromsvr = 1; preq->rq_perm = ATR_DFLAG_MGRD | ATR_DFLAG_MGWR | ATR_DFLAG_SvWR; svrname = parse_servername(servern, &port); if ((pbs_failover_active != 0) && (svrname != NULL)) { /* we are the active secondary server in a failover config */ /* if the message is going to the primary,then redirect to me */ size_t len; len = strlen(svrname); if (strncasecmp(svrname, primary_host, len) == 0) { if ((primary_host[(int)len] == '\0') || (primary_host[(int)len] == '.')) svrname = server_host; } } svraddr = get_hostaddr(svrname); if (svraddr == (pbs_net_t)0) { if (pbs_errno == PBS_NET_RC_RETRY) /* Non fatal error - retry */ do_retry = 1; } else { handle = svr_connect(svraddr, port, process_Dreply, ToServerDIS, PROT_TCP); if (handle >= 0) return (issue_Drequest(handle, preq, replyfunc, 0, 0)); else if (handle == PBS_NET_RC_RETRY) do_retry = 1; } /* if reached here, it didn`t go, do we retry? */ if (do_retry) { pwt = set_task(WORK_Timed, (long)(time_now+(2*PBS_NET_RETRY_TIME)), reissue_to_svr, (void *)preq); pwt->wt_parm2 = (void *)replyfunc; return (0); } else return (-1); }
void kernel_main() { int i = 0xa0000; for( ; i <= 0xaffff; ++i ){ asm_write_mem( i, 1 ); } // Delete boot sector and temporary data. for( i = 0x1000; i <= 0x9fc00; ++i ){ asm_write_mem( i, 0 ); } init_palette(); i = 0xa0000; for( ; i <= 0xaffff; ++i ){ asm_write_mem( i, 0 ); } asm_disable_intr(); InitGDT(); InitIDT(); InitPIC(); asm_enable_intr(); set_task( 1, task_a, stack[ 0 ] + 2048 ); set_task( 2, task_b, stack[ 1 ] + 2048 ); switch_task_2( TASK_INFO_ADDR, TASK_INFO_ADDR + sizeof( TaskInfo ) ); for(;;){ print_str( 0, 20, "fin" ); asm_halt(); } asm_halt(); }
void track_save( struct work_task *pwt) /* unused */ { int fd; char *myid = "save_track"; time_t time_now = time(NULL); work_task *wt; /* set task for next round trip */ if (pwt) /* set up another work task for next time period */ { free(pwt->wt_mutex); free(pwt); wt = set_task(WORK_Timed, (long)time_now + PBS_SAVE_TRACK_TM, track_save, NULL, FALSE); if (wt == NULL) log_err(errno, myid, "Unable to set task for save"); } if (server.sv_trackmodifed == 0) return; /* nothing to do this time */ fd = open(path_track, O_WRONLY, 0); if (fd < 0) { log_err(errno, myid, "Unable to open tracking file"); return; } if (write(fd, (char *)server.sv_track, server.sv_tracksize * sizeof(struct tracking)) != (ssize_t)(server.sv_tracksize * sizeof(struct tracking))) { log_err(errno, myid, "failed to write to track file"); } if (close(fd) < 0) { log_err(errno, myid, "failed to close track file after saving"); return; } server.sv_trackmodifed = 0; return; }
struct work_task *apply_job_delete_nanny( struct job *pjob, int delay) /* I */ { struct work_task *pwtnew; enum work_type tasktype; /* short-circuit if nanny isn't enabled */ if (!server.sv_attr[SRV_ATR_JobNanny].at_val.at_long) { remove_job_delete_nanny(pjob); /* in case it was recently disabled */ return(NULL); } if (delay == 0) { tasktype = WORK_Immed; } else if (delay > 0) { tasktype = WORK_Timed; } else { log_err(-1, "apply_job_delete_nanny", "negative delay requested for nanny"); return(NULL); } /* first, surgically remove any existing nanny tasks */ remove_job_delete_nanny(pjob); /* second, add a nanny task at the requested time */ pwtnew = set_task(tasktype, delay, job_delete_nanny, (void *)pjob); if (pwtnew) { /* insure that work task will be removed if job goes away */ append_link(&pjob->ji_svrtask, &pwtnew->wt_linkobj, pwtnew); } return(pwtnew); } /* END apply_job_delete_nanny() */
GList *get_values(int num_tasks){ int i; int period, wcet; GList *list = NULL; for(i = 0; i < num_tasks; i++){ printf("Digite o Period e o Wcet da Tarefa %d:\t", i+1); scanf("%d %d", &period, &wcet ); list = g_list_append(list, set_task(period, wcet, 0)); } return list; }
/** * poll_job_task * * The invocation of this routine is triggered from * the pbs_server main_loop code. */ void poll_job_task( struct work_task *ptask) { char *job_id = (char *)ptask->wt_parm1; job *pjob; time_t time_now = time(NULL); long poll_jobs = 0; long job_stat_rate; free(ptask->wt_mutex); free(ptask); if (job_id != NULL) { pjob = svr_find_job(job_id, FALSE); if (pjob != NULL) { mutex_mgr job_mutex(pjob->ji_mutex, true); int job_state = -1; job_state = pjob->ji_qs.ji_state; // only do things for running jobs if (job_state == JOB_STATE_RUNNING) { job_mutex.unlock(); get_svr_attr_l(SRV_ATR_JobStatRate, &job_stat_rate); if (time(NULL) - pjob->ji_last_reported_time > job_stat_rate) { get_svr_attr_l(SRV_ATR_PollJobs, &poll_jobs); if (poll_jobs) stat_mom_job(job_id); } /* add another task */ set_task(WORK_Timed, time_now + (job_stat_rate / 3), poll_job_task, strdup(job_id), FALSE); } } free(job_id); } } /* END poll_job_task() */
void queue_a_retry_task( batch_request *preq, /* I */ void (*replyfunc)(struct work_task *)) /* I */ { /* create a new batch_request because preq is going to be freed when issue_to_svr returns success */ batch_request *new_preq = duplicate_request(preq, -1); struct work_task *pwt; get_batch_request_id(new_preq); pwt = set_task(WORK_Timed, (time(NULL) + PBS_NET_RETRY_TIME), reissue_to_svr, new_preq->rq_id, TRUE); pwt->wt_parmfunc = replyfunc; pthread_mutex_unlock(pwt->wt_mutex); } /* END queue_a_retry_task() */
struct psi_process * psi_arch_process(const pid_t pid) { struct kinfo_proc p; struct psi_process *proci; if (get_kinfo_proc(pid, &p) == -1) { return NULL; } proci = psi_calloc(sizeof(struct psi_process)); if (proci == NULL) { return NULL; } if (set_exe(proci, &p) == -1) goto cleanup; if (set_cwd(proci, &p) == -1) goto cleanup; if (set_kp_proc(proci, &p) == -1) goto cleanup; if (set_kp_eproc(proci, &p) == -1) goto cleanup; if (set_task(proci, &p) == -1) goto cleanup; if (proci->utime_status == PSI_STATUS_PRIVS || proci->stime_status == PSI_STATUS_PRIVS) proci->cputime_status = PSI_STATUS_PRIVS; else { proci->cputime = calc_cputime(proci->utime, proci->stime); proci->cputime_status = PSI_STATUS_OK; } if (proci->command_status == PSI_STATUS_PRIVS) { /* Ensure Process.command always has a value, as per our * contract with the user. */ proci->command = psi_strdup(""); proci->command_status = PSI_STATUS_OK; } return proci; cleanup: psi_free_process(proci); return NULL; }
// Выполняется в прерывании таймера inline uint8_t timer_manager(void) { uint8_t index; for (index = 0; index < MAIN_TIMER_QUEUE_SIZE; index++) { if (TimerQueue[index].task_ptr == NULL) continue; if (TimerQueue[index].task_time_elapsed > 0) { TimerQueue[index].task_time_elapsed--; } else { set_task(TimerQueue[index].task_ptr); if (TimerQueue[index].flags & FLAG_PERSISTENT) TimerQueue[index].task_time_elapsed = TimerQueue[index].task_time; else TimerQueue[index].task_ptr = NULL; } } return E_OK; }
void chkpt_xfr_hold( struct work_task *ptask) { job *pjob; struct batch_request *preq; char log_buf[LOCAL_LOG_BUF_SIZE]; preq = get_remove_batch_request(ptask->wt_parm1); free(ptask->wt_mutex); free(ptask); if ((preq == NULL) || (preq->rq_extra == NULL)) return; if ((pjob = svr_find_job(preq->rq_extra, FALSE)) == NULL) return; if (LOGLEVEL >= 7) { sprintf(log_buf, "BLCR copy completed (state is %s-%s)", PJobState[pjob->ji_qs.ji_state], PJobSubState[pjob->ji_qs.ji_substate]); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf); } free_br(preq); set_task(WORK_Immed, 0, mom_cleanup_checkpoint_hold, strdup(pjob->ji_qs.ji_jobid), FALSE); unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); return; } /* END chkpt_xfr_hold() */
int apply_job_delete_nanny( struct job *pjob, int delay) /* I */ { enum work_type tasktype; long nanny = FALSE; /* short-circuit if nanny isn't enabled or we have a delete nanny */ get_svr_attr_l(SRV_ATR_JobNanny, &nanny); if ((nanny == FALSE) || (pjob->ji_has_delete_nanny == TRUE)) { return(PBSE_NONE); } if (delay == 0) { tasktype = WORK_Immed; } else if (delay > 0) { tasktype = WORK_Timed; } else { log_err(-1, __func__, "negative delay requested for nanny"); return(-1); } pjob->ji_has_delete_nanny = TRUE; /* add a nanny task at the requested time */ set_task(tasktype, delay, job_delete_nanny, strdup(pjob->ji_qs.ji_jobid), FALSE); return(PBSE_NONE); } /* END apply_job_delete_nanny() */
static void post_delete_mom1( struct work_task *pwt) { int delay = 0; int dellen = strlen(deldelaystr); job *pjob; pbs_queue *pque; char *preq_clt_id; struct batch_request *preq_sig; /* signal request to MOM */ struct batch_request *preq_clt = NULL; /* original client request */ int rc; time_t time_now = time(NULL); preq_sig = get_remove_batch_request((char *)pwt->wt_parm1); free(pwt->wt_mutex); free(pwt); if (preq_sig == NULL) return; rc = preq_sig->rq_reply.brp_code; preq_clt_id = preq_sig->rq_extra; free_br(preq_sig); if (preq_clt_id != NULL) { preq_clt = get_remove_batch_request(preq_clt_id); free(preq_clt_id); } /* the client request has been handled another way, nothing left to do */ if (preq_clt == NULL) return; pjob = svr_find_job(preq_clt->rq_ind.rq_delete.rq_objname, FALSE); if (pjob == NULL) { /* job has gone away */ req_reject(PBSE_UNKJOBID, 0, preq_clt, NULL, NULL); return; } if (rc) { /* mom rejected request */ if (rc == PBSE_UNKJOBID) { /* MOM claims no knowledge, so just purge it */ log_event( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, "MOM rejected signal during delete"); /* removed the resources assigned to job */ free_nodes(pjob); set_resc_assigned(pjob, DECR); svr_job_purge(pjob); reply_ack(preq_clt); } else { req_reject(rc, 0, preq_clt, NULL, NULL); unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); } return; } if (preq_clt->rq_extend) { if (strncmp(preq_clt->rq_extend, deldelaystr, dellen) == 0) { delay = atoi(preq_clt->rq_extend + dellen); } } reply_ack(preq_clt); /* dont need it, reply now */ /* * if no delay specified in original request, see if kill_delay * queue attribute is set. */ if (delay == 0) { if ((pque = get_jobs_queue(&pjob)) != NULL) { pthread_mutex_lock(server.sv_attr_mutex); delay = attr_ifelse_long(&pque->qu_attr[QE_ATR_KillDelay], &server.sv_attr[SRV_ATR_KillDelay], 2); pthread_mutex_unlock(server.sv_attr_mutex); unlock_queue(pque, __func__, NULL, LOGLEVEL); } else if (pjob != NULL) return; } set_task(WORK_Timed, delay + time_now, post_delete_mom2, strdup(pjob->ji_qs.ji_jobid), FALSE); /* * Since the first signal has succeeded, let's reschedule the * nanny to be 1 minute after the second phase. */ apply_job_delete_nanny(pjob, time_now + delay + 60); unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL); } /* END post_delete_mom1() */
int execute_job_delete( job *pjob, /* M */ char *Msg, /* I */ struct batch_request *preq) /* I */ { struct work_task *pwtnew; int rc; char *sigt = "SIGTERM"; int has_mutex = TRUE; char log_buf[LOCAL_LOG_BUF_SIZE]; time_t time_now = time(NULL); long force_cancel = FALSE; long array_compatible = FALSE; chk_job_req_permissions(&pjob,preq); if (pjob == NULL) { /* preq is rejected in chk_job_req_permissions here */ return(-1); } if (pjob->ji_qs.ji_state == JOB_STATE_TRANSIT) { /* see note in req_delete - not sure this is possible still, * but the deleted code is irrelevant now. I will leave this * part --dbeer */ unlock_ji_mutex(pjob, __func__, "1", LOGLEVEL); return(-1); } if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN || pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN || pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN1 || pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN2 || pjob->ji_qs.ji_substate == JOB_SUBSTATE_RERUN3 ) { /* If JOB_SUBSTATE_PRERUN being sent to MOM, wait till she gets it going */ /* retry in one second */ /* If JOB_SUBSTATE_RERUN, RERUN1, RERUN2 or RERUN3 the job is being requeued. Wait until finished */ static time_t cycle_check_when = 0; static char cycle_check_jid[PBS_MAXSVRJOBID + 1]; if (cycle_check_when != 0) { if (!strcmp(pjob->ji_qs.ji_jobid, cycle_check_jid) && (time_now - cycle_check_when > 10)) { /* state not updated after 10 seconds */ /* did the mom ever get it? delete it anyways... */ cycle_check_jid[0] = '\0'; cycle_check_when = 0; goto jump; } if (time_now - cycle_check_when > 20) { /* give up after 20 seconds */ cycle_check_jid[0] = '\0'; cycle_check_when = 0; } } /* END if (cycle_check_when != 0) */ if (cycle_check_when == 0) { /* new PRERUN job located */ cycle_check_when = time_now; strcpy(cycle_check_jid, pjob->ji_qs.ji_jobid); } sprintf(log_buf, "job cannot be deleted, state=PRERUN, requeuing delete request"); log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf); pwtnew = set_task(WORK_Timed,time_now + 1,post_delete_route,preq,FALSE); unlock_ji_mutex(pjob, __func__, "2", LOGLEVEL); if (pwtnew == NULL) { req_reject(PBSE_SYSTEM, 0, preq, NULL, NULL); return(-1); } else { return(ROUTE_DELETE); } } /* END if (pjob->ji_qs.ji_substate == JOB_SUBSTATE_PRERUN) */ jump: /* * Log delete and if requesting client is not job owner, send mail. */ sprintf(log_buf, "requestor=%s@%s", preq->rq_user, preq->rq_host); /* NOTE: should annotate accounting record with extend message (NYI) */ account_record(PBS_ACCT_DEL, pjob, log_buf); sprintf(log_buf, msg_manager, msg_deletejob, preq->rq_user, preq->rq_host); log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf); /* NOTE: should incorporate job delete message */ if (Msg != NULL) { /* have text message in request extension, add it */ strcat(log_buf, "\n"); strcat(log_buf, Msg); } if ((svr_chk_owner(preq, pjob) != 0) && (pjob->ji_has_delete_nanny == FALSE)) { /* only send email if owner did not delete job and job deleted has not been previously attempted */ svr_mailowner(pjob, MAIL_DEL, MAIL_FORCE, log_buf); /* * If we sent mail and already sent the extra message * then reset message so we don't trigger a redundant email * in job_abt() */ if (Msg != NULL) { Msg = NULL; } } if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0) { /* job has restart file at mom, change restart comment if failed */ change_restart_comment_if_needed(pjob); } if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) { /* * setup a nanny task to make sure the job is actually deleted (see the * comments at job_delete_nanny()). */ if (pjob->ji_has_delete_nanny == TRUE) { unlock_ji_mutex(pjob, __func__, "3", LOGLEVEL); req_reject(PBSE_IVALREQ, 0, preq, NULL, "job cancel in progress"); return(-1); } apply_job_delete_nanny(pjob, time_now + 60); /* * Send signal request to MOM. The server will automagically * pick up and "finish" off the client request when MOM replies. */ get_batch_request_id(preq); if ((rc = issue_signal(&pjob, sigt, post_delete_mom1, strdup(preq->rq_id)))) { /* cant send to MOM */ req_reject(rc, 0, preq, NULL, NULL); } /* normally will ack reply when mom responds */ if (pjob != NULL) { sprintf(log_buf, msg_delrunjobsig, sigt); log_event(PBSEVENT_JOB,PBS_EVENTCLASS_JOB,pjob->ji_qs.ji_jobid,log_buf); unlock_ji_mutex(pjob, __func__, "4", LOGLEVEL); } return(-1); } /* END if (pjob->ji_qs.ji_state == JOB_STATE_RUNNING) */ /* make a cleanup task if set */ get_svr_attr_l(SRV_ATR_JobForceCancelTime, &force_cancel); if (force_cancel > 0) { char *dup_jobid = strdup(pjob->ji_qs.ji_jobid); set_task(WORK_Timed, time_now + force_cancel, ensure_deleted, dup_jobid, FALSE); } /* if configured, and this job didn't have a slot limit hold, free a job * held with the slot limit hold */ get_svr_attr_l(SRV_ATR_MoabArrayCompatible, &array_compatible); if ((array_compatible != FALSE) && ((pjob->ji_wattr[JOB_ATR_hold].at_val.at_long & HOLD_l) == FALSE)) { if ((pjob->ji_arraystruct != NULL) && (pjob->ji_is_array_template == FALSE)) { int i; int newstate; int newsub; job *tmp; job_array *pa = get_jobs_array(&pjob); if (pjob == NULL) return(-1); for (i = 0; i < pa->ai_qs.array_size; i++) { if (pa->job_ids[i] == NULL) continue; if (!strcmp(pa->job_ids[i], pjob->ji_qs.ji_jobid)) continue; if ((tmp = svr_find_job(pa->job_ids[i], FALSE)) == NULL) { free(pa->job_ids[i]); pa->job_ids[i] = NULL; } else { if (tmp->ji_wattr[JOB_ATR_hold].at_val.at_long & HOLD_l) { tmp->ji_wattr[JOB_ATR_hold].at_val.at_long &= ~HOLD_l; if (tmp->ji_wattr[JOB_ATR_hold].at_val.at_long == 0) { tmp->ji_wattr[JOB_ATR_hold].at_flags &= ~ATR_VFLAG_SET; } svr_evaljobstate(tmp, &newstate, &newsub, 1); svr_setjobstate(tmp, newstate, newsub, FALSE); job_save(tmp, SAVEJOB_FULL, 0); unlock_ji_mutex(tmp, __func__, "5", LOGLEVEL); break; } unlock_ji_mutex(tmp, __func__, "6", LOGLEVEL); } } if (LOGLEVEL >= 7) { sprintf(log_buf, "%s: unlocking ai_mutex", __func__); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf); } pthread_mutex_unlock(pa->ai_mutex); } } /* END MoabArrayCompatible check */ if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0) { /* job has restart file at mom, do end job processing */ svr_setjobstate(pjob, JOB_STATE_EXITING, JOB_SUBSTATE_EXITING, FALSE); /* force new connection */ pjob->ji_momhandle = -1; if (LOGLEVEL >= 7) { sprintf(log_buf, "calling on_job_exit from %s", __func__); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf); } set_task(WORK_Immed, 0, on_job_exit, strdup(pjob->ji_qs.ji_jobid), FALSE); } else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_StagedIn) != 0) { /* job has staged-in file, should remove them */ remove_stagein(&pjob); if (pjob != NULL) job_abt(&pjob, Msg); has_mutex = FALSE; } else { /* * the job is not transitting (though it may have been) and * is not running, so put in into a complete state. */ struct pbs_queue *pque; int KeepSeconds = 0; svr_setjobstate(pjob, JOB_STATE_COMPLETE, JOB_SUBSTATE_COMPLETE, FALSE); if ((pque = get_jobs_queue(&pjob)) != NULL) { pque->qu_numcompleted++; unlock_queue(pque, __func__, NULL, LOGLEVEL); if (LOGLEVEL >= 7) { sprintf(log_buf, "calling on_job_exit from %s", __func__); log_event(PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, log_buf); } pthread_mutex_lock(server.sv_attr_mutex); KeepSeconds = attr_ifelse_long( &pque->qu_attr[QE_ATR_KeepCompleted], &server.sv_attr[SRV_ATR_KeepCompleted], 0); pthread_mutex_unlock(server.sv_attr_mutex); } else KeepSeconds = 0; if (pjob != NULL) { set_task(WORK_Timed, time_now + KeepSeconds, on_job_exit, strdup(pjob->ji_qs.ji_jobid), FALSE); } else has_mutex = FALSE; } /* END else if ((pjob->ji_qs.ji_svrflags & JOB_SVFLG_CHECKPOINT_FILE) != 0) */ if (has_mutex == TRUE) unlock_ji_mutex(pjob, __func__, "7", LOGLEVEL); return(PBSE_NONE); } /* END execute_job_delete() */
void delay_and_send_sig_kill( batch_request *preq_sig) { int delay = 0; job *pjob; pbs_queue *pque; batch_request *preq_clt = NULL; /* original client request */ int rc; time_t time_now = time(NULL); char log_buf[LOCAL_LOG_BUF_SIZE]; if (preq_sig == NULL) return; rc = preq_sig->rq_reply.brp_code; if (preq_sig->rq_extend != NULL) { preq_clt = get_remove_batch_request(preq_sig->rq_extend); } /* the client request has been handled another way, nothing left to do */ if (preq_clt == NULL) return; if ((pjob = chk_job_request(preq_clt->rq_ind.rq_rerun, preq_clt)) == NULL) { /* job has gone away, chk_job_request() calls req_reject() on failure */ return; } mutex_mgr pjob_mutex = mutex_mgr(pjob->ji_mutex, true); if (rc) { /* mom rejected request */ if (rc == PBSE_UNKJOBID) { /* MOM claims no knowledge, so just purge it */ log_event( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, pjob->ji_qs.ji_jobid, "MOM rejected signal during rerun"); /* removed the resources assigned to job */ free_nodes(pjob); set_resc_assigned(pjob, DECR); unlock_ji_mutex(pjob, __func__, "3", LOGLEVEL); svr_job_purge(pjob); reply_ack(preq_clt); } else { pjob_mutex.unlock(); req_reject(rc, 0, preq_clt, NULL, NULL); } return; } // Apply the user delay first so it takes precedence. if (pjob->ji_wattr[JOB_ATR_user_kill_delay].at_flags & ATR_VFLAG_SET) delay = pjob->ji_wattr[JOB_ATR_user_kill_delay].at_val.at_long; if ((pque = get_jobs_queue(&pjob)) != NULL) { mutex_mgr pque_mutex = mutex_mgr(pque->qu_mutex, true); mutex_mgr server_mutex = mutex_mgr(server.sv_attr_mutex, false); if (delay == 0) { delay = attr_ifelse_long(&pque->qu_attr[QE_ATR_KillDelay], &server.sv_attr[SRV_ATR_KillDelay], 0); } } else { /* why is the pque null. Something went wrong */ snprintf(log_buf, LOCAL_LOG_BUF_SIZE, "jobid %s returned a null queue", pjob->ji_qs.ji_jobid); req_reject(PBSE_UNKQUE, 0, preq_clt, NULL, log_buf); return; } pjob_mutex.unlock(); reply_ack(preq_clt); set_task(WORK_Timed, delay + time_now, send_sig_kill, strdup(pjob->ji_qs.ji_jobid), FALSE); } // END delay_and_send_sig_kill()
int send_job( job *jobp, pbs_net_t hostaddr, /* host address, host byte order */ int port, /* service port, host byte order */ int move_type, /* move, route, or execute */ void (*post_func)(struct work_task *), /* after move */ void *data) /* ptr to optional batch_request to be put */ /* in the work task structure */ { tlist_head attrl; enum conn_type cntype = ToServerDIS; int con; char *destin = jobp->ji_qs.ji_destin; int encode_type; int i; int NumRetries; char *id = "send_job"; attribute *pattr; pid_t pid; struct attropl *pqjatr; /* list (single) of attropl for quejob */ char *safail = "sigaction failed\n"; char *spfail = "sigprocmask failed\n"; char script_name[MAXPATHLEN + 1]; sigset_t child_set, all_set; struct sigaction child_action; struct work_task *ptask; mbool_t Timeout = FALSE; char *pc; sigemptyset(&child_set); sigaddset(&child_set, SIGCHLD); sigfillset(&all_set); /* block SIGCHLD until work task is established */ if (sigprocmask(SIG_BLOCK, &child_set, NULL) == -1) { log_err(errno,id,spfail); pbs_errno = PBSE_SYSTEM; log_event( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, jobp->ji_qs.ji_jobid, "cannot set signal mask"); return(ROUTE_PERM_FAILURE); } if (LOGLEVEL >= 6) { sprintf(log_buffer,"about to send job - type=%d", move_type); log_event( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, jobp->ji_qs.ji_jobid, "forking in send_job"); } pid = fork(); if (pid == -1) { /* error on fork */ log_err(errno, id, "fork failed\n"); if (sigprocmask(SIG_UNBLOCK, &child_set, NULL) == -1) log_err(errno, id, spfail); pbs_errno = PBSE_SYSTEM; return(ROUTE_PERM_FAILURE); } if (pid != 0) { /* The parent (main server) */ /* create task to monitor job startup */ /* CRI: need way to report to scheduler job is starting, not started */ ptask = set_task(WORK_Deferred_Child, pid, post_func, jobp); if (ptask == NULL) { log_err(errno, id, msg_err_malloc); return(ROUTE_PERM_FAILURE); } ptask->wt_parm2 = data; append_link( &((job *)jobp)->ji_svrtask, &ptask->wt_linkobj, ptask); /* now can unblock SIGCHLD */ if (sigprocmask(SIG_UNBLOCK, &child_set, NULL) == -1) log_err(errno, id, spfail); if (LOGLEVEL >= 1) { extern long DispatchTime[]; extern job *DispatchJob[]; extern char *DispatchNode[]; extern time_t time_now; struct pbsnode *NP; /* record job dispatch time */ int jindex; for (jindex = 0;jindex < 20;jindex++) { if (DispatchJob[jindex] == NULL) { DispatchTime[jindex] = time_now; DispatchJob[jindex] = jobp; if ((NP = PGetNodeFromAddr(hostaddr)) != NULL) DispatchNode[jindex] = NP->nd_name; else DispatchNode[jindex] = NULL; break; } } } /* SUCCESS */ return(ROUTE_DEFERRED); } /* END if (pid != 0) */ /* * the child process * * set up signal catcher for error return */ rpp_terminate(); child_action.sa_handler = net_move_die; sigfillset(&child_action.sa_mask); child_action.sa_flags = 0; if (sigaction(SIGHUP, &child_action, NULL)) log_err(errno, id, safail); if (sigaction(SIGINT, &child_action, NULL)) log_err(errno, id, safail); if (sigaction(SIGQUIT, &child_action, NULL)) log_err(errno, id, safail); /* signal handling is set, now unblock */ if (sigprocmask(SIG_UNBLOCK, &child_set, NULL) == -1) log_err(errno, id, spfail); /* encode job attributes to be moved */ CLEAR_HEAD(attrl); /* select attributes/resources to send based on move type */ if (move_type == MOVE_TYPE_Exec) { /* moving job to MOM - ie job start */ resc_access_perm = ATR_DFLAG_MOM; encode_type = ATR_ENCODE_MOM; cntype = ToServerDIS; } else { /* moving job to alternate server? */ resc_access_perm = ATR_DFLAG_USWR | ATR_DFLAG_OPWR | ATR_DFLAG_MGWR | ATR_DFLAG_SvRD; encode_type = ATR_ENCODE_SVR; /* clear default resource settings */ svr_dequejob(jobp); } pattr = jobp->ji_wattr; for (i = 0;i < JOB_ATR_LAST;i++) { if (((job_attr_def + i)->at_flags & resc_access_perm) || ((strncmp((job_attr_def + i)->at_name,"session_id",10) == 0) && (jobp->ji_wattr[JOB_ATR_checkpoint_name].at_flags & ATR_VFLAG_SET))) { (job_attr_def + i)->at_encode( pattr + i, &attrl, (job_attr_def + i)->at_name, NULL, encode_type); } } /* END for (i) */ attrl_fixlink(&attrl); /* put together the job script file name */ strcpy(script_name, path_jobs); if (jobp->ji_wattr[JOB_ATR_job_array_request].at_flags & ATR_VFLAG_SET) { strcat(script_name, jobp->ji_arraystruct->ai_qs.fileprefix); } else { strcat(script_name, jobp->ji_qs.ji_fileprefix); } strcat(script_name, JOB_SCRIPT_SUFFIX); pbs_errno = 0; con = -1; for (NumRetries = 0;NumRetries < RETRY;NumRetries++) { int rc; /* connect to receiving server with retries */ if (NumRetries > 0) { /* recycle after an error */ if (con >= 0) svr_disconnect(con); /* check pbs_errno from previous attempt */ if (should_retry_route(pbs_errno) == -1) { sprintf(log_buffer, "child failed in previous commit request for job %s", jobp->ji_qs.ji_jobid); log_err(pbs_errno, id, log_buffer); exit(1); /* fatal error, don't retry */ } sleep(1 << NumRetries); } /* NOTE: on node hangs, svr_connect is successful */ if ((con = svr_connect(hostaddr, port, 0, cntype)) == PBS_NET_RC_FATAL) { sprintf(log_buffer, "send_job failed to %lx port %d", hostaddr, port); log_err(pbs_errno, id, log_buffer); exit(1); } if (con == PBS_NET_RC_RETRY) { pbs_errno = 0; /* should retry */ continue; } /* * if the job is substate JOB_SUBSTATE_TRNOUTCM which means * we are recovering after being down or a late failure, we * just want to send the "ready-to-commit/commit" */ if (jobp->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUTCM) { if (jobp->ji_qs.ji_substate != JOB_SUBSTATE_TRNOUT) { jobp->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUT; job_save(jobp, SAVEJOB_QUICK); } pqjatr = &((svrattrl *)GET_NEXT(attrl))->al_atopl; if ((pc = PBSD_queuejob( con, jobp->ji_qs.ji_jobid, destin, pqjatr, NULL)) == NULL) { if ((pbs_errno == PBSE_EXPIRED) || (pbs_errno == PBSE_READ_REPLY_TIMEOUT)) { /* queue job timeout based on pbs_tcp_timeout */ Timeout = TRUE; } if ((pbs_errno == PBSE_JOBEXIST) && (move_type == MOVE_TYPE_Exec)) { /* already running, mark it so */ log_event( PBSEVENT_ERROR, PBS_EVENTCLASS_JOB, jobp->ji_qs.ji_jobid, "MOM reports job already running"); exit(0); } sprintf(log_buffer, "send of job to %s failed error = %d", destin, pbs_errno); log_event( PBSEVENT_JOB, PBS_EVENTCLASS_JOB, jobp->ji_qs.ji_jobid, log_buffer); continue; } /* END if ((pc = PBSD_queuejob() == NULL) */ free(pc); if (jobp->ji_qs.ji_svrflags & JOB_SVFLG_SCRIPT) { if (PBSD_jscript(con, script_name, jobp->ji_qs.ji_jobid) != 0) continue; } /* XXX may need to change the logic below, if we are sending the job to a mom on the same host and the mom and server are not sharing the same spool directory, then we still need to move the file */ if ((move_type == MOVE_TYPE_Exec) && (jobp->ji_qs.ji_svrflags & JOB_SVFLG_HASRUN) && (hostaddr != pbs_server_addr)) { /* send files created on prior run */ if ((move_job_file(con,jobp,StdOut) != 0) || (move_job_file(con,jobp,StdErr) != 0) || (move_job_file(con,jobp,Checkpoint) != 0)) { continue; } } /* ignore signals */ if (sigprocmask(SIG_BLOCK, &all_set, NULL) == -1) log_err(errno, id, "sigprocmask\n"); jobp->ji_qs.ji_substate = JOB_SUBSTATE_TRNOUTCM; job_save(jobp, SAVEJOB_QUICK); } else { /* ignore signals */ if (sigprocmask(SIG_BLOCK, &all_set, NULL) == -1) log_err(errno, id, "sigprocmask\n"); } if (PBSD_rdytocmt(con, jobp->ji_qs.ji_jobid) != 0) { if (sigprocmask(SIG_UNBLOCK, &all_set, NULL) == -1) log_err(errno, id, "sigprocmask\n"); continue; } if ((rc = PBSD_commit(con, jobp->ji_qs.ji_jobid)) != 0) { int errno2; /* NOTE: errno is modified by log_err */ errno2 = errno; sprintf(log_buffer, "send_job commit failed, rc=%d (%s)", rc, (connection[con].ch_errtxt != NULL) ? connection[con].ch_errtxt : "N/A"); log_ext(errno2, id, log_buffer, LOG_WARNING); /* if failure occurs, pbs_mom should purge job and pbs_server should set * job state to idle w/error msg */ if (errno2 == EINPROGRESS) { /* request is still being processed */ /* increase tcp_timeout in qmgr? */ Timeout = TRUE; /* do we need a continue here? */ sprintf(log_buffer, "child commit request timed-out for job %s, increase tcp_timeout?", jobp->ji_qs.ji_jobid); log_ext(errno2, id, log_buffer, LOG_WARNING); /* don't retry on timeout--break out and report error! */ break; } else { sprintf(log_buffer, "child failed in commit request for job %s", jobp->ji_qs.ji_jobid); log_ext(errno2, id, log_buffer, LOG_CRIT); /* FAILURE */ exit(1); } } /* END if ((rc = PBSD_commit(con,jobp->ji_qs.ji_jobid)) != 0) */ svr_disconnect(con); /* child process is done */ /* SUCCESS */ exit(0); } /* END for (NumRetries) */ if (con >= 0) svr_disconnect(con); if (Timeout == TRUE) { /* 10 indicates that job migrate timed out, server will mark node down * and abort the job - see post_sendmom() */ sprintf(log_buffer, "child timed-out attempting to start job %s", jobp->ji_qs.ji_jobid); log_ext(pbs_errno, id, log_buffer, LOG_WARNING); exit(10); } if (should_retry_route(pbs_errno) == -1) { sprintf(log_buffer, "child failed and will not retry job %s", jobp->ji_qs.ji_jobid); log_err(pbs_errno, id, log_buffer); exit(1); } exit(2); /*NOTREACHED*/ return(ROUTE_SUCCESS); } /* END send_job() */
acl_int64 aio_timer_callback::trigger(void) { // sanity check if (tasks_.empty()) return TIMER_EMPTY; acl_assert(length_ > 0); set_time(); std::list<aio_timer_task*>::iterator it, next; std::list<aio_timer_task*> tasks; aio_timer_task* task; // 从定时器中取出到达的定时任务 for (it = tasks_.begin(); it != tasks_.end(); it = next) { if ((*it)->when > present_) break; next = it; ++next; task = *it; tasks_.erase(it); length_--; tasks.push_back(task); } // 有可能这些到达的定时任务已经被用户提前删除了 if (tasks.empty()) { acl_assert(!tasks_.empty()); aio_timer_task* first = tasks_.front(); acl_int64 delay = first->when - present_; return delay < 0 ? 0 : delay; } // 将到达的定时任务重新放回至定时器的任务列表中, // 并开始触发所有的到达的定时任务 // 必须先设置触发器的忙状态,以防止子类在回调过程 // 中调用了该类对象的析构过程 set_locked(); // 设置解锁后销毁标志为 false,因为当前该定时器处于 // 锁定状态,所以其它类对象不能直接在锁定时销毁本类 // 对象,当解锁后,如果该标识被置为 true,则本类对象 // 应该自动销毁 destroy_on_unlock_ = false; for (it = tasks.begin(); it != tasks.end(); ++it) { set_task(*it); timer_callback((*it)->id); } tasks.clear(); // 允许之后的操作中被子类调用析构过程 unset_locked(); // 子类有可能会在 timer_callback 中删除了所有的定时任务 if (tasks_.empty()) return TIMER_EMPTY; aio_timer_task* first = tasks_.front(); acl_int64 delay = first->when - present_; // 如果在加锁期间外部程序要求释放该对象,则在此处释放 if (destroy_on_unlock_) { destroy(); return -1; } return delay < 0 ? 0 : delay; }