/* Test if a batch launch request should be defered * RET -1: abort the request, pending job cancelled * 0: execute the request now * 1: defer the request */ static int _batch_launch_defer(queued_request_t *queued_req_ptr) { agent_arg_t *agent_arg_ptr; batch_job_launch_msg_t *launch_msg_ptr; time_t now = time(NULL); struct job_record *job_ptr; int delay_time, nodes_ready = 0, tmp; agent_arg_ptr = queued_req_ptr->agent_arg_ptr; if (agent_arg_ptr->msg_type != REQUEST_BATCH_JOB_LAUNCH) return 0; if (difftime(now, queued_req_ptr->last_attempt) < 10) { /* Reduce overhead by only testing once every 10 secs */ return 1; } launch_msg_ptr = (batch_job_launch_msg_t *)agent_arg_ptr->msg_args; job_ptr = find_job_record(launch_msg_ptr->job_id); if ((job_ptr == NULL) || (!IS_JOB_RUNNING(job_ptr) && !IS_JOB_SUSPENDED(job_ptr))) { info("agent(batch_launch): removed pending request for " "cancelled job %u", launch_msg_ptr->job_id); return -1; /* job cancelled while waiting */ } if (job_ptr->wait_all_nodes) { (void) job_node_ready(launch_msg_ptr->job_id, &tmp); if (tmp == (READY_JOB_STATE | READY_NODE_STATE)) { nodes_ready = 1; if (launch_msg_ptr->alias_list && !strcmp(launch_msg_ptr->alias_list, "TBD")) { /* Update launch RPC with correct node * aliases */ struct job_record *job_ptr; job_ptr = find_job_record(launch_msg_ptr-> job_id); xfree(launch_msg_ptr->alias_list); launch_msg_ptr->alias_list = xstrdup(job_ptr-> alias_list); } } } else { #ifdef HAVE_FRONT_END nodes_ready = 1; #else struct node_record *node_ptr; char *hostname; hostname = hostlist_deranged_string_xmalloc( agent_arg_ptr->hostlist); node_ptr = find_node_record(hostname); if (node_ptr == NULL) { error("agent(batch_launch) removed pending request for " "job %u, missing node %s", launch_msg_ptr->job_id, hostname); xfree(hostname); return -1; /* invalid request?? */ } xfree(hostname); if (!IS_NODE_POWER_SAVE(node_ptr) && !IS_NODE_NO_RESPOND(node_ptr)) { nodes_ready = 1; } #endif } delay_time = difftime(now, job_ptr->start_time); if (nodes_ready) { /* ready to launch, adjust time limit for boot time */ if (delay_time && (job_ptr->time_limit != INFINITE) && (!wiki2_sched)) { info("Job %u launch delayed by %d secs, " "updating end_time", launch_msg_ptr->job_id, delay_time); job_ptr->end_time += delay_time; } queued_req_ptr->last_attempt = (time_t) 0; return 0; } if (queued_req_ptr->last_attempt == 0) { queued_req_ptr->first_attempt = now; queued_req_ptr->last_attempt = now; } else if (difftime(now, queued_req_ptr->first_attempt) >= slurm_get_resume_timeout()) { error("agent waited too long for nodes to respond, " "sending batch request anyway..."); if (delay_time && (job_ptr->time_limit != INFINITE) && (!wiki2_sched)) { info("Job %u launch delayed by %d secs, " "updating end_time", launch_msg_ptr->job_id, delay_time); job_ptr->end_time += delay_time; } queued_req_ptr->last_attempt = (time_t) 0; return 0; } queued_req_ptr->last_attempt = now; return 1; }
/* Perform any power change work to nodes */ static void _do_power_work(time_t now) { static time_t last_log = 0, last_work_scan = 0; int i, wake_cnt = 0, sleep_cnt = 0, susp_total = 0; time_t delta_t; uint32_t susp_state; bitstr_t *wake_node_bitmap = NULL, *sleep_node_bitmap = NULL; struct node_record *node_ptr; bool run_suspend = false; /* Set limit on counts of nodes to have state changed */ delta_t = now - last_work_scan; if (delta_t >= 60) { suspend_cnt_f = 0.0; resume_cnt_f = 0.0; } else { float rate = (60 - delta_t) / 60.0; suspend_cnt_f *= rate; resume_cnt_f *= rate; } suspend_cnt = (suspend_cnt_f + 0.5); resume_cnt = (resume_cnt_f + 0.5); if (now > (last_suspend + suspend_timeout)) { /* ready to start another round of node suspends */ run_suspend = true; if (last_suspend) { bit_nclear(suspend_node_bitmap, 0, (node_record_count - 1)); last_suspend = (time_t) 0; } } last_work_scan = now; /* Build bitmaps identifying each node which should change state */ for (i=0, node_ptr=node_record_table_ptr; i<node_record_count; i++, node_ptr++) { susp_state = IS_NODE_POWER_SAVE(node_ptr); if (susp_state) susp_total++; /* Resume nodes as appropriate */ if (susp_state && ((resume_rate == 0) || (resume_cnt < resume_rate)) && (bit_test(suspend_node_bitmap, i) == 0) && (IS_NODE_ALLOCATED(node_ptr) || (node_ptr->last_idle > (now - idle_time)))) { if (wake_node_bitmap == NULL) { wake_node_bitmap = bit_alloc(node_record_count); } wake_cnt++; resume_cnt++; resume_cnt_f++; node_ptr->node_state &= (~NODE_STATE_POWER_SAVE); node_ptr->node_state |= NODE_STATE_POWER_UP; node_ptr->node_state |= NODE_STATE_NO_RESPOND; bit_clear(power_node_bitmap, i); bit_clear(avail_node_bitmap, i); node_ptr->last_response = now + resume_timeout; bit_set(wake_node_bitmap, i); } /* Suspend nodes as appropriate */ if (run_suspend && (susp_state == 0) && ((suspend_rate == 0) || (suspend_cnt < suspend_rate)) && IS_NODE_IDLE(node_ptr) && (node_ptr->sus_job_cnt == 0) && (!IS_NODE_COMPLETING(node_ptr)) && (!IS_NODE_POWER_UP(node_ptr)) && (node_ptr->last_idle < (now - idle_time)) && ((exc_node_bitmap == NULL) || (bit_test(exc_node_bitmap, i) == 0))) { if (sleep_node_bitmap == NULL) { sleep_node_bitmap = bit_alloc(node_record_count); } sleep_cnt++; suspend_cnt++; suspend_cnt_f++; node_ptr->node_state |= NODE_STATE_POWER_SAVE; bit_set(power_node_bitmap, i); bit_set(sleep_node_bitmap, i); bit_set(suspend_node_bitmap, i); last_suspend = now; } } if (((now - last_log) > 600) && (susp_total > 0)) { info("Power save mode: %d nodes", susp_total); last_log = now; } if (sleep_node_bitmap) { char *nodes; nodes = bitmap2node_name(sleep_node_bitmap); if (nodes) _do_suspend(nodes); else error("power_save: bitmap2nodename"); xfree(nodes); FREE_NULL_BITMAP(sleep_node_bitmap); /* last_node_update could be changed already by another thread! last_node_update = now; */ } if (wake_node_bitmap) { char *nodes; nodes = bitmap2node_name(wake_node_bitmap); if (nodes) _do_resume(nodes); else error("power_save: bitmap2nodename"); xfree(nodes); FREE_NULL_BITMAP(wake_node_bitmap); /* last_node_update could be changed already by another thread! last_node_update = now; */ } }