/* returns 1 if job and nodes are ready for job to begin, 0 otherwise */ static int _wait_bluegene_block_ready(resource_allocation_response_msg_t *alloc) { int is_ready = 0, i, rc; char *block_id = NULL; double cur_delay = 0; double cur_sleep = 0; int max_delay = BG_FREE_PREVIOUS_BLOCK + BG_MIN_BLOCK_BOOT + (BG_INCR_BLOCK_BOOT * alloc->node_cnt); select_g_select_jobinfo_get(alloc->select_jobinfo, SELECT_JOBDATA_BLOCK_ID, &block_id); for (i = 0; cur_delay < max_delay; i++) { cur_sleep = POLL_SLEEP * i; if (i == 1) { debug("Waiting for block %s to become ready for job", block_id); } if (i) { usleep(1000000 * cur_sleep); rc = _blocks_dealloc(); if ((rc == 0) || (rc == -1)) cur_delay += cur_sleep; debug2("still waiting"); } rc = slurm_job_node_ready(alloc->job_id); if (rc == READY_JOB_FATAL) break; /* fatal error */ if ((rc == READY_JOB_ERROR) || (rc == EAGAIN)) continue; /* retry */ if ((rc & READY_JOB_STATE) == 0) /* job killed */ break; if (rc & READY_NODE_STATE) { /* job and node ready */ is_ready = 1; break; } if (destroy_job) break; } if (is_ready) debug("Block %s is ready for job", block_id); else if (!destroy_job) error("Block %s still not ready", block_id); else /* destroy_job set and slurmctld not responing */ is_ready = 0; xfree(block_id); return is_ready; }
static int _wait_bluegene_block_ready(resource_allocation_response_msg_t *alloc) { int is_ready = SLURM_ERROR, i, rc = 0; char *block_id = NULL; int cur_delay = 0; int max_delay = BG_FREE_PREVIOUS_BLOCK + BG_MIN_BLOCK_BOOT + (BG_INCR_BLOCK_BOOT * alloc->node_cnt); select_g_select_jobinfo_get(alloc->select_jobinfo, SELECT_JOBDATA_BLOCK_ID, &block_id); for (i=0; (cur_delay < max_delay); i++) { if (i) { if (i == 1) { info("Waiting for block %s to become ready for " "job", block_id); } else debug("still waiting"); sleep(POLL_SLEEP); rc = _blocks_dealloc(); if ((rc == 0) || (rc == -1)) cur_delay += POLL_SLEEP; } rc = slurm_job_node_ready(alloc->job_id); if (rc == READY_JOB_FATAL) break; /* fatal error */ if ((rc == READY_JOB_ERROR) || (rc == EAGAIN)) continue; /* retry */ if ((rc & READY_JOB_STATE) == 0) /* job killed */ break; if (rc & READY_NODE_STATE) { /* job and node ready */ is_ready = SLURM_SUCCESS; break; } } if (is_ready == SLURM_SUCCESS) info("Block %s is ready for job %u", block_id, alloc->job_id); else if ((rc & READY_JOB_STATE) == 0) info("Job %u no longer running", alloc->job_id); else info("Problem running job %u", alloc->job_id); xfree(block_id); return is_ready; }