/* * Perform any work required to terminate a jobs on a block. * bg_block_id IN - block name * RET - SLURM_SUCCESS or an error code * * NOTE: The job is killed before the function returns. This can take * many seconds. Do not call from slurmctld or any other entity that * can not wait. */ int term_jobs_on_block(char *bg_block_id) { int rc = SLURM_SUCCESS; bg_action_t *bg_action_ptr; bg_action_ptr = xmalloc(sizeof(bg_action_t)); bg_action_ptr->op = TERM_OP; bg_action_ptr->bg_block_id = xstrdup(bg_block_id); _block_op(bg_action_ptr); return rc; }
/* Update block user and reboot as needed block_state_mutex needs to * be locked before coming in. */ static void _sync_agent(bg_action_t *bg_action_ptr, bg_record_t *bg_record) { struct job_record *job_ptr = bg_action_ptr->job_ptr; debug3("Queue sync of job %u in BG block %s ending at %ld", job_ptr->job_id, bg_action_ptr->bg_block_id, job_ptr->end_time); last_bg_update = time(NULL); ba_sync_job_to_block(bg_record, job_ptr); set_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_BLOCK_PTR, bg_record); num_unused_cpus -= job_ptr->total_cpus; if (!block_ptr_exist_in_list(bg_lists->job_running, bg_record)) list_push(bg_lists->job_running, bg_record); if (!block_ptr_exist_in_list(bg_lists->booted, bg_record)) list_push(bg_lists->booted, bg_record); if (bg_record->state == BG_BLOCK_INITED) { int sync_user_rc; job_ptr->job_state &= (~JOB_CONFIGURING); last_job_update = time(NULL); /* Just in case reset the boot flags */ bg_record->boot_state = 0; bg_record->boot_count = 0; sync_user_rc = bridge_block_sync_users(bg_record); if (sync_user_rc == SLURM_ERROR) { slurm_mutex_unlock(&block_state_mutex); (void) slurm_fail_job(job_ptr->job_id, JOB_BOOT_FAIL); slurm_mutex_lock(&block_state_mutex); } _destroy_bg_action(bg_action_ptr); } else { if (bg_record->state != BG_BLOCK_BOOTING) { error("Block %s isn't ready and isn't " "being configured! Starting job again.", bg_action_ptr->bg_block_id); } else { debug("Block %s is booting, job ok", bg_action_ptr->bg_block_id); } /* the function _block_op calls will destroy the bg_action_ptr */ _block_op(bg_action_ptr); } }
/* * Perform any work required to terminate a job * job_ptr IN - pointer to the job being terminated * RET - SLURM_SUCCESS or an error code * * NOTE: This happens in parallel with srun and slurmd terminating * the job. Insure that this function, mpirun and the epilog can * all deal with termination race conditions. */ int term_job(struct job_record *job_ptr) { int rc = SLURM_SUCCESS; bg_action_t *bg_action_ptr = NULL; bg_action_ptr = xmalloc(sizeof(bg_action_t)); bg_action_ptr->op = TERM_OP; bg_action_ptr->job_ptr = job_ptr; get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_BLOCK_ID, &(bg_action_ptr->bg_block_id)); info("Queue termination of job %u in BG block %s", job_ptr->job_id, bg_action_ptr->bg_block_id); _block_op(bg_action_ptr); return rc; }
/* * Perform any setup required to initiate a job * job_ptr IN - pointer to the job being initiated * RET - SLURM_SUCCESS or an error code * * NOTE: This happens in parallel with srun and slurmd spawning * the job. A prolog script is expected to defer initiation of * the job script until the BG block is available for use. */ extern int start_job(struct job_record *job_ptr) { int rc = SLURM_SUCCESS; bg_record_t *bg_record = NULL; bg_action_t *bg_action_ptr = NULL; select_jobinfo_t *jobinfo = job_ptr->select_jobinfo->data; slurm_mutex_lock(&block_state_mutex); bg_record = jobinfo->bg_record; if (!bg_record || !block_ptr_exist_in_list(bg_lists->main, bg_record)) { slurm_mutex_unlock(&block_state_mutex); error("bg_record %s doesn't exist, requested for job (%d)", jobinfo->bg_block_id, job_ptr->job_id); return SLURM_ERROR; } if ((jobinfo->conn_type[0] != SELECT_NAV) && (jobinfo->conn_type[0] < SELECT_SMALL)) { int dim; for (dim=0; dim<SYSTEM_DIMENSIONS; dim++) jobinfo->conn_type[dim] = bg_record->conn_type[dim]; } /* If it isn't 0 then it was setup previous (sub-block) */ if (jobinfo->geometry[SYSTEM_DIMENSIONS] == 0) memcpy(jobinfo->geometry, bg_record->geo, sizeof(bg_record->geo)); if (bg_record->job_list) { /* Mark the ba_mp cnodes as used now. */ ba_mp_t *ba_mp = list_peek(bg_record->ba_mp_list); xassert(ba_mp); xassert(ba_mp->cnode_bitmap); bit_or(ba_mp->cnode_bitmap, jobinfo->units_avail); if (!find_job_in_bg_record(bg_record, job_ptr->job_id)) list_append(bg_record->job_list, job_ptr); } else { bg_record->job_running = job_ptr->job_id; bg_record->job_ptr = job_ptr; } job_ptr->job_state |= JOB_CONFIGURING; bg_action_ptr = xmalloc(sizeof(bg_action_t)); bg_action_ptr->op = START_OP; bg_action_ptr->job_ptr = job_ptr; /* FIXME: The below get_select_jobinfo calls could be avoided * by just using the jobinfo as we do above. */ get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_BLOCK_ID, &(bg_action_ptr->bg_block_id)); get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_REBOOT, &(bg_action_ptr->reboot)); get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_CONN_TYPE, &(bg_action_ptr->conn_type)); get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_MLOADER_IMAGE, &(bg_action_ptr->mloaderimage)); #ifdef HAVE_BG_L_P # ifdef HAVE_BGL get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_BLRTS_IMAGE, &(bg_action_ptr->blrtsimage)); if (!bg_action_ptr->blrtsimage) { bg_action_ptr->blrtsimage = xstrdup(bg_conf->default_blrtsimage); set_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_BLRTS_IMAGE, bg_action_ptr->blrtsimage); } # elif defined HAVE_BGP get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_CONN_TYPE, &(bg_action_ptr->conn_type)); # endif get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_LINUX_IMAGE, &(bg_action_ptr->linuximage)); if (!bg_action_ptr->linuximage) { bg_action_ptr->linuximage = xstrdup(bg_conf->default_linuximage); set_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_LINUX_IMAGE, bg_action_ptr->linuximage); } get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_RAMDISK_IMAGE, &(bg_action_ptr->ramdiskimage)); if (!bg_action_ptr->ramdiskimage) { bg_action_ptr->ramdiskimage = xstrdup(bg_conf->default_ramdiskimage); set_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_RAMDISK_IMAGE, bg_action_ptr->ramdiskimage); } #endif if (!bg_action_ptr->mloaderimage) { bg_action_ptr->mloaderimage = xstrdup(bg_conf->default_mloaderimage); set_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_MLOADER_IMAGE, bg_action_ptr->mloaderimage); } num_unused_cpus -= job_ptr->total_cpus; if (!block_ptr_exist_in_list(bg_lists->job_running, bg_record)) list_push(bg_lists->job_running, bg_record); if (!block_ptr_exist_in_list(bg_lists->booted, bg_record)) list_push(bg_lists->booted, bg_record); /* Just in case something happens to free this block before we start the job we will make it so this job doesn't get blown away. */ bg_record->modifying = 1; last_bg_update = time(NULL); slurm_mutex_unlock(&block_state_mutex); info("Queue start of job %u in BG block %s", job_ptr->job_id, bg_action_ptr->bg_block_id); _block_op(bg_action_ptr); return rc; }
/* * Perform any setup required to initiate a job * job_ptr IN - pointer to the job being initiated * RET - SLURM_SUCCESS or an error code * * NOTE: This happens in parallel with srun and slurmd spawning * the job. A prolog script is expected to defer initiation of * the job script until the BG block is available for use. */ extern int start_job(struct job_record *job_ptr) { int rc = SLURM_SUCCESS; bg_record_t *bg_record = NULL; bg_action_t *bg_action_ptr = NULL; bg_action_ptr = xmalloc(sizeof(bg_action_t)); bg_action_ptr->op = START_OP; bg_action_ptr->job_ptr = job_ptr; get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_BLOCK_ID, &(bg_action_ptr->bg_block_id)); get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_REBOOT, &(bg_action_ptr->reboot)); get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_CONN_TYPE, &(bg_action_ptr->conn_type)); get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_MLOADER_IMAGE, &(bg_action_ptr->mloaderimage)); #ifdef HAVE_BG_L_P # ifdef HAVE_BGL get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_BLRTS_IMAGE, &(bg_action_ptr->blrtsimage)); if (!bg_action_ptr->blrtsimage) { bg_action_ptr->blrtsimage = xstrdup(bg_conf->default_blrtsimage); set_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_BLRTS_IMAGE, bg_action_ptr->blrtsimage); } # elif defined HAVE_BGP get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_CONN_TYPE, &(bg_action_ptr->conn_type)); # endif get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_LINUX_IMAGE, &(bg_action_ptr->linuximage)); if (!bg_action_ptr->linuximage) { bg_action_ptr->linuximage = xstrdup(bg_conf->default_linuximage); set_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_LINUX_IMAGE, bg_action_ptr->linuximage); } get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_RAMDISK_IMAGE, &(bg_action_ptr->ramdiskimage)); if (!bg_action_ptr->ramdiskimage) { bg_action_ptr->ramdiskimage = xstrdup(bg_conf->default_ramdiskimage); set_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_RAMDISK_IMAGE, bg_action_ptr->ramdiskimage); } #endif if (!bg_action_ptr->mloaderimage) { bg_action_ptr->mloaderimage = xstrdup(bg_conf->default_mloaderimage); set_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_MLOADER_IMAGE, bg_action_ptr->mloaderimage); } slurm_mutex_lock(&block_state_mutex); bg_record = find_bg_record_in_list(bg_lists->main, bg_action_ptr->bg_block_id); if (!bg_record) { slurm_mutex_unlock(&block_state_mutex); error("bg_record %s doesn't exist, requested for job (%d)", bg_action_ptr->bg_block_id, job_ptr->job_id); _destroy_bg_action(bg_action_ptr); return SLURM_ERROR; } last_bg_update = time(NULL); if (bg_record->job_list) { if (!find_job_in_bg_record(bg_record, job_ptr->job_id)) list_append(bg_record->job_list, job_ptr); } else { bg_record->job_running = bg_action_ptr->job_ptr->job_id; bg_record->job_ptr = bg_action_ptr->job_ptr; } num_unused_cpus -= job_ptr->total_cpus; if (!block_ptr_exist_in_list(bg_lists->job_running, bg_record)) list_push(bg_lists->job_running, bg_record); if (!block_ptr_exist_in_list(bg_lists->booted, bg_record)) list_push(bg_lists->booted, bg_record); /* Just incase something happens to free this block before we start the job we will make it so this job doesn't get blown away. */ bg_record->modifying = 1; slurm_mutex_unlock(&block_state_mutex); info("Queue start of job %u in BG block %s", job_ptr->job_id, bg_action_ptr->bg_block_id); _block_op(bg_action_ptr); return rc; }
/* * Synchronize BG block state to that of currently active jobs. * This can recover from slurmctld crashes when block usership * changes were queued */ extern int sync_jobs(List job_list) { ListIterator job_iterator; struct job_record *job_ptr = NULL; bg_action_t *bg_action_ptr = NULL; List block_list = NULL; static bool run_already = false; /* Execute only on initial startup. We don't support bgblock * creation on demand today, so there is no need to re-sync data. */ if (run_already) return SLURM_SUCCESS; run_already = true; if (!job_list) { error("sync_jobs: no job_list"); return SLURM_ERROR; } /* Insure that all running jobs own the specified block */ block_list = _get_all_allocated_blocks(); job_iterator = list_iterator_create(job_list); while ((job_ptr = list_next(job_iterator))) { bool good_block = true; if (!IS_JOB_RUNNING(job_ptr)) continue; bg_action_ptr = xmalloc(sizeof(bg_action_t)); bg_action_ptr->op = SYNC_OP; bg_action_ptr->job_ptr = job_ptr; get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_BLOCK_ID, &(bg_action_ptr->bg_block_id)); #ifdef HAVE_BG_L_P # ifdef HAVE_BGL get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_BLRTS_IMAGE, &(bg_action_ptr->blrtsimage)); # else get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_CONN_TYPE, &(bg_action_ptr->conn_type)); # endif get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_LINUX_IMAGE, &(bg_action_ptr->linuximage)); get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_RAMDISK_IMAGE, &(bg_action_ptr->ramdiskimage)); #endif get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_MLOADER_IMAGE, &(bg_action_ptr->mloaderimage)); if (bg_action_ptr->bg_block_id == NULL) { error("Running job %u has bgblock==NULL", job_ptr->job_id); good_block = false; } else if (job_ptr->nodes == NULL) { error("Running job %u has nodes==NULL", job_ptr->job_id); good_block = false; } else if (_excise_block(block_list, bg_action_ptr->bg_block_id, job_ptr->nodes) != SLURM_SUCCESS) { error("Kill job %u belongs to defunct " "bgblock %s", job_ptr->job_id, bg_action_ptr->bg_block_id); good_block = false; } if (!good_block) { job_ptr->job_state = JOB_FAILED | JOB_COMPLETING; job_ptr->end_time = time(NULL); last_job_update = time(NULL); _destroy_bg_action(bg_action_ptr); continue; } debug3("Queue sync of job %u in BG block %s " "ending at %ld", job_ptr->job_id, bg_action_ptr->bg_block_id, job_ptr->end_time); _block_op(bg_action_ptr); } list_iterator_destroy(job_iterator); /* Insure that all other blocks are free of users */ if (block_list) { bridge_reset_block_list(block_list); list_destroy(block_list); } else { /* this should never happen, * vestigial logic */ error("sync_jobs: no block_list"); return SLURM_ERROR; } return SLURM_SUCCESS; }