/* Update block user and reboot as needed block_state_mutex needs to * be locked before coming in. */ static void _sync_agent(bg_action_t *bg_action_ptr, bg_record_t *bg_record) { struct job_record *job_ptr = bg_action_ptr->job_ptr; debug3("Queue sync of job %u in BG block %s ending at %ld", job_ptr->job_id, bg_action_ptr->bg_block_id, job_ptr->end_time); last_bg_update = time(NULL); ba_sync_job_to_block(bg_record, job_ptr); set_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_BLOCK_PTR, bg_record); num_unused_cpus -= job_ptr->total_cpus; if (!block_ptr_exist_in_list(bg_lists->job_running, bg_record)) list_push(bg_lists->job_running, bg_record); if (!block_ptr_exist_in_list(bg_lists->booted, bg_record)) list_push(bg_lists->booted, bg_record); if (bg_record->state == BG_BLOCK_INITED) { int sync_user_rc; job_ptr->job_state &= (~JOB_CONFIGURING); last_job_update = time(NULL); /* Just in case reset the boot flags */ bg_record->boot_state = 0; bg_record->boot_count = 0; sync_user_rc = bridge_block_sync_users(bg_record); if (sync_user_rc == SLURM_ERROR) { slurm_mutex_unlock(&block_state_mutex); (void) slurm_fail_job(job_ptr->job_id, JOB_BOOT_FAIL); slurm_mutex_lock(&block_state_mutex); } _destroy_bg_action(bg_action_ptr); } else { if (bg_record->state != BG_BLOCK_BOOTING) { error("Block %s isn't ready and isn't " "being configured! Starting job again.", bg_action_ptr->bg_block_id); } else { debug("Block %s is booting, job ok", bg_action_ptr->bg_block_id); } /* the function _block_op calls will destroy the bg_action_ptr */ _block_op(bg_action_ptr); } }
static void *_block_agent(void *args) { bg_action_t *bg_action_ptr = (bg_action_t *)args; if (bg_action_ptr->op == START_OP) _start_agent(bg_action_ptr); else if (bg_action_ptr->op == TERM_OP) bridge_block_post_job(bg_action_ptr->bg_block_id, bg_action_ptr->job_ptr); _destroy_bg_action(bg_action_ptr); return NULL; }
/* * Synchronize BG block state to that of currently active jobs. * This can recover from slurmctld crashes when block usership * changes were queued */ extern int sync_jobs(List job_list) { ListIterator itr; struct job_record *job_ptr = NULL; List block_list = NULL, kill_list = NULL; static bool run_already = false; bg_record_t *bg_record = NULL; /* Execute only on initial startup. We don't support bgblock * creation on demand today, so there is no need to re-sync data. */ if (run_already) return SLURM_SUCCESS; run_already = true; if (!job_list) { error("sync_jobs: no job_list"); return SLURM_ERROR; } slurm_mutex_lock(&block_state_mutex); /* Insure that all running jobs own the specified block */ itr = list_iterator_create(job_list); while ((job_ptr = list_next(itr))) { bg_action_t *bg_action_ptr = NULL; if (!IS_JOB_RUNNING(job_ptr) && !IS_JOB_COMPLETING(job_ptr)) continue; bg_action_ptr = xmalloc(sizeof(bg_action_t)); if (IS_JOB_COMPLETING(job_ptr)) bg_action_ptr->op = TERM_OP; else bg_action_ptr->op = START_OP; bg_action_ptr->job_ptr = job_ptr; get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_BLOCK_ID, &(bg_action_ptr->bg_block_id)); #ifdef HAVE_BG_L_P # ifdef HAVE_BGL get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_BLRTS_IMAGE, &(bg_action_ptr->blrtsimage)); # else get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_CONN_TYPE, &(bg_action_ptr->conn_type)); # endif get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_LINUX_IMAGE, &(bg_action_ptr->linuximage)); get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_RAMDISK_IMAGE, &(bg_action_ptr->ramdiskimage)); #endif get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_MLOADER_IMAGE, &(bg_action_ptr->mloaderimage)); if (bg_action_ptr->bg_block_id == NULL) { error("Running job %u has bgblock==NULL", job_ptr->job_id); } else if (job_ptr->nodes == NULL) { error("Running job %u has nodes==NULL", job_ptr->job_id); } else if (!(bg_record = find_bg_record_in_list( bg_lists->main, bg_action_ptr->bg_block_id))) { error("Kill job %u belongs to defunct " "bgblock %s", job_ptr->job_id, bg_action_ptr->bg_block_id); } if (!bg_record) { /* Can't fail it just now, we have locks in place. */ bg_status_add_job_kill_list(job_ptr, &kill_list); _destroy_bg_action(bg_action_ptr); continue; } /* _sync_agent will destroy the bg_action_ptr */ _sync_agent(bg_action_ptr, bg_record); } list_iterator_destroy(itr); block_list = list_create(destroy_bg_record); itr = list_iterator_create(bg_lists->main); while ((bg_record = list_next(itr))) { bg_record_t *rm_record; if (bg_record->job_ptr || (bg_record->job_list && list_count(bg_record->job_list))) continue; rm_record = xmalloc(sizeof(bg_record_t)); rm_record->magic = BLOCK_MAGIC; rm_record->bg_block_id = xstrdup(bg_record->bg_block_id); rm_record->mp_str = xstrdup(bg_record->mp_str); list_append(block_list, rm_record); } list_iterator_destroy(itr); slurm_mutex_unlock(&block_state_mutex); if (kill_list) { /* slurmctld is already locked up, so handle this right after * the unlock of block_state_mutex. */ bg_status_process_kill_job_list(kill_list, JOB_BOOT_FAIL, 1); FREE_NULL_LIST(kill_list); } /* Insure that all other blocks are free of users */ if (block_list) { itr = list_iterator_create(block_list); while ((bg_record = list_next(itr))) { info("Queue clearing of users of BG block %s", bg_record->bg_block_id); term_jobs_on_block(bg_record->bg_block_id); } list_iterator_destroy(itr); FREE_NULL_LIST(block_list); } else { /* this should never happen, * vestigial logic */ error("sync_jobs: no block_list"); return SLURM_ERROR; } return SLURM_SUCCESS; }
/* * Perform any setup required to initiate a job * job_ptr IN - pointer to the job being initiated * RET - SLURM_SUCCESS or an error code * * NOTE: This happens in parallel with srun and slurmd spawning * the job. A prolog script is expected to defer initiation of * the job script until the BG block is available for use. */ extern int start_job(struct job_record *job_ptr) { int rc = SLURM_SUCCESS; bg_record_t *bg_record = NULL; bg_action_t *bg_action_ptr = NULL; bg_action_ptr = xmalloc(sizeof(bg_action_t)); bg_action_ptr->op = START_OP; bg_action_ptr->job_ptr = job_ptr; get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_BLOCK_ID, &(bg_action_ptr->bg_block_id)); get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_REBOOT, &(bg_action_ptr->reboot)); get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_CONN_TYPE, &(bg_action_ptr->conn_type)); get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_MLOADER_IMAGE, &(bg_action_ptr->mloaderimage)); #ifdef HAVE_BG_L_P # ifdef HAVE_BGL get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_BLRTS_IMAGE, &(bg_action_ptr->blrtsimage)); if (!bg_action_ptr->blrtsimage) { bg_action_ptr->blrtsimage = xstrdup(bg_conf->default_blrtsimage); set_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_BLRTS_IMAGE, bg_action_ptr->blrtsimage); } # elif defined HAVE_BGP get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_CONN_TYPE, &(bg_action_ptr->conn_type)); # endif get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_LINUX_IMAGE, &(bg_action_ptr->linuximage)); if (!bg_action_ptr->linuximage) { bg_action_ptr->linuximage = xstrdup(bg_conf->default_linuximage); set_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_LINUX_IMAGE, bg_action_ptr->linuximage); } get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_RAMDISK_IMAGE, &(bg_action_ptr->ramdiskimage)); if (!bg_action_ptr->ramdiskimage) { bg_action_ptr->ramdiskimage = xstrdup(bg_conf->default_ramdiskimage); set_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_RAMDISK_IMAGE, bg_action_ptr->ramdiskimage); } #endif if (!bg_action_ptr->mloaderimage) { bg_action_ptr->mloaderimage = xstrdup(bg_conf->default_mloaderimage); set_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_MLOADER_IMAGE, bg_action_ptr->mloaderimage); } slurm_mutex_lock(&block_state_mutex); bg_record = find_bg_record_in_list(bg_lists->main, bg_action_ptr->bg_block_id); if (!bg_record) { slurm_mutex_unlock(&block_state_mutex); error("bg_record %s doesn't exist, requested for job (%d)", bg_action_ptr->bg_block_id, job_ptr->job_id); _destroy_bg_action(bg_action_ptr); return SLURM_ERROR; } last_bg_update = time(NULL); if (bg_record->job_list) { if (!find_job_in_bg_record(bg_record, job_ptr->job_id)) list_append(bg_record->job_list, job_ptr); } else { bg_record->job_running = bg_action_ptr->job_ptr->job_id; bg_record->job_ptr = bg_action_ptr->job_ptr; } num_unused_cpus -= job_ptr->total_cpus; if (!block_ptr_exist_in_list(bg_lists->job_running, bg_record)) list_push(bg_lists->job_running, bg_record); if (!block_ptr_exist_in_list(bg_lists->booted, bg_record)) list_push(bg_lists->booted, bg_record); /* Just incase something happens to free this block before we start the job we will make it so this job doesn't get blown away. */ bg_record->modifying = 1; slurm_mutex_unlock(&block_state_mutex); info("Queue start of job %u in BG block %s", job_ptr->job_id, bg_action_ptr->bg_block_id); _block_op(bg_action_ptr); return rc; }
/* * Synchronize BG block state to that of currently active jobs. * This can recover from slurmctld crashes when block usership * changes were queued */ extern int sync_jobs(List job_list) { ListIterator job_iterator; struct job_record *job_ptr = NULL; bg_action_t *bg_action_ptr = NULL; List block_list = NULL; static bool run_already = false; /* Execute only on initial startup. We don't support bgblock * creation on demand today, so there is no need to re-sync data. */ if (run_already) return SLURM_SUCCESS; run_already = true; if (!job_list) { error("sync_jobs: no job_list"); return SLURM_ERROR; } /* Insure that all running jobs own the specified block */ block_list = _get_all_allocated_blocks(); job_iterator = list_iterator_create(job_list); while ((job_ptr = list_next(job_iterator))) { bool good_block = true; if (!IS_JOB_RUNNING(job_ptr)) continue; bg_action_ptr = xmalloc(sizeof(bg_action_t)); bg_action_ptr->op = SYNC_OP; bg_action_ptr->job_ptr = job_ptr; get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_BLOCK_ID, &(bg_action_ptr->bg_block_id)); #ifdef HAVE_BG_L_P # ifdef HAVE_BGL get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_BLRTS_IMAGE, &(bg_action_ptr->blrtsimage)); # else get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_CONN_TYPE, &(bg_action_ptr->conn_type)); # endif get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_LINUX_IMAGE, &(bg_action_ptr->linuximage)); get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_RAMDISK_IMAGE, &(bg_action_ptr->ramdiskimage)); #endif get_select_jobinfo(job_ptr->select_jobinfo->data, SELECT_JOBDATA_MLOADER_IMAGE, &(bg_action_ptr->mloaderimage)); if (bg_action_ptr->bg_block_id == NULL) { error("Running job %u has bgblock==NULL", job_ptr->job_id); good_block = false; } else if (job_ptr->nodes == NULL) { error("Running job %u has nodes==NULL", job_ptr->job_id); good_block = false; } else if (_excise_block(block_list, bg_action_ptr->bg_block_id, job_ptr->nodes) != SLURM_SUCCESS) { error("Kill job %u belongs to defunct " "bgblock %s", job_ptr->job_id, bg_action_ptr->bg_block_id); good_block = false; } if (!good_block) { job_ptr->job_state = JOB_FAILED | JOB_COMPLETING; job_ptr->end_time = time(NULL); last_job_update = time(NULL); _destroy_bg_action(bg_action_ptr); continue; } debug3("Queue sync of job %u in BG block %s " "ending at %ld", job_ptr->job_id, bg_action_ptr->bg_block_id, job_ptr->end_time); _block_op(bg_action_ptr); } list_iterator_destroy(job_iterator); /* Insure that all other blocks are free of users */ if (block_list) { bridge_reset_block_list(block_list); list_destroy(block_list); } else { /* this should never happen, * vestigial logic */ error("sync_jobs: no block_list"); return SLURM_ERROR; } return SLURM_SUCCESS; }