/* * Trigger a job's burst buffer stage-out to begin * * Returns a SLURM errno. */ extern int bb_p_job_start_stage_out(struct job_record *job_ptr) { //FIXME: How to handle various job terminate states (e.g. requeue, failure), user script controlled? //FIXME: Test for memory leaks bb_alloc_t *bb_ptr; char **script_argv, *resp; int i, status = 0; char jobid_buf[32]; if (bb_state.bb_config.debug_flag) { info("%s: %s: %s", plugin_type, __func__, jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf))); } if (!bb_state.bb_config.start_stage_out) return SLURM_ERROR; if ((job_ptr->burst_buffer == NULL) || (job_ptr->burst_buffer[0] == '\0') || (_get_bb_size(job_ptr) == 0)) return SLURM_SUCCESS; pthread_mutex_lock(&bb_state.bb_mutex); bb_ptr = bb_find_alloc_rec(&bb_state, job_ptr); if (!bb_ptr) { /* No job buffers. Assuming use of persistent buffers only */ debug("%s: %s bb_rec not found", __func__, jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf))); } else { script_argv = _build_stage_args(bb_state.bb_config.start_stage_out, "start_stage_out", job_ptr, bb_ptr->size); if (script_argv) { bb_ptr->state = BB_STATE_STAGING_OUT; bb_ptr->state_time = time(NULL); resp = bb_run_script("StartStageOut", bb_state.bb_config.start_stage_out, script_argv, -1, &status); if (resp) { error("%s: StartStageOut: %s", __func__, resp); xfree(resp); } for (i = 0; script_argv[i]; i++) xfree(script_argv[i]); xfree(script_argv); } else { bb_ptr->state = BB_STATE_STAGED_OUT; bb_ptr->state_time = time(NULL); } } pthread_mutex_unlock(&bb_state.bb_mutex); return SLURM_SUCCESS; }
/* * Terminate any file staging and completely release burst buffer resources * * Returns a SLURM errno. */ extern int bb_p_job_cancel(struct job_record *job_ptr) { bb_alloc_t *bb_ptr; char **script_argv, *resp; int i, status = 0; char jobid_buf[32]; if (bb_state.bb_config.debug_flag) { info("%s: %s: %s", plugin_type, __func__, jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf))); } if (!bb_state.bb_config.stop_stage_out) return SLURM_ERROR; if ((job_ptr->burst_buffer == NULL) || (job_ptr->burst_buffer[0] == '\0') || (_get_bb_size(job_ptr) == 0)) return SLURM_SUCCESS; pthread_mutex_lock(&bb_state.bb_mutex); bb_ptr = bb_find_alloc_rec(&bb_state, job_ptr); if (!bb_ptr) { _stop_stage_out(job_ptr->job_id); } else { script_argv = _build_stage_args(bb_state.bb_config.stop_stage_out, "stop_stage_out", job_ptr, 0); if (script_argv) { bb_ptr->state = BB_STATE_STAGED_OUT; bb_ptr->state_time = time(NULL); resp = bb_run_script("StopStageOut", bb_state.bb_config.stop_stage_out, script_argv, -1, &status); if (resp) { error("%s: StopStageOut: %s", __func__, resp); xfree(resp); } for (i = 0; script_argv[i]; i++) xfree(script_argv[i]); xfree(script_argv); } else { _stop_stage_out(job_ptr->job_id); bb_ptr->cancelled = true; bb_ptr->end_time = 0; bb_ptr->state = BB_STATE_STAGED_OUT; bb_ptr->state_time = time(NULL); } } pthread_mutex_unlock(&bb_state.bb_mutex); return SLURM_SUCCESS; }
/* * Trigger a job's burst buffer stage-out to begin * * Returns a SLURM errno. */ extern int bb_p_job_start_stage_out(struct job_record *job_ptr) { bb_alloc_t *bb_ptr; char **script_argv, *resp; int i; if (debug_flag) { info("%s: %s", __func__, plugin_type); info("%s: job_id:%u", __func__, job_ptr->job_id); } if ((job_ptr->burst_buffer == NULL) || (job_ptr->burst_buffer[0] == '\0') || (_get_bb_size(job_ptr) == 0)) return SLURM_SUCCESS; pthread_mutex_lock(&bb_mutex); bb_ptr = _find_bb_job_rec(job_ptr); if (!bb_ptr) { /* No job buffers. Assuming use of persistent buffers only */ debug("%s: job_id:%u bb_rec not found", __func__, job_ptr->job_id); } else { script_argv = _build_stage_args(start_stage_out, "stage_out", job_ptr); if (script_argv) { bb_ptr->state = BB_STATE_STAGING_OUT; resp = _run_script("StartStageOut", start_stage_out, script_argv, -1); if (resp) { error("%s: StartStageOut: %s", __func__, resp); xfree(resp); } for (i = 0; script_argv[i]; i++) xfree(script_argv[i]); xfree(script_argv); } else { bb_ptr->state = BB_STATE_STAGED_OUT; } } pthread_mutex_unlock(&bb_mutex); return SLURM_SUCCESS; }
static void _alloc_job_bb(struct job_record *job_ptr, uint64_t bb_size) { char **script_argv, *resp; bb_alloc_t *bb_ptr; int i, status = 0; bb_job_t *bb_spec; char jobid_buf[32]; bb_spec = xmalloc(sizeof(bb_job_t)); bb_spec->total_size = bb_size; bb_ptr = bb_alloc_job(&bb_state, job_ptr, bb_spec); xfree(bb_spec); if (bb_state.bb_config.debug_flag) { info("%s: start stage-in %s", __func__, jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf))); } script_argv = _build_stage_args(bb_state.bb_config.start_stage_in, "start_stage_in", job_ptr, bb_size); if (script_argv) { bb_ptr->state = BB_STATE_STAGING_IN; bb_ptr->state_time = time(NULL); resp = bb_run_script("StartStageIn", bb_state.bb_config.start_stage_in, script_argv, -1, &status); if (resp) { error("%s: StartStageIn: %s", __func__, resp); xfree(resp); } for (i = 0; script_argv[i]; i++) xfree(script_argv[i]); xfree(script_argv); } else { bb_ptr->state = BB_STATE_STAGED_IN; bb_ptr->state_time = time(NULL); } }