/* * Initialize context for plugin */ extern int launch_init(void) { int retval = SLURM_SUCCESS; char *plugin_type = "launch"; char *type = NULL; if (init_run && plugin_context) return retval; slurm_mutex_lock(&plugin_context_lock); if (plugin_context) goto done; type = slurm_get_launch_type(); plugin_context = plugin_context_create( plugin_type, type, (void **)&ops, syms, sizeof(syms)); if (!plugin_context) { error("cannot create %s context for %s", plugin_type, type); retval = SLURM_ERROR; goto done; } init_run = true; done: slurm_mutex_unlock(&plugin_context_lock); xfree(type); return retval; }
/* * opt_default(): used by initialize_and_process_args to set defaults */ static void _opt_default(void) { opt.account = NULL; opt.batch = false; opt.clusters = NULL; #ifdef HAVE_FRONT_END opt.ctld = true; #else { char *launch_type = slurm_get_launch_type(); /* do this for all but slurm (poe, aprun, etc...) */ if (strcmp(launch_type, "launch/slurm")) opt.ctld = true; else opt.ctld = false; xfree(launch_type); } #endif opt.interactive = false; opt.job_cnt = 0; opt.job_list = NULL; opt.job_name = NULL; opt.nodelist = NULL; opt.partition = NULL; opt.qos = NULL; opt.reservation = NULL; opt.signal = (uint16_t) NO_VAL; opt.state = JOB_END; opt.user_id = 0; opt.user_name = NULL; opt.verbose = 0; opt.wckey = NULL; }
/* * opt_default(): used by initialize_and_process_args to set defaults */ static void _opt_default(void) { opt.account = NULL; opt.batch = false; opt.clusters = NULL; #ifdef HAVE_FRONT_END opt.ctld = true; #else { char *launch_type = slurm_get_launch_type(); if (!strcmp(launch_type, "launch/poe")) opt.ctld = true; else opt.ctld = false; xfree(launch_type); } #endif opt.interactive = false; opt.job_cnt = 0; opt.job_name = NULL; opt.nodelist = NULL; opt.partition = NULL; opt.qos = NULL; opt.reservation = NULL; opt.signal = (uint16_t)-1; /* no signal specified */ opt.state = JOB_END; opt.user_id = 0; opt.user_name = NULL; opt.verbose = 0; opt.wckey = NULL; }
static int _handle_suspend(int fd, stepd_step_rec_t *job, uid_t uid) { static int launch_poe = -1; int rc = SLURM_SUCCESS; int errnum = 0; uint16_t job_core_spec = (uint16_t) NO_VAL; safe_read(fd, &job_core_spec, sizeof(uint16_t)); debug("_handle_suspend for step:%u.%u uid:%ld core_spec:%u", job->jobid, job->stepid, (long)uid, job_core_spec); if (!_slurm_authorized_user(uid)) { debug("job step suspend request from uid %ld for job %u.%u ", (long)uid, job->jobid, job->stepid); rc = -1; errnum = EPERM; goto done; } if (job->cont_id == 0) { debug ("step %u.%u invalid container [cont_id:%"PRIu64"]", job->jobid, job->stepid, job->cont_id); rc = -1; errnum = ESLURMD_JOB_NOTRUNNING; goto done; } acct_gather_suspend_poll(); if (launch_poe == -1) { char *launch_type = slurm_get_launch_type(); if (!strcmp(launch_type, "launch/poe")) launch_poe = 1; else launch_poe = 0; xfree(launch_type); } /* * Signal the container */ pthread_mutex_lock(&suspend_mutex); if (suspended) { rc = -1; errnum = ESLURMD_STEP_SUSPENDED; pthread_mutex_unlock(&suspend_mutex); goto done; } else { if (!job->batch && switch_g_job_step_pre_suspend(job)) error("switch_g_job_step_pre_suspend: %m"); /* SIGTSTP is sent first to let MPI daemons stop their tasks, * then wait 2 seconds, then send SIGSTOP to the spawned * process's container to stop everything else. * * In some cases, 1 second has proven insufficient. Longer * delays may help insure that all MPI tasks have been stopped * (that depends upon the MPI implementaiton used), but will * also permit longer time periods when more than one job can * be running on each resource (not good). */ if (launch_poe == 0) { /* IBM MPI seens to periodically hang upon receipt * of SIGTSTP. */ if (proctrack_g_signal(job->cont_id, SIGTSTP) < 0) { verbose("Error suspending %u.%u (SIGTSTP): %m", job->jobid, job->stepid); } else sleep(2); } if (proctrack_g_signal(job->cont_id, SIGSTOP) < 0) { verbose("Error suspending %u.%u (SIGSTOP): %m", job->jobid, job->stepid); } else { verbose("Suspended %u.%u", job->jobid, job->stepid); } suspended = true; } if (!job->batch && switch_g_job_step_post_suspend(job)) error("switch_g_job_step_post_suspend: %m"); if (!job->batch && core_spec_g_suspend(job->cont_id, job_core_spec)) error("core_spec_g_suspend: %m"); pthread_mutex_unlock(&suspend_mutex); done: /* Send the return code and errno */ safe_write(fd, &rc, sizeof(int)); safe_write(fd, &errnum, sizeof(int)); return SLURM_SUCCESS; rwfail: return SLURM_FAILURE; }
static void _launch_app(srun_job_t *job, List srun_job_list, bool got_alloc) { ListIterator opt_iter, job_iter; opt_t *opt_local = NULL; _launch_app_data_t *opts; int total_ntasks = 0, total_nnodes = 0, step_cnt = 0, node_offset = 0; pthread_mutex_t step_mutex = PTHREAD_MUTEX_INITIALIZER; pthread_cond_t step_cond = PTHREAD_COND_INITIALIZER; srun_job_t *first_job = NULL; char *launch_type, *pack_node_list = NULL; bool need_mpir = false; uint16_t *tmp_task_cnt = NULL, *pack_task_cnts = NULL; uint32_t **tmp_tids = NULL, **pack_tids = NULL; launch_type = slurm_get_launch_type(); if (launch_type && strstr(launch_type, "slurm")) need_mpir = true; xfree(launch_type); if (srun_job_list) { int pack_step_cnt = list_count(srun_job_list); first_job = (srun_job_t *) list_peek(srun_job_list); if (!opt_list) { if (first_job) fini_srun(first_job, got_alloc, &global_rc, 0); fatal("%s: have srun_job_list, but no opt_list", __func__); } job_iter = list_iterator_create(srun_job_list); while ((job = (srun_job_t *) list_next(job_iter))) { char *node_list = NULL; int i, node_inx; total_ntasks += job->ntasks; total_nnodes += job->nhosts; xrealloc(pack_task_cnts, sizeof(uint16_t)*total_nnodes); (void) slurm_step_ctx_get(job->step_ctx, SLURM_STEP_CTX_TASKS, &tmp_task_cnt); if (!tmp_task_cnt) { fatal("%s: job %u has NULL task array", __func__, job->jobid); break; /* To eliminate CLANG error */ } memcpy(pack_task_cnts + node_offset, tmp_task_cnt, sizeof(uint16_t) * job->nhosts); xrealloc(pack_tids, sizeof(uint32_t *) * total_nnodes); (void) slurm_step_ctx_get(job->step_ctx, SLURM_STEP_CTX_TIDS, &tmp_tids); if (!tmp_tids) { fatal("%s: job %u has NULL task ID array", __func__, job->jobid); break; /* To eliminate CLANG error */ } for (node_inx = 0; node_inx < job->nhosts; node_inx++) { uint32_t *node_tids; node_tids = xmalloc(sizeof(uint32_t) * tmp_task_cnt[node_inx]); for (i = 0; i < tmp_task_cnt[node_inx]; i++) { node_tids[i] = tmp_tids[node_inx][i] + job->pack_task_offset; } pack_tids[node_offset + node_inx] = node_tids; } (void) slurm_step_ctx_get(job->step_ctx, SLURM_STEP_CTX_NODE_LIST, &node_list); if (!node_list) { fatal("%s: job %u has NULL hostname", __func__, job->jobid); } if (pack_node_list) xstrfmtcat(pack_node_list, ",%s", node_list); else pack_node_list = xstrdup(node_list); xfree(node_list); node_offset += job->nhosts; } list_iterator_reset(job_iter); _reorder_pack_recs(&pack_node_list, &pack_task_cnts, &pack_tids, total_nnodes); if (need_mpir) mpir_init(total_ntasks); opt_iter = list_iterator_create(opt_list); while ((opt_local = (opt_t *) list_next(opt_iter))) { job = (srun_job_t *) list_next(job_iter); if (!job) { slurm_mutex_lock(&step_mutex); while (step_cnt > 0) slurm_cond_wait(&step_cond,&step_mutex); slurm_mutex_unlock(&step_mutex); if (first_job) { fini_srun(first_job, got_alloc, &global_rc, 0); } fatal("%s: job allocation count does not match request count (%d != %d)", __func__, list_count(srun_job_list), list_count(opt_list)); break; /* To eliminate CLANG error */ } slurm_mutex_lock(&step_mutex); step_cnt++; slurm_mutex_unlock(&step_mutex); job->pack_node_list = xstrdup(pack_node_list); if ((pack_step_cnt > 1) && pack_task_cnts) { xassert(node_offset == job->pack_nnodes); job->pack_task_cnts = xmalloc(sizeof(uint16_t) * job->pack_nnodes); memcpy(job->pack_task_cnts, pack_task_cnts, sizeof(uint16_t) * job->pack_nnodes); job->pack_tids = xmalloc(sizeof(uint32_t *) * job->pack_nnodes); memcpy(job->pack_tids, pack_tids, sizeof(uint32_t *) * job->pack_nnodes); } opts = xmalloc(sizeof(_launch_app_data_t)); opts->got_alloc = got_alloc; opts->job = job; opts->opt_local = opt_local; opts->step_cond = &step_cond; opts->step_cnt = &step_cnt; opts->step_mutex = &step_mutex; opt_local->pack_step_cnt = pack_step_cnt; slurm_thread_create_detached(NULL, _launch_one_app, opts); } xfree(pack_node_list); xfree(pack_task_cnts); list_iterator_destroy(job_iter); list_iterator_destroy(opt_iter); slurm_mutex_lock(&step_mutex); while (step_cnt > 0) slurm_cond_wait(&step_cond, &step_mutex); slurm_mutex_unlock(&step_mutex); if (first_job) fini_srun(first_job, got_alloc, &global_rc, 0); } else { if (need_mpir) mpir_init(job->ntasks); opts = xmalloc(sizeof(_launch_app_data_t)); opts->got_alloc = got_alloc; opts->job = job; opts->opt_local = &opt; opt.pack_step_cnt = 1; _launch_one_app(opts); fini_srun(job, got_alloc, &global_rc, 0); } }