extern int launch_p_step_launch( srun_job_t *job, slurm_step_io_fds_t *cio_fds, uint32_t *global_rc, slurm_step_launch_callbacks_t *step_callbacks) { slurm_step_launch_params_t launch_params; slurm_step_launch_callbacks_t callbacks; int rc = 0; bool first_launch = 0; slurm_step_launch_params_t_init(&launch_params); memcpy(&callbacks, step_callbacks, sizeof(callbacks)); if (!task_state) { task_state = task_state_create(job->ntasks); local_srun_job = job; local_global_rc = global_rc; first_launch = 1; } else task_state_alter(task_state, job->ntasks); launch_params.gid = opt.gid; launch_params.alias_list = job->alias_list; launch_params.argc = opt.argc; launch_params.argv = opt.argv; launch_params.multi_prog = opt.multi_prog ? true : false; launch_params.cwd = opt.cwd; launch_params.slurmd_debug = opt.slurmd_debug; launch_params.buffered_stdio = !opt.unbuffered; launch_params.labelio = opt.labelio ? true : false; launch_params.remote_output_filename =fname_remote_string(job->ofname); launch_params.remote_input_filename = fname_remote_string(job->ifname); launch_params.remote_error_filename = fname_remote_string(job->efname); launch_params.task_prolog = opt.task_prolog; launch_params.task_epilog = opt.task_epilog; launch_params.cpu_bind = opt.cpu_bind; launch_params.cpu_bind_type = opt.cpu_bind_type; launch_params.mem_bind = opt.mem_bind; launch_params.mem_bind_type = opt.mem_bind_type; launch_params.open_mode = opt.open_mode; if (opt.acctg_freq >= 0) launch_params.acctg_freq = opt.acctg_freq; launch_params.pty = opt.pty; if (opt.cpus_set) launch_params.cpus_per_task = opt.cpus_per_task; else launch_params.cpus_per_task = 1; launch_params.cpu_freq = opt.cpu_freq; launch_params.task_dist = opt.distribution; launch_params.ckpt_dir = opt.ckpt_dir; launch_params.restart_dir = opt.restart_dir; launch_params.preserve_env = opt.preserve_env; launch_params.spank_job_env = opt.spank_job_env; launch_params.spank_job_env_size = opt.spank_job_env_size; launch_params.user_managed_io = opt.user_managed_io; memcpy(&launch_params.local_fds, cio_fds, sizeof(slurm_step_io_fds_t)); if (MPIR_being_debugged) { launch_params.parallel_debug = true; pmi_server_max_threads(1); } else { launch_params.parallel_debug = false; } /* Normally this isn't used, but if an outside process (other than srun (poe) is using this logic to launch tasks then we can use this to signal the step. */ callbacks.task_start = _task_start; /* If poe is using this code with multi-prog it always returns 1 for each task which could be confusing since no real error happened. */ if (!launch_params.multi_prog || (!callbacks.step_signal || (callbacks.step_signal == launch_g_fwd_signal))) { callbacks.task_finish = _task_finish; } mpir_init(job->ctx_params.task_count); update_job_state(job, SRUN_JOB_LAUNCHING); launch_start_time = time(NULL); if (first_launch) { if (slurm_step_launch(job->step_ctx, &launch_params, &callbacks) != SLURM_SUCCESS) { rc = errno; *local_global_rc = errno; error("Application launch failed: %m"); slurm_step_launch_abort(job->step_ctx); slurm_step_launch_wait_finish(job->step_ctx); goto cleanup; } } else { if (slurm_step_launch_add(job->step_ctx, &launch_params, job->nodelist, job->fir_nodeid) != SLURM_SUCCESS) { rc = errno; *local_global_rc = errno; error("Application launch add failed: %m"); slurm_step_launch_abort(job->step_ctx); slurm_step_launch_wait_finish(job->step_ctx); goto cleanup; } } update_job_state(job, SRUN_JOB_STARTING); if (slurm_step_launch_wait_start(job->step_ctx) == SLURM_SUCCESS) { update_job_state(job, SRUN_JOB_RUNNING); /* Only set up MPIR structures if the step launched * correctly. */ if (opt.multi_prog) mpir_set_multi_name(job->ctx_params.task_count, launch_params.argv[0]); else mpir_set_executable_names(launch_params.argv[0]); MPIR_debug_state = MPIR_DEBUG_SPAWNED; if (opt.debugger_test) mpir_dump_proctable(); else MPIR_Breakpoint(job); } else { info("Job step %u.%u aborted before step completely launched.", job->jobid, job->stepid); } cleanup: return rc; }
int main (int argc, char *argv[]) { int i, min_nodes = 1, max_nodes = 1, nodes, tasks = 0, rc = 0; job_desc_msg_t job_req; resource_allocation_response_msg_t *job_resp; slurm_step_ctx_params_t step_params[1]; slurm_step_ctx_t *ctx = NULL; slurm_step_launch_params_t launch[1]; char *task_argv[3]; int *fd_array = NULL; int num_fd; if (argc > 1) { i = atoi(argv[1]); if (i > 0) min_nodes = i; } if (argc > 2) { i = atoi(argv[2]); if (i > 0) max_nodes = i; } if (max_nodes < min_nodes) max_nodes = min_nodes; /* Create a job allocation */ slurm_init_job_desc_msg( &job_req ); job_req.min_nodes = min_nodes; job_req.max_nodes = max_nodes; job_req.user_id = getuid(); job_req.group_id = getgid(); job_req.time_limit = 1; if (slurm_allocate_resources(&job_req, &job_resp)) { slurm_perror ("slurm_allocate_resources"); printf("INFO: min_nodes=%u max_nodes=%u user_id=%u group_id=%u", job_req.min_nodes, job_req.max_nodes, job_req.user_id, job_req.group_id); exit(0); } printf("job_id %u\n", job_resp->job_id); fflush(stdout); /* Wait for allocation request to be satisfied */ if ((job_resp->node_list == NULL) || (strlen(job_resp->node_list) == 0)) { printf("Waiting for resource allocation\n"); fflush(stdout); while ((job_resp->node_list == NULL) || (strlen(job_resp->node_list) == 0)) { sleep(5); if (slurm_allocation_lookup_lite(job_resp->job_id, &job_resp) && (slurm_get_errno() != ESLURM_JOB_PENDING)) { slurm_perror("slurm_confirm_allocation"); exit(0); } } } nodes = job_resp->node_cnt; if (argc > 3) tasks = atoi(argv[3]); if (tasks < 1) tasks = nodes * TASKS_PER_NODE; if (tasks < nodes) { fprintf(stderr, "Invalid task count argument\n"); exit(1); } printf("Starting %d tasks on %d nodes\n", tasks, nodes); fflush(stdout); /* * Create a job step context. */ slurm_step_ctx_params_t_init(step_params); step_params->job_id = job_resp->job_id; step_params->min_nodes = nodes; step_params->task_count = tasks; ctx = slurm_step_ctx_create(step_params); if ((ctx == NULL) && (slurm_get_errno() == ESLURM_PROLOG_RUNNING)) { printf("SlurmctldProlog is still running, " "sleep and try again\n"); sleep(10); ctx = slurm_step_ctx_create(step_params); } if (ctx == NULL) { slurm_perror("slurm_step_ctx_create"); rc = 1; goto done; } /* * Hack to run one task per node, regardless of what we set up * when we created the job step context. */ if (slurm_step_ctx_daemon_per_node_hack(ctx) != SLURM_SUCCESS) { slurm_perror("slurm_step_ctx_daemon_per_node_hack"); rc = 1; goto done; } /* * Launch the tasks using "user managed" IO. * "user managed" IO means a TCP stream for each task, directly * connected to the stdin, stdout, and stderr the task. */ slurm_step_launch_params_t_init(launch); task_argv[0] = "./test7.3.io"; launch->argv = task_argv; launch->argc = 1; launch->user_managed_io = true; /* This is the key to using "user managed" IO */ if (slurm_step_launch(ctx, launch, NULL) != SLURM_SUCCESS) { slurm_perror("slurm_step_launch"); rc = 1; goto done; } if (slurm_step_launch_wait_start(ctx) != SLURM_SUCCESS) { slurm_perror("slurm_step_launch_wait_start"); rc =1; goto done; } slurm_step_ctx_get(ctx, SLURM_STEP_CTX_USER_MANAGED_SOCKETS, &num_fd, &fd_array); /* Interact with launched tasks as desired */ _do_task_work(fd_array, tasks); for (i = 0; i < tasks; i++) { close(fd_array[i]); } slurm_step_launch_wait_finish(ctx); /* Terminate the job killing all tasks */ done: slurm_kill_job(job_resp->job_id, SIGKILL, 0); /* clean up storage */ slurm_free_resource_allocation_response_msg(job_resp); if (ctx) slurm_step_ctx_destroy(ctx); exit(0); }