extern int launch_p_step_launch( srun_job_t *job, slurm_step_io_fds_t *cio_fds, uint32_t *global_rc, slurm_step_launch_callbacks_t *step_callbacks) { int rc = 0; if (opt.export_env) _build_user_env(); poe_pid = fork(); if (poe_pid < 0) { error("fork: %m"); return 1; } else if (poe_pid > 0) { if (waitpid(poe_pid, &rc, 0) < 0) error("Unable to reap poe child process"); *global_rc = rc; /* Just because waitpid returns something doesn't mean this function failed so always set it back to 0. */ rc = 0; } else { setpgrp(); _unblock_signals(); /* dup stdio onto our open fds */ if ((dup2(cio_fds->input.fd, 0) == -1) || (dup2(cio_fds->out.fd, 1) == -1) || (dup2(cio_fds->err.fd, 2) == -1)) { error("dup2: %m"); return 1; } execvp(opt.argv[0], opt.argv); error("execv(poe) error: %m"); return 1; } return rc; }
extern int launch_p_step_launch( srun_job_t *job, slurm_step_io_fds_t *cio_fds, uint32_t *global_rc, slurm_step_launch_callbacks_t *step_callbacks) { slurm_step_launch_params_t launch_params; slurm_step_launch_callbacks_t callbacks; int rc = 0; bool first_launch = 0; slurm_step_launch_params_t_init(&launch_params); memcpy(&callbacks, step_callbacks, sizeof(callbacks)); if (!task_state) { task_state = task_state_create(job->ntasks); local_srun_job = job; local_global_rc = global_rc; *local_global_rc = NO_VAL; first_launch = 1; } else task_state_alter(task_state, job->ntasks); launch_params.gid = opt.gid; launch_params.alias_list = job->alias_list; launch_params.argc = opt.argc; launch_params.argv = opt.argv; launch_params.multi_prog = opt.multi_prog ? true : false; launch_params.cwd = opt.cwd; launch_params.slurmd_debug = opt.slurmd_debug; launch_params.buffered_stdio = opt.unbuffered; launch_params.labelio = opt.labelio ? true : false; launch_params.remote_output_filename =fname_remote_string(job->ofname); launch_params.remote_input_filename = fname_remote_string(job->ifname); launch_params.remote_error_filename = fname_remote_string(job->efname); launch_params.partition = job->partition; launch_params.profile = opt.profile; launch_params.task_prolog = opt.task_prolog; launch_params.task_epilog = opt.task_epilog; launch_params.cpu_bind = opt.cpu_bind; launch_params.cpu_bind_type = opt.cpu_bind_type; launch_params.mem_bind = opt.mem_bind; launch_params.mem_bind_type = opt.mem_bind_type; launch_params.accel_bind_type = opt.accel_bind_type; launch_params.open_mode = opt.open_mode; if (opt.acctg_freq >= 0) launch_params.acctg_freq = opt.acctg_freq; launch_params.pty = opt.pty; if (opt.cpus_set) launch_params.cpus_per_task = opt.cpus_per_task; else launch_params.cpus_per_task = 1; launch_params.cpu_freq_min = opt.cpu_freq_min; launch_params.cpu_freq_max = opt.cpu_freq_max; launch_params.cpu_freq_gov = opt.cpu_freq_gov; launch_params.task_dist = opt.distribution; launch_params.ckpt_dir = opt.ckpt_dir; launch_params.restart_dir = opt.restart_dir; launch_params.preserve_env = opt.preserve_env; launch_params.spank_job_env = opt.spank_job_env; launch_params.spank_job_env_size = opt.spank_job_env_size; launch_params.user_managed_io = opt.user_managed_io; launch_params.ntasks_per_board = job->ntasks_per_board; launch_params.ntasks_per_core = job->ntasks_per_core; launch_params.ntasks_per_socket = job->ntasks_per_socket; if (opt.export_env) launch_params.env = _build_user_env(); memcpy(&launch_params.local_fds, cio_fds, sizeof(slurm_step_io_fds_t)); if (MPIR_being_debugged) { launch_params.parallel_debug = true; pmi_server_max_threads(1); } else { launch_params.parallel_debug = false; } /* Normally this isn't used, but if an outside process (other than srun (poe) is using this logic to launch tasks then we can use this to signal the step. */ callbacks.task_start = _task_start; /* If poe is using this code with multi-prog it always returns 1 for each task which could be confusing since no real error happened. */ if (!launch_params.multi_prog || (!callbacks.step_signal || (callbacks.step_signal == launch_g_fwd_signal))) { callbacks.task_finish = _task_finish; } mpir_init(job->ntasks); update_job_state(job, SRUN_JOB_LAUNCHING); launch_start_time = time(NULL); if (first_launch) { if (slurm_step_launch(job->step_ctx, &launch_params, &callbacks) != SLURM_SUCCESS) { rc = errno; *local_global_rc = errno; error("Application launch failed: %m"); slurm_step_launch_abort(job->step_ctx); slurm_step_launch_wait_finish(job->step_ctx); goto cleanup; } } else { if (slurm_step_launch_add(job->step_ctx, &launch_params, job->nodelist, job->fir_nodeid) != SLURM_SUCCESS) { rc = errno; *local_global_rc = errno; error("Application launch add failed: %m"); slurm_step_launch_abort(job->step_ctx); slurm_step_launch_wait_finish(job->step_ctx); goto cleanup; } } update_job_state(job, SRUN_JOB_STARTING); if (slurm_step_launch_wait_start(job->step_ctx) == SLURM_SUCCESS) { update_job_state(job, SRUN_JOB_RUNNING); /* Only set up MPIR structures if the step launched * correctly. */ if (opt.multi_prog) mpir_set_multi_name(job->ntasks, launch_params.argv[0]); else mpir_set_executable_names(launch_params.argv[0]); MPIR_debug_state = MPIR_DEBUG_SPAWNED; if (opt.debugger_test) mpir_dump_proctable(); else MPIR_Breakpoint(job); } else { info("Job step %u.%u aborted before step completely launched.", job->jobid, job->stepid); } cleanup: return rc; }