/* Convert an array of task IDs into a list of host names * RET: the string, caller must xfree() this value */ static char *_task_ids_to_host_list(int ntasks, uint32_t taskids[]) { int i; hostset_t hs; char *hosts; slurm_step_layout_t *sl; if ((sl = launch_common_get_slurm_step_layout(local_srun_job)) == NULL) return (xstrdup("Unknown")); hs = hostset_create(NULL); for (i = 0; i < ntasks; i++) { char *host = slurm_step_layout_host_name(sl, taskids[i]); if (host) { hostset_insert(hs, host); free(host); } else { error("Could not identify host name for task %u", taskids[i]); } } hosts = _hostset_to_string(hs); hostset_destroy(hs); return (hosts); }
/* Convert an array of task IDs into a list of host names * RET: the string, caller must xfree() this value */ static char *_task_ids_to_host_list(int ntasks, uint32_t *taskids) { int i, task_cnt = 0; hostset_t hs; char *hosts; slurm_step_layout_t *sl; if ((sl = launch_common_get_slurm_step_layout(local_srun_job)) == NULL) return (xstrdup("Unknown")); /* If overhead of determining the hostlist is too high then srun * communications will timeout and fail, so return "Unknown" instead. * * See slurm_step_layout_host_id() in src/common/slurm_step_layout.c * for details. */ for (i = 0; i < sl->node_cnt; i++) { task_cnt += sl->tasks[i]; } if (task_cnt > 100000) return (xstrdup("Unknown")); hs = hostset_create(NULL); for (i = 0; i < ntasks; i++) { char *host = slurm_step_layout_host_name(sl, taskids[i]); if (host) { hostset_insert(hs, host); free(host); } else { error("Could not identify host name for task %u", taskids[i]); } } hosts = _hostset_to_string(hs); hostset_destroy(hs); return (hosts); }
extern void launch_common_set_stdio_fds(srun_job_t *job, slurm_step_io_fds_t *cio_fds) { bool err_shares_out = false; int file_flags; if (opt.open_mode == OPEN_MODE_APPEND) file_flags = O_CREAT|O_WRONLY|O_APPEND; else if (opt.open_mode == OPEN_MODE_TRUNCATE) file_flags = O_CREAT|O_WRONLY|O_APPEND|O_TRUNC; else { slurm_ctl_conf_t *conf; conf = slurm_conf_lock(); if (conf->job_file_append) file_flags = O_CREAT|O_WRONLY|O_APPEND; else file_flags = O_CREAT|O_WRONLY|O_APPEND|O_TRUNC; slurm_conf_unlock(); } /* * create stdin file descriptor */ if (_is_local_file(job->ifname)) { if ((job->ifname->name == NULL) || (job->ifname->taskid != -1)) { cio_fds->in.fd = STDIN_FILENO; } else { cio_fds->in.fd = open(job->ifname->name, O_RDONLY); if (cio_fds->in.fd == -1) { error("Could not open stdin file: %m"); exit(error_exit); } } if (job->ifname->type == IO_ONE) { cio_fds->in.taskid = job->ifname->taskid; cio_fds->in.nodeid = slurm_step_layout_host_id( launch_common_get_slurm_step_layout(job), job->ifname->taskid); } } /* * create stdout file descriptor */ if (_is_local_file(job->ofname)) { if ((job->ofname->name == NULL) || (job->ofname->taskid != -1)) { cio_fds->out.fd = STDOUT_FILENO; } else { cio_fds->out.fd = open(job->ofname->name, file_flags, 0644); if (cio_fds->out.fd == -1) { error("Could not open stdout file: %m"); exit(error_exit); } } if (job->ofname->name != NULL && job->efname->name != NULL && !strcmp(job->ofname->name, job->efname->name)) { err_shares_out = true; } } /* * create seperate stderr file descriptor only if stderr is not sharing * the stdout file descriptor */ if (err_shares_out) { debug3("stdout and stderr sharing a file"); cio_fds->err.fd = cio_fds->out.fd; cio_fds->err.taskid = cio_fds->out.taskid; } else if (_is_local_file(job->efname)) { if ((job->efname->name == NULL) || (job->efname->taskid != -1)) { cio_fds->err.fd = STDERR_FILENO; } else { cio_fds->err.fd = open(job->efname->name, file_flags, 0644); if (cio_fds->err.fd == -1) { error("Could not open stderr file: %m"); exit(error_exit); } } } }