extern void slurm_setup_remote_working_cluster(resource_allocation_response_msg_t *msg) { xassert(msg); xassert(msg->working_cluster_rec); xassert(msg->node_list); xassert(msg->node_addr); if (working_cluster_rec) slurmdb_destroy_cluster_rec(working_cluster_rec); working_cluster_rec = (slurmdb_cluster_rec_t *)msg->working_cluster_rec; msg->working_cluster_rec = NULL; working_cluster_rec->plugin_id_select = select_get_plugin_id_pos(working_cluster_rec->plugin_id_select); slurm_set_addr(&working_cluster_rec->control_addr, working_cluster_rec->control_port, working_cluster_rec->control_host); if (setenvf(NULL, "SLURM_CLUSTER_NAME", "%s", working_cluster_rec->name) < 0) error("unable to set SLURM_CLUSTER_NAME in environment"); add_remote_nodes_to_conf_tbls(msg->node_list, msg->node_addr); }
/* * Set environment variables associated with the frequency variables. */ extern int cpu_freq_set_env(char* var, uint32_t argmin, uint32_t argmax, uint32_t arggov) { uint32_t min, max, gov; char bfgov[32], bfmin[32], bfmax[32], bfall[96]; bfgov[0] = '\0'; bfmin[0] = '\0'; bfmax[0] = '\0'; /* * Default value from command line is NO_VAL, * Default value from slurmstepd for batch jobs is 0 * Convert slurmstepd values to command line ones. */ min = argmin; if (min == 0) min = NO_VAL; max = argmax; if (max == 0) max = NO_VAL; gov = arggov; if (gov == 0) gov = NO_VAL; if ((min == NO_VAL) && (max == NO_VAL) && (gov == NO_VAL)) return SLURM_SUCCESS; if (min != NO_VAL) { if (min & CPU_FREQ_RANGE_FLAG) { cpu_freq_to_string(bfmin, sizeof(bfmin), min); } else { sprintf(bfmin, "%u", min); } } if (max != NO_VAL) { if (max & CPU_FREQ_RANGE_FLAG) { cpu_freq_to_string(bfmax, sizeof(bfmax), max); } else { sprintf(bfmax, "%u", max); } } if (gov != NO_VAL) { cpu_freq_to_string(bfgov, sizeof(bfgov), gov); } if ((min != NO_VAL) && (max != NO_VAL) && (gov != NO_VAL)) { sprintf(bfall, "%s-%s:%s", bfmin, bfmax, bfgov); } else if ((min != NO_VAL) && (max != NO_VAL)) { sprintf(bfall, "%s-%s", bfmin, bfmax); } else if (max != NO_VAL) { sprintf(bfall, "%s", bfmax); } else if (gov != NO_VAL) { sprintf(bfall, "%s", bfgov); } if (setenvf(NULL, var, "%s", bfall)) { error("Unable to set %s", var); return SLURM_FAILURE; } return SLURM_SUCCESS; }
static void _make_tmpdir(slurmd_job_t *job) { char *tmpdir; if (!(tmpdir = getenvp(job->env, "TMPDIR"))) setenvf(&job->env, "TMPDIR", "/tmp"); /* task may want it set */ else if (mkdir(tmpdir, 0700) < 0) { struct stat st; int mkdir_errno = errno; if (stat(tmpdir, &st)) { /* does the file exist ? */ /* show why we were not able to create it */ error("Unable to create TMPDIR [%s]: %s", tmpdir, strerror(mkdir_errno)); } else if (!S_ISDIR(st.st_mode)) { /* is it a directory? */ error("TMPDIR [%s] is not a directory", tmpdir); } /* Eaccess wasn't introduced until glibc 2.4 but euidaccess * has been around for a while. So to make sure we * still work with older systems we include this check. */ #if defined(__FreeBSD__) #define __GLIBC__ (1) #define __GLIBC_PREREQ(a,b) (1) #endif #if defined __GLIBC__ && __GLIBC_PREREQ(2, 4) else if (eaccess(tmpdir, X_OK|W_OK)) /* check permissions */ #else else if (euidaccess(tmpdir, X_OK|W_OK)) #endif error("TMPDIR [%s] is not writeable", tmpdir); else return; error("Setting TMPDIR to /tmp"); setenvf(&job->env, "TMPDIR", "/tmp"); } return; }
/* Set SLURM_SUBMIT_DIR environment variable with current state */ static void _set_submit_dir_env(void) { work_dir = xmalloc(MAXPATHLEN + 1); if ((getcwd(work_dir, MAXPATHLEN)) == NULL) { error("getcwd failed: %m"); exit(error_exit); } if (setenvf(NULL, "SLURM_SUBMIT_DIR", "%s", work_dir) < 0) { error("unable to set SLURM_SUBMIT_DIR in environment"); return; } }
static int _setup_particulars(uint32_t cluster_flags, char ***dest, dynamic_plugin_data_t *select_jobinfo) { int rc = SLURM_SUCCESS; if (cluster_flags & CLUSTER_FLAG_BG) { char *bg_part_id = NULL; select_g_select_jobinfo_get(select_jobinfo, SELECT_JOBDATA_BLOCK_ID, &bg_part_id); if (bg_part_id) { /* check to see if this is a HTC block or not. */ if (cluster_flags & CLUSTER_FLAG_BGP) { uint16_t conn_type[HIGHEST_DIMENSIONS]; select_g_select_jobinfo_get( select_jobinfo, SELECT_JOBDATA_CONN_TYPE, &conn_type); if (conn_type[0] > SELECT_SMALL) { /* SUBMIT_POOL over rides HTC_SUBMIT_POOL */ setenvf(dest, "SUBMIT_POOL", "%s", bg_part_id); } } setenvf(dest, "MPIRUN_PARTITION", "%s", bg_part_id); setenvf(dest, "MPIRUN_NOFREE", "%d", 1); setenvf(dest, "MPIRUN_NOALLOCATE", "%d", 1); xfree(bg_part_id); } else rc = SLURM_FAILURE; if (rc == SLURM_FAILURE) { error("Can't set MPIRUN_PARTITION " "environment variable"); } } else if (cluster_flags & CLUSTER_FLAG_CRAYXT) { uint32_t resv_id = 0; select_g_select_jobinfo_get(select_jobinfo, SELECT_JOBDATA_RESV_ID, &resv_id); if (resv_id) { setenvf(dest, "BASIL_RESERVATION_ID", "%u", resv_id); } else { error("Can't set BASIL_RESERVATION_ID " "environment variable"); rc = SLURM_FAILURE; } } else if (cluster_flags & CLUSTER_FLAG_AIX) { setenvf(dest, "LOADLBATCH", "%s", "yes"); } return rc; }
int _mpi_init (char *mpi_type) { int retval = SLURM_SUCCESS; char *full_type = NULL; int got_default = 0; slurm_mutex_lock( &context_lock ); if ( g_context ) goto done; if (mpi_type == NULL) { mpi_type = slurm_get_mpi_default(); got_default = 1; } if (mpi_type == NULL) { error("No MPI default set."); retval = SLURM_ERROR; goto done; } setenvf (NULL, "SLURM_MPI_TYPE", "%s", mpi_type); full_type = xmalloc(sizeof(char) * (strlen(mpi_type)+5)); sprintf(full_type,"mpi/%s",mpi_type); g_context = _slurm_mpi_context_create(full_type); xfree(full_type); if ( g_context == NULL ) { error( "cannot create a context for %s", mpi_type); retval = SLURM_ERROR; goto done; } if ( _slurm_mpi_get_ops( g_context ) == NULL ) { error( "cannot resolve plugin operations for %s", mpi_type); _slurm_mpi_context_destroy( g_context ); g_context = NULL; retval = SLURM_ERROR; } done: if(got_default) xfree(mpi_type); slurm_mutex_unlock( &context_lock ); return retval; }
extern int slurm_ckpt_stepd_prefork(stepd_step_rec_t *job) { char *old_env = NULL, *new_env = NULL, *ptr = NULL, *save_ptr = NULL; /* * I was thinking that a thread can be created here to * communicate with the tasks via sockets/pipes. * Maybe this is not needed - we can modify MVAPICH2 */ /* set LD_PRELOAD for batch script shell */ //if (job->batch) { old_env = getenvp(job->env, "LD_PRELOAD"); if (old_env) { /* search and replace all libcr_run and libcr_omit * the old env value is messed up -- * it will be replaced */ while ((ptr = strtok_r(old_env, " :", &save_ptr))) { old_env = NULL; if (!ptr) break; if (!strncmp(ptr, "libcr_run.so", 12) || !strncmp(ptr, "libcr_omit.so", 13)) continue; xstrcat(new_env, ptr); xstrcat(new_env, ":"); } } ptr = xstrdup("libcr_run.so"); if (new_env) xstrfmtcat(ptr, ":%s", new_env); setenvf(&job->env, "LD_PRELOAD", ptr); xfree(new_env); xfree(ptr); //} return SLURM_SUCCESS; }
/* * Current process is running as the user when this is called. */ void exec_task(slurmd_job_t *job, int i) { uint32_t *gtids; /* pointer to arrary of ranks */ int fd, j; slurmd_task_info_t *task = job->task[i]; char **tmp_env; if (i == 0) _make_tmpdir(job); gtids = xmalloc(job->node_tasks * sizeof(uint32_t)); for (j = 0; j < job->node_tasks; j++) gtids[j] = job->task[j]->gtid; job->envtp->sgtids = _uint32_array_to_str(job->node_tasks, gtids); xfree(gtids); job->envtp->jobid = job->jobid; job->envtp->stepid = job->stepid; job->envtp->nodeid = job->nodeid; job->envtp->cpus_on_node = job->cpus; job->envtp->procid = task->gtid; job->envtp->localid = task->id; job->envtp->task_pid = getpid(); job->envtp->distribution = job->task_dist; job->envtp->cpu_bind = xstrdup(job->cpu_bind); job->envtp->cpu_bind_type = job->cpu_bind_type; job->envtp->cpu_freq = job->cpu_freq; job->envtp->mem_bind = xstrdup(job->mem_bind); job->envtp->mem_bind_type = job->mem_bind_type; job->envtp->distribution = -1; job->envtp->ckpt_dir = xstrdup(job->ckpt_dir); job->envtp->batch_flag = job->batch; /* Modify copy of job's environment. Do not alter in place or * concurrent searches of the environment can generate invalid memory * references. */ job->envtp->env = env_array_copy((const char **) job->env); setup_env(job->envtp, false); setenvf(&job->envtp->env, "SLURMD_NODENAME", "%s", conf->node_name); tmp_env = job->env; job->env = job->envtp->env; env_array_free(tmp_env); job->envtp->env = NULL; xfree(job->envtp->task_count); if (task->argv[0] && *task->argv[0] != '/') { /* * Normally the client (srun) expands the command name * to a fully qualified path, but in --multi-prog mode it * is left up to the server to search the PATH for the * executable. */ task->argv[0] = _build_path(task->argv[0], job->env); } if (!job->batch) { if (interconnect_attach(job->switch_job, &job->env, job->nodeid, (uint32_t) i, job->nnodes, job->ntasks, task->gtid) < 0) { error("Unable to attach to interconnect: %m"); log_fini(); exit(1); } if (_setup_mpi(job, i) != SLURM_SUCCESS) { error("Unable to configure MPI plugin: %m"); log_fini(); exit(1); } } /* task-specific pre-launch activities */ if (spank_user_task (job, i) < 0) { error ("Failed to invoke task plugin stack"); exit (1); } /* task plugin hook */ if (pre_launch(job)) { error ("Failed task affinity setup"); exit (1); } if (conf->task_prolog) { char *my_prolog; slurm_mutex_lock(&conf->config_mutex); my_prolog = xstrdup(conf->task_prolog); slurm_mutex_unlock(&conf->config_mutex); _run_script_and_set_env("slurm task_prolog", my_prolog, job); xfree(my_prolog); } if (job->task_prolog) { _run_script_and_set_env("user task_prolog", job->task_prolog, job); } if (!job->batch) pdebug_stop_current(job); if (job->env == NULL) { debug("job->env is NULL"); job->env = (char **)xmalloc(sizeof(char *)); job->env[0] = (char *)NULL; } if (job->restart_dir) { info("restart from %s", job->restart_dir); /* no return on success */ checkpoint_restart_task(job, job->restart_dir, task->gtid); error("Restart task failed: %m"); exit(errno); } if (task->argv[0] == NULL) { error("No executable program specified for this task"); exit(2); } /* Do this last so you don't worry too much about the users limits including the slurmstepd in with it. */ if (set_user_limits(job) < 0) { debug("Unable to set user limits"); log_fini(); exit(5); } execve(task->argv[0], task->argv, job->env); /* * print error message and clean up if execve() returns: */ if ((errno == ENOENT) && ((fd = open(task->argv[0], O_RDONLY)) >= 0)) { char buf[256], *eol; int sz; sz = read(fd, buf, sizeof(buf)); if ((sz >= 3) && (strncmp(buf, "#!", 2) == 0)) { eol = strchr(buf, '\n'); if (eol) eol[0] = '\0'; else buf[sizeof(buf)-1] = '\0'; error("execve(): bad interpreter(%s): %m", buf+2); exit(errno); } } error("execve(): %s: %m", task->argv[0]); exit(errno); }
/* * Process TaskProlog output * "export NAME=value" adds environment variables * "unset NAME" clears an environment variable * "print <whatever>" writes that to the job's stdout */ static void _proc_stdout(char *buf, char ***env) { bool end_buf = false; int len; char *buf_ptr, *name_ptr, *val_ptr; char *end_line, *equal_ptr; buf_ptr = buf; while (buf_ptr[0]) { end_line = strchr(buf_ptr, '\n'); if (!end_line) { end_line = buf_ptr + strlen(buf_ptr); end_buf = true; } if (!strncmp(buf_ptr, "print ", 6)) { buf_ptr += 6; while (isspace(buf_ptr[0])) buf_ptr++; len = end_line - buf_ptr + 1; safe_write(1, buf_ptr, len); } else if (!strncmp(buf_ptr, "export ",7)) { name_ptr = buf_ptr + 7; while (isspace(name_ptr[0])) name_ptr++; equal_ptr = strchr(name_ptr, '='); if (!equal_ptr || (equal_ptr > end_line)) goto rwfail; val_ptr = equal_ptr + 1; while (isspace(equal_ptr[-1])) equal_ptr--; equal_ptr[0] = '\0'; end_line[0] = '\0'; debug("export name:%s:val:%s:", name_ptr, val_ptr); if (setenvf(env, name_ptr, "%s", val_ptr)) { error("Unable to set %s environment variable", buf_ptr); } equal_ptr[0] = '='; if (end_buf) end_line[0] = '\0'; else end_line[0] = '\n'; } else if (!strncmp(buf_ptr, "unset ", 6)) { name_ptr = buf_ptr + 6; while (isspace(name_ptr[0])) name_ptr++; if ((name_ptr[0] == '\n') || (name_ptr[0] == '\0')) goto rwfail; while (isspace(end_line[-1])) end_line--; end_line[0] = '\0'; debug(" unset name:%s:", name_ptr); unsetenvp(*env, name_ptr); if (end_buf) end_line[0] = '\0'; else end_line[0] = '\n'; } rwfail: /* process rest of script output */ if (end_buf) break; buf_ptr = end_line + 1; } return; }
/* * Process TaskProlog output * "export NAME=value" adds environment variables * "unset NAME" clears an environment variable * "print <whatever>" writes that to the job's stdout */ static void _proc_stdout(char *buf, stepd_step_rec_t *job) { bool end_buf = false; int len; char *buf_ptr, *name_ptr, *val_ptr; char *end_line, *equal_ptr; char ***env = &job->env; buf_ptr = buf; while (buf_ptr[0]) { end_line = strchr(buf_ptr, '\n'); if (!end_line) { end_line = buf_ptr + strlen(buf_ptr); end_buf = true; } if (!xstrncmp(buf_ptr, "print ", 6)) { buf_ptr += 6; while (isspace(buf_ptr[0])) buf_ptr++; len = end_line - buf_ptr + 1; safe_write(1, buf_ptr, len); } else if (!xstrncmp(buf_ptr, "export ",7)) { name_ptr = buf_ptr + 7; while (isspace(name_ptr[0])) name_ptr++; equal_ptr = strchr(name_ptr, '='); if (!equal_ptr || (equal_ptr > end_line)) goto rwfail; val_ptr = equal_ptr + 1; while (isspace(equal_ptr[-1])) equal_ptr--; equal_ptr[0] = '\0'; end_line[0] = '\0'; if (!xstrcmp(name_ptr, "SLURM_PROLOG_CPU_MASK")) { job->cpu_bind_type = CPU_BIND_MASK; xfree(job->cpu_bind); job->cpu_bind = xstrdup(val_ptr); if (task_g_pre_launch(job)) { error("Failed SLURM_PROLOG_CPU_MASK " "setup"); exit(1); } } debug("export name:%s:val:%s:", name_ptr, val_ptr); if (setenvf(env, name_ptr, "%s", val_ptr)) { error("Unable to set %s environment variable", buf_ptr); } equal_ptr[0] = '='; if (end_buf) end_line[0] = '\0'; else end_line[0] = '\n'; } else if (!xstrncmp(buf_ptr, "unset ", 6)) { name_ptr = buf_ptr + 6; while (isspace(name_ptr[0])) name_ptr++; if ((name_ptr[0] == '\n') || (name_ptr[0] == '\0')) goto rwfail; while (isspace(end_line[-1])) end_line--; end_line[0] = '\0'; debug(" unset name:%s:", name_ptr); unsetenvp(*env, name_ptr); if (end_buf) end_line[0] = '\0'; else end_line[0] = '\n'; } rwfail: /* process rest of script output */ if (end_buf) break; buf_ptr = end_line + 1; } return; }
/* * Current process is running as the user when this is called. */ extern void exec_task(stepd_step_rec_t *job, int local_proc_id) { uint32_t *gtids; /* pointer to array of ranks */ int fd, j; stepd_step_task_info_t *task = job->task[local_proc_id]; char **tmp_env; int saved_errno; uint32_t node_offset = 0, task_offset = 0; if (job->node_offset != NO_VAL) node_offset = job->node_offset; if (job->pack_task_offset != NO_VAL) task_offset = job->pack_task_offset; gtids = xmalloc(job->node_tasks * sizeof(uint32_t)); for (j = 0; j < job->node_tasks; j++) gtids[j] = job->task[j]->gtid + task_offset; job->envtp->sgtids = _uint32_array_to_str(job->node_tasks, gtids); xfree(gtids); if (job->pack_jobid != NO_VAL) job->envtp->jobid = job->pack_jobid; else job->envtp->jobid = job->jobid; job->envtp->stepid = job->stepid; job->envtp->nodeid = job->nodeid + node_offset; job->envtp->cpus_on_node = job->cpus; job->envtp->procid = task->gtid + task_offset; job->envtp->localid = task->id; job->envtp->task_pid = getpid(); job->envtp->distribution = job->task_dist; job->envtp->cpu_bind = xstrdup(job->cpu_bind); job->envtp->cpu_bind_type = job->cpu_bind_type; job->envtp->cpu_freq_min = job->cpu_freq_min; job->envtp->cpu_freq_max = job->cpu_freq_max; job->envtp->cpu_freq_gov = job->cpu_freq_gov; job->envtp->mem_bind = xstrdup(job->mem_bind); job->envtp->mem_bind_type = job->mem_bind_type; job->envtp->distribution = -1; job->envtp->ckpt_dir = xstrdup(job->ckpt_dir); job->envtp->batch_flag = job->batch; job->envtp->uid = job->uid; job->envtp->user_name = xstrdup(job->user_name); /* * Modify copy of job's environment. Do not alter in place or * concurrent searches of the environment can generate invalid memory * references. */ job->envtp->env = env_array_copy((const char **) job->env); setup_env(job->envtp, false); setenvf(&job->envtp->env, "SLURM_JOB_GID", "%d", job->gid); setenvf(&job->envtp->env, "SLURMD_NODENAME", "%s", conf->node_name); if (job->tres_bind) { setenvf(&job->envtp->env, "SLURMD_TRES_BIND", "%s", job->tres_bind); } if (job->tres_freq) { setenvf(&job->envtp->env, "SLURMD_TRES_FREQ", "%s", job->tres_freq); } tmp_env = job->env; job->env = job->envtp->env; env_array_free(tmp_env); job->envtp->env = NULL; xfree(job->envtp->task_count); if (task->argv[0] && *task->argv[0] != '/') { /* * Normally the client (srun) expands the command name * to a fully qualified path, but in --multi-prog mode it * is left up to the server to search the PATH for the * executable. */ task->argv[0] = _build_path(task->argv[0], job->env, NULL); } if (!job->batch && (job->stepid != SLURM_EXTERN_CONT)) { if (switch_g_job_attach(job->switch_job, &job->env, job->nodeid, (uint32_t) local_proc_id, job->nnodes, job->ntasks, task->gtid) < 0) { error("Unable to attach to interconnect: %m"); log_fini(); exit(1); } if (_setup_mpi(job, local_proc_id) != SLURM_SUCCESS) { error("Unable to configure MPI plugin: %m"); log_fini(); exit(1); } } /* task-specific pre-launch activities */ /* task plugin hook */ if (task_g_pre_launch(job)) { error("Failed to invoke task plugins: task_p_pre_launch error"); exit(1); } if (!job->batch && (job->accel_bind_type || job->tres_bind || job->tres_freq)) { /* * Modify copy of job's environment. Do not alter in place or * concurrent searches of the environment can generate invalid * memory references. * * Also sets GRES frequency as needed. */ job->envtp->env = env_array_copy((const char **) job->env); gres_plugin_step_set_env(&job->envtp->env, job->step_gres_list, job->accel_bind_type, job->tres_bind, job->tres_freq, local_proc_id); tmp_env = job->env; job->env = job->envtp->env; env_array_free(tmp_env); } if (spank_user_task(job, local_proc_id) < 0) { error("Failed to invoke spank plugin stack"); exit(1); } if (conf->task_prolog) { char *my_prolog; slurm_mutex_lock(&conf->config_mutex); my_prolog = xstrdup(conf->task_prolog); slurm_mutex_unlock(&conf->config_mutex); _run_script_and_set_env("slurm task_prolog", my_prolog, job); xfree(my_prolog); } if (job->task_prolog) { _run_script_and_set_env("user task_prolog", job->task_prolog, job); } /* * Set TMPDIR after running prolog scripts, since TMPDIR * might be set or changed in one of the prolog scripts. */ if (local_proc_id == 0) _make_tmpdir(job); if (!job->batch) pdebug_stop_current(job); if (job->env == NULL) { debug("job->env is NULL"); job->env = (char **)xmalloc(sizeof(char *)); job->env[0] = (char *)NULL; } if (job->restart_dir) { info("restart from %s", job->restart_dir); /* no return on success */ checkpoint_restart_task(job, job->restart_dir, task->gtid); error("Restart task failed: %m"); exit(errno); } if (task->argv[0] == NULL) { error("No executable program specified for this task"); exit(2); } /* Do this last so you don't worry too much about the users limits including the slurmstepd in with it. */ if (set_user_limits(job) < 0) { debug("Unable to set user limits"); log_fini(); exit(5); } execve(task->argv[0], task->argv, job->env); saved_errno = errno; /* * print error message and clean up if execve() returns: */ if ((errno == ENOENT) && ((fd = open(task->argv[0], O_RDONLY)) >= 0)) { char buf[256], *eol; int sz; sz = read(fd, buf, sizeof(buf)); if ((sz >= 3) && (xstrncmp(buf, "#!", 2) == 0)) { buf[sizeof(buf)-1] = '\0'; eol = strchr(buf, '\n'); if (eol) eol[0] = '\0'; slurm_seterrno(saved_errno); error("execve(): bad interpreter(%s): %m", buf+2); exit(errno); } } slurm_seterrno(saved_errno); error("execve(): %s: %m", task->argv[0]); exit(errno); }
int main (int argc, char *argv[]) { struct context ctx; struct pmi_simple_ops ops = { .kvs_put = s_kvs_put, .kvs_get = s_kvs_get, .barrier_enter = s_barrier_enter, .response_send = s_send_response, }; struct pmi_simple_client *cli; int spawned = -1, initialized = -1; int rank = -1, size = -1; int universe_size = -1; int name_len = -1, key_len = -1, val_len = -1; char *name = NULL, *val = NULL, *val2 = NULL, *val3 = NULL; char *key = NULL; int rc; plan (NO_PLAN); if (!(ctx.kvs = zhash_new ())) oom (); ctx.size = 1; ok (socketpair (PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ctx.fds) == 0, "socketpair returned client,server file descriptors"); ctx.pmi = pmi_simple_server_create (&ops, 42, ctx.size, ctx.size, "bleepgorp", &ctx); ok (ctx.pmi != NULL, "created simple pmi server context"); ctx.buflen = pmi_simple_server_get_maxrequest (ctx.pmi); ctx.buf = xzmalloc (ctx.buflen); ok (pthread_create (&ctx.t, NULL, server_thread, &ctx) == 0, "pthread_create successfully started server"); setenvf ("PMI_FD", 1, "%d", ctx.fds[0]); setenvf ("PMI_RANK", 1, "%d", 0); setenvf ("PMI_SIZE", 1, "%d", ctx.size); ok ((cli = pmi_simple_client_create ()) != NULL, "pmi_simple_client_create OK"); ok (pmi_simple_client_initialized (cli, &initialized) == PMI_SUCCESS && initialized == 0, "pmi_simple_client_initialized OK, initialized=0"); ok (pmi_simple_client_init (cli, &spawned) == PMI_SUCCESS && spawned == 0, "pmi_simple_client_init OK, spawned=0"); ok (pmi_simple_client_initialized (cli, &initialized) == PMI_SUCCESS && initialized == 1, "pmi_simple_client_initialized OK, initialized=1"); /* retrieve basic params */ ok (pmi_simple_client_get_size (cli, &size) == PMI_SUCCESS && size == 1, "pmi_simple_client_get_size OK, size=%d", size); ok (pmi_simple_client_get_rank (cli, &rank) == PMI_SUCCESS && rank == 0, "pmi_simple_client_get_rank OK, rank=%d", rank); ok (pmi_simple_client_get_universe_size (cli, &universe_size) == PMI_SUCCESS && universe_size == size, "pmi_simple_client_get_universe_size OK, universe_size=%d", universe_size); ok (pmi_simple_client_kvs_get_name_length_max (cli, &name_len) == PMI_SUCCESS && name_len > 0, "pmi_simple_client_kvs_get_name_length_max OK, name_len=%d", name_len); ok (pmi_simple_client_kvs_get_key_length_max (cli, &key_len) == PMI_SUCCESS && key_len > 0, "pmi_simple_client_kvs_get_key_length_max OK, key_len=%d", key_len); ok (pmi_simple_client_kvs_get_value_length_max (cli, &val_len) == PMI_SUCCESS && val_len > 0, "pmi_simple_client_kvs_get_value_length_max OK, val_len=%d", val_len); name = xzmalloc (name_len); ok (pmi_simple_client_kvs_get_my_name (cli, name, name_len) == PMI_SUCCESS && strlen (name) > 0, "pmi_simple_client_kvs_get_my_name OK, name=%s", name); /* put foo=bar / commit / barier / get foo */ ok (pmi_simple_client_kvs_put (cli, name, "foo", "bar") == PMI_SUCCESS, "pmi_simple_client_kvs_put foo=bar OK"); ok (pmi_simple_client_kvs_commit (cli, name) == PMI_SUCCESS, "pmi_simple_client_kvs_commit OK"); ok (pmi_simple_client_barrier (cli) == PMI_SUCCESS, "pmi_simple_client_barrier OK"); val = xzmalloc (val_len); ok (pmi_simple_client_kvs_get (cli, name, "foo", val, val_len) == PMI_SUCCESS && !strcmp (val, "bar"), "pmi_simple_client_kvs_get foo OK, val=%s", val); /* put long=... / get long */ val2 = xzmalloc (val_len); memset (val2, 'x', val_len - 1); ok (pmi_simple_client_kvs_put (cli, name, "long", val2) == PMI_SUCCESS, "pmi_simple_client_kvs_put long=xxx... OK"); memset (val, 'y', val_len); /* not null terminated */ ok (pmi_simple_client_kvs_get (cli, name, "long", val, val_len) == PMI_SUCCESS && strnlen (val2, val_len) < val_len && strcmp (val, val2) == 0, "pmi_simple_client_kvs_get long OK, val=xxx..."); /* put: value too long */ val3 = xzmalloc (val_len + 1); memset (val3, 'y', val_len); rc = pmi_simple_client_kvs_put (cli, name, "toolong", val3); ok (rc == PMI_ERR_INVALID_VAL_LENGTH, "pmi_simple_client_kvs_put val too long fails"); /* put: key too long */ key = xzmalloc (key_len + 1); memset (key, 'z', key_len); rc = pmi_simple_client_kvs_put (cli, name, key, "abc"); ok (rc == PMI_ERR_INVALID_KEY_LENGTH, "pmi_simple_client_kvs_put key too long fails"); /* get: key too long */ rc = pmi_simple_client_kvs_get (cli, name, key, val, val_len); ok (rc == PMI_ERR_INVALID_KEY_LENGTH, "pmi_simple_client_kvs_get key too long fails"); /* get: no exist */ rc = pmi_simple_client_kvs_get (cli, name, "noexist", val, val_len); ok (rc == PMI_ERR_INVALID_KEY, "pmi_simple_client_kvs_get unknown key fails"); /* barrier: entry failure */ rig_barrier_entry_failure = 1; ok (pmi_simple_client_barrier (cli) == PMI_FAIL, "pmi_simple_client_barrier with entry function failure fails"); rig_barrier_entry_failure = 0; rig_barrier_exit_failure = 1; ok (pmi_simple_client_barrier (cli) == PMI_FAIL, "pmi_simple_client_barrier with exit function failure fails"); rig_barrier_exit_failure = 0; ok (pmi_simple_client_barrier (cli) == PMI_SUCCESS, "pmi_simple_client_barrier OK (rigged errors cleared)"); /* finalize */ ok (pmi_simple_client_finalize (cli) == PMI_SUCCESS, "pmi_simple_client_finalize OK"); ok (pthread_join (ctx.t, NULL) == 0, "pthread join successfully reaped server"); free (name); free (val); free (val2); free (val3); free (key); pmi_simple_client_destroy (cli); if (ctx.pmi) pmi_simple_server_destroy (ctx.pmi); close (ctx.fds[0]); close (ctx.fds[1]); zhash_destroy (&ctx.kvs); done_testing (); return 0; }
/* * verify that a hint is valid and convert it into the implied settings * RET true if valid */ bool verify_hint(const char *arg, int *min_sockets, int *min_cores, int *min_threads, int *ntasks_per_core, cpu_bind_type_t *cpu_bind_type) { char *buf, *p, *tok; if (!arg) { return true; } buf = xstrdup(arg); p = buf; /* change all ',' delimiters not followed by a digit to ';' */ /* simplifies parsing tokens while keeping map/mask together */ while (p[0] != '\0') { if ((p[0] == ',') && (!isdigit((int)p[1]))) p[0] = ';'; p++; } p = buf; while ((tok = strsep(&p, ";"))) { if (xstrcasecmp(tok, "help") == 0) { printf( "Application hint options:\n" " --hint= Bind tasks according to application hints\n" " compute_bound use all cores in each socket\n" " memory_bound use only one core in each socket\n" " [no]multithread [don't] use extra threads with in-core multi-threading\n" " help show this help message\n"); return 1; } else if (xstrcasecmp(tok, "compute_bound") == 0) { *min_sockets = NO_VAL; *min_cores = NO_VAL; *min_threads = 1; if (cpu_bind_type) *cpu_bind_type |= CPU_BIND_TO_CORES; } else if (xstrcasecmp(tok, "memory_bound") == 0) { *min_cores = 1; *min_threads = 1; if (cpu_bind_type) *cpu_bind_type |= CPU_BIND_TO_CORES; } else if (xstrcasecmp(tok, "multithread") == 0) { *min_threads = NO_VAL; if (cpu_bind_type) { *cpu_bind_type |= CPU_BIND_TO_THREADS; *cpu_bind_type &= (~CPU_BIND_ONE_THREAD_PER_CORE); } if (*ntasks_per_core == NO_VAL) *ntasks_per_core = INFINITE; } else if (xstrcasecmp(tok, "nomultithread") == 0) { *min_threads = 1; if (cpu_bind_type) { *cpu_bind_type |= CPU_BIND_TO_THREADS; *cpu_bind_type |= CPU_BIND_ONE_THREAD_PER_CORE; } } else { error("unrecognized --hint argument \"%s\", " "see --hint=help", tok); xfree(buf); return 1; } } if (!cpu_bind_type) setenvf(NULL, "SLURM_HINT", "%s", arg); xfree(buf); return 0; }
int _mpi_init (char *mpi_type) { int retval = SLURM_SUCCESS; char *plugin_type = "mpi"; char *type = NULL; int got_default = 0; if (init_run && g_context) return retval; slurm_mutex_lock( &context_lock ); if ( g_context ) goto done; if (mpi_type == NULL) { mpi_type = slurm_get_mpi_default(); got_default = 1; } if (mpi_type == NULL) { error("No MPI default set."); retval = SLURM_ERROR; goto done; } if (!strcmp(mpi_type, "list")) { char *plugin_dir; plugrack_t mpi_rack; mpi_rack = plugrack_create(); if (!mpi_rack) { error("Unable to create a plugin manager"); exit(0); } plugrack_set_major_type(mpi_rack, "mpi"); plugin_dir = slurm_get_plugin_dir(); plugrack_read_dir(mpi_rack, plugin_dir); plugrack_print_all_plugin(mpi_rack); exit(0); } setenvf(NULL, "SLURM_MPI_TYPE", "%s", mpi_type); type = xstrdup_printf("mpi/%s", mpi_type); g_context = plugin_context_create( plugin_type, type, (void **)&ops, syms, sizeof(syms)); if (!g_context) { error("cannot create %s context for %s", plugin_type, type); retval = SLURM_ERROR; goto done; } init_run = true; done: xfree(type); if (got_default) xfree(mpi_type); slurm_mutex_unlock( &context_lock ); return retval; }
/* * Run a task prolog script. Also read the stdout of the script and set * environment variables in the task's environment as specified * in the script's standard output. * name IN: class of program ("system prolog", "user prolog", etc.) * path IN: pathname of program to run * job IN/OUT: pointer to associated job, can update job->env * if prolog * RET 0 on success, -1 on failure. */ static int _run_script_and_set_env(const char *name, const char *path, stepd_step_rec_t *job) { int status, rc; pid_t cpid; int pfd[2]; char buf[4096]; FILE *f; xassert(job->env); if (path == NULL || path[0] == '\0') return 0; debug("[job %u] attempting to run %s [%s]", job->jobid, name, path); if (access(path, R_OK | X_OK) < 0) { error("Could not run %s [%s]: %m", name, path); return -1; } if (pipe(pfd) < 0) { error("executing %s: pipe: %m", name); return -1; } if ((cpid = fork()) < 0) { error("executing %s: fork: %m", name); return -1; } if (cpid == 0) { char *argv[2]; setenvf(&job->env, "SLURM_SCRIPT_CONTEXT", "prolog_task"); argv[0] = xstrdup(path); argv[1] = NULL; if (dup2(pfd[1], 1) == -1) error("couldn't do the dup: %m"); close(2); close(0); close(pfd[0]); close(pfd[1]); setpgid(0, 0); execve(path, argv, job->env); error("execve(%s): %m", path); exit(127); } close(pfd[1]); f = fdopen(pfd[0], "r"); if (f == NULL) { error("Cannot open pipe device: %m"); log_fini(); exit(1); } while (feof(f) == 0) { if (fgets(buf, sizeof(buf) - 1, f) != NULL) { _proc_stdout(buf, job); } } fclose(f); while (1) { rc = waitpid(cpid, &status, 0); if (rc < 0) { if (errno == EINTR) continue; error("waidpid: %m"); return 0; } else { killpg(cpid, SIGKILL); /* kill children too */ return status; } } /* NOTREACHED */ }
int setup_env(env_t *env, bool preserve_env) { int rc = SLURM_SUCCESS; char *dist = NULL, *lllp_dist = NULL; char addrbuf[INET_ADDRSTRLEN]; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); if (env == NULL) return SLURM_ERROR; if (env->task_pid && setenvf(&env->env, "SLURM_TASK_PID", "%d", (int)env->task_pid)) { error("Unable to set SLURM_TASK_PID environment variable"); rc = SLURM_FAILURE; } if (!preserve_env && env->ntasks) { if(setenvf(&env->env, "SLURM_NTASKS", "%d", env->ntasks)) { error("Unable to set SLURM_NTASKS " "environment variable"); rc = SLURM_FAILURE; } if(setenvf(&env->env, "SLURM_NPROCS", "%d", env->ntasks)) { error("Unable to set SLURM_NPROCS " "environment variable"); rc = SLURM_FAILURE; } } if (env->cpus_per_task && setenvf(&env->env, "SLURM_CPUS_PER_TASK", "%d", env->cpus_per_task) ) { error("Unable to set SLURM_CPUS_PER_TASK"); rc = SLURM_FAILURE; } if (env->ntasks_per_node && setenvf(&env->env, "SLURM_NTASKS_PER_NODE", "%d", env->ntasks_per_node) ) { error("Unable to set SLURM_NTASKS_PER_NODE"); rc = SLURM_FAILURE; } if (env->ntasks_per_socket && setenvf(&env->env, "SLURM_NTASKS_PER_SOCKET", "%d", env->ntasks_per_socket) ) { error("Unable to set SLURM_NTASKS_PER_SOCKET"); rc = SLURM_FAILURE; } if (env->ntasks_per_core && setenvf(&env->env, "SLURM_NTASKS_PER_CORE", "%d", env->ntasks_per_core) ) { error("Unable to set SLURM_NTASKS_PER_CORE"); rc = SLURM_FAILURE; } if (env->cpus_on_node && setenvf(&env->env, "SLURM_CPUS_ON_NODE", "%d", env->cpus_on_node) ) { error("Unable to set SLURM_CPUS_ON_NODE"); rc = SLURM_FAILURE; } _set_distribution(env->distribution, &dist, &lllp_dist); if(dist) if (setenvf(&env->env, "SLURM_DISTRIBUTION", "%s", dist)) { error("Can't set SLURM_DISTRIBUTION env variable"); rc = SLURM_FAILURE; } if(env->distribution == SLURM_DIST_PLANE) if (setenvf(&env->env, "SLURM_DIST_PLANESIZE", "%u", env->plane_size)) { error("Can't set SLURM_DIST_PLANESIZE " "env variable"); rc = SLURM_FAILURE; } if(lllp_dist) if (setenvf(&env->env, "SLURM_DIST_LLLP", "%s", lllp_dist)) { error("Can't set SLURM_DIST_LLLP env variable"); rc = SLURM_FAILURE; } if (env->cpu_bind_type) { char *str_verbose, *str_bind_type, *str_bind_list; char *str_bind; int len; if (env->batch_flag) { unsetenvp(env->env, "SBATCH_CPU_BIND_VERBOSE"); unsetenvp(env->env, "SBATCH_CPU_BIND_TYPE"); unsetenvp(env->env, "SBATCH_CPU_BIND_LIST"); unsetenvp(env->env, "SBATCH_CPU_BIND"); } else { unsetenvp(env->env, "SLURM_CPU_BIND_VERBOSE"); unsetenvp(env->env, "SLURM_CPU_BIND_TYPE"); unsetenvp(env->env, "SLURM_CPU_BIND_LIST"); unsetenvp(env->env, "SLURM_CPU_BIND"); } str_verbose = xstrdup (""); if (env->cpu_bind_type & CPU_BIND_VERBOSE) { xstrcat(str_verbose, "verbose"); } else { xstrcat(str_verbose, "quiet"); } str_bind_type = xstrdup (""); if (env->cpu_bind_type & CPU_BIND_TO_THREADS) { xstrcat(str_bind_type, "threads,"); } else if (env->cpu_bind_type & CPU_BIND_TO_CORES) { xstrcat(str_bind_type, "cores,"); } else if (env->cpu_bind_type & CPU_BIND_TO_SOCKETS) { xstrcat(str_bind_type, "sockets,"); } else if (env->cpu_bind_type & CPU_BIND_TO_LDOMS) { xstrcat(str_bind_type, "ldoms,"); } if (env->cpu_bind_type & CPU_BIND_NONE) { xstrcat(str_bind_type, "none"); } else if (env->cpu_bind_type & CPU_BIND_RANK) { xstrcat(str_bind_type, "rank"); } else if (env->cpu_bind_type & CPU_BIND_MAP) { xstrcat(str_bind_type, "map_cpu:"); } else if (env->cpu_bind_type & CPU_BIND_MASK) { xstrcat(str_bind_type, "mask_cpu:"); } else if (env->cpu_bind_type & CPU_BIND_LDRANK) { xstrcat(str_bind_type, "rank_ldom"); } else if (env->cpu_bind_type & CPU_BIND_LDMAP) { xstrcat(str_bind_type, "map_ldom:"); } else if (env->cpu_bind_type & CPU_BIND_LDMASK) { xstrcat(str_bind_type, "mask_ldom:"); } len = strlen(str_bind_type); if (len) { /* remove a possible trailing ',' */ if (str_bind_type[len-1] == ',') { str_bind_type[len-1] = '\0'; } } str_bind_list = xstrdup (""); if (env->cpu_bind) { xstrcat(str_bind_list, env->cpu_bind); } str_bind = xstrdup (""); xstrcat(str_bind, str_verbose); if (str_bind[0] && str_bind_type && str_bind_type[0]) xstrcatchar(str_bind, ','); xstrcat(str_bind, str_bind_type); xstrcat(str_bind, str_bind_list); if (env->batch_flag) { if (setenvf(&env->env, "SBATCH_CPU_BIND_VERBOSE", str_verbose)) { error("Unable to set SBATCH_CPU_BIND_VERBOSE"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SBATCH_CPU_BIND_TYPE", str_bind_type)) { error("Unable to set SBATCH_CPU_BIND_TYPE"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SBATCH_CPU_BIND_LIST", str_bind_list)) { error("Unable to set SBATCH_CPU_BIND_LIST"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SBATCH_CPU_BIND", str_bind)) { error("Unable to set SBATCH_CPU_BIND"); rc = SLURM_FAILURE; } } else { if (setenvf(&env->env, "SLURM_CPU_BIND_VERBOSE", str_verbose)) { error("Unable to set SLURM_CPU_BIND_VERBOSE"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SLURM_CPU_BIND_TYPE", str_bind_type)) { error("Unable to set SLURM_CPU_BIND_TYPE"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SLURM_CPU_BIND_LIST", str_bind_list)) { error("Unable to set SLURM_CPU_BIND_LIST"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SLURM_CPU_BIND", str_bind)) { error("Unable to set SLURM_CPU_BIND"); rc = SLURM_FAILURE; } } } if (env->mem_bind_type) { char *str_verbose, *str_bind_type, *str_bind_list; char *str_bind; if (env->batch_flag) { unsetenvp(env->env, "SBATCH_MEM_BIND_VERBOSE"); unsetenvp(env->env, "SBATCH_MEM_BIND_TYPE"); unsetenvp(env->env, "SBATCH_MEM_BIND_LIST"); unsetenvp(env->env, "SBATCH_MEM_BIND"); } else { unsetenvp(env->env, "SLURM_MEM_BIND_VERBOSE"); unsetenvp(env->env, "SLURM_MEM_BIND_TYPE"); unsetenvp(env->env, "SLURM_MEM_BIND_LIST"); unsetenvp(env->env, "SLURM_MEM_BIND"); } str_verbose = xstrdup (""); if (env->mem_bind_type & MEM_BIND_VERBOSE) { xstrcat(str_verbose, "verbose"); } else { xstrcat(str_verbose, "quiet"); } str_bind_type = xstrdup (""); if (env->mem_bind_type & MEM_BIND_NONE) { xstrcat(str_bind_type, "none"); } else if (env->mem_bind_type & MEM_BIND_RANK) { xstrcat(str_bind_type, "rank"); } else if (env->mem_bind_type & MEM_BIND_MAP) { xstrcat(str_bind_type, "map_mem:"); } else if (env->mem_bind_type & MEM_BIND_MASK) { xstrcat(str_bind_type, "mask_mem:"); } else if (env->mem_bind_type & MEM_BIND_LOCAL) { xstrcat(str_bind_type, "local"); } str_bind_list = xstrdup (""); if (env->mem_bind) { xstrcat(str_bind_list, env->mem_bind); } str_bind = xstrdup (""); xstrcat(str_bind, str_verbose); if (str_bind[0]) { /* add ',' if str_verbose */ xstrcatchar(str_bind, ','); } xstrcat(str_bind, str_bind_type); xstrcat(str_bind, str_bind_list); if (env->batch_flag) { if (setenvf(&env->env, "SBATCH_MEM_BIND_VERBOSE", str_verbose)) { error("Unable to set SBATCH_MEM_BIND_VERBOSE"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SBATCH_MEM_BIND_TYPE", str_bind_type)) { error("Unable to set SBATCH_MEM_BIND_TYPE"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SBATCH_MEM_BIND_LIST", str_bind_list)) { error("Unable to set SBATCH_MEM_BIND_LIST"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SBATCH_MEM_BIND", str_bind)) { error("Unable to set SBATCH_MEM_BIND"); rc = SLURM_FAILURE; } } else { if (setenvf(&env->env, "SLURM_MEM_BIND_VERBOSE", str_verbose)) { error("Unable to set SLURM_MEM_BIND_VERBOSE"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SLURM_MEM_BIND_TYPE", str_bind_type)) { error("Unable to set SLURM_MEM_BIND_TYPE"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SLURM_MEM_BIND_LIST", str_bind_list)) { error("Unable to set SLURM_MEM_BIND_LIST"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SLURM_MEM_BIND", str_bind)) { error("Unable to set SLURM_MEM_BIND"); rc = SLURM_FAILURE; } } } if (env->overcommit && (setenvf(&env->env, "SLURM_OVERCOMMIT", "1"))) { error("Unable to set SLURM_OVERCOMMIT environment variable"); rc = SLURM_FAILURE; } if (env->slurmd_debug && setenvf(&env->env, "SLURMD_DEBUG", "%d", env->slurmd_debug)) { error("Can't set SLURMD_DEBUG environment variable"); rc = SLURM_FAILURE; } if (env->labelio && setenvf(&env->env, "SLURM_LABELIO", "1")) { error("Unable to set SLURM_LABELIO environment variable"); rc = SLURM_FAILURE; } if (env->select_jobinfo) { _setup_particulars(cluster_flags, &env->env, env->select_jobinfo); } if (env->jobid >= 0) { if (setenvf(&env->env, "SLURM_JOB_ID", "%d", env->jobid)) { error("Unable to set SLURM_JOB_ID environment"); rc = SLURM_FAILURE; } /* and for backwards compatability... */ if (setenvf(&env->env, "SLURM_JOBID", "%d", env->jobid)) { error("Unable to set SLURM_JOBID environment"); rc = SLURM_FAILURE; } } if (env->nodeid >= 0 && setenvf(&env->env, "SLURM_NODEID", "%d", env->nodeid)) { error("Unable to set SLURM_NODEID environment"); rc = SLURM_FAILURE; } if (env->procid >= 0 && setenvf(&env->env, "SLURM_PROCID", "%d", env->procid)) { error("Unable to set SLURM_PROCID environment"); rc = SLURM_FAILURE; } if (env->localid >= 0 && setenvf(&env->env, "SLURM_LOCALID", "%d", env->localid)) { error("Unable to set SLURM_LOCALID environment"); rc = SLURM_FAILURE; } if (env->stepid >= 0 && setenvf(&env->env, "SLURM_STEPID", "%d", env->stepid)) { error("Unable to set SLURM_STEPID environment"); rc = SLURM_FAILURE; } if (!preserve_env && env->nhosts && setenvf(&env->env, "SLURM_NNODES", "%d", env->nhosts)) { error("Unable to set SLURM_NNODES environment var"); rc = SLURM_FAILURE; } if (env->nodelist && setenvf(&env->env, "SLURM_NODELIST", "%s", env->nodelist)) { error("Unable to set SLURM_NODELIST environment var."); rc = SLURM_FAILURE; } if (!preserve_env && env->task_count && setenvf (&env->env, "SLURM_TASKS_PER_NODE", "%s", env->task_count)) { error ("Can't set SLURM_TASKS_PER_NODE env variable"); rc = SLURM_FAILURE; } if (env->comm_port && setenvf (&env->env, "SLURM_SRUN_COMM_PORT", "%u", env->comm_port)) { error ("Can't set SLURM_SRUN_COMM_PORT env variable"); rc = SLURM_FAILURE; } if (env->cli) { slurm_print_slurm_addr (env->cli, addrbuf, INET_ADDRSTRLEN); /* * XXX: Eventually, need a function for slurm_addrs that * returns just the IP address (not addr:port) */ if ((dist = strchr (addrbuf, ':')) != NULL) *dist = '\0'; setenvf (&env->env, "SLURM_LAUNCH_NODE_IPADDR", "%s", addrbuf); } if (env->sgtids && setenvf(&env->env, "SLURM_GTIDS", "%s", env->sgtids)) { error("Unable to set SLURM_GTIDS environment variable"); rc = SLURM_FAILURE; } if(cluster_flags & CLUSTER_FLAG_AIX) { char res_env[128]; char *debug_env = (char *)getenv("SLURM_LL_API_DEBUG"); int debug_num = 0; /* MP_POERESTART_ENV causes a warning message for "poe", but * is needed for "poerestart". Presently we have no means to * determine what command a user will execute. We could * possibly add a "srestart" command which would set * MP_POERESTART_ENV, but that presently seems unnecessary. */ /* setenvf(&env->env, "MP_POERESTART_ENV", res_env); */ if (debug_env) debug_num = atoi(debug_env); snprintf(res_env, sizeof(res_env), "SLURM_LL_API_DEBUG=%d", debug_num); /* Required for AIX/POE systems indicating pre-allocation */ setenvf(&env->env, "LOADLBATCH", "yes"); setenvf(&env->env, "LOADL_ACTIVE", "3.2.0"); } if (env->pty_port && setenvf(&env->env, "SLURM_PTY_PORT", "%hu", env->pty_port)) { error("Can't set SLURM_PTY_PORT env variable"); rc = SLURM_FAILURE; } if (env->ws_col && setenvf(&env->env, "SLURM_PTY_WIN_COL", "%hu", env->ws_col)) { error("Can't set SLURM_PTY_WIN_COL env variable"); rc = SLURM_FAILURE; } if (env->ws_row && setenvf(&env->env, "SLURM_PTY_WIN_ROW", "%hu", env->ws_row)) { error("Can't set SLURM_PTY_WIN_ROW env variable"); rc = SLURM_FAILURE; } if (env->ckpt_dir && setenvf(&env->env, "SLURM_CHECKPOINT_IMAGE_DIR", "%s", env->ckpt_dir)) { error("Can't set SLURM_CHECKPOINT_IMAGE_DIR env variable"); rc = SLURM_FAILURE; } if (env->restart_cnt && setenvf(&env->env, "SLURM_RESTART_COUNT", "%u", env->restart_cnt)) { error("Can't set SLURM_RESTART_COUNT env variable"); rc = SLURM_FAILURE; } return rc; }