int mpi_hook_slurmstepd_init (char ***env) { char *mpi_type = getenvp (*env, "SLURM_MPI_TYPE"); debug("mpi type = %s", mpi_type); if (_mpi_init(mpi_type) == SLURM_ERROR) return SLURM_ERROR; unsetenvp (*env, "SLURM_MPI_TYPE"); return SLURM_SUCCESS; }
/* Set umask using value of env var SLURM_UMASK */ extern int set_umask(stepd_step_rec_t *job) { mode_t mask; char *val; if (!(val = getenvp(job->env, "SLURM_UMASK"))) { debug("Couldn't find SLURM_UMASK in environment"); return SLURM_ERROR; } mask = strtol(val, (char **)NULL, 8); unsetenvp(job->env, "SLURM_UMASK"); umask(mask); return SLURM_SUCCESS; }
/* Set umask using value of env var SLURM_UMASK */ extern int set_umask(stepd_step_rec_t *job) { mode_t mask; char *val; if (!(val = getenvp(job->env, "SLURM_UMASK"))) { if (job->stepid != SLURM_EXTERN_CONT) debug("Couldn't find SLURM_UMASK in environment"); return SLURM_ERROR; } mask = strtol(val, (char **)NULL, 8); if ((job->stepid == SLURM_EXTERN_CONT) || (job->stepid == SLURM_BATCH_SCRIPT)) unsetenvp(job->env, "SLURM_UMASK"); umask(mask); return SLURM_SUCCESS; }
static void _set_rlimits(char **env) { slurm_rlimits_info_t *rli; char env_name[25] = "SLURM_RLIMIT_"; char *env_value, *p; struct rlimit r; //unsigned long env_num; rlim_t env_num; for (rli=get_slurm_rlimits_info(); rli->name; rli++) { if (rli->propagate_flag != PROPAGATE_RLIMITS) continue; strcpy(&env_name[sizeof("SLURM_RLIMIT_")-1], rli->name); env_value = getenvp(env, env_name); if (env_value == NULL) continue; unsetenvp(env, env_name); if (getrlimit(rli->resource, &r) < 0) { error("getrlimit(%s): %m", env_name+6); continue; } env_num = strtol(env_value, &p, 10); if (p && (p[0] != '\0')) { error("Invalid environment %s value %s", env_name, env_value); continue; } if (r.rlim_cur == env_num) continue; r.rlim_cur = (rlim_t) env_num; if (setrlimit(rli->resource, &r) < 0) { error("setrlimit(%s): %m", env_name+6); continue; } } }
/* * Process TaskProlog output * "export NAME=value" adds environment variables * "unset NAME" clears an environment variable * "print <whatever>" writes that to the job's stdout */ static void _proc_stdout(char *buf, char ***env) { bool end_buf = false; int len; char *buf_ptr, *name_ptr, *val_ptr; char *end_line, *equal_ptr; buf_ptr = buf; while (buf_ptr[0]) { end_line = strchr(buf_ptr, '\n'); if (!end_line) { end_line = buf_ptr + strlen(buf_ptr); end_buf = true; } if (!strncmp(buf_ptr, "print ", 6)) { buf_ptr += 6; while (isspace(buf_ptr[0])) buf_ptr++; len = end_line - buf_ptr + 1; safe_write(1, buf_ptr, len); } else if (!strncmp(buf_ptr, "export ",7)) { name_ptr = buf_ptr + 7; while (isspace(name_ptr[0])) name_ptr++; equal_ptr = strchr(name_ptr, '='); if (!equal_ptr || (equal_ptr > end_line)) goto rwfail; val_ptr = equal_ptr + 1; while (isspace(equal_ptr[-1])) equal_ptr--; equal_ptr[0] = '\0'; end_line[0] = '\0'; debug("export name:%s:val:%s:", name_ptr, val_ptr); if (setenvf(env, name_ptr, "%s", val_ptr)) { error("Unable to set %s environment variable", buf_ptr); } equal_ptr[0] = '='; if (end_buf) end_line[0] = '\0'; else end_line[0] = '\n'; } else if (!strncmp(buf_ptr, "unset ", 6)) { name_ptr = buf_ptr + 6; while (isspace(name_ptr[0])) name_ptr++; if ((name_ptr[0] == '\n') || (name_ptr[0] == '\0')) goto rwfail; while (isspace(end_line[-1])) end_line--; end_line[0] = '\0'; debug(" unset name:%s:", name_ptr); unsetenvp(*env, name_ptr); if (end_buf) end_line[0] = '\0'; else end_line[0] = '\n'; } rwfail: /* process rest of script output */ if (end_buf) break; buf_ptr = end_line + 1; } return; }
/* * Process TaskProlog output * "export NAME=value" adds environment variables * "unset NAME" clears an environment variable * "print <whatever>" writes that to the job's stdout */ static void _proc_stdout(char *buf, stepd_step_rec_t *job) { bool end_buf = false; int len; char *buf_ptr, *name_ptr, *val_ptr; char *end_line, *equal_ptr; char ***env = &job->env; buf_ptr = buf; while (buf_ptr[0]) { end_line = strchr(buf_ptr, '\n'); if (!end_line) { end_line = buf_ptr + strlen(buf_ptr); end_buf = true; } if (!xstrncmp(buf_ptr, "print ", 6)) { buf_ptr += 6; while (isspace(buf_ptr[0])) buf_ptr++; len = end_line - buf_ptr + 1; safe_write(1, buf_ptr, len); } else if (!xstrncmp(buf_ptr, "export ",7)) { name_ptr = buf_ptr + 7; while (isspace(name_ptr[0])) name_ptr++; equal_ptr = strchr(name_ptr, '='); if (!equal_ptr || (equal_ptr > end_line)) goto rwfail; val_ptr = equal_ptr + 1; while (isspace(equal_ptr[-1])) equal_ptr--; equal_ptr[0] = '\0'; end_line[0] = '\0'; if (!xstrcmp(name_ptr, "SLURM_PROLOG_CPU_MASK")) { job->cpu_bind_type = CPU_BIND_MASK; xfree(job->cpu_bind); job->cpu_bind = xstrdup(val_ptr); if (task_g_pre_launch(job)) { error("Failed SLURM_PROLOG_CPU_MASK " "setup"); exit(1); } } debug("export name:%s:val:%s:", name_ptr, val_ptr); if (setenvf(env, name_ptr, "%s", val_ptr)) { error("Unable to set %s environment variable", buf_ptr); } equal_ptr[0] = '='; if (end_buf) end_line[0] = '\0'; else end_line[0] = '\n'; } else if (!xstrncmp(buf_ptr, "unset ", 6)) { name_ptr = buf_ptr + 6; while (isspace(name_ptr[0])) name_ptr++; if ((name_ptr[0] == '\n') || (name_ptr[0] == '\0')) goto rwfail; while (isspace(end_line[-1])) end_line--; end_line[0] = '\0'; debug(" unset name:%s:", name_ptr); unsetenvp(*env, name_ptr); if (end_buf) end_line[0] = '\0'; else end_line[0] = '\n'; } rwfail: /* process rest of script output */ if (end_buf) break; buf_ptr = end_line + 1; } return; }
static int _setup_stepd_job_info(const stepd_step_rec_t *job, char ***env) { char *p; int i; memset(&job_info, 0, sizeof(job_info)); job_info.jobid = job->jobid; job_info.stepid = job->stepid; job_info.nnodes = job->nnodes; job_info.nodeid = job->nodeid; job_info.ntasks = job->ntasks; job_info.ltasks = job->node_tasks; job_info.gtids = xmalloc(job->node_tasks * sizeof(uint32_t)); for (i = 0; i < job->node_tasks; i ++) { job_info.gtids[i] = job->task[i]->gtid; } p = getenvp(*env, PMI2_PMI_DEBUGGED_ENV); if (p) { job_info.pmi_debugged = atoi(p); } else { job_info.pmi_debugged = 0; } p = getenvp(*env, PMI2_SPAWN_SEQ_ENV); if (p) { /* spawned */ job_info.spawn_seq = atoi(p); unsetenvp(*env, PMI2_SPAWN_SEQ_ENV); p = getenvp(*env, PMI2_SPAWNER_JOBID_ENV); job_info.spawner_jobid = xstrdup(p); unsetenvp(*env, PMI2_SPAWNER_JOBID_ENV); } else { job_info.spawn_seq = 0; job_info.spawner_jobid = NULL; } p = getenvp(*env, PMI2_PMI_JOBID_ENV); if (p) { job_info.pmi_jobid = xstrdup(p); unsetenvp(*env, PMI2_PMI_JOBID_ENV); } else { xstrfmtcat(job_info.pmi_jobid, "%u.%u", job->jobid, job->stepid); } p = getenvp(*env, PMI2_STEP_NODES_ENV); if (!p) { error("mpi/pmi2: unable to find nodes in job environment"); return SLURM_ERROR; } else { job_info.step_nodelist = xstrdup(p); unsetenvp(*env, PMI2_STEP_NODES_ENV); } /* * how to get the mapping info from stepd directly? * there is the task distribution info in the launch_tasks_request_msg_t, * but it is not stored in the stepd_step_rec_t. */ p = getenvp(*env, PMI2_PROC_MAPPING_ENV); if (!p) { error("PMI2_PROC_MAPPING_ENV not found"); return SLURM_ERROR; } else { job_info.proc_mapping = xstrdup(p); unsetenvp(*env, PMI2_PROC_MAPPING_ENV); } job_info.job_env = env_array_copy((const char **)*env); job_info.MPIR_proctable = NULL; job_info.srun_opt = NULL; return SLURM_SUCCESS; }
static int _setup_stepd_tree_info(const stepd_step_rec_t *job, char ***env) { hostlist_t hl; char srun_host[64]; uint16_t port; char *p; int tree_width; /* job info available */ memset(&tree_info, 0, sizeof(tree_info)); hl = hostlist_create(job_info.step_nodelist); p = hostlist_nth(hl, job_info.nodeid); /* strdup-ed */ tree_info.this_node = xstrdup(p); free(p); /* this only controls the upward communication tree width */ p = getenvp(*env, PMI2_TREE_WIDTH_ENV); if (p) { tree_width = atoi(p); if (tree_width < 2) { info("invalid PMI2 tree width value (%d) detected. " "fallback to default value.", tree_width); tree_width = slurm_get_tree_width(); } } else { tree_width = slurm_get_tree_width(); } /* TODO: cannot launch 0 tasks on node */ /* * In tree position calculation, root of the tree is srun with id 0. * Stepd's id will be its nodeid plus 1. */ reverse_tree_info(job_info.nodeid + 1, job_info.nnodes + 1, tree_width, &tree_info.parent_id, &tree_info.num_children, &tree_info.depth, &tree_info.max_depth); tree_info.parent_id --; /* restore real nodeid */ if (tree_info.parent_id < 0) { /* parent is srun */ tree_info.parent_node = NULL; } else { p = hostlist_nth(hl, tree_info.parent_id); tree_info.parent_node = xstrdup(p); free(p); } hostlist_destroy(hl); tree_info.pmi_port = 0; /* not used */ p = getenvp(*env, "SLURM_SRUN_COMM_HOST"); if (!p) { error("mpi/pmi2: unable to find srun comm ifhn in env"); return SLURM_ERROR; } else { strncpy(srun_host, p, 64); } p = getenvp(*env, PMI2_SRUN_PORT_ENV); if (!p) { error("mpi/pmi2: unable to find srun pmi2 port in env"); return SLURM_ERROR; } else { port = atoi(p); unsetenvp(*env, PMI2_SRUN_PORT_ENV); } tree_info.srun_addr = xmalloc(sizeof(slurm_addr_t)); slurm_set_addr(tree_info.srun_addr, port, srun_host); /* init kvs seq to 0. TODO: reduce array size */ tree_info.children_kvs_seq = xmalloc(sizeof(uint32_t) * job_info.nnodes); return SLURM_SUCCESS; }
int setup_env(env_t *env, bool preserve_env) { int rc = SLURM_SUCCESS; char *dist = NULL, *lllp_dist = NULL; char addrbuf[INET_ADDRSTRLEN]; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); if (env == NULL) return SLURM_ERROR; if (env->task_pid && setenvf(&env->env, "SLURM_TASK_PID", "%d", (int)env->task_pid)) { error("Unable to set SLURM_TASK_PID environment variable"); rc = SLURM_FAILURE; } if (!preserve_env && env->ntasks) { if(setenvf(&env->env, "SLURM_NTASKS", "%d", env->ntasks)) { error("Unable to set SLURM_NTASKS " "environment variable"); rc = SLURM_FAILURE; } if(setenvf(&env->env, "SLURM_NPROCS", "%d", env->ntasks)) { error("Unable to set SLURM_NPROCS " "environment variable"); rc = SLURM_FAILURE; } } if (env->cpus_per_task && setenvf(&env->env, "SLURM_CPUS_PER_TASK", "%d", env->cpus_per_task) ) { error("Unable to set SLURM_CPUS_PER_TASK"); rc = SLURM_FAILURE; } if (env->ntasks_per_node && setenvf(&env->env, "SLURM_NTASKS_PER_NODE", "%d", env->ntasks_per_node) ) { error("Unable to set SLURM_NTASKS_PER_NODE"); rc = SLURM_FAILURE; } if (env->ntasks_per_socket && setenvf(&env->env, "SLURM_NTASKS_PER_SOCKET", "%d", env->ntasks_per_socket) ) { error("Unable to set SLURM_NTASKS_PER_SOCKET"); rc = SLURM_FAILURE; } if (env->ntasks_per_core && setenvf(&env->env, "SLURM_NTASKS_PER_CORE", "%d", env->ntasks_per_core) ) { error("Unable to set SLURM_NTASKS_PER_CORE"); rc = SLURM_FAILURE; } if (env->cpus_on_node && setenvf(&env->env, "SLURM_CPUS_ON_NODE", "%d", env->cpus_on_node) ) { error("Unable to set SLURM_CPUS_ON_NODE"); rc = SLURM_FAILURE; } _set_distribution(env->distribution, &dist, &lllp_dist); if(dist) if (setenvf(&env->env, "SLURM_DISTRIBUTION", "%s", dist)) { error("Can't set SLURM_DISTRIBUTION env variable"); rc = SLURM_FAILURE; } if(env->distribution == SLURM_DIST_PLANE) if (setenvf(&env->env, "SLURM_DIST_PLANESIZE", "%u", env->plane_size)) { error("Can't set SLURM_DIST_PLANESIZE " "env variable"); rc = SLURM_FAILURE; } if(lllp_dist) if (setenvf(&env->env, "SLURM_DIST_LLLP", "%s", lllp_dist)) { error("Can't set SLURM_DIST_LLLP env variable"); rc = SLURM_FAILURE; } if (env->cpu_bind_type) { char *str_verbose, *str_bind_type, *str_bind_list; char *str_bind; int len; if (env->batch_flag) { unsetenvp(env->env, "SBATCH_CPU_BIND_VERBOSE"); unsetenvp(env->env, "SBATCH_CPU_BIND_TYPE"); unsetenvp(env->env, "SBATCH_CPU_BIND_LIST"); unsetenvp(env->env, "SBATCH_CPU_BIND"); } else { unsetenvp(env->env, "SLURM_CPU_BIND_VERBOSE"); unsetenvp(env->env, "SLURM_CPU_BIND_TYPE"); unsetenvp(env->env, "SLURM_CPU_BIND_LIST"); unsetenvp(env->env, "SLURM_CPU_BIND"); } str_verbose = xstrdup (""); if (env->cpu_bind_type & CPU_BIND_VERBOSE) { xstrcat(str_verbose, "verbose"); } else { xstrcat(str_verbose, "quiet"); } str_bind_type = xstrdup (""); if (env->cpu_bind_type & CPU_BIND_TO_THREADS) { xstrcat(str_bind_type, "threads,"); } else if (env->cpu_bind_type & CPU_BIND_TO_CORES) { xstrcat(str_bind_type, "cores,"); } else if (env->cpu_bind_type & CPU_BIND_TO_SOCKETS) { xstrcat(str_bind_type, "sockets,"); } else if (env->cpu_bind_type & CPU_BIND_TO_LDOMS) { xstrcat(str_bind_type, "ldoms,"); } if (env->cpu_bind_type & CPU_BIND_NONE) { xstrcat(str_bind_type, "none"); } else if (env->cpu_bind_type & CPU_BIND_RANK) { xstrcat(str_bind_type, "rank"); } else if (env->cpu_bind_type & CPU_BIND_MAP) { xstrcat(str_bind_type, "map_cpu:"); } else if (env->cpu_bind_type & CPU_BIND_MASK) { xstrcat(str_bind_type, "mask_cpu:"); } else if (env->cpu_bind_type & CPU_BIND_LDRANK) { xstrcat(str_bind_type, "rank_ldom"); } else if (env->cpu_bind_type & CPU_BIND_LDMAP) { xstrcat(str_bind_type, "map_ldom:"); } else if (env->cpu_bind_type & CPU_BIND_LDMASK) { xstrcat(str_bind_type, "mask_ldom:"); } len = strlen(str_bind_type); if (len) { /* remove a possible trailing ',' */ if (str_bind_type[len-1] == ',') { str_bind_type[len-1] = '\0'; } } str_bind_list = xstrdup (""); if (env->cpu_bind) { xstrcat(str_bind_list, env->cpu_bind); } str_bind = xstrdup (""); xstrcat(str_bind, str_verbose); if (str_bind[0] && str_bind_type && str_bind_type[0]) xstrcatchar(str_bind, ','); xstrcat(str_bind, str_bind_type); xstrcat(str_bind, str_bind_list); if (env->batch_flag) { if (setenvf(&env->env, "SBATCH_CPU_BIND_VERBOSE", str_verbose)) { error("Unable to set SBATCH_CPU_BIND_VERBOSE"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SBATCH_CPU_BIND_TYPE", str_bind_type)) { error("Unable to set SBATCH_CPU_BIND_TYPE"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SBATCH_CPU_BIND_LIST", str_bind_list)) { error("Unable to set SBATCH_CPU_BIND_LIST"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SBATCH_CPU_BIND", str_bind)) { error("Unable to set SBATCH_CPU_BIND"); rc = SLURM_FAILURE; } } else { if (setenvf(&env->env, "SLURM_CPU_BIND_VERBOSE", str_verbose)) { error("Unable to set SLURM_CPU_BIND_VERBOSE"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SLURM_CPU_BIND_TYPE", str_bind_type)) { error("Unable to set SLURM_CPU_BIND_TYPE"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SLURM_CPU_BIND_LIST", str_bind_list)) { error("Unable to set SLURM_CPU_BIND_LIST"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SLURM_CPU_BIND", str_bind)) { error("Unable to set SLURM_CPU_BIND"); rc = SLURM_FAILURE; } } } if (env->mem_bind_type) { char *str_verbose, *str_bind_type, *str_bind_list; char *str_bind; if (env->batch_flag) { unsetenvp(env->env, "SBATCH_MEM_BIND_VERBOSE"); unsetenvp(env->env, "SBATCH_MEM_BIND_TYPE"); unsetenvp(env->env, "SBATCH_MEM_BIND_LIST"); unsetenvp(env->env, "SBATCH_MEM_BIND"); } else { unsetenvp(env->env, "SLURM_MEM_BIND_VERBOSE"); unsetenvp(env->env, "SLURM_MEM_BIND_TYPE"); unsetenvp(env->env, "SLURM_MEM_BIND_LIST"); unsetenvp(env->env, "SLURM_MEM_BIND"); } str_verbose = xstrdup (""); if (env->mem_bind_type & MEM_BIND_VERBOSE) { xstrcat(str_verbose, "verbose"); } else { xstrcat(str_verbose, "quiet"); } str_bind_type = xstrdup (""); if (env->mem_bind_type & MEM_BIND_NONE) { xstrcat(str_bind_type, "none"); } else if (env->mem_bind_type & MEM_BIND_RANK) { xstrcat(str_bind_type, "rank"); } else if (env->mem_bind_type & MEM_BIND_MAP) { xstrcat(str_bind_type, "map_mem:"); } else if (env->mem_bind_type & MEM_BIND_MASK) { xstrcat(str_bind_type, "mask_mem:"); } else if (env->mem_bind_type & MEM_BIND_LOCAL) { xstrcat(str_bind_type, "local"); } str_bind_list = xstrdup (""); if (env->mem_bind) { xstrcat(str_bind_list, env->mem_bind); } str_bind = xstrdup (""); xstrcat(str_bind, str_verbose); if (str_bind[0]) { /* add ',' if str_verbose */ xstrcatchar(str_bind, ','); } xstrcat(str_bind, str_bind_type); xstrcat(str_bind, str_bind_list); if (env->batch_flag) { if (setenvf(&env->env, "SBATCH_MEM_BIND_VERBOSE", str_verbose)) { error("Unable to set SBATCH_MEM_BIND_VERBOSE"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SBATCH_MEM_BIND_TYPE", str_bind_type)) { error("Unable to set SBATCH_MEM_BIND_TYPE"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SBATCH_MEM_BIND_LIST", str_bind_list)) { error("Unable to set SBATCH_MEM_BIND_LIST"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SBATCH_MEM_BIND", str_bind)) { error("Unable to set SBATCH_MEM_BIND"); rc = SLURM_FAILURE; } } else { if (setenvf(&env->env, "SLURM_MEM_BIND_VERBOSE", str_verbose)) { error("Unable to set SLURM_MEM_BIND_VERBOSE"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SLURM_MEM_BIND_TYPE", str_bind_type)) { error("Unable to set SLURM_MEM_BIND_TYPE"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SLURM_MEM_BIND_LIST", str_bind_list)) { error("Unable to set SLURM_MEM_BIND_LIST"); rc = SLURM_FAILURE; } if (setenvf(&env->env, "SLURM_MEM_BIND", str_bind)) { error("Unable to set SLURM_MEM_BIND"); rc = SLURM_FAILURE; } } } if (env->overcommit && (setenvf(&env->env, "SLURM_OVERCOMMIT", "1"))) { error("Unable to set SLURM_OVERCOMMIT environment variable"); rc = SLURM_FAILURE; } if (env->slurmd_debug && setenvf(&env->env, "SLURMD_DEBUG", "%d", env->slurmd_debug)) { error("Can't set SLURMD_DEBUG environment variable"); rc = SLURM_FAILURE; } if (env->labelio && setenvf(&env->env, "SLURM_LABELIO", "1")) { error("Unable to set SLURM_LABELIO environment variable"); rc = SLURM_FAILURE; } if (env->select_jobinfo) { _setup_particulars(cluster_flags, &env->env, env->select_jobinfo); } if (env->jobid >= 0) { if (setenvf(&env->env, "SLURM_JOB_ID", "%d", env->jobid)) { error("Unable to set SLURM_JOB_ID environment"); rc = SLURM_FAILURE; } /* and for backwards compatability... */ if (setenvf(&env->env, "SLURM_JOBID", "%d", env->jobid)) { error("Unable to set SLURM_JOBID environment"); rc = SLURM_FAILURE; } } if (env->nodeid >= 0 && setenvf(&env->env, "SLURM_NODEID", "%d", env->nodeid)) { error("Unable to set SLURM_NODEID environment"); rc = SLURM_FAILURE; } if (env->procid >= 0 && setenvf(&env->env, "SLURM_PROCID", "%d", env->procid)) { error("Unable to set SLURM_PROCID environment"); rc = SLURM_FAILURE; } if (env->localid >= 0 && setenvf(&env->env, "SLURM_LOCALID", "%d", env->localid)) { error("Unable to set SLURM_LOCALID environment"); rc = SLURM_FAILURE; } if (env->stepid >= 0 && setenvf(&env->env, "SLURM_STEPID", "%d", env->stepid)) { error("Unable to set SLURM_STEPID environment"); rc = SLURM_FAILURE; } if (!preserve_env && env->nhosts && setenvf(&env->env, "SLURM_NNODES", "%d", env->nhosts)) { error("Unable to set SLURM_NNODES environment var"); rc = SLURM_FAILURE; } if (env->nodelist && setenvf(&env->env, "SLURM_NODELIST", "%s", env->nodelist)) { error("Unable to set SLURM_NODELIST environment var."); rc = SLURM_FAILURE; } if (!preserve_env && env->task_count && setenvf (&env->env, "SLURM_TASKS_PER_NODE", "%s", env->task_count)) { error ("Can't set SLURM_TASKS_PER_NODE env variable"); rc = SLURM_FAILURE; } if (env->comm_port && setenvf (&env->env, "SLURM_SRUN_COMM_PORT", "%u", env->comm_port)) { error ("Can't set SLURM_SRUN_COMM_PORT env variable"); rc = SLURM_FAILURE; } if (env->cli) { slurm_print_slurm_addr (env->cli, addrbuf, INET_ADDRSTRLEN); /* * XXX: Eventually, need a function for slurm_addrs that * returns just the IP address (not addr:port) */ if ((dist = strchr (addrbuf, ':')) != NULL) *dist = '\0'; setenvf (&env->env, "SLURM_LAUNCH_NODE_IPADDR", "%s", addrbuf); } if (env->sgtids && setenvf(&env->env, "SLURM_GTIDS", "%s", env->sgtids)) { error("Unable to set SLURM_GTIDS environment variable"); rc = SLURM_FAILURE; } if(cluster_flags & CLUSTER_FLAG_AIX) { char res_env[128]; char *debug_env = (char *)getenv("SLURM_LL_API_DEBUG"); int debug_num = 0; /* MP_POERESTART_ENV causes a warning message for "poe", but * is needed for "poerestart". Presently we have no means to * determine what command a user will execute. We could * possibly add a "srestart" command which would set * MP_POERESTART_ENV, but that presently seems unnecessary. */ /* setenvf(&env->env, "MP_POERESTART_ENV", res_env); */ if (debug_env) debug_num = atoi(debug_env); snprintf(res_env, sizeof(res_env), "SLURM_LL_API_DEBUG=%d", debug_num); /* Required for AIX/POE systems indicating pre-allocation */ setenvf(&env->env, "LOADLBATCH", "yes"); setenvf(&env->env, "LOADL_ACTIVE", "3.2.0"); } if (env->pty_port && setenvf(&env->env, "SLURM_PTY_PORT", "%hu", env->pty_port)) { error("Can't set SLURM_PTY_PORT env variable"); rc = SLURM_FAILURE; } if (env->ws_col && setenvf(&env->env, "SLURM_PTY_WIN_COL", "%hu", env->ws_col)) { error("Can't set SLURM_PTY_WIN_COL env variable"); rc = SLURM_FAILURE; } if (env->ws_row && setenvf(&env->env, "SLURM_PTY_WIN_ROW", "%hu", env->ws_row)) { error("Can't set SLURM_PTY_WIN_ROW env variable"); rc = SLURM_FAILURE; } if (env->ckpt_dir && setenvf(&env->env, "SLURM_CHECKPOINT_IMAGE_DIR", "%s", env->ckpt_dir)) { error("Can't set SLURM_CHECKPOINT_IMAGE_DIR env variable"); rc = SLURM_FAILURE; } if (env->restart_cnt && setenvf(&env->env, "SLURM_RESTART_COUNT", "%u", env->restart_cnt)) { error("Can't set SLURM_RESTART_COUNT env variable"); rc = SLURM_FAILURE; } return rc; }
/* * Set rlimit using value of env vars such as SLURM_RLIMIT_FSIZE if * the slurm config file has PropagateResourceLimits=YES or the user * requested it with srun --propagate. * * NOTE: THIS FUNCTION SHOULD ONLY BE CALLED RIGHT BEFORE THE EXEC OF * A SCRIPT AFTER THE FORK SO AS TO LIMIT THE ABOUT OF EFFECT THE * LIMITS HAVE WHEN COMBINED WITH THE SLURMSTEPD. RLIMIT_FSIZE IS THE * MAIN REASON SINCE IF THE USER SETS THIS TO BE LOWER THAN THE SIZE * OF THE CURRENT SLURMD.LOG THE STEPD WILL CORE THE NEXT TIME * ANYTHING IS WRITTEN TO IT. SO IF RUNNING +DEBUG2 AND THE USER IS * GETTING CORES WITH FILE SYSTEM LIMIT ERRORS THIS IS THE REASON. */ static int _set_limit(char **env, slurm_rlimits_info_t *rli) { unsigned long env_value; char max[24], cur[24], req[24]; struct rlimit r; bool u_req_propagate; /* e.g. TRUE if 'srun --propagate' */ char env_name[25] = "SLURM_RLIMIT_"; char *rlimit_name = &env_name[6]; strcpy( &env_name[sizeof("SLURM_RLIMIT_")-1], rli->name ); if (_get_env_val( env, env_name, &env_value, &u_req_propagate )){ debug( "Couldn't find %s in environment", env_name ); return SLURM_ERROR; } /* * Users shouldn't get the SLURM_RLIMIT_* env vars in their environ */ unsetenvp( env, env_name ); /* * We'll only attempt to set the propagated soft rlimit when indicated * by the slurm conf file settings, or the user requested it. */ if ( ! (rli->propagate_flag == PROPAGATE_RLIMITS || u_req_propagate)) return SLURM_SUCCESS; if (getrlimit( rli->resource, &r ) < 0) { error("getrlimit(%s): %m", rlimit_name); return SLURM_ERROR; } /* * Nothing to do if the rlimit won't change */ if (r.rlim_cur == (rlim_t) env_value) { debug2( "_set_limit: %s setrlimit %s no change in value: %lu", u_req_propagate?"user":"******", rlimit_name, (unsigned long) r.rlim_cur); return SLURM_SUCCESS; } debug2("_set_limit: %-14s: max:%s cur:%s req:%s", rlimit_name, rlim_to_string (r.rlim_max, max, sizeof (max)), rlim_to_string (r.rlim_cur, cur, sizeof (cur)), rlim_to_string (env_value, req, sizeof (req)) ); r.rlim_cur = (rlim_t) env_value; if (r.rlim_max < r.rlim_cur) r.rlim_max = r.rlim_cur; if (setrlimit( rli->resource, &r ) < 0) { /* * Report an error only if the user requested propagate */ if (u_req_propagate) { error( "Can't propagate %s of %s from submit host: %m", rlimit_name, r.rlim_cur == RLIM_INFINITY ? "'unlimited'" : rlim_to_string( r.rlim_cur, cur, sizeof(cur))); } else { verbose("Can't propagate %s of %s from submit host: %m", rlimit_name, r.rlim_cur == RLIM_INFINITY ? "'unlimited'" : rlim_to_string( r.rlim_cur, cur, sizeof(cur))); } return SLURM_ERROR; } debug2( "_set_limit: %s setrlimit %s succeeded", u_req_propagate?"user":"******", rlimit_name ); return SLURM_SUCCESS; }