/* * Set in "dest" the environment variables relevant to a SLURM job * allocation, overwriting any environment variables of the same name. * If the address pointed to by "dest" is NULL, memory will automatically be * xmalloc'ed. The array is terminated by a NULL pointer, and thus is * suitable for use by execle() and other env_array_* functions. * * Sets the variables: * SLURM_JOB_ID * SLURM_JOB_NUM_NODES * SLURM_JOB_NODELIST * SLURM_JOB_CPUS_PER_NODE * LOADLBATCH (AIX only) * SLURM_BG_NUM_NODES, MPIRUN_PARTITION, MPIRUN_NOFREE, and * MPIRUN_NOALLOCATE (BG only) * * Sets OBSOLETE variables (needed for MPI, do not remove): * SLURM_JOBID * SLURM_NNODES * SLURM_NODELIST * SLURM_TASKS_PER_NODE */ int env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc, const job_desc_msg_t *desc) { char *tmp = NULL; char *dist = NULL, *lllp_dist = NULL; slurm_step_layout_t *step_layout = NULL; uint32_t num_tasks = desc->num_tasks; int rc = SLURM_SUCCESS; uint32_t node_cnt = alloc->node_cnt; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); _setup_particulars(cluster_flags, dest, alloc->select_jobinfo); if (cluster_flags & CLUSTER_FLAG_BG) { select_g_select_jobinfo_get(alloc->select_jobinfo, SELECT_JOBDATA_NODE_CNT, &node_cnt); if (!node_cnt) node_cnt = alloc->node_cnt; env_array_overwrite_fmt(dest, "SLURM_BG_NUM_NODES", "%u", node_cnt); } env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", alloc->job_id); env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u", node_cnt); env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s", alloc->node_list); _set_distribution(desc->task_dist, &dist, &lllp_dist); if(dist) env_array_overwrite_fmt(dest, "SLURM_DISTRIBUTION", "%s", dist); if(desc->task_dist == SLURM_DIST_PLANE) env_array_overwrite_fmt(dest, "SLURM_DIST_PLANESIZE", "%u", desc->plane_size); if(lllp_dist) env_array_overwrite_fmt(dest, "SLURM_DIST_LLLP", "%s", lllp_dist); tmp = uint32_compressed_to_str(alloc->num_cpu_groups, alloc->cpus_per_node, alloc->cpu_count_reps); env_array_overwrite_fmt(dest, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp); xfree(tmp); /* OBSOLETE, but needed by MPI, do not remove */ env_array_overwrite_fmt(dest, "SLURM_JOBID", "%u", alloc->job_id); env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", node_cnt); env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", alloc->node_list); if(num_tasks == NO_VAL) { /* If we know how many tasks we are going to do then we set SLURM_TASKS_PER_NODE */ int i=0; /* If no tasks were given we can figure it out here * by totalling up the cpus and then dividing by the * number of cpus per task */ num_tasks = 0; for (i = 0; i < alloc->num_cpu_groups; i++) { num_tasks += alloc->cpu_count_reps[i] * alloc->cpus_per_node[i]; } if((int)desc->cpus_per_task > 1 && desc->cpus_per_task != (uint16_t)NO_VAL) num_tasks /= desc->cpus_per_task; //num_tasks = desc->min_cpus; } if(desc->task_dist == SLURM_DIST_ARBITRARY) { tmp = desc->req_nodes; env_array_overwrite_fmt(dest, "SLURM_ARBITRARY_NODELIST", "%s", tmp); } else tmp = alloc->node_list; if(!(step_layout = slurm_step_layout_create(tmp, alloc->cpus_per_node, alloc->cpu_count_reps, node_cnt, num_tasks, desc->cpus_per_task, desc->task_dist, desc->plane_size))) return SLURM_ERROR; tmp = _uint16_array_to_str(step_layout->node_cnt, step_layout->tasks); slurm_step_layout_destroy(step_layout); env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp); xfree(tmp); return rc; }
static void _update_job_size(uint32_t job_id) { resource_allocation_response_msg_t *alloc_info; char *fname_csh = NULL, *fname_sh = NULL; FILE *resize_csh = NULL, *resize_sh = NULL; if (!getenv("SLURM_JOBID")) return; /*No job environment here to update */ if (slurm_allocation_lookup_lite(job_id, &alloc_info) != SLURM_SUCCESS) { slurm_perror("slurm_allocation_lookup_lite"); return; } xstrfmtcat(fname_csh, "slurm_job_%u_resize.csh", job_id); xstrfmtcat(fname_sh, "slurm_job_%u_resize.sh", job_id); (void) unlink(fname_csh); (void) unlink(fname_sh); if (!(resize_csh = fopen(fname_csh, "w"))) { fprintf(stderr, "Could not create file %s: %s\n", fname_csh, strerror(errno)); goto fini; } if (!(resize_sh = fopen(fname_sh, "w"))) { fprintf(stderr, "Could not create file %s: %s\n", fname_sh, strerror(errno)); goto fini; } chmod(fname_csh, 0700); /* Make file executable */ chmod(fname_sh, 0700); if (getenv("SLURM_NODELIST")) { fprintf(resize_sh, "export SLURM_NODELIST=\"%s\"\n", alloc_info->node_list); fprintf(resize_csh, "setenv SLURM_NODELIST \"%s\"\n", alloc_info->node_list); } if (getenv("SLURM_JOB_NODELIST")) { fprintf(resize_sh, "export SLURM_JOB_NODELIST=\"%s\"\n", alloc_info->node_list); fprintf(resize_csh, "setenv SLURM_JOB_NODELIST \"%s\"\n", alloc_info->node_list); } if (getenv("SLURM_NNODES")) { fprintf(resize_sh, "export SLURM_NNODES=%u\n", alloc_info->node_cnt); fprintf(resize_csh, "setenv SLURM_NNODES %u\n", alloc_info->node_cnt); } if (getenv("SLURM_JOB_NUM_NODES")) { fprintf(resize_sh, "export SLURM_JOB_NUM_NODES=%u\n", alloc_info->node_cnt); fprintf(resize_csh, "setenv SLURM_JOB_NUM_NODES %u\n", alloc_info->node_cnt); } if (getenv("SLURM_JOB_CPUS_PER_NODE")) { char *tmp; tmp = uint32_compressed_to_str(alloc_info->num_cpu_groups, alloc_info->cpus_per_node, alloc_info->cpu_count_reps); fprintf(resize_sh, "export SLURM_JOB_CPUS_PER_NODE=\"%s\"\n", tmp); fprintf(resize_csh, "setenv SLURM_JOB_CPUS_PER_NODE \"%s\"\n", tmp); xfree(tmp); } if (getenv("SLURM_TASKS_PER_NODE")) { /* We don't have sufficient information to recreate this */ fprintf(resize_sh, "unset SLURM_TASKS_PER_NODE\n"); fprintf(resize_csh, "unsetenv SLURM_TASKS_PER_NODE\n"); } printf("To reset SLURM environment variables, execute\n"); printf(" For bash or sh shells: . ./%s\n", fname_sh); printf(" For csh shells: source ./%s\n", fname_csh); fini: slurm_free_resource_allocation_response_msg(alloc_info); xfree(fname_csh); xfree(fname_sh); if (resize_csh) fclose(resize_csh); if (resize_sh) fclose(resize_sh); }
/* * Set in "dest" the environment variables strings relevant to a SLURM batch * job allocation, overwriting any environment variables of the same name. * If the address pointed to by "dest" is NULL, memory will automatically be * xmalloc'ed. The array is terminated by a NULL pointer, and thus is * suitable for use by execle() and other env_array_* functions. * * Sets the variables: * SLURM_JOB_ID * SLURM_JOB_NUM_NODES * SLURM_JOB_NODELIST * SLURM_JOB_CPUS_PER_NODE * ENVIRONMENT=BATCH * HOSTNAME * LOADLBATCH (AIX only) * * Sets OBSOLETE variables (needed for MPI, do not remove): * SLURM_JOBID * SLURM_NNODES * SLURM_NODELIST * SLURM_NTASKS * SLURM_TASKS_PER_NODE */ extern int env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, const char *node_name) { char *tmp = NULL; uint32_t num_nodes = 0; uint32_t num_cpus = 0; int i; slurm_step_layout_t *step_layout = NULL; uint32_t num_tasks = batch->ntasks; uint16_t cpus_per_task; uint16_t task_dist; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); _setup_particulars(cluster_flags, dest, batch->select_jobinfo); /* There is no explicit node count in the batch structure, * so we need to calculate the node count. */ for (i = 0; i < batch->num_cpu_groups; i++) { num_nodes += batch->cpu_count_reps[i]; num_cpus += batch->cpu_count_reps[i] * batch->cpus_per_node[i]; } env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", batch->job_id); env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u", num_nodes); if(cluster_flags & CLUSTER_FLAG_BG) env_array_overwrite_fmt(dest, "SLURM_BG_NUM_NODES", "%u", num_nodes); env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s", batch->nodes); tmp = uint32_compressed_to_str(batch->num_cpu_groups, batch->cpus_per_node, batch->cpu_count_reps); env_array_overwrite_fmt(dest, "SLURM_JOB_CPUS_PER_NODE", "%s", tmp); xfree(tmp); env_array_overwrite_fmt(dest, "ENVIRONMENT", "BATCH"); if (node_name) env_array_overwrite_fmt(dest, "HOSTNAME", "%s", node_name); /* OBSOLETE, but needed by MPI, do not remove */ env_array_overwrite_fmt(dest, "SLURM_JOBID", "%u", batch->job_id); env_array_overwrite_fmt(dest, "SLURM_NNODES", "%u", num_nodes); env_array_overwrite_fmt(dest, "SLURM_NODELIST", "%s", batch->nodes); if((batch->cpus_per_task != 0) && (batch->cpus_per_task != (uint16_t) NO_VAL)) cpus_per_task = batch->cpus_per_task; else cpus_per_task = 1; /* default value */ if (cpus_per_task > 1) { env_array_overwrite_fmt(dest, "SLURM_CPUS_PER_TASK", "%u", cpus_per_task); } if(num_tasks) { env_array_overwrite_fmt(dest, "SLURM_NTASKS", "%u", num_tasks); /* keep around for old scripts */ env_array_overwrite_fmt(dest, "SLURM_NPROCS", "%u", num_tasks); } else { num_tasks = num_cpus / cpus_per_task; } if((tmp = getenvp(*dest, "SLURM_ARBITRARY_NODELIST"))) { task_dist = SLURM_DIST_ARBITRARY; } else { tmp = batch->nodes; task_dist = SLURM_DIST_BLOCK; } if(!(step_layout = slurm_step_layout_create(tmp, batch->cpus_per_node, batch->cpu_count_reps, num_nodes, num_tasks, cpus_per_task, task_dist, (uint16_t)NO_VAL))) return SLURM_ERROR; tmp = _uint16_array_to_str(step_layout->node_cnt, step_layout->tasks); slurm_step_layout_destroy(step_layout); env_array_overwrite_fmt(dest, "SLURM_TASKS_PER_NODE", "%s", tmp); xfree(tmp); return SLURM_SUCCESS; }