/* build maps for task layout on nodes */ static int _init_task_layout(slurm_step_layout_t *step_layout, const char *arbitrary_nodes, uint16_t *cpus_per_node, uint32_t *cpu_count_reps, uint16_t cpus_per_task, uint16_t task_dist, uint16_t plane_size) { int cpu_cnt = 0, cpu_inx = 0, i; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); /* char *name = NULL; */ uint16_t cpus[step_layout->node_cnt]; if (step_layout->node_cnt == 0) return SLURM_ERROR; if (step_layout->tasks) /* layout already completed */ return SLURM_SUCCESS; if ((int)cpus_per_task < 1 || cpus_per_task == (uint16_t)NO_VAL) cpus_per_task = 1; step_layout->plane_size = plane_size; step_layout->tasks = xmalloc(sizeof(uint16_t) * step_layout->node_cnt); step_layout->tids = xmalloc(sizeof(uint32_t *) * step_layout->node_cnt); if (!(cluster_flags & CLUSTER_FLAG_BG)) { hostlist_t hl = hostlist_create(step_layout->node_list); /* make sure the number of nodes we think we have * is the correct number */ i = hostlist_count(hl); if (step_layout->node_cnt > i) step_layout->node_cnt = i; hostlist_destroy(hl); } debug("laying out the %u tasks on %u hosts %s dist %u", step_layout->task_cnt, step_layout->node_cnt, step_layout->node_list, task_dist); if (step_layout->node_cnt < 1) { error("no hostlist given can't layout tasks"); return SLURM_ERROR; } for (i=0; i<step_layout->node_cnt; i++) { /* name = hostlist_shift(hl); */ /* if (!name) { */ /* error("hostlist incomplete for this job request"); */ /* hostlist_destroy(hl); */ /* return SLURM_ERROR; */ /* } */ /* debug2("host %d = %s", i, name); */ /* free(name); */ cpus[i] = (cpus_per_node[cpu_inx] / cpus_per_task); if (cpus[i] == 0) { /* this can be a result of a heterogeneous allocation * (e.g. 4 cpus on one node and 2 on the second with * cpus_per_task=3) */ cpus[i] = 1; } //info("got %d cpus", cpus[i]); if ((++cpu_cnt) >= cpu_count_reps[cpu_inx]) { /* move to next record */ cpu_inx++; cpu_cnt = 0; } } if ((task_dist == SLURM_DIST_CYCLIC) || (task_dist == SLURM_DIST_CYCLIC_CYCLIC) || (task_dist == SLURM_DIST_CYCLIC_BLOCK)) return _task_layout_cyclic(step_layout, cpus); else if (task_dist == SLURM_DIST_ARBITRARY && !(cluster_flags & CLUSTER_FLAG_FE)) return _task_layout_hostfile(step_layout, arbitrary_nodes); else if (task_dist == SLURM_DIST_PLANE) return _task_layout_plane(step_layout, cpus); else return _task_layout_block(step_layout, cpus); }
/* build maps for task layout on nodes */ static int _init_task_layout(slurm_step_layout_req_t *step_layout_req, slurm_step_layout_t *step_layout, const char *arbitrary_nodes) { int cpu_cnt = 0, cpu_inx = 0, cpu_task_cnt = 0, cpu_task_inx = 0, i; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); uint16_t cpus[step_layout->node_cnt]; uint16_t cpus_per_task[1]; uint32_t cpus_task_reps[1]; if (step_layout->node_cnt == 0) return SLURM_ERROR; if (step_layout->tasks) /* layout already completed */ return SLURM_SUCCESS; if (!step_layout_req->cpus_per_task) { cpus_per_task[0] = 1; cpus_task_reps[0] = step_layout_req->num_hosts; step_layout_req->cpus_per_task = cpus_per_task; step_layout_req->cpus_task_reps = cpus_task_reps; } if (((int)step_layout_req->cpus_per_task[0] < 1) || (step_layout_req->cpus_per_task[0] == NO_VAL16)) { step_layout_req->cpus_per_task[0] = 1; step_layout_req->cpus_task_reps[0] = step_layout_req->num_hosts; } step_layout->plane_size = step_layout_req->plane_size; step_layout->tasks = xmalloc(sizeof(uint16_t) * step_layout->node_cnt); step_layout->tids = xmalloc(sizeof(uint32_t *) * step_layout->node_cnt); if (!(cluster_flags & CLUSTER_FLAG_BG)) { hostlist_t hl = hostlist_create(step_layout->node_list); /* make sure the number of nodes we think we have * is the correct number */ i = hostlist_count(hl); if (step_layout->node_cnt > i) step_layout->node_cnt = i; hostlist_destroy(hl); } debug("laying out the %u tasks on %u hosts %s dist %u", step_layout->task_cnt, step_layout->node_cnt, step_layout->node_list, step_layout->task_dist); if (step_layout->node_cnt < 1) { error("no hostlist given can't layout tasks"); return SLURM_ERROR; } /* hostlist_t hl = hostlist_create(step_layout->node_list); */ for (i=0; i<step_layout->node_cnt; i++) { /* char *name = hostlist_shift(hl); */ /* if (!name) { */ /* error("hostlist incomplete for this job request"); */ /* hostlist_destroy(hl); */ /* return SLURM_ERROR; */ /* } */ /* debug2("host %d = %s", i, name); */ /* free(name); */ cpus[i] = (step_layout_req->cpus_per_node[cpu_inx] / step_layout_req->cpus_per_task[cpu_task_inx]); if (cpus[i] == 0) { /* this can be a result of a heterogeneous allocation * (e.g. 4 cpus on one node and 2 on the second with * step_layout_req->cpus_per_task=3) */ cpus[i] = 1; } if (step_layout->plane_size && (step_layout->plane_size != NO_VAL16) && ((step_layout->task_dist & SLURM_DIST_STATE_BASE) != SLURM_DIST_PLANE)) { /* plane_size when dist != plane is used to convey ntasks_per_node. Adjust the number of cpus to reflect that. */ uint16_t cpus_per_node = step_layout->plane_size * step_layout_req->cpus_per_task[cpu_task_inx]; if (cpus[i] > cpus_per_node) cpus[i] = cpus_per_node; } /* info("got %d cpus", cpus[i]); */ if ((++cpu_cnt) >= step_layout_req->cpu_count_reps[cpu_inx]) { /* move to next record */ cpu_inx++; cpu_cnt = 0; } if ((++cpu_task_cnt) >= step_layout_req->cpus_task_reps[cpu_task_inx]) { /* move to next record */ cpu_task_inx++; cpu_task_cnt = 0; } } if ((step_layout->task_dist & SLURM_DIST_NODEMASK) == SLURM_DIST_NODECYCLIC) return _task_layout_cyclic(step_layout, cpus); else if (((step_layout->task_dist & SLURM_DIST_STATE_BASE) == SLURM_DIST_ARBITRARY) && !(cluster_flags & CLUSTER_FLAG_FE)) return _task_layout_hostfile(step_layout, arbitrary_nodes); else if ((step_layout->task_dist & SLURM_DIST_STATE_BASE) == SLURM_DIST_PLANE) return _task_layout_plane(step_layout, cpus); else return _task_layout_block(step_layout, cpus); }