/* To effectively deal with heterogeneous nodes, we fake a cyclic * distribution to figure out how many cpus are needed on each node. * * This routine is a slightly modified "version" of the routine * _task_layout_block in src/common/dist_tasks.c. We do not need to * assign tasks to job->hostid[] and job->tids[][] at this point so * the cpu allocation is the same for cyclic and block. * * For the consumable resources support we need to determine what * "node/CPU/Core/thread"-tuplets will be allocated for a given job. * In the past we assumed that we only allocated one task per CPU (at * that point the lowest level of logical processor) and didn't allow * the use of overcommit. We have changed this philosophy and are now * allowing people to overcommit their resources and expect the system * administrator to enable the task/affinity plug-in which will then * bind all of a job's tasks to its allocated resources thereby * avoiding interference between co-allocated running jobs. * * In the consumable resources environment we need to determine the * layout schema within slurmctld. * * We have a core_bitmap of all available cores. All we're doing here * is removing cores that are not needed based on the task count, and * the choice of cores to remove is based on the distribution: * - "cyclic" removes cores "evenly", starting from the last socket, * - "block" removes cores from the "last" socket(s) * - "plane" removes cores "in chunks" */ extern int cr_dist(struct job_record *job_ptr, const uint16_t cr_type) { int error_code, cr_cpu = 1; if (((job_ptr->job_resrcs->node_req == NODE_CR_RESERVED) || (job_ptr->details->whole_node != 0)) && (job_ptr->details->core_spec == 0)) { /* the job has been allocated an EXCLUSIVE set of nodes, * so it gets all of the bits in the core_bitmap and * all of the available CPUs in the cpus array */ int size = bit_size(job_ptr->job_resrcs->core_bitmap); bit_nset(job_ptr->job_resrcs->core_bitmap, 0, size-1); return SLURM_SUCCESS; } _log_select_maps("cr_dist/start", job_ptr->job_resrcs->node_bitmap, job_ptr->job_resrcs->core_bitmap); if (job_ptr->details->task_dist == SLURM_DIST_PLANE) { /* perform a plane distribution on the 'cpus' array */ error_code = _compute_plane_dist(job_ptr); if (error_code != SLURM_SUCCESS) { error("cons_res: cr_dist: Error in " "_compute_plane_dist"); return error_code; } } else { /* perform a cyclic distribution on the 'cpus' array */ error_code = _compute_c_b_task_dist(job_ptr); if (error_code != SLURM_SUCCESS) { error("cons_res: cr_dist: Error in " "_compute_c_b_task_dist"); return error_code; } } /* now sync up the core_bitmap with the allocated 'cpus' array * based on the given distribution AND resource setting */ if ((cr_type & CR_CORE) || (cr_type & CR_SOCKET)) cr_cpu = 0; if (cr_cpu) { _block_sync_core_bitmap(job_ptr, cr_type); return SLURM_SUCCESS; } /* * If SelectTypeParameters mentions to use a block distribution for * cores by default, use that kind of distribution if no particular * cores distribution specified. * Note : cyclic cores distribution, which is the default, is treated * by the next code block */ if ( slurmctld_conf.select_type_param & CR_CORE_DEFAULT_DIST_BLOCK ) { switch(job_ptr->details->task_dist) { case SLURM_DIST_ARBITRARY: case SLURM_DIST_BLOCK: case SLURM_DIST_CYCLIC: case SLURM_DIST_UNKNOWN: _block_sync_core_bitmap(job_ptr, cr_type); return SLURM_SUCCESS; } } /* Determine the number of logical processors per node needed * for this job. Make sure below matches the layouts in * lllp_distribution in plugins/task/affinity/dist_task.c (FIXME) */ switch(job_ptr->details->task_dist) { case SLURM_DIST_BLOCK_BLOCK: case SLURM_DIST_CYCLIC_BLOCK: case SLURM_DIST_PLANE: _block_sync_core_bitmap(job_ptr, cr_type); break; case SLURM_DIST_ARBITRARY: case SLURM_DIST_BLOCK: case SLURM_DIST_CYCLIC: case SLURM_DIST_BLOCK_CYCLIC: case SLURM_DIST_CYCLIC_CYCLIC: case SLURM_DIST_BLOCK_CFULL: case SLURM_DIST_CYCLIC_CFULL: case SLURM_DIST_UNKNOWN: error_code = _cyclic_sync_core_bitmap(job_ptr, cr_type); break; default: error("select/cons_res: invalid task_dist entry"); return SLURM_ERROR; } _log_select_maps("cr_dist/fini", job_ptr->job_resrcs->node_bitmap, job_ptr->job_resrcs->core_bitmap); return error_code; }
/* To effectively deal with heterogeneous nodes, we fake a cyclic * distribution to figure out how many cpus are needed on each node. * * This routine is a slightly modified "version" of the routine * _task_layout_block in src/common/dist_tasks.c. We do not need to * assign tasks to job->hostid[] and job->tids[][] at this point so * the cpu allocation is the same for cyclic and block. * * For the consumable resources support we need to determine what * "node/CPU/Core/thread"-tuplets will be allocated for a given job. * In the past we assumed that we only allocated one task per CPU (at * that point the lowest level of logical processor) and didn't allow * the use of overcommit. We have changed this philosophy and are now * allowing people to overcommit their resources and expect the system * administrator to enable the task/affinity plug-in which will then * bind all of a job's tasks to its allocated resources thereby * avoiding interference between co-allocated running jobs. * * In the consumable resources environment we need to determine the * layout schema within slurmctld. * * We have a core_bitmap of all available cores. All we're doing here * is removing cores that are not needed based on the task count, and * the choice of cores to remove is based on the distribution: * - "cyclic" removes cores "evenly", starting from the last socket, * - "block" removes cores from the "last" socket(s) * - "plane" removes cores "in chunks" * * IN job_ptr - job to be allocated resources * IN cr_type - allocation type (sockets, cores, etc.) * IN preempt_mode - true if testing with simulated preempted jobs * IN avail_core_bitmap - system-wide bitmap of cores originally available to * the job, only used to identify specialized cores */ extern int cr_dist(struct job_record *job_ptr, const uint16_t cr_type, bool preempt_mode, bitstr_t *avail_core_bitmap) { int error_code, cr_cpu = 1; if (job_ptr->details->core_spec != NO_VAL16) { /* The job has been allocated all non-specialized cores, * so we don't need to select specific CPUs. */ return SLURM_SUCCESS; } if ((job_ptr->job_resrcs->node_req == NODE_CR_RESERVED) || (job_ptr->details->whole_node == 1)) { /* The job has been allocated an EXCLUSIVE set of nodes, * so it gets all of the bits in the core_bitmap and all of * the available CPUs in the cpus array. Up to this point * we might not have the correct CPU count, but a core count * and ignoring specialized cores. Fix that too. */ _clear_spec_cores(job_ptr, avail_core_bitmap); return SLURM_SUCCESS; } _log_select_maps("cr_dist/start", job_ptr->job_resrcs->node_bitmap, job_ptr->job_resrcs->core_bitmap); if ((job_ptr->details->task_dist & SLURM_DIST_STATE_BASE) == SLURM_DIST_PLANE) { /* perform a plane distribution on the 'cpus' array */ error_code = _compute_plane_dist(job_ptr); if (error_code != SLURM_SUCCESS) { error("cons_res: cr_dist: Error in " "_compute_plane_dist"); return error_code; } } else { /* perform a cyclic distribution on the 'cpus' array */ error_code = _compute_c_b_task_dist(job_ptr); if (error_code != SLURM_SUCCESS) { error("cons_res: cr_dist: Error in " "_compute_c_b_task_dist"); return error_code; } } /* now sync up the core_bitmap with the allocated 'cpus' array * based on the given distribution AND resource setting */ if ((cr_type & CR_CORE) || (cr_type & CR_SOCKET)) cr_cpu = 0; if (cr_cpu) { _block_sync_core_bitmap(job_ptr, cr_type); return SLURM_SUCCESS; } /* * If SelectTypeParameters mentions to use a block distribution for * cores by default, use that kind of distribution if no particular * cores distribution specified. * Note : cyclic cores distribution, which is the default, is treated * by the next code block */ if ( slurmctld_conf.select_type_param & CR_CORE_DEFAULT_DIST_BLOCK ) { switch(job_ptr->details->task_dist & SLURM_DIST_NODEMASK) { case SLURM_DIST_ARBITRARY: case SLURM_DIST_BLOCK: case SLURM_DIST_CYCLIC: case SLURM_DIST_UNKNOWN: _block_sync_core_bitmap(job_ptr, cr_type); return SLURM_SUCCESS; } } /* Determine the number of logical processors per node needed * for this job. Make sure below matches the layouts in * lllp_distribution in plugins/task/affinity/dist_task.c (FIXME) */ switch(job_ptr->details->task_dist & SLURM_DIST_NODESOCKMASK) { case SLURM_DIST_BLOCK_BLOCK: case SLURM_DIST_CYCLIC_BLOCK: case SLURM_DIST_PLANE: _block_sync_core_bitmap(job_ptr, cr_type); break; case SLURM_DIST_ARBITRARY: case SLURM_DIST_BLOCK: case SLURM_DIST_CYCLIC: case SLURM_DIST_BLOCK_CYCLIC: case SLURM_DIST_CYCLIC_CYCLIC: case SLURM_DIST_BLOCK_CFULL: case SLURM_DIST_CYCLIC_CFULL: case SLURM_DIST_UNKNOWN: error_code = _cyclic_sync_core_bitmap(job_ptr, cr_type, preempt_mode); break; default: error("select/cons_res: invalid task_dist entry"); return SLURM_ERROR; } _log_select_maps("cr_dist/fini", job_ptr->job_resrcs->node_bitmap, job_ptr->job_resrcs->core_bitmap); return error_code; }