static int get_thread_affinity_layout(FILE *fplog, const t_commrec *cr, const gmx_hw_info_t * hwinfo, int nthreads, int pin_offset, int * pin_stride, const int **locality_order) { int nhwthreads, npkg, ncores, nhwthreads_per_core, rc; const int * pkg_id; const int * core_id; const int * hwthread_id; gmx_bool bPickPinStride; if (pin_offset < 0) { gmx_fatal(FARGS, "Negative thread pinning offset requested"); } if (*pin_stride < 0) { gmx_fatal(FARGS, "Negative thread pinning stride requested"); } rc = gmx_cpuid_topology(hwinfo->cpuid_info, &nhwthreads, &npkg, &ncores, &nhwthreads_per_core, &pkg_id, &core_id, &hwthread_id, locality_order); if (rc != 0) { /* topology information not available or invalid, ignore it */ nhwthreads = hwinfo->nthreads_hw_avail; *locality_order = NULL; if (nhwthreads <= 0) { /* We don't know anything about the hardware, don't pin */ md_print_warn(cr, fplog, "NOTE: We don't know how many logical cores we have, will not pin threads"); return -1; } } if (nthreads > nhwthreads) { /* We are oversubscribing, don't pin */ md_print_warn(NULL, fplog, "WARNING: Oversubscribing the CPU, will not pin threads"); return -1; } if (pin_offset + nthreads > nhwthreads) { /* We are oversubscribing, don't pin */ md_print_warn(NULL, fplog, "WARNING: The requested pin offset is too large for the available logical cores,\n" " will not pin threads"); return -1; } /* do we need to choose the pinning stride? */ bPickPinStride = (*pin_stride == 0); if (bPickPinStride) { if (rc == 0 && pin_offset + nthreads*nhwthreads_per_core <= nhwthreads) { /* Put one thread on each physical core */ *pin_stride = nhwthreads_per_core; } else { /* We don't know if we have SMT, and if we do, we don't know * if hw threads in the same physical core are consecutive. * Without SMT the pinning layout should not matter too much. * so we assume a consecutive layout and maximally spread out" * the threads at equal threads per core. * Note that IBM is the major non-x86 case with cpuid support * and probably threads are already pinned by the queuing system, * so we wouldn't end up here in the first place. */ *pin_stride = (nhwthreads - pin_offset)/nthreads; } } else { /* Check the placement of the thread with the largest index to make sure * that the offset & stride doesn't cause pinning beyond the last hardware thread. */ if (pin_offset + (nthreads-1)*(*pin_stride) >= nhwthreads) { /* We are oversubscribing, don't pin */ md_print_warn(NULL, fplog, "WARNING: The requested pinning stride is too large for the available logical cores,\n" " will not pin threads"); return -1; } } if (fplog != NULL) { fprintf(fplog, "Pinning threads with a%s logical core stride of %d\n", bPickPinStride ? "n auto-selected" : " user-specified", *pin_stride); } return 0; }
static int get_thread_affinity_layout(FILE *fplog, const t_commrec *cr, const gmx_hw_info_t * hwinfo, int nthreads, int pin_offset, int * pin_stride, const int **locality_order) { int nhwthreads, npkg, ncores, nhwthreads_per_core, rc; const int * pkg_id; const int * core_id; const int * hwthread_id; if (pin_offset < 0) { gmx_fatal(FARGS, "Negative thread pinning offset requested"); } if (*pin_stride < 0) { gmx_fatal(FARGS, "Negative thread pinning stride requested"); } rc = gmx_cpuid_topology(hwinfo->cpuid_info, &nhwthreads, &npkg, &ncores, &nhwthreads_per_core, &pkg_id, &core_id, &hwthread_id, locality_order); if (rc != 0) { nhwthreads = hwinfo->nthreads_hw_avail; *locality_order = NULL; if (nhwthreads <= 0) { /* We don't know anything about the hardware, don't pin */ md_print_warn(cr, fplog, "We don't know how many logical cores we have, will not pin threads"); return -1; } } if (pin_offset + nthreads > nhwthreads) { /* We are oversubscribing, don't pin */ md_print_warn(NULL, fplog, "More threads requested than available logical cores, will not pin threads"); return -1; } /* Check if we need to choose the pinning stride */ if (*pin_stride == 0) { if (rc == 0 && pin_offset + nthreads*nhwthreads_per_core <= nhwthreads) { /* Put one thread on each physical core */ *pin_stride = nhwthreads_per_core; } else { /* We don't know if we have SMT, and if we do, we don't know * if hw threads in the same physical core are consecutive. * Without SMT the pinning layout should not matter too much. * so we assume a consecutive layout and maximally spread out" * the threads at equal threads per core. * Note that IBM is the major non-x86 case with cpuid support * and probably threads are already pinned by the queuing system, * so we wouldn't end up here in the first place. */ *pin_stride = (nhwthreads - pin_offset)/nthreads; } if (fplog != NULL) { fprintf(fplog, "Pinning threads with a logical core stride of %d\n", *pin_stride); } } else { if (pin_offset + nthreads*(*pin_stride) > nhwthreads) { /* We are oversubscribing, don't pin */ md_print_warn(NULL, fplog, "The requested pinning stride is too large for the available logical cores, will not pin threads"); return -1; } } return 0; }