unsigned int Hwloc::getNumaNodeOfGpu( unsigned int gpu ) { unsigned int node = 0; #ifdef GPU_DEV #ifdef HWLOC hwloc_obj_t obj = hwloc_cudart_get_device_pcidev ( _hwlocTopology, gpu ); if ( obj != NULL ) { hwloc_obj_t objNode = hwloc_get_ancestor_obj_by_type( _hwlocTopology, HWLOC_OBJ_NODE, obj ); if ( objNode != NULL ){ node = objNode->os_index; } } #endif #endif return node; }
int Chip::initializeMICDevices(hwloc_obj_t chip_obj, hwloc_topology_t topology) { /* Get any Intel MIC devices */ /* start at indes 0 and go until hwloc_intel_mic_get_device_osdev_by_index returns NULL */ for (int idx = 0; ; idx++) { hwloc_obj_t mic_obj; hwloc_obj_t ancestor_obj; int is_in_tree; mic_obj = hwloc_intel_mic_get_device_osdev_by_index(topology, idx); if (mic_obj == NULL) break; if (chip_obj == NULL) { /* this came from the Non NUMA */ PCI_Device new_device; new_device.initializePCIDevice(mic_obj, idx, topology); this->devices.push_back(new_device); } else { PCI_Device new_device; ancestor_obj = hwloc_get_ancestor_obj_by_type(topology, HWLOC_OBJ_NODE, mic_obj); if (ancestor_obj != NULL) { if (ancestor_obj->logical_index == chip_obj->logical_index) { PCI_Device new_device; new_device.initializePCIDevice(mic_obj, idx, topology); this->devices.push_back(new_device); this->total_mics++; this->available_mics++; } } } } return(PBSE_NONE); }
unsigned int Hwloc::getNumaNodeOfCpu ( unsigned int cpu ) { int numaNodeId = 0; #ifdef HWLOC hwloc_obj_t pu = hwloc_get_pu_obj_by_os_index( _hwlocTopology, cpu ); // Now we have the PU object, go find its parent numa node hwloc_obj_t numaNode = hwloc_get_ancestor_obj_by_type( _hwlocTopology, HWLOC_OBJ_NODE, pu ); // If the machine is not NUMA if ( numaNode != NULL ) { numaNodeId = numaNode->os_index; } return numaNodeId; #else return numaNodeId; #endif }
int Machine::initializeNVIDIADevices(hwloc_obj_t machine_obj, hwloc_topology_t topology) { nvmlReturn_t rc; /* Initialize the NVML handle. * * nvmlInit should be called once before invoking any other methods in the NVML library. * A reference count of the number of initializations is maintained. Shutdown only occurs * when the reference count reaches zero. * */ rc = nvmlInit(); if (rc != NVML_SUCCESS && rc != NVML_ERROR_ALREADY_INITIALIZED) { log_nvml_error(rc, NULL, __func__); return(PBSE_NONE); } unsigned int device_count = 0; /* Get the device count. */ rc = nvmlDeviceGetCount(&device_count); if (rc == NVML_SUCCESS) { nvmlDevice_t gpu; /* Get the nvml device handle at each index */ for (unsigned int idx = 0; idx < device_count; idx++) { rc = nvmlDeviceGetHandleByIndex(idx, &gpu); if (rc != NVML_SUCCESS) { /* TODO: get gpuid from nvmlDevice_t struct */ log_nvml_error(rc, NULL, __func__); } /* Use the hwloc library to determine device locality */ hwloc_obj_t gpu_obj; hwloc_obj_t ancestor_obj; int is_in_tree; gpu_obj = hwloc_nvml_get_device_osdev(topology, gpu); if (gpu_obj == NULL) { /* This was not an nvml device. We will look for a "card" device (GeForce or Quadra) */ gpu_obj = this->get_non_nvml_device(topology, gpu); if (gpu_obj == NULL) continue; } /* The ancestor was not a numa chip. Is it the machine? */ ancestor_obj = hwloc_get_ancestor_obj_by_type(topology, HWLOC_OBJ_MACHINE, gpu_obj); if (ancestor_obj != NULL) { PCI_Device new_device; new_device.initializePCIDevice(gpu_obj, idx, topology); store_device_on_appropriate_chip(new_device); } } } else { log_nvml_error(rc, NULL, __func__); } /* Shutdown the NVML handle. * * nvmlShutdown should be called after NVML work is done, once for each call to nvmlInit() * A reference count of the number of initializations is maintained. Shutdown only occurs when * the reference count reaches zero. For backwards compatibility, no error is reported if * nvmlShutdown() is called more times than nvmlInit(). * */ rc = nvmlShutdown(); if (rc != NVML_SUCCESS) { log_nvml_error(rc, NULL, __func__); } return(PBSE_NONE); }
static void getNumCPUs(void) { // // accessible cores // // // Hwloc can't tell us the number of accessible cores directly, so // get that by counting the parent cores of the accessible PUs. // // // We could seemingly use hwloc_topology_get_allowed_cpuset() to get // the set of accessible PUs here. But that seems not to reflect the // schedaffinity settings, so use hwloc_get_proc_cpubind() instead. // hwloc_cpuset_t logAccSet; CHK_ERR_ERRNO((logAccSet = hwloc_bitmap_alloc()) != NULL); if (hwloc_get_proc_cpubind(topology, getpid(), logAccSet, 0) != 0) { #ifdef __APPLE__ const int errRecoverable = (errno == ENOSYS); // no cpubind on macOS #else const int errRecoverable = 0; #endif if (errRecoverable) { hwloc_bitmap_fill(logAccSet); } else { REPORT_ERR_ERRNO(hwloc_get_proc_cpubind(topology, getpid(), logAccSet, 0) == 0); } } hwloc_bitmap_and(logAccSet, logAccSet, hwloc_topology_get_online_cpuset(topology)); hwloc_cpuset_t physAccSet; CHK_ERR_ERRNO((physAccSet = hwloc_bitmap_alloc()) != NULL); #define NEXT_PU(pu) \ hwloc_get_next_obj_inside_cpuset_by_type(topology, logAccSet, \ HWLOC_OBJ_PU, pu) for (hwloc_obj_t pu = NEXT_PU(NULL); pu != NULL; pu = NEXT_PU(pu)) { hwloc_obj_t core; CHK_ERR_ERRNO((core = hwloc_get_ancestor_obj_by_type(topology, HWLOC_OBJ_CORE, pu)) != NULL); hwloc_bitmap_set(physAccSet, core->logical_index); } #undef NEXT_PU numCPUsPhysAcc = hwloc_bitmap_weight(physAccSet); hwloc_bitmap_free(physAccSet); CHK_ERR(numCPUsPhysAcc > 0); // // all cores // numCPUsPhysAll = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE); CHK_ERR(numCPUsPhysAll > 0); // // accessible PUs // numCPUsLogAcc = hwloc_bitmap_weight(logAccSet); CHK_ERR(numCPUsLogAcc > 0); hwloc_bitmap_free(logAccSet); // // all PUs // numCPUsLogAll = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU); CHK_ERR(numCPUsLogAll > 0); }