static int hwloc_gl_backend_notify_new_object(struct hwloc_backend *backend, struct hwloc_obj *pcidev) { struct hwloc_topology *topology = backend->topology; struct hwloc_gl_backend_data_s *data = backend->private_data; unsigned i, res; if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO))) return 0; if (!hwloc_topology_is_thissystem(topology)) { hwloc_debug("%s", "\nno GL detection (not thissystem)\n"); return 0; } if (HWLOC_OBJ_PCI_DEVICE != pcidev->type) return 0; if (data->nr_display == (unsigned) -1) { /* first call, lookup all display */ hwloc_gl_query_devices(data); /* if it fails, data->nr_display = 0 so we won't do anything below and in next callbacks */ } if (!data->nr_display) /* found no display */ return 0; /* now the display array is ready to use */ res = 0; for(i=0; i<data->nr_display; i++) { struct hwloc_gl_display_info_s *info = &data->display[i]; hwloc_obj_t osdev; if (info->pcidomain != pcidev->attr->pcidev.domain) continue; if (info->pcibus != pcidev->attr->pcidev.bus) continue; if (info->pcidevice != pcidev->attr->pcidev.dev) continue; if (info->pcifunc != pcidev->attr->pcidev.func) continue; osdev = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1); osdev->name = strdup(info->name); osdev->logical_index = -1; osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU; hwloc_obj_add_info(osdev, "Backend", "GL"); hwloc_obj_add_info(osdev, "GPUVendor", "NVIDIA Corporation"); if (info->productname) hwloc_obj_add_info(osdev, "GPUModel", info->productname); hwloc_insert_object_by_parent(topology, pcidev, osdev); res++; /* there may be others */ } return res; }
static int hwloc_look_pci(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; struct hwloc_obj *first_obj = NULL, *last_obj = NULL; int ret; struct pci_device_iterator *iter; struct pci_device *pcidev; #ifdef HWLOC_LINUX_SYS DIR *dir; #endif if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO))) return 0; if (hwloc_get_next_pcidev(topology, NULL)) { hwloc_debug("%s", "PCI objects already added, ignoring pci backend.\n"); return 0; } if (!hwloc_topology_is_thissystem(topology)) { hwloc_debug("%s", "\nno PCI detection (not thissystem)\n"); return 0; } hwloc_debug("%s", "\nScanning PCI buses...\n"); /* initialize PCI scanning */ ret = pci_system_init(); if (ret) { hwloc_debug("%s", "Can not initialize libpciaccess\n"); return -1; } iter = pci_slot_match_iterator_create(NULL); /* iterate over devices */ for (pcidev = pci_device_next(iter); pcidev; pcidev = pci_device_next(iter)) { const char *vendorname, *devicename, *fullname; unsigned char config_space_cache[CONFIG_SPACE_CACHESIZE]; struct hwloc_obj *obj; unsigned os_index; unsigned domain; unsigned device_class; unsigned short tmp16; char name[128]; unsigned offset; /* initialize the config space in case we fail to read it (missing permissions, etc). */ memset(config_space_cache, 0xff, CONFIG_SPACE_CACHESIZE); pci_device_probe(pcidev); pci_device_cfg_read(pcidev, config_space_cache, 0, CONFIG_SPACE_CACHESIZE, NULL); /* try to read the domain */ domain = pcidev->domain; /* try to read the device_class */ device_class = pcidev->device_class >> 8; /* fixup SR-IOV buggy VF device/vendor IDs */ if (0xffff == pcidev->vendor_id && 0xffff == pcidev->device_id) { /* SR-IOV puts ffff:ffff in Virtual Function config space. * The actual VF device ID is stored at a special (dynamic) location in the Physical Function config space. * VF and PF have the same vendor ID. * * libpciaccess just returns ffff:ffff, needs to be fixed. * linuxpci is OK because sysfs files are already fixed the kernel. * (pciutils is OK when it uses those Linux sysfs files.) * * Reading these files is an easy way to work around the libpciaccess issue on Linux, * but we have no way to know if this is caused by SR-IOV or not. * * TODO: * If PF has CAP_ID_PCIX or CAP_ID_EXP (offset>0), * look for extended capability PCI_EXT_CAP_ID_SRIOV (need extended config space (more than 256 bytes)), * then read the VF device ID after it (PCI_IOV_DID bytes later). * Needs access to extended config space (needs root on Linux). * TODO: * Add string info attributes in VF and PF objects? */ #ifdef HWLOC_LINUX_SYS /* Workaround for Linux (the kernel returns the VF device/vendor IDs). */ char path[64]; char value[16]; FILE *file; size_t read; snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/vendor", domain, pcidev->bus, pcidev->dev, pcidev->func); file = fopen(path, "r"); if (file) { read = fread(value, 1, sizeof(value), file); fclose(file); if (read) /* fixup the pciaccess struct so that pci_device_get_vendor_name() is correct later. */ pcidev->vendor_id = strtoul(value, NULL, 16); } snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/device", domain, pcidev->bus, pcidev->dev, pcidev->func); file = fopen(path, "r"); if (file) { read = fread(value, 1, sizeof(value), file); fclose(file); if (read) /* fixup the pciaccess struct so that pci_device_get_device_name() is correct later. */ pcidev->device_id = strtoul(value, NULL, 16); } #endif } /* might be useful for debugging (note that domain might be truncated) */ os_index = (domain << 20) + (pcidev->bus << 12) + (pcidev->dev << 4) + pcidev->func; obj = hwloc_alloc_setup_object(HWLOC_OBJ_PCI_DEVICE, os_index); obj->attr->pcidev.domain = domain; obj->attr->pcidev.bus = pcidev->bus; obj->attr->pcidev.dev = pcidev->dev; obj->attr->pcidev.func = pcidev->func; obj->attr->pcidev.vendor_id = pcidev->vendor_id; obj->attr->pcidev.device_id = pcidev->device_id; obj->attr->pcidev.class_id = device_class; obj->attr->pcidev.revision = config_space_cache[PCI_REVISION_ID]; obj->attr->pcidev.linkspeed = 0; /* unknown */ offset = hwloc_pci_find_cap(config_space_cache, PCI_CAP_ID_EXP); if (offset > 0 && offset + 20 /* size of PCI express block up to link status */ <= CONFIG_SPACE_CACHESIZE) hwloc_pci_find_linkspeed(config_space_cache, offset, &obj->attr->pcidev.linkspeed); hwloc_pci_prepare_bridge(obj, config_space_cache); if (obj->type == HWLOC_OBJ_PCI_DEVICE) { memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_VENDOR_ID], sizeof(tmp16)); obj->attr->pcidev.subvendor_id = tmp16; memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_ID], sizeof(tmp16)); obj->attr->pcidev.subdevice_id = tmp16; } else { /* TODO: * bridge must lookup PCI_CAP_ID_SSVID and then look at offset+PCI_SSVID_VENDOR/DEVICE_ID * cardbus must look at PCI_CB_SUBSYSTEM_VENDOR_ID and PCI_CB_SUBSYSTEM_ID */ } /* get the vendor name */ vendorname = pci_device_get_vendor_name(pcidev); if (vendorname && *vendorname) hwloc_obj_add_info(obj, "PCIVendor", vendorname); /* get the device name */ devicename = pci_device_get_device_name(pcidev); if (devicename && *devicename) hwloc_obj_add_info(obj, "PCIDevice", devicename); /* generate or get the fullname */ snprintf(name, sizeof(name), "%s%s%s", vendorname ? vendorname : "", vendorname && devicename ? " " : "", devicename ? devicename : ""); fullname = name; if (*name) obj->name = strdup(name); hwloc_debug(" %04x:%02x:%02x.%01x %04x %04x:%04x %s\n", domain, pcidev->bus, pcidev->dev, pcidev->func, device_class, pcidev->vendor_id, pcidev->device_id, fullname && *fullname ? fullname : "??"); /* queue the object for now */ if (first_obj) last_obj->next_sibling = obj; else first_obj = obj; last_obj = obj; } /* finalize device scanning */ pci_iterator_destroy(iter); pci_system_cleanup(); #ifdef HWLOC_LINUX_SYS dir = opendir("/sys/bus/pci/slots/"); if (dir) { struct dirent *dirent; while ((dirent = readdir(dir)) != NULL) { char path[64]; FILE *file; if (dirent->d_name[0] == '.') continue; snprintf(path, sizeof(path), "/sys/bus/pci/slots/%s/address", dirent->d_name); file = fopen(path, "r"); if (file) { unsigned domain, bus, dev; if (fscanf(file, "%x:%x:%x", &domain, &bus, &dev) == 3) { hwloc_obj_t obj = first_obj; while (obj) { if (obj->attr->pcidev.domain == domain && obj->attr->pcidev.bus == bus && obj->attr->pcidev.dev == dev && obj->attr->pcidev.func == 0) { hwloc_obj_add_info(obj, "PCISlot", dirent->d_name); break; } obj = obj->next_sibling; } } fclose(file); } } closedir(dir); } #endif return hwloc_insert_pci_device_list(backend, first_obj); }
static int hwloc_nvml_discover(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; nvmlReturn_t ret; unsigned nb, i; if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO))) return 0; if (!hwloc_topology_is_thissystem(topology)) { hwloc_debug("%s", "\nno NVML detection (not thissystem)\n"); return 0; } ret = nvmlInit(); if (NVML_SUCCESS != ret) return 0; ret = nvmlDeviceGetCount(&nb); if (NVML_SUCCESS != ret || !nb) { nvmlShutdown(); return 0; } for(i=0; i<nb; i++) { nvmlPciInfo_t pci; nvmlDevice_t device; hwloc_obj_t osdev, parent; char buffer[64]; ret = nvmlDeviceGetHandleByIndex(i, &device); assert(ret == NVML_SUCCESS); osdev = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1); snprintf(buffer, sizeof(buffer), "nvml%d", i); osdev->name = strdup(buffer); osdev->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN; osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU; hwloc_obj_add_info(osdev, "Backend", "NVML"); hwloc_obj_add_info(osdev, "GPUVendor", "NVIDIA Corporation"); buffer[0] = '\0'; ret = nvmlDeviceGetName(device, buffer, sizeof(buffer)); hwloc_obj_add_info(osdev, "GPUModel", buffer); /* these may fail with NVML_ERROR_NOT_SUPPORTED on old devices */ buffer[0] = '\0'; ret = nvmlDeviceGetSerial(device, buffer, sizeof(buffer)); if (buffer[0] != '\0') hwloc_obj_add_info(osdev, "NVIDIASerial", buffer); buffer[0] = '\0'; ret = nvmlDeviceGetUUID(device, buffer, sizeof(buffer)); if (buffer[0] != '\0') hwloc_obj_add_info(osdev, "NVIDIAUUID", buffer); parent = NULL; if (NVML_SUCCESS == nvmlDeviceGetPciInfo(device, &pci)) { parent = hwloc_pci_belowroot_find_by_busid(topology, pci.domain, pci.bus, pci.device, 0); if (!parent) parent = hwloc_pci_find_busid_parent(topology, pci.domain, pci.bus, pci.device, 0); #if HAVE_DECL_NVMLDEVICEGETMAXPCIELINKGENERATION if (parent && parent->type == HWLOC_OBJ_PCI_DEVICE) { unsigned maxwidth = 0, maxgen = 0; float lanespeed; nvmlDeviceGetMaxPcieLinkWidth(device, &maxwidth); nvmlDeviceGetMaxPcieLinkGeneration(device, &maxgen); /* PCIe Gen1 = 2.5GT/s signal-rate per lane with 8/10 encoding = 0.25GB/s data-rate per lane * PCIe Gen2 = 5 GT/s signal-rate per lane with 8/10 encoding = 0.5 GB/s data-rate per lane * PCIe Gen3 = 8 GT/s signal-rate per lane with 128/130 encoding = 1 GB/s data-rate per lane */ lanespeed = maxgen <= 2 ? 2.5 * maxgen * 0.8 : 8.0 * 128/130; /* Gbit/s per lane */ if (lanespeed * maxwidth) /* we found the max link speed, replace the current link speed found by pci (or none) */ parent->attr->pcidev.linkspeed = lanespeed * maxwidth / 8; /* GB/s */ } #endif } if (!parent) parent = hwloc_get_root_obj(topology); hwloc_insert_object_by_parent(topology, parent, osdev); } nvmlShutdown(); return nb; }
static int hwloc_look_pci(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; struct hwloc_obj *first_obj = NULL, *last_obj = NULL; #ifdef HWLOC_HAVE_LIBPCIACCESS int ret; struct pci_device_iterator *iter; struct pci_device *pcidev; #else /* HWLOC_HAVE_PCIUTILS */ struct pci_access *pciaccess; struct pci_dev *pcidev; #endif if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO))) return 0; if (hwloc_get_next_pcidev(topology, NULL)) { hwloc_debug("%s", "PCI objects already added, ignoring pci backend.\n"); return 0; } if (!hwloc_topology_is_thissystem(topology)) { hwloc_debug("%s", "\nno PCI detection (not thissystem)\n"); return 0; } hwloc_debug("%s", "\nScanning PCI buses...\n"); /* initialize PCI scanning */ #ifdef HWLOC_HAVE_LIBPCIACCESS ret = pci_system_init(); if (ret) { hwloc_debug("%s", "Can not initialize libpciaccess\n"); return -1; } iter = pci_slot_match_iterator_create(NULL); #else /* HWLOC_HAVE_PCIUTILS */ pciaccess = pci_alloc(); pciaccess->error = hwloc_pci_error; pciaccess->warning = hwloc_pci_warning; if (setjmp(err_buf)) { pci_cleanup(pciaccess); return -1; } pci_init(pciaccess); pci_scan_bus(pciaccess); #endif /* iterate over devices */ #ifdef HWLOC_HAVE_LIBPCIACCESS for (pcidev = pci_device_next(iter); pcidev; pcidev = pci_device_next(iter)) #else /* HWLOC_HAVE_PCIUTILS */ for (pcidev = pciaccess->devices; pcidev; pcidev = pcidev->next) #endif { const char *vendorname, *devicename, *fullname; unsigned char config_space_cache[CONFIG_SPACE_CACHESIZE]; struct hwloc_obj *obj; unsigned os_index; unsigned domain; unsigned device_class; unsigned short tmp16; char name[128]; unsigned offset; #ifdef HWLOC_HAVE_PCI_FIND_CAP struct pci_cap *cap; #endif /* initialize the config space in case we fail to read it (missing permissions, etc). */ memset(config_space_cache, 0xff, CONFIG_SPACE_CACHESIZE); #ifdef HWLOC_HAVE_LIBPCIACCESS pci_device_probe(pcidev); pci_device_cfg_read(pcidev, config_space_cache, 0, CONFIG_SPACE_CACHESIZE, NULL); #else /* HWLOC_HAVE_PCIUTILS */ pci_read_block(pcidev, 0, config_space_cache, CONFIG_SPACE_CACHESIZE); /* doesn't even tell how much it actually reads */ #endif /* try to read the domain */ #if (defined HWLOC_HAVE_LIBPCIACCESS) || (defined HWLOC_HAVE_PCIDEV_DOMAIN) domain = pcidev->domain; #else domain = 0; /* default domain number */ #endif /* try to read the device_class */ #ifdef HWLOC_HAVE_LIBPCIACCESS device_class = pcidev->device_class >> 8; #else /* HWLOC_HAVE_PCIUTILS */ #ifdef HWLOC_HAVE_PCIDEV_DEVICE_CLASS device_class = pcidev->device_class; #else device_class = config_space_cache[PCI_CLASS_DEVICE] | (config_space_cache[PCI_CLASS_DEVICE+1] << 8); #endif #endif /* might be useful for debugging (note that domain might be truncated) */ os_index = (domain << 20) + (pcidev->bus << 12) + (pcidev->dev << 4) + pcidev->func; obj = hwloc_alloc_setup_object(HWLOC_OBJ_PCI_DEVICE, os_index); obj->attr->pcidev.domain = domain; obj->attr->pcidev.bus = pcidev->bus; obj->attr->pcidev.dev = pcidev->dev; obj->attr->pcidev.func = pcidev->func; obj->attr->pcidev.vendor_id = pcidev->vendor_id; obj->attr->pcidev.device_id = pcidev->device_id; obj->attr->pcidev.class_id = device_class; obj->attr->pcidev.revision = config_space_cache[PCI_REVISION_ID]; obj->attr->pcidev.linkspeed = 0; /* unknown */ #ifdef HWLOC_HAVE_PCI_FIND_CAP cap = pci_find_cap(pcidev, PCI_CAP_ID_EXP, PCI_CAP_NORMAL); offset = cap ? cap->addr : 0; #else offset = hwloc_pci_find_cap(config_space_cache, PCI_CAP_ID_EXP); #endif /* HWLOC_HAVE_PCI_FIND_CAP */ if (0xffff == pcidev->vendor_id && 0xffff == pcidev->device_id) { /* SR-IOV puts ffff:ffff in Virtual Function config space. * The actual VF device ID is stored at a special (dynamic) location in the Physical Function config space. * VF and PF have the same vendor ID. * * libpciaccess just returns ffff:ffff, needs to be fixed. * linuxpci is OK because sysfs files are already fixed the kernel. * pciutils is OK when it uses those Linux sysfs files. * * Reading these files is an easy way to work around the libpciaccess issue on Linux, * but we have no way to know if this is caused by SR-IOV or not. * * TODO: * If PF has CAP_ID_PCIX or CAP_ID_EXP (offset>0), * look for extended capability PCI_EXT_CAP_ID_SRIOV (need extended config space (more than 256 bytes)), * then read the VF device ID after it (PCI_IOV_DID bytes later). * Needs access to extended config space (needs root on Linux). * TODO: * Add string info attributes in VF and PF objects? */ #ifdef HWLOC_LINUX_SYS /* Workaround for Linux (the kernel returns the VF device/vendor IDs). */ char path[64]; char value[16]; FILE *file; snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/vendor", domain, pcidev->bus, pcidev->dev, pcidev->func); file = fopen(path, "r"); if (file) { fread(value, sizeof(value), 1, file); fclose(file); obj->attr->pcidev.vendor_id = strtoul(value, NULL, 16); } snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/device", domain, pcidev->bus, pcidev->dev, pcidev->func); file = fopen(path, "r"); if (file) { fread(value, sizeof(value), 1, file); fclose(file); obj->attr->pcidev.device_id = strtoul(value, NULL, 16); } #endif } if (offset > 0 && offset + 20 /* size of PCI express block up to link status */ <= CONFIG_SPACE_CACHESIZE) hwloc_pci_find_linkspeed(config_space_cache, offset, &obj->attr->pcidev.linkspeed); hwloc_pci_prepare_bridge(obj, config_space_cache); if (obj->type == HWLOC_OBJ_PCI_DEVICE) { memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_VENDOR_ID], sizeof(tmp16)); obj->attr->pcidev.subvendor_id = tmp16; memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_ID], sizeof(tmp16)); obj->attr->pcidev.subdevice_id = tmp16; } else { /* TODO: * bridge must lookup PCI_CAP_ID_SSVID and then look at offset+PCI_SSVID_VENDOR/DEVICE_ID * cardbus must look at PCI_CB_SUBSYSTEM_VENDOR_ID and PCI_CB_SUBSYSTEM_ID */ } /* starting from pciutils 2.2, pci_lookup_name() takes a variable number * of arguments, and supports the PCI_LOOKUP_NO_NUMBERS flag. */ /* get the vendor name */ #ifdef HWLOC_HAVE_LIBPCIACCESS vendorname = pci_device_get_vendor_name(pcidev); #else /* HWLOC_HAVE_PCIUTILS */ vendorname = pci_lookup_name(pciaccess, name, sizeof(name), #if HAVE_DECL_PCI_LOOKUP_NO_NUMBERS PCI_LOOKUP_VENDOR|PCI_LOOKUP_NO_NUMBERS, pcidev->vendor_id #else PCI_LOOKUP_VENDOR, pcidev->vendor_id, 0, 0, 0 #endif ); #endif /* HWLOC_HAVE_PCIUTILS */ if (vendorname && *vendorname) hwloc_obj_add_info(obj, "PCIVendor", vendorname); /* get the device name */ #ifdef HWLOC_HAVE_LIBPCIACCESS devicename = pci_device_get_device_name(pcidev); #else /* HWLOC_HAVE_PCIUTILS */ devicename = pci_lookup_name(pciaccess, name, sizeof(name), #if HAVE_DECL_PCI_LOOKUP_NO_NUMBERS PCI_LOOKUP_DEVICE|PCI_LOOKUP_NO_NUMBERS, pcidev->vendor_id, pcidev->device_id #else PCI_LOOKUP_DEVICE, pcidev->vendor_id, pcidev->device_id, 0, 0 #endif ); #endif /* HWLOC_HAVE_PCIUTILS */ if (devicename && *devicename) hwloc_obj_add_info(obj, "PCIDevice", devicename); /* generate or get the fullname */ #ifdef HWLOC_HAVE_LIBPCIACCESS snprintf(name, sizeof(name), "%s%s%s", vendorname ? vendorname : "", vendorname && devicename ? " " : "", devicename ? devicename : ""); fullname = name; if (*name) obj->name = strdup(name); #else /* HWLOC_HAVE_PCIUTILS */ fullname = pci_lookup_name(pciaccess, name, sizeof(name), #if HAVE_DECL_PCI_LOOKUP_NO_NUMBERS PCI_LOOKUP_VENDOR|PCI_LOOKUP_DEVICE|PCI_LOOKUP_NO_NUMBERS, pcidev->vendor_id, pcidev->device_id #else PCI_LOOKUP_VENDOR|PCI_LOOKUP_DEVICE, pcidev->vendor_id, pcidev->device_id, 0, 0 #endif ); if (fullname && *fullname) obj->name = strdup(fullname); #endif /* HWLOC_HAVE_PCIUTILS */ hwloc_debug(" %04x:%02x:%02x.%01x %04x %04x:%04x %s\n", domain, pcidev->bus, pcidev->dev, pcidev->func, device_class, pcidev->vendor_id, pcidev->device_id, fullname && *fullname ? fullname : "??"); /* queue the object for now */ if (first_obj) last_obj->next_sibling = obj; else first_obj = obj; last_obj = obj; } /* finalize device scanning */ #ifdef HWLOC_HAVE_LIBPCIACCESS pci_iterator_destroy(iter); pci_system_cleanup(); #else /* HWLOC_HAVE_PCIUTILS */ pci_cleanup(pciaccess); #endif return hwloc_insert_pci_device_list(backend, first_obj); }
static int hwloc_opencl_discover(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; cl_platform_id *platform_ids = NULL; cl_uint nr_platforms; cl_int clret; unsigned j; if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO))) return 0; if (!hwloc_topology_is_thissystem(topology)) { hwloc_debug("%s", "\nno OpenCL detection (not thissystem)\n"); return 0; } clret = clGetPlatformIDs(0, NULL, &nr_platforms); if (CL_SUCCESS != clret || !nr_platforms) return -1; hwloc_debug("%u OpenCL platforms\n", nr_platforms); platform_ids = malloc(nr_platforms * sizeof(*platform_ids)); if (!platform_ids) return -1; clret = clGetPlatformIDs(nr_platforms, platform_ids, &nr_platforms); if (CL_SUCCESS != clret || !nr_platforms) { free(platform_ids); return -1; } for(j=0; j<nr_platforms; j++) { cl_device_id *device_ids = NULL; cl_uint nr_devices; unsigned i; clret = clGetDeviceIDs(platform_ids[j], CL_DEVICE_TYPE_ALL, 0, NULL, &nr_devices); if (CL_SUCCESS != clret) continue; device_ids = malloc(nr_devices * sizeof(*device_ids)); clret = clGetDeviceIDs(platform_ids[j], CL_DEVICE_TYPE_ALL, nr_devices, device_ids, &nr_devices); if (CL_SUCCESS != clret) { free(device_ids); continue; } for(i=0; i<nr_devices; i++) { cl_platform_id platform_id = 0; cl_device_type type; #ifdef CL_DEVICE_TOPOLOGY_AMD cl_device_topology_amd amdtopo; #endif cl_ulong globalmemsize; cl_uint computeunits; hwloc_obj_t osdev, parent; char buffer[64]; hwloc_debug("This is opencl%dd%d\n", j, i); #ifdef CL_DEVICE_TOPOLOGY_AMD clret = clGetDeviceInfo(device_ids[i], CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL); if (CL_SUCCESS != clret) { hwloc_debug("no AMD-specific device information: %d\n", clret); continue; } else if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) { hwloc_debug("AMD-specific device topology reports non-PCIe device type: %u\n", amdtopo.raw.type); continue; } #else continue; #endif osdev = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1); snprintf(buffer, sizeof(buffer), "opencl%dd%d", j, i); osdev->name = strdup(buffer); osdev->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN; osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_COPROC; hwloc_obj_add_info(osdev, "CoProcType", "OpenCL"); hwloc_obj_add_info(osdev, "Backend", "OpenCL"); clGetDeviceInfo(device_ids[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL); if (type == CL_DEVICE_TYPE_GPU) hwloc_obj_add_info(osdev, "OpenCLDeviceType", "GPU"); else if (type == CL_DEVICE_TYPE_ACCELERATOR) hwloc_obj_add_info(osdev, "OpenCLDeviceType", "Accelerator"); else if (type == CL_DEVICE_TYPE_CPU) hwloc_obj_add_info(osdev, "OpenCLDeviceType", "CPU"); else if (type == CL_DEVICE_TYPE_CUSTOM) hwloc_obj_add_info(osdev, "OpenCLDeviceType", "Custom"); else hwloc_obj_add_info(osdev, "OpenCLDeviceType", "Unknown"); buffer[0] = '\0'; clGetDeviceInfo(device_ids[i], CL_DEVICE_VENDOR, sizeof(buffer), buffer, NULL); if (buffer[0] != '\0') hwloc_obj_add_info(osdev, "GPUVendor", buffer); buffer[0] = '\0'; #ifdef CL_DEVICE_BOARD_NAME_AMD clGetDeviceInfo(device_ids[i], CL_DEVICE_BOARD_NAME_AMD, sizeof(buffer), buffer, NULL); #else clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, sizeof(buffer), buffer, NULL); #endif if (buffer[0] != '\0') hwloc_obj_add_info(osdev, "GPUModel", buffer); snprintf(buffer, sizeof(buffer), "%u", j); hwloc_obj_add_info(osdev, "OpenCLPlatformIndex", buffer); buffer[0] = '\0'; clret = clGetDeviceInfo(device_ids[i], CL_DEVICE_PLATFORM, sizeof(platform_id), &platform_id, NULL); if (CL_SUCCESS == clret) { clGetPlatformInfo(platform_id, CL_PLATFORM_NAME, sizeof(buffer), buffer, NULL); if (buffer[0] != '\0') hwloc_obj_add_info(osdev, "OpenCLPlatformName", buffer); } snprintf(buffer, sizeof(buffer), "%u", i); hwloc_obj_add_info(osdev, "OpenCLPlatformDeviceIndex", buffer); clGetDeviceInfo(device_ids[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(computeunits), &computeunits, NULL); snprintf(buffer, sizeof(buffer), "%u", computeunits); hwloc_obj_add_info(osdev, "OpenCLComputeUnits", buffer); clGetDeviceInfo(device_ids[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(globalmemsize), &globalmemsize, NULL); snprintf(buffer, sizeof(buffer), "%llu", (unsigned long long) globalmemsize / 1024); hwloc_obj_add_info(osdev, "OpenCLGlobalMemorySize", buffer); parent = NULL; #ifdef CL_DEVICE_TOPOLOGY_AMD parent = hwloc_pci_belowroot_find_by_busid(topology, 0, amdtopo.pcie.bus, amdtopo.pcie.device, amdtopo.pcie.function); if (!parent) parent = hwloc_pci_find_busid_parent(topology, 0, amdtopo.pcie.bus, amdtopo.pcie.device, amdtopo.pcie.function); #endif if (!parent) parent = hwloc_get_root_obj(topology); hwloc_insert_object_by_parent(topology, parent, osdev); } free(device_ids); } free(platform_ids); return 0; }
static int hwloc_cuda_backend_notify_new_object(struct hwloc_backend *backend, struct hwloc_obj *pcidev) { struct hwloc_topology *topology = backend->topology; struct hwloc_cuda_backend_data_s *data = backend->private_data; unsigned i; if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO))) return 0; if (!hwloc_topology_is_thissystem(topology)) { hwloc_debug("%s", "\nno CUDA detection (not thissystem)\n"); return 0; } if (HWLOC_OBJ_PCI_DEVICE != pcidev->type) return 0; if (data->nr_devices == (unsigned) -1) { /* first call, lookup all devices */ hwloc_cuda_query_devices(data); /* if it fails, data->nr_devices = 0 so we won't do anything below and in next callbacks */ } if (!data->nr_devices) /* found no devices */ return 0; for(i=0; i<data->nr_devices; i++) { struct hwloc_cuda_device_info_s *info = &data->devices[i]; char cuda_name[32]; char number[32]; struct cudaDeviceProp prop; hwloc_obj_t cuda_device; cudaError_t cures; unsigned cores; if (info->pcidomain != pcidev->attr->pcidev.domain) continue; if (info->pcibus != pcidev->attr->pcidev.bus) continue; if (info->pcidev != pcidev->attr->pcidev.dev) continue; if (info->pcifunc != pcidev->attr->pcidev.func) continue; cuda_device = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1); snprintf(cuda_name, sizeof(cuda_name), "cuda%d", info->idx); cuda_device->name = strdup(cuda_name); cuda_device->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN; cuda_device->attr->osdev.type = HWLOC_OBJ_OSDEV_COPROC; hwloc_obj_add_info(cuda_device, "CoProcType", "CUDA"); hwloc_obj_add_info(cuda_device, "Backend", "CUDA"); hwloc_obj_add_info(cuda_device, "GPUVendor", "NVIDIA Corporation"); cures = cudaGetDeviceProperties(&prop, info->idx); if (!cures) hwloc_obj_add_info(cuda_device, "GPUModel", prop.name); snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.totalGlobalMem) >> 10); hwloc_obj_add_info(cuda_device, "CUDAGlobalMemorySize", number); snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.l2CacheSize) >> 10); hwloc_obj_add_info(cuda_device, "CUDAL2CacheSize", number); snprintf(number, sizeof(number), "%d", prop.multiProcessorCount); hwloc_obj_add_info(cuda_device, "CUDAMultiProcessors", number); cores = hwloc_cuda_cores_per_MP(prop.major, prop.minor); if (cores) { snprintf(number, sizeof(number), "%u", cores); hwloc_obj_add_info(cuda_device, "CUDACoresPerMP", number); } snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.sharedMemPerBlock) >> 10); hwloc_obj_add_info(cuda_device, "CUDASharedMemorySizePerMP", number); hwloc_insert_object_by_parent(topology, pcidev, cuda_device); return 1; } return 0; }
static int hwloc_gl_discover(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; unsigned i, res = 0; int err; if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO))) return 0; if (!hwloc_topology_is_thissystem(topology)) { hwloc_debug("%s", "\nno GL detection (not thissystem)\n"); return 0; } for (i = 0; i < HWLOC_GL_SERVER_MAX; ++i) { Display* display; char displayName[10]; int opcode, event, error; unsigned j; /* open X server */ snprintf(displayName, sizeof(displayName), ":%u", i); display = XOpenDisplay(displayName); if (!display) continue; /* Check for NV-CONTROL extension (it's per server) */ if(!XQueryExtension(display, "NV-CONTROL", &opcode, &event, &error)) { XCloseDisplay(display); continue; } for (j = 0; j < (unsigned) ScreenCount(display) && j < HWLOC_GL_SCREEN_MAX; j++) { hwloc_obj_t osdev, parent; const int screen = j; unsigned int *ptr_binary_data; int data_length; int gpu_number; int nv_ctrl_pci_bus; int nv_ctrl_pci_device; int nv_ctrl_pci_domain; int nv_ctrl_pci_func; char *productname; char name[64]; /* the server supports NV-CONTROL but it may contain non-NVIDIA screen that don't support it */ if (!XNVCTRLIsNvScreen(display, screen)) continue; /* Gets the GPU number attached to the default screen. */ /* For further details, see the <NVCtrl/NVCtrlLib.h> */ err = XNVCTRLQueryTargetBinaryData (display, NV_CTRL_TARGET_TYPE_X_SCREEN, screen, 0, NV_CTRL_BINARY_DATA_GPUS_USED_BY_XSCREEN, (unsigned char **) &ptr_binary_data, &data_length); if (!err) continue; gpu_number = ptr_binary_data[1]; free(ptr_binary_data); #ifdef NV_CTRL_PCI_DOMAIN /* Gets the ID's of the GPU defined by gpu_number * For further details, see the <NVCtrl/NVCtrlLib.h> */ err = XNVCTRLQueryTargetAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0, NV_CTRL_PCI_DOMAIN, &nv_ctrl_pci_domain); if (!err) continue; #else nv_ctrl_pci_domain = 0; #endif err = XNVCTRLQueryTargetAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0, NV_CTRL_PCI_BUS, &nv_ctrl_pci_bus); if (!err) continue; err = XNVCTRLQueryTargetAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0, NV_CTRL_PCI_DEVICE, &nv_ctrl_pci_device); if (!err) continue; err = XNVCTRLQueryTargetAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0, NV_CTRL_PCI_FUNCTION, &nv_ctrl_pci_func); if (!err) continue; productname = NULL; err = XNVCTRLQueryTargetStringAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0, NV_CTRL_STRING_PRODUCT_NAME, &productname); snprintf(name, sizeof(name), ":%u.%u", i, j); osdev = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1); osdev->name = strdup(name); osdev->logical_index = -1; osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU; hwloc_obj_add_info(osdev, "Backend", "GL"); hwloc_obj_add_info(osdev, "GPUVendor", "NVIDIA Corporation"); if (productname) hwloc_obj_add_info(osdev, "GPUModel", productname); parent = hwloc_pci_belowroot_find_by_busid(topology, nv_ctrl_pci_domain, nv_ctrl_pci_bus, nv_ctrl_pci_device, nv_ctrl_pci_func); if (!parent) parent = hwloc_pci_find_busid_parent(topology, nv_ctrl_pci_domain, nv_ctrl_pci_bus, nv_ctrl_pci_device, nv_ctrl_pci_func); if (!parent) parent = hwloc_get_root_obj(topology); hwloc_insert_object_by_parent(topology, parent, osdev); hwloc_debug("GL device %s (product %s) on PCI 0000:%02x:%02x.%u\n", name, productname, nv_ctrl_pci_domain, nv_ctrl_pci_bus, nv_ctrl_pci_device, nv_ctrl_pci_func); res++; } XCloseDisplay(display); } return res; }
static int hwloc_cuda_discover(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; cudaError_t cures; int nb, i; if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO))) return 0; if (!hwloc_topology_is_thissystem(topology)) { hwloc_debug("%s", "\nno CUDA detection (not thissystem)\n"); return 0; } cures = cudaGetDeviceCount(&nb); if (cures) return -1; for (i = 0; i < nb; i++) { int domain, bus, dev; char cuda_name[32]; char number[32]; struct cudaDeviceProp prop; hwloc_obj_t cuda_device, parent; unsigned cores; cuda_device = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1); snprintf(cuda_name, sizeof(cuda_name), "cuda%d", i); cuda_device->name = strdup(cuda_name); cuda_device->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN; cuda_device->attr->osdev.type = HWLOC_OBJ_OSDEV_COPROC; hwloc_obj_add_info(cuda_device, "CoProcType", "CUDA"); hwloc_obj_add_info(cuda_device, "Backend", "CUDA"); hwloc_obj_add_info(cuda_device, "GPUVendor", "NVIDIA Corporation"); cures = cudaGetDeviceProperties(&prop, i); if (!cures) hwloc_obj_add_info(cuda_device, "GPUModel", prop.name); snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.totalGlobalMem) >> 10); hwloc_obj_add_info(cuda_device, "CUDAGlobalMemorySize", number); snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.l2CacheSize) >> 10); hwloc_obj_add_info(cuda_device, "CUDAL2CacheSize", number); snprintf(number, sizeof(number), "%d", prop.multiProcessorCount); hwloc_obj_add_info(cuda_device, "CUDAMultiProcessors", number); cores = hwloc_cuda_cores_per_MP(prop.major, prop.minor); if (cores) { snprintf(number, sizeof(number), "%u", cores); hwloc_obj_add_info(cuda_device, "CUDACoresPerMP", number); } snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.sharedMemPerBlock) >> 10); hwloc_obj_add_info(cuda_device, "CUDASharedMemorySizePerMP", number); parent = NULL; if (hwloc_cudart_get_device_pci_ids(NULL /* topology unused */, i, &domain, &bus, &dev) == 0) { parent = hwloc_pci_belowroot_find_by_busid(topology, domain, bus, dev, 0); if (!parent) parent = hwloc_pci_find_busid_parent(topology, domain, bus, dev, 0); } if (!parent) parent = hwloc_get_root_obj(topology); hwloc_insert_object_by_parent(topology, parent, cuda_device); } return 0; }