static int hwloc_gl_backend_notify_new_object(struct hwloc_backend *backend, struct hwloc_obj *pcidev) { struct hwloc_topology *topology = backend->topology; struct hwloc_gl_backend_data_s *data = backend->private_data; unsigned i, res; if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO))) return 0; if (!hwloc_topology_is_thissystem(topology)) { hwloc_debug("%s", "\nno GL detection (not thissystem)\n"); return 0; } if (HWLOC_OBJ_PCI_DEVICE != pcidev->type) return 0; if (data->nr_display == (unsigned) -1) { /* first call, lookup all display */ hwloc_gl_query_devices(data); /* if it fails, data->nr_display = 0 so we won't do anything below and in next callbacks */ } if (!data->nr_display) /* found no display */ return 0; /* now the display array is ready to use */ res = 0; for(i=0; i<data->nr_display; i++) { struct hwloc_gl_display_info_s *info = &data->display[i]; hwloc_obj_t osdev; if (info->pcidomain != pcidev->attr->pcidev.domain) continue; if (info->pcibus != pcidev->attr->pcidev.bus) continue; if (info->pcidevice != pcidev->attr->pcidev.dev) continue; if (info->pcifunc != pcidev->attr->pcidev.func) continue; osdev = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1); osdev->name = strdup(info->name); osdev->logical_index = -1; osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU; hwloc_obj_add_info(osdev, "Backend", "GL"); hwloc_obj_add_info(osdev, "GPUVendor", "NVIDIA Corporation"); if (info->productname) hwloc_obj_add_info(osdev, "GPUModel", info->productname); hwloc_insert_object_by_parent(topology, pcidev, osdev); res++; /* there may be others */ } return res; }
static int hwloc_look_synthetic(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; struct hwloc_synthetic_backend_data_s *data = backend->private_data; hwloc_bitmap_t cpuset = hwloc_bitmap_alloc(); unsigned i; assert(!topology->levels[0][0]->cpuset); hwloc_alloc_obj_cpusets(topology->levels[0][0]); topology->support.discovery->pu = 1; /* start with os_index 0 for each level */ for (i = 0; data->level[i].arity > 0; i++) data->level[i].next_os_index = 0; /* ... including the last one */ data->level[i].next_os_index = 0; /* update first level type according to the synthetic type array */ topology->levels[0][0]->type = data->level[0].type; hwloc_synthetic__post_look_hooks(&data->level[0], topology->levels[0][0]); for (i = 0; i < data->level[0].arity; i++) hwloc__look_synthetic(topology, data, 1, cpuset); hwloc_bitmap_free(cpuset); hwloc_obj_add_info(topology->levels[0][0], "Backend", "Synthetic"); hwloc_obj_add_info(topology->levels[0][0], "SyntheticDescription", data->string); return 1; }
void hwloc_look_synthetic(struct hwloc_topology *topology) { hwloc_bitmap_t cpuset = hwloc_bitmap_alloc(); unsigned first_cpu = 0, i; topology->support.discovery->pu = 1; /* start with id=0 for each level */ for (i = 0; topology->backend_params.synthetic.arity[i] > 0; i++) topology->backend_params.synthetic.id[i] = 0; /* ... including the last one */ topology->backend_params.synthetic.id[i] = 0; /* update first level type according to the synthetic type array */ topology->levels[0][0]->type = topology->backend_params.synthetic.type[0]; for (i = 0; i < topology->backend_params.synthetic.arity[0]; i++) first_cpu = hwloc__look_synthetic(topology, 1, first_cpu, cpuset); hwloc_bitmap_free(cpuset); hwloc_obj_add_info(topology->levels[0][0], "Backend", "Synthetic"); hwloc_obj_add_info(topology->levels[0][0], "SyntheticDescription", topology->backend_params.synthetic.string); }
int hwloc_look_hardwired_fujitsu_fx10(struct hwloc_topology *topology) { /* FIXME: what if a broken core is disabled? */ unsigned i; hwloc_obj_t obj; hwloc_bitmap_t set; for(i=0; i<16; i++) { set = hwloc_bitmap_alloc(); hwloc_bitmap_set(set, i); obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); obj->cpuset = hwloc_bitmap_dup(set); obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; obj->attr->cache.depth = 1; obj->attr->cache.size = 32*1024; obj->attr->cache.linesize = 128; obj->attr->cache.associativity = 2; hwloc_insert_object_by_cpuset(topology, obj); obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); obj->cpuset = hwloc_bitmap_dup(set); obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; obj->attr->cache.depth = 1; obj->attr->cache.size = 32*1024; obj->attr->cache.linesize = 128; obj->attr->cache.associativity = 2; hwloc_insert_object_by_cpuset(topology, obj); obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i); obj->cpuset = set; hwloc_insert_object_by_cpuset(topology, obj); } set = hwloc_bitmap_alloc(); hwloc_bitmap_set_range(set, 0, 15); obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1); obj->cpuset = hwloc_bitmap_dup(set); obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; obj->attr->cache.depth = 2; obj->attr->cache.size = 12*1024*1024; obj->attr->cache.linesize = 128; obj->attr->cache.associativity = 24; hwloc_insert_object_by_cpuset(topology, obj); obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, 0); obj->cpuset = set; hwloc_obj_add_info(obj, "CPUVendor", "Fujitsu"); hwloc_obj_add_info(obj, "CPUModel", "SPARC64 IXfx"); hwloc_insert_object_by_cpuset(topology, obj); hwloc_setup_pu_level(topology, 16); return 0; }
static hwloc_obj_t insert_task(hwloc_topology_t topology, hwloc_cpuset_t cpuset, const char * name) { hwloc_obj_t obj; hwloc_bitmap_and(cpuset, cpuset, hwloc_topology_get_topology_cpuset(topology)); if (hwloc_bitmap_iszero(cpuset)) return NULL; /* try to insert at exact position */ obj = hwloc_topology_insert_misc_object_by_cpuset(topology, cpuset, name); if (!obj) { /* try to insert in a larger parent */ char *s; hwloc_bitmap_asprintf(&s, cpuset); obj = hwloc_get_obj_covering_cpuset(topology, cpuset); if (obj) { obj = hwloc_topology_insert_misc_object_by_parent(topology, obj, name); fprintf(stderr, "Inserted process `%s' below parent larger than cpuset %s\n", name, s); } else { fprintf(stderr, "Failed to insert process `%s' with cpuset %s\n", name, s); } free(s); } else { hwloc_obj_add_info(obj, "Type", "Process"); } return obj; }
static int hwloc_look_solaris(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; unsigned nbprocs = hwloc_fallback_nbprocessors (topology); int alreadypus = 0; if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return 0; hwloc_alloc_obj_cpusets(topology->levels[0][0]); #ifdef HAVE_LIBLGRP hwloc_look_lgrp(topology); #endif /* HAVE_LIBLGRP */ #ifdef HAVE_LIBKSTAT if (hwloc_look_kstat(topology) > 0) alreadypus = 1; #endif /* HAVE_LIBKSTAT */ if (!alreadypus) hwloc_setup_pu_level(topology, nbprocs); hwloc_obj_add_info(topology->levels[0][0], "Backend", "Solaris"); if (topology->is_thissystem) hwloc_add_uname_info(topology, NULL); return 1; }
static void lstopo_add_collapse_attributes(hwloc_topology_t topology) { hwloc_obj_t obj, collapser = NULL; unsigned collapsed = 0; /* collapse identical PCI devs */ for(obj = hwloc_get_next_pcidev(topology, NULL); obj; obj = hwloc_get_next_pcidev(topology, obj)) { if (collapser) { if (!obj->arity && obj->parent == collapser->parent && obj->attr->pcidev.vendor_id == collapser->attr->pcidev.vendor_id && obj->attr->pcidev.device_id == collapser->attr->pcidev.device_id && obj->attr->pcidev.subvendor_id == collapser->attr->pcidev.subvendor_id && obj->attr->pcidev.subdevice_id == collapser->attr->pcidev.subdevice_id) { /* collapse another one */ hwloc_obj_add_info(obj, "lstopoCollapse", "0"); collapsed++; continue; } else if (collapsed > 1) { /* end this collapsing */ char text[10]; snprintf(text, sizeof(text), "%u", collapsed); hwloc_obj_add_info(collapser, "lstopoCollapse", text); collapser = NULL; collapsed = 0; } } if (!obj->arity) { /* start a new collapsing */ collapser = obj; collapsed = 1; } } if (collapsed > 1) { /* end this collapsing */ char text[10]; snprintf(text, sizeof(text), "%u", collapsed); hwloc_obj_add_info(collapser, "lstopoCollapse", text); } }
void hwloc_look_solaris(struct hwloc_topology *topology) { unsigned nbprocs = hwloc_fallback_nbprocessors (topology); int alreadypus = 0; #ifdef HAVE_LIBLGRP hwloc_look_lgrp(topology); #endif /* HAVE_LIBLGRP */ #ifdef HAVE_LIBKSTAT nbprocs = 0; if (hwloc_look_kstat(topology)) alreadypus = 1; #endif /* HAVE_LIBKSTAT */ if (!alreadypus) hwloc_setup_pu_level(topology, nbprocs); hwloc_obj_add_info(topology->levels[0][0], "Backend", "Solaris"); }
static int hwloc_look_netbsd(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; unsigned nbprocs = hwloc_fallback_nbprocessors(topology); if (!topology->levels[0][0]->cpuset) { /* Nobody (even the x86 backend) created objects yet, setup basic objects */ hwloc_alloc_obj_cpusets(topology->levels[0][0]); hwloc_setup_pu_level(topology, nbprocs); } /* Add NetBSD specific information */ #if (defined HAVE_SYSCTL) && (defined HAVE_SYS_SYSCTL_H) hwloc_netbsd_node_meminfo_info(topology); #endif hwloc_obj_add_info(topology->levels[0][0], "Backend", "NetBSD"); hwloc_add_uname_info(topology, NULL); return 0; }
static int hwloc_opencl_discover(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; enum hwloc_type_filter_e filter; cl_platform_id *platform_ids = NULL; cl_uint nr_platforms; cl_int clret; unsigned j; hwloc_topology_get_type_filter(topology, HWLOC_OBJ_OS_DEVICE, &filter); if (filter == HWLOC_TYPE_FILTER_KEEP_NONE) return 0; clret = clGetPlatformIDs(0, NULL, &nr_platforms); if (CL_SUCCESS != clret || !nr_platforms) return -1; hwloc_debug("%u OpenCL platforms\n", nr_platforms); platform_ids = malloc(nr_platforms * sizeof(*platform_ids)); if (!platform_ids) return -1; clret = clGetPlatformIDs(nr_platforms, platform_ids, &nr_platforms); if (CL_SUCCESS != clret || !nr_platforms) { free(platform_ids); return -1; } for(j=0; j<nr_platforms; j++) { cl_device_id *device_ids = NULL; cl_uint nr_devices; unsigned i; clret = clGetDeviceIDs(platform_ids[j], CL_DEVICE_TYPE_ALL, 0, NULL, &nr_devices); if (CL_SUCCESS != clret) continue; device_ids = malloc(nr_devices * sizeof(*device_ids)); clret = clGetDeviceIDs(platform_ids[j], CL_DEVICE_TYPE_ALL, nr_devices, device_ids, &nr_devices); if (CL_SUCCESS != clret) { free(device_ids); continue; } for(i=0; i<nr_devices; i++) { cl_platform_id platform_id = 0; cl_device_type type; #ifdef CL_DEVICE_TOPOLOGY_AMD cl_device_topology_amd amdtopo; #endif cl_ulong globalmemsize; cl_uint computeunits; hwloc_obj_t osdev, parent; char buffer[64]; hwloc_debug("This is opencl%ud%u\n", j, i); #ifdef CL_DEVICE_TOPOLOGY_AMD clret = clGetDeviceInfo(device_ids[i], CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL); if (CL_SUCCESS != clret) { hwloc_debug("no AMD-specific device information: %d\n", clret); continue; } else if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) { hwloc_debug("AMD-specific device topology reports non-PCIe device type: %u\n", amdtopo.raw.type); continue; } #else continue; #endif osdev = hwloc_alloc_setup_object(topology, HWLOC_OBJ_OS_DEVICE, -1); snprintf(buffer, sizeof(buffer), "opencl%ud%u", j, i); osdev->name = strdup(buffer); osdev->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN; osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_COPROC; osdev->subtype = strdup("OpenCL"); hwloc_obj_add_info(osdev, "Backend", "OpenCL"); clGetDeviceInfo(device_ids[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL); if (type == CL_DEVICE_TYPE_GPU) hwloc_obj_add_info(osdev, "OpenCLDeviceType", "GPU"); else if (type == CL_DEVICE_TYPE_ACCELERATOR) hwloc_obj_add_info(osdev, "OpenCLDeviceType", "Accelerator"); else if (type == CL_DEVICE_TYPE_CPU) hwloc_obj_add_info(osdev, "OpenCLDeviceType", "CPU"); else if (type == CL_DEVICE_TYPE_CUSTOM) hwloc_obj_add_info(osdev, "OpenCLDeviceType", "Custom"); else hwloc_obj_add_info(osdev, "OpenCLDeviceType", "Unknown"); buffer[0] = '\0'; clGetDeviceInfo(device_ids[i], CL_DEVICE_VENDOR, sizeof(buffer), buffer, NULL); if (buffer[0] != '\0') hwloc_obj_add_info(osdev, "GPUVendor", buffer); buffer[0] = '\0'; #ifdef CL_DEVICE_BOARD_NAME_AMD clGetDeviceInfo(device_ids[i], CL_DEVICE_BOARD_NAME_AMD, sizeof(buffer), buffer, NULL); #else clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, sizeof(buffer), buffer, NULL); #endif if (buffer[0] != '\0') hwloc_obj_add_info(osdev, "GPUModel", buffer); snprintf(buffer, sizeof(buffer), "%u", j); hwloc_obj_add_info(osdev, "OpenCLPlatformIndex", buffer); buffer[0] = '\0'; clret = clGetDeviceInfo(device_ids[i], CL_DEVICE_PLATFORM, sizeof(platform_id), &platform_id, NULL); if (CL_SUCCESS == clret) { clGetPlatformInfo(platform_id, CL_PLATFORM_NAME, sizeof(buffer), buffer, NULL); if (buffer[0] != '\0') hwloc_obj_add_info(osdev, "OpenCLPlatformName", buffer); } snprintf(buffer, sizeof(buffer), "%u", i); hwloc_obj_add_info(osdev, "OpenCLPlatformDeviceIndex", buffer); clGetDeviceInfo(device_ids[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(computeunits), &computeunits, NULL); snprintf(buffer, sizeof(buffer), "%u", computeunits); hwloc_obj_add_info(osdev, "OpenCLComputeUnits", buffer); clGetDeviceInfo(device_ids[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(globalmemsize), &globalmemsize, NULL); snprintf(buffer, sizeof(buffer), "%llu", (unsigned long long) globalmemsize / 1024); hwloc_obj_add_info(osdev, "OpenCLGlobalMemorySize", buffer); parent = NULL; #ifdef CL_DEVICE_TOPOLOGY_AMD parent = hwloc_pcidisc_find_by_busid(topology, 0, amdtopo.pcie.bus, amdtopo.pcie.device, amdtopo.pcie.function); if (!parent) parent = hwloc_pcidisc_find_busid_parent(topology, 0, amdtopo.pcie.bus, amdtopo.pcie.device, amdtopo.pcie.function); #endif if (!parent) parent = hwloc_get_root_obj(topology); hwloc_insert_object_by_parent(topology, parent, osdev); } free(device_ids); } free(platform_ids); return 0; }
static int hwloc_nvml_discover(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; nvmlReturn_t ret; unsigned nb, i; if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO))) return 0; if (!hwloc_topology_is_thissystem(topology)) { hwloc_debug("%s", "\nno NVML detection (not thissystem)\n"); return 0; } ret = nvmlInit(); if (NVML_SUCCESS != ret) return 0; ret = nvmlDeviceGetCount(&nb); if (NVML_SUCCESS != ret || !nb) { nvmlShutdown(); return 0; } for(i=0; i<nb; i++) { nvmlPciInfo_t pci; nvmlDevice_t device; hwloc_obj_t osdev, parent; char buffer[64]; ret = nvmlDeviceGetHandleByIndex(i, &device); assert(ret == NVML_SUCCESS); osdev = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1); snprintf(buffer, sizeof(buffer), "nvml%d", i); osdev->name = strdup(buffer); osdev->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN; osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU; hwloc_obj_add_info(osdev, "Backend", "NVML"); hwloc_obj_add_info(osdev, "GPUVendor", "NVIDIA Corporation"); buffer[0] = '\0'; ret = nvmlDeviceGetName(device, buffer, sizeof(buffer)); hwloc_obj_add_info(osdev, "GPUModel", buffer); /* these may fail with NVML_ERROR_NOT_SUPPORTED on old devices */ buffer[0] = '\0'; ret = nvmlDeviceGetSerial(device, buffer, sizeof(buffer)); if (buffer[0] != '\0') hwloc_obj_add_info(osdev, "NVIDIASerial", buffer); buffer[0] = '\0'; ret = nvmlDeviceGetUUID(device, buffer, sizeof(buffer)); if (buffer[0] != '\0') hwloc_obj_add_info(osdev, "NVIDIAUUID", buffer); parent = NULL; if (NVML_SUCCESS == nvmlDeviceGetPciInfo(device, &pci)) { parent = hwloc_pci_belowroot_find_by_busid(topology, pci.domain, pci.bus, pci.device, 0); if (!parent) parent = hwloc_pci_find_busid_parent(topology, pci.domain, pci.bus, pci.device, 0); #if HAVE_DECL_NVMLDEVICEGETMAXPCIELINKGENERATION if (parent && parent->type == HWLOC_OBJ_PCI_DEVICE) { unsigned maxwidth = 0, maxgen = 0; float lanespeed; nvmlDeviceGetMaxPcieLinkWidth(device, &maxwidth); nvmlDeviceGetMaxPcieLinkGeneration(device, &maxgen); /* PCIe Gen1 = 2.5GT/s signal-rate per lane with 8/10 encoding = 0.25GB/s data-rate per lane * PCIe Gen2 = 5 GT/s signal-rate per lane with 8/10 encoding = 0.5 GB/s data-rate per lane * PCIe Gen3 = 8 GT/s signal-rate per lane with 128/130 encoding = 1 GB/s data-rate per lane */ lanespeed = maxgen <= 2 ? 2.5 * maxgen * 0.8 : 8.0 * 128/130; /* Gbit/s per lane */ if (lanespeed * maxwidth) /* we found the max link speed, replace the current link speed found by pci (or none) */ parent->attr->pcidev.linkspeed = lanespeed * maxwidth / 8; /* GB/s */ } #endif } if (!parent) parent = hwloc_get_root_obj(topology); hwloc_insert_object_by_parent(topology, parent, osdev); } nvmlShutdown(); return nb; }
static int hwloc_look_pci(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; struct hwloc_obj *first_obj = NULL, *last_obj = NULL; #ifdef HWLOC_HAVE_LIBPCIACCESS int ret; struct pci_device_iterator *iter; struct pci_device *pcidev; #else /* HWLOC_HAVE_PCIUTILS */ struct pci_access *pciaccess; struct pci_dev *pcidev; #endif if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO))) return 0; if (hwloc_get_next_pcidev(topology, NULL)) { hwloc_debug("%s", "PCI objects already added, ignoring pci backend.\n"); return 0; } if (!hwloc_topology_is_thissystem(topology)) { hwloc_debug("%s", "\nno PCI detection (not thissystem)\n"); return 0; } hwloc_debug("%s", "\nScanning PCI buses...\n"); /* initialize PCI scanning */ #ifdef HWLOC_HAVE_LIBPCIACCESS ret = pci_system_init(); if (ret) { hwloc_debug("%s", "Can not initialize libpciaccess\n"); return -1; } iter = pci_slot_match_iterator_create(NULL); #else /* HWLOC_HAVE_PCIUTILS */ pciaccess = pci_alloc(); pciaccess->error = hwloc_pci_error; pciaccess->warning = hwloc_pci_warning; if (setjmp(err_buf)) { pci_cleanup(pciaccess); return -1; } pci_init(pciaccess); pci_scan_bus(pciaccess); #endif /* iterate over devices */ #ifdef HWLOC_HAVE_LIBPCIACCESS for (pcidev = pci_device_next(iter); pcidev; pcidev = pci_device_next(iter)) #else /* HWLOC_HAVE_PCIUTILS */ for (pcidev = pciaccess->devices; pcidev; pcidev = pcidev->next) #endif { const char *vendorname, *devicename, *fullname; unsigned char config_space_cache[CONFIG_SPACE_CACHESIZE]; struct hwloc_obj *obj; unsigned os_index; unsigned domain; unsigned device_class; unsigned short tmp16; char name[128]; unsigned offset; #ifdef HWLOC_HAVE_PCI_FIND_CAP struct pci_cap *cap; #endif /* initialize the config space in case we fail to read it (missing permissions, etc). */ memset(config_space_cache, 0xff, CONFIG_SPACE_CACHESIZE); #ifdef HWLOC_HAVE_LIBPCIACCESS pci_device_probe(pcidev); pci_device_cfg_read(pcidev, config_space_cache, 0, CONFIG_SPACE_CACHESIZE, NULL); #else /* HWLOC_HAVE_PCIUTILS */ pci_read_block(pcidev, 0, config_space_cache, CONFIG_SPACE_CACHESIZE); /* doesn't even tell how much it actually reads */ #endif /* try to read the domain */ #if (defined HWLOC_HAVE_LIBPCIACCESS) || (defined HWLOC_HAVE_PCIDEV_DOMAIN) domain = pcidev->domain; #else domain = 0; /* default domain number */ #endif /* try to read the device_class */ #ifdef HWLOC_HAVE_LIBPCIACCESS device_class = pcidev->device_class >> 8; #else /* HWLOC_HAVE_PCIUTILS */ #ifdef HWLOC_HAVE_PCIDEV_DEVICE_CLASS device_class = pcidev->device_class; #else device_class = config_space_cache[PCI_CLASS_DEVICE] | (config_space_cache[PCI_CLASS_DEVICE+1] << 8); #endif #endif /* might be useful for debugging (note that domain might be truncated) */ os_index = (domain << 20) + (pcidev->bus << 12) + (pcidev->dev << 4) + pcidev->func; obj = hwloc_alloc_setup_object(HWLOC_OBJ_PCI_DEVICE, os_index); obj->attr->pcidev.domain = domain; obj->attr->pcidev.bus = pcidev->bus; obj->attr->pcidev.dev = pcidev->dev; obj->attr->pcidev.func = pcidev->func; obj->attr->pcidev.vendor_id = pcidev->vendor_id; obj->attr->pcidev.device_id = pcidev->device_id; obj->attr->pcidev.class_id = device_class; obj->attr->pcidev.revision = config_space_cache[PCI_REVISION_ID]; obj->attr->pcidev.linkspeed = 0; /* unknown */ #ifdef HWLOC_HAVE_PCI_FIND_CAP cap = pci_find_cap(pcidev, PCI_CAP_ID_EXP, PCI_CAP_NORMAL); offset = cap ? cap->addr : 0; #else offset = hwloc_pci_find_cap(config_space_cache, PCI_CAP_ID_EXP); #endif /* HWLOC_HAVE_PCI_FIND_CAP */ if (0xffff == pcidev->vendor_id && 0xffff == pcidev->device_id) { /* SR-IOV puts ffff:ffff in Virtual Function config space. * The actual VF device ID is stored at a special (dynamic) location in the Physical Function config space. * VF and PF have the same vendor ID. * * libpciaccess just returns ffff:ffff, needs to be fixed. * linuxpci is OK because sysfs files are already fixed the kernel. * pciutils is OK when it uses those Linux sysfs files. * * Reading these files is an easy way to work around the libpciaccess issue on Linux, * but we have no way to know if this is caused by SR-IOV or not. * * TODO: * If PF has CAP_ID_PCIX or CAP_ID_EXP (offset>0), * look for extended capability PCI_EXT_CAP_ID_SRIOV (need extended config space (more than 256 bytes)), * then read the VF device ID after it (PCI_IOV_DID bytes later). * Needs access to extended config space (needs root on Linux). * TODO: * Add string info attributes in VF and PF objects? */ #ifdef HWLOC_LINUX_SYS /* Workaround for Linux (the kernel returns the VF device/vendor IDs). */ char path[64]; char value[16]; FILE *file; snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/vendor", domain, pcidev->bus, pcidev->dev, pcidev->func); file = fopen(path, "r"); if (file) { fread(value, sizeof(value), 1, file); fclose(file); obj->attr->pcidev.vendor_id = strtoul(value, NULL, 16); } snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/device", domain, pcidev->bus, pcidev->dev, pcidev->func); file = fopen(path, "r"); if (file) { fread(value, sizeof(value), 1, file); fclose(file); obj->attr->pcidev.device_id = strtoul(value, NULL, 16); } #endif } if (offset > 0 && offset + 20 /* size of PCI express block up to link status */ <= CONFIG_SPACE_CACHESIZE) hwloc_pci_find_linkspeed(config_space_cache, offset, &obj->attr->pcidev.linkspeed); hwloc_pci_prepare_bridge(obj, config_space_cache); if (obj->type == HWLOC_OBJ_PCI_DEVICE) { memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_VENDOR_ID], sizeof(tmp16)); obj->attr->pcidev.subvendor_id = tmp16; memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_ID], sizeof(tmp16)); obj->attr->pcidev.subdevice_id = tmp16; } else { /* TODO: * bridge must lookup PCI_CAP_ID_SSVID and then look at offset+PCI_SSVID_VENDOR/DEVICE_ID * cardbus must look at PCI_CB_SUBSYSTEM_VENDOR_ID and PCI_CB_SUBSYSTEM_ID */ } /* starting from pciutils 2.2, pci_lookup_name() takes a variable number * of arguments, and supports the PCI_LOOKUP_NO_NUMBERS flag. */ /* get the vendor name */ #ifdef HWLOC_HAVE_LIBPCIACCESS vendorname = pci_device_get_vendor_name(pcidev); #else /* HWLOC_HAVE_PCIUTILS */ vendorname = pci_lookup_name(pciaccess, name, sizeof(name), #if HAVE_DECL_PCI_LOOKUP_NO_NUMBERS PCI_LOOKUP_VENDOR|PCI_LOOKUP_NO_NUMBERS, pcidev->vendor_id #else PCI_LOOKUP_VENDOR, pcidev->vendor_id, 0, 0, 0 #endif ); #endif /* HWLOC_HAVE_PCIUTILS */ if (vendorname && *vendorname) hwloc_obj_add_info(obj, "PCIVendor", vendorname); /* get the device name */ #ifdef HWLOC_HAVE_LIBPCIACCESS devicename = pci_device_get_device_name(pcidev); #else /* HWLOC_HAVE_PCIUTILS */ devicename = pci_lookup_name(pciaccess, name, sizeof(name), #if HAVE_DECL_PCI_LOOKUP_NO_NUMBERS PCI_LOOKUP_DEVICE|PCI_LOOKUP_NO_NUMBERS, pcidev->vendor_id, pcidev->device_id #else PCI_LOOKUP_DEVICE, pcidev->vendor_id, pcidev->device_id, 0, 0 #endif ); #endif /* HWLOC_HAVE_PCIUTILS */ if (devicename && *devicename) hwloc_obj_add_info(obj, "PCIDevice", devicename); /* generate or get the fullname */ #ifdef HWLOC_HAVE_LIBPCIACCESS snprintf(name, sizeof(name), "%s%s%s", vendorname ? vendorname : "", vendorname && devicename ? " " : "", devicename ? devicename : ""); fullname = name; if (*name) obj->name = strdup(name); #else /* HWLOC_HAVE_PCIUTILS */ fullname = pci_lookup_name(pciaccess, name, sizeof(name), #if HAVE_DECL_PCI_LOOKUP_NO_NUMBERS PCI_LOOKUP_VENDOR|PCI_LOOKUP_DEVICE|PCI_LOOKUP_NO_NUMBERS, pcidev->vendor_id, pcidev->device_id #else PCI_LOOKUP_VENDOR|PCI_LOOKUP_DEVICE, pcidev->vendor_id, pcidev->device_id, 0, 0 #endif ); if (fullname && *fullname) obj->name = strdup(fullname); #endif /* HWLOC_HAVE_PCIUTILS */ hwloc_debug(" %04x:%02x:%02x.%01x %04x %04x:%04x %s\n", domain, pcidev->bus, pcidev->dev, pcidev->func, device_class, pcidev->vendor_id, pcidev->device_id, fullname && *fullname ? fullname : "??"); /* queue the object for now */ if (first_obj) last_obj->next_sibling = obj; else first_obj = obj; last_obj = obj; } /* finalize device scanning */ #ifdef HWLOC_HAVE_LIBPCIACCESS pci_iterator_destroy(iter); pci_system_cleanup(); #else /* HWLOC_HAVE_PCIUTILS */ pci_cleanup(pciaccess); #endif return hwloc_insert_pci_device_list(backend, first_obj); }
void hwloc_look_hpux(struct hwloc_topology *topology) { int has_numa = sysconf(_SC_CCNUMA_SUPPORT) == 1; hwloc_obj_t *nodes = NULL, obj; spu_t currentcpu; ldom_t currentnode; int i, nbnodes = 0; #ifdef HAVE__SC_LARGE_PAGESIZE topology->levels[0][0]->attr->machine.huge_page_size_kB = sysconf(_SC_LARGE_PAGESIZE); #endif if (has_numa) { nbnodes = mpctl(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM ? MPC_GETNUMLDOMS_SYS : MPC_GETNUMLDOMS, 0, 0); hwloc_debug("%d nodes\n", nbnodes); nodes = malloc(nbnodes * sizeof(*nodes)); i = 0; currentnode = mpctl(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM ? MPC_GETFIRSTLDOM_SYS : MPC_GETFIRSTLDOM, 0, 0); while (currentnode != -1 && i < nbnodes) { hwloc_debug("node %d is %d\n", i, currentnode); nodes[i] = obj = hwloc_alloc_setup_object(HWLOC_OBJ_NODE, currentnode); obj->cpuset = hwloc_bitmap_alloc(); obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, currentnode); /* TODO: obj->attr->node.memory_kB */ /* TODO: obj->attr->node.huge_page_free */ currentnode = mpctl(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM ? MPC_GETNEXTLDOM_SYS : MPC_GETNEXTLDOM, currentnode, 0); i++; } } i = 0; currentcpu = mpctl(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM ? MPC_GETFIRSTSPU_SYS : MPC_GETFIRSTSPU, 0,0); while (currentcpu != -1) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_PU, currentcpu); obj->cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->cpuset, currentcpu); hwloc_debug("cpu %d\n", currentcpu); if (nodes) { /* Add this cpu to its node */ currentnode = mpctl(MPC_SPUTOLDOM, currentcpu, 0); if ((ldom_t) nodes[i]->os_index != currentnode) for (i = 0; i < nbnodes; i++) if ((ldom_t) nodes[i]->os_index == currentnode) break; if (i < nbnodes) { hwloc_bitmap_set(nodes[i]->cpuset, currentcpu); hwloc_debug("is in node %d\n", i); } else { hwloc_debug("%s", "is in no node?!\n"); } } /* Add cpu */ hwloc_insert_object_by_cpuset(topology, obj); currentcpu = mpctl(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM ? MPC_GETNEXTSPU_SYS : MPC_GETNEXTSPU, currentcpu, 0); } if (nodes) { /* Add nodes */ for (i = 0 ; i < nbnodes ; i++) hwloc_insert_object_by_cpuset(topology, nodes[i]); free(nodes); } topology->support.discovery->pu = 1; hwloc_obj_add_info(topology->levels[0][0], "Backend", "HP-UX"); }
int hwloc_look_hardwired_fujitsu_fx100(struct hwloc_topology *topology) { /* FIXME: what if a broken core is disabled? */ unsigned i; hwloc_obj_t obj; hwloc_bitmap_t set; for(i=0; i<34; i++) { set = hwloc_bitmap_alloc(); hwloc_bitmap_set(set, i); if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_L1ICACHE, -1); obj->cpuset = hwloc_bitmap_dup(set); obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; obj->attr->cache.depth = 1; obj->attr->cache.size = 64*1024; obj->attr->cache.linesize = 256; obj->attr->cache.associativity = 4; hwloc_insert_object_by_cpuset(topology, obj); } if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1CACHE)) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_L1CACHE, -1); obj->cpuset = hwloc_bitmap_dup(set); obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; obj->attr->cache.depth = 1; obj->attr->cache.size = 64*1024; obj->attr->cache.linesize = 256; obj->attr->cache.associativity = 4; hwloc_insert_object_by_cpuset(topology, obj); } if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i); obj->cpuset = set; hwloc_insert_object_by_cpuset(topology, obj); } else hwloc_bitmap_free(set); } if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L2CACHE)) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_L2CACHE, -1); obj->cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_set_range(obj->cpuset, 0, 15); hwloc_bitmap_set(obj->cpuset, 32); obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; obj->attr->cache.depth = 2; obj->attr->cache.size = 12*1024*1024; obj->attr->cache.linesize = 256; obj->attr->cache.associativity = 24; hwloc_insert_object_by_cpuset(topology, obj); obj = hwloc_alloc_setup_object(HWLOC_OBJ_L2CACHE, -1); obj->cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_set_range(obj->cpuset, 16, 31); hwloc_bitmap_set(obj->cpuset, 33); obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; obj->attr->cache.depth = 2; obj->attr->cache.size = 12*1024*1024; obj->attr->cache.linesize = 256; obj->attr->cache.associativity = 24; hwloc_insert_object_by_cpuset(topology, obj); } if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, 0); obj->cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_set_range(obj->cpuset, 0, 33); hwloc_obj_add_info(obj, "CPUVendor", "Fujitsu"); hwloc_obj_add_info(obj, "CPUModel", "SPARC64 XIfx"); hwloc_insert_object_by_cpuset(topology, obj); } hwloc_setup_pu_level(topology, 34); return 0; }
static int hwloc_look_osf(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; cpu_cursor_t cursor; unsigned nbnodes; radid_t radid, radid2; radset_t radset, radset2; cpuid_t cpuid; cpuset_t cpuset; struct hwloc_obj *obj; unsigned distance; if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return 0; hwloc_alloc_obj_cpusets(topology->levels[0][0]); nbnodes = rad_get_num(); cpusetcreate(&cpuset); radsetcreate(&radset); radsetcreate(&radset2); { hwloc_obj_t *nodes = calloc(nbnodes, sizeof(hwloc_obj_t)); unsigned *indexes = calloc(nbnodes, sizeof(unsigned)); float *distances = calloc(nbnodes*nbnodes, sizeof(float)); unsigned nfound; numa_attr_t attr; attr.nattr_type = R_RAD; attr.nattr_descr.rd_radset = radset; attr.nattr_flags = 0; for (radid = 0; radid < (radid_t) nbnodes; radid++) { rademptyset(radset); radaddset(radset, radid); cpuemptyset(cpuset); if (rad_get_cpus(radid, cpuset)==-1) { fprintf(stderr,"rad_get_cpus(%d) failed: %s\n",radid,strerror(errno)); continue; } indexes[radid] = radid; nodes[radid] = obj = hwloc_alloc_setup_object(HWLOC_OBJ_NODE, radid); obj->cpuset = hwloc_bitmap_alloc(); obj->memory.local_memory = rad_get_physmem(radid) * hwloc_getpagesize(); obj->memory.page_types_len = 2; obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types)); obj->memory.page_types[0].size = hwloc_getpagesize(); #ifdef HAVE__SC_LARGE_PAGESIZE obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif cursor = SET_CURSOR_INIT; while((cpuid = cpu_foreach(cpuset, 0, &cursor)) != CPU_NONE) hwloc_bitmap_set(obj->cpuset, cpuid); hwloc_debug_1arg_bitmap("node %d has cpuset %s\n", radid, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); nfound = 0; for (radid2 = 0; radid2 < (radid_t) nbnodes; radid2++) distances[radid*nbnodes+radid2] = RAD_DIST_REMOTE; for (distance = RAD_DIST_LOCAL; distance < RAD_DIST_REMOTE; distance++) { attr.nattr_distance = distance; /* get set of NUMA nodes at distance <= DISTANCE */ if (nloc(&attr, radset2)) { fprintf(stderr,"nloc failed: %s\n", strerror(errno)); continue; } cursor = SET_CURSOR_INIT; while ((radid2 = rad_foreach(radset2, 0, &cursor)) != RAD_NONE) { if (distances[radid*nbnodes+radid2] == RAD_DIST_REMOTE) { distances[radid*nbnodes+radid2] = (float) distance; nfound++; } } if (nfound == nbnodes) /* Finished finding distances, no need to go up to RAD_DIST_REMOTE */ break; } } hwloc_distances_set(topology, HWLOC_OBJ_NODE, nbnodes, indexes, nodes, distances, 0 /* OS cannot force */); } radsetdestroy(&radset2); radsetdestroy(&radset); cpusetdestroy(&cpuset); /* add PU objects */ hwloc_setup_pu_level(topology, hwloc_fallback_nbprocessors(topology)); hwloc_obj_add_info(topology->levels[0][0], "Backend", "OSF"); if (topology->is_thissystem) hwloc_add_uname_info(topology); return 1; }
int main(void) { hwloc_topology_t topology1, topology2; char *xmlbuf; int xmlbuflen; char xmlfile[] = "hwloc_backends.tmpxml.XXXXXX"; char env[64]; int xmlbufok = 0, xmlfileok = 0, xmlfilefd; const char *orig_backend_name; putenv("HWLOC_LIBXML_CLEANUP=1"); printf("trying to export topology to XML buffer and file for later...\n"); hwloc_topology_init(&topology1); hwloc_topology_load(topology1); orig_backend_name = get_backend_name(topology1); hwloc_obj_add_info(hwloc_get_root_obj(topology1), "Foo", "Bar"); assert(hwloc_topology_is_thissystem(topology1)); if (hwloc_topology_export_xmlbuffer(topology1, &xmlbuf, &xmlbuflen) < 0) printf("XML buffer export failed (%s), ignoring\n", strerror(errno)); else xmlbufok = 1; xmlfilefd = mkstemp(xmlfile); if (xmlfilefd < 0 || hwloc_topology_export_xml(topology1, xmlfile) < 0) printf("XML file export failed (%s), ignoring\n", strerror(errno)); else xmlfileok = 1; /* init+config+destroy without loading */ printf("init...\n"); hwloc_topology_init(&topology2); if (xmlfileok) { printf("switching to xml...\n"); assert(!hwloc_topology_set_xml(topology2, xmlfile)); } if (xmlbufok) { printf("switching to xmlbuffer...\n"); assert(!hwloc_topology_set_xmlbuffer(topology2, xmlbuf, xmlbuflen)); } printf("switching to synthetic...\n"); hwloc_topology_set_synthetic(topology2, "machine:2 node:3 l1:2 pu:4"); hwloc_topology_destroy(topology2); /* init+xml+load+destroy */ if (xmlfileok) { printf("switching to xml and loading...\n"); hwloc_topology_init(&topology2); assert(!hwloc_topology_set_xml(topology2, xmlfile)); hwloc_topology_load(topology2); assert_backend_name(topology2, orig_backend_name); assert_foo_bar(topology2, 1); hwloc_topology_check(topology2); assert(!hwloc_topology_is_thissystem(topology2)); hwloc_topology_destroy(topology2); } /* init+xmlbuf+load+destroy */ if (xmlbufok) { printf("switching to xmlbuffer and loading...\n"); hwloc_topology_init(&topology2); assert(!hwloc_topology_set_xmlbuffer(topology2, xmlbuf, xmlbuflen)); hwloc_topology_load(topology2); assert_backend_name(topology2, orig_backend_name); assert_foo_bar(topology2, 1); hwloc_topology_check(topology2); assert(!hwloc_topology_is_thissystem(topology2)); hwloc_topology_destroy(topology2); } /* init+synthetic+load+destroy */ printf("switching to synthetic and loading...\n"); hwloc_topology_init(&topology2); hwloc_topology_set_synthetic(topology2, "machine:2 node:3 l3i:2 pu:4"); hwloc_topology_load(topology2); assert_backend_name(topology2, "Synthetic"); assert_foo_bar(topology2, 0); assert(hwloc_get_nbobjs_by_type(topology2, HWLOC_OBJ_PU) == 2*3*2*4); hwloc_topology_check(topology2); assert(!hwloc_topology_is_thissystem(topology2)); hwloc_topology_destroy(topology2); /* xmlenv+init+load+destroy */ if (xmlfileok) { printf("switching to xml by env and loading...\n"); snprintf(env, sizeof(env), "HWLOC_XMLFILE=%s", xmlfile); putenv(env); hwloc_topology_init(&topology2); hwloc_topology_load(topology2); assert_backend_name(topology2, orig_backend_name); assert_foo_bar(topology2, 1); hwloc_topology_check(topology2); assert(!hwloc_topology_is_thissystem(topology2)); hwloc_topology_destroy(topology2); } /* syntheticenv+init+load+destroy, synthetic env overrides xml */ printf("switching to synthetic by env and loading...\n"); putenv("HWLOC_SYNTHETIC=node:3 pu:3"); hwloc_topology_init(&topology2); hwloc_topology_load(topology2); assert_backend_name(topology2, "Synthetic"); assert_foo_bar(topology2, 0); assert(hwloc_get_nbobjs_by_type(topology2, HWLOC_OBJ_PU) == 3*3); hwloc_topology_check(topology2); assert(!hwloc_topology_is_thissystem(topology2)); hwloc_topology_destroy(topology2); /* componentsenv+init+load+destroy for testing defaults, overrides synthetic/xml/fsroot envs */ printf("switching to default components by env and loading...\n"); putenv("HWLOC_COMPONENTS=,"); /* don't set to empty since it means 'unset' on windows */ hwloc_topology_init(&topology2); hwloc_topology_load(topology2); assert_backend_name(topology2, orig_backend_name); assert_foo_bar(topology2, 0); hwloc_topology_check(topology2); assert(hwloc_topology_is_thissystem(topology2)); hwloc_topology_destroy(topology2); if (xmlbufok) hwloc_free_xmlbuffer(topology1, xmlbuf); if (xmlfilefd >= 0) { unlink(xmlfile); close(xmlfilefd); } hwloc_topology_destroy(topology1); return 0; }
static int hwloc_cuda_discover(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; enum hwloc_type_filter_e filter; cudaError_t cures; int nb, i; hwloc_topology_get_type_filter(topology, HWLOC_OBJ_OS_DEVICE, &filter); if (filter == HWLOC_TYPE_FILTER_KEEP_NONE) return 0; cures = cudaGetDeviceCount(&nb); if (cures) return -1; for (i = 0; i < nb; i++) { int domain, bus, dev; char cuda_name[32]; char number[32]; struct cudaDeviceProp prop; hwloc_obj_t cuda_device, parent; unsigned cores; cuda_device = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1); snprintf(cuda_name, sizeof(cuda_name), "cuda%d", i); cuda_device->name = strdup(cuda_name); cuda_device->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN; cuda_device->attr->osdev.type = HWLOC_OBJ_OSDEV_COPROC; cuda_device->subtype = strdup("CUDA"); hwloc_obj_add_info(cuda_device, "Backend", "CUDA"); hwloc_obj_add_info(cuda_device, "GPUVendor", "NVIDIA Corporation"); cures = cudaGetDeviceProperties(&prop, i); if (!cures) hwloc_obj_add_info(cuda_device, "GPUModel", prop.name); snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.totalGlobalMem) >> 10); hwloc_obj_add_info(cuda_device, "CUDAGlobalMemorySize", number); snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.l2CacheSize) >> 10); hwloc_obj_add_info(cuda_device, "CUDAL2CacheSize", number); snprintf(number, sizeof(number), "%d", prop.multiProcessorCount); hwloc_obj_add_info(cuda_device, "CUDAMultiProcessors", number); cores = hwloc_cuda_cores_per_MP(prop.major, prop.minor); if (cores) { snprintf(number, sizeof(number), "%u", cores); hwloc_obj_add_info(cuda_device, "CUDACoresPerMP", number); } snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.sharedMemPerBlock) >> 10); hwloc_obj_add_info(cuda_device, "CUDASharedMemorySizePerMP", number); parent = NULL; if (hwloc_cudart_get_device_pci_ids(NULL /* topology unused */, i, &domain, &bus, &dev) == 0) { parent = hwloc_pci_belowroot_find_by_busid(topology, domain, bus, dev, 0); if (!parent) parent = hwloc_pci_find_busid_parent(topology, domain, bus, dev, 0); } if (!parent) parent = hwloc_get_root_obj(topology); hwloc_insert_object_by_parent(topology, parent, cuda_device); } return 0; }
int hwloc_look_hardwired_fujitsu_fx10(struct hwloc_topology *topology) { /* If a broken core gets disabled, its bit disappears and other core bits are NOT shifted towards 0. * Node is not given to user job, not need to handle that case properly. */ unsigned i; hwloc_obj_t obj; hwloc_bitmap_t set; for(i=0; i<16; i++) { set = hwloc_bitmap_alloc(); hwloc_bitmap_set(set, i); if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1ICACHE, -1); obj->cpuset = hwloc_bitmap_dup(set); obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; obj->attr->cache.depth = 1; obj->attr->cache.size = 32*1024; obj->attr->cache.linesize = 128; obj->attr->cache.associativity = 2; hwloc_insert_object_by_cpuset(topology, obj); } if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1CACHE)) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1CACHE, -1); obj->cpuset = hwloc_bitmap_dup(set); obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; obj->attr->cache.depth = 1; obj->attr->cache.size = 32*1024; obj->attr->cache.linesize = 128; obj->attr->cache.associativity = 2; hwloc_insert_object_by_cpuset(topology, obj); } if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_CORE, i); obj->cpuset = set; hwloc_insert_object_by_cpuset(topology, obj); } else hwloc_bitmap_free(set); } set = hwloc_bitmap_alloc(); hwloc_bitmap_set_range(set, 0, 15); if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L2CACHE)) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L2CACHE, -1); obj->cpuset = hwloc_bitmap_dup(set); obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; obj->attr->cache.depth = 2; obj->attr->cache.size = 12*1024*1024; obj->attr->cache.linesize = 128; obj->attr->cache.associativity = 24; hwloc_insert_object_by_cpuset(topology, obj); } if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PACKAGE, 0); obj->cpuset = set; hwloc_obj_add_info(obj, "CPUVendor", "Fujitsu"); hwloc_obj_add_info(obj, "CPUModel", "SPARC64 IXfx"); hwloc_insert_object_by_cpuset(topology, obj); } else hwloc_bitmap_free(set); hwloc_setup_pu_level(topology, 16); return 0; }
static int hwloc_gl_discover(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; unsigned i, res = 0; int err; if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO))) return 0; if (!hwloc_topology_is_thissystem(topology)) { hwloc_debug("%s", "\nno GL detection (not thissystem)\n"); return 0; } for (i = 0; i < HWLOC_GL_SERVER_MAX; ++i) { Display* display; char displayName[10]; int opcode, event, error; unsigned j; /* open X server */ snprintf(displayName, sizeof(displayName), ":%u", i); display = XOpenDisplay(displayName); if (!display) continue; /* Check for NV-CONTROL extension (it's per server) */ if(!XQueryExtension(display, "NV-CONTROL", &opcode, &event, &error)) { XCloseDisplay(display); continue; } for (j = 0; j < (unsigned) ScreenCount(display) && j < HWLOC_GL_SCREEN_MAX; j++) { hwloc_obj_t osdev, parent; const int screen = j; unsigned int *ptr_binary_data; int data_length; int gpu_number; int nv_ctrl_pci_bus; int nv_ctrl_pci_device; int nv_ctrl_pci_domain; int nv_ctrl_pci_func; char *productname; char name[64]; /* the server supports NV-CONTROL but it may contain non-NVIDIA screen that don't support it */ if (!XNVCTRLIsNvScreen(display, screen)) continue; /* Gets the GPU number attached to the default screen. */ /* For further details, see the <NVCtrl/NVCtrlLib.h> */ err = XNVCTRLQueryTargetBinaryData (display, NV_CTRL_TARGET_TYPE_X_SCREEN, screen, 0, NV_CTRL_BINARY_DATA_GPUS_USED_BY_XSCREEN, (unsigned char **) &ptr_binary_data, &data_length); if (!err) continue; gpu_number = ptr_binary_data[1]; free(ptr_binary_data); #ifdef NV_CTRL_PCI_DOMAIN /* Gets the ID's of the GPU defined by gpu_number * For further details, see the <NVCtrl/NVCtrlLib.h> */ err = XNVCTRLQueryTargetAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0, NV_CTRL_PCI_DOMAIN, &nv_ctrl_pci_domain); if (!err) continue; #else nv_ctrl_pci_domain = 0; #endif err = XNVCTRLQueryTargetAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0, NV_CTRL_PCI_BUS, &nv_ctrl_pci_bus); if (!err) continue; err = XNVCTRLQueryTargetAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0, NV_CTRL_PCI_DEVICE, &nv_ctrl_pci_device); if (!err) continue; err = XNVCTRLQueryTargetAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0, NV_CTRL_PCI_FUNCTION, &nv_ctrl_pci_func); if (!err) continue; productname = NULL; err = XNVCTRLQueryTargetStringAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0, NV_CTRL_STRING_PRODUCT_NAME, &productname); snprintf(name, sizeof(name), ":%u.%u", i, j); osdev = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1); osdev->name = strdup(name); osdev->logical_index = -1; osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU; hwloc_obj_add_info(osdev, "Backend", "GL"); hwloc_obj_add_info(osdev, "GPUVendor", "NVIDIA Corporation"); if (productname) hwloc_obj_add_info(osdev, "GPUModel", productname); parent = hwloc_pci_belowroot_find_by_busid(topology, nv_ctrl_pci_domain, nv_ctrl_pci_bus, nv_ctrl_pci_device, nv_ctrl_pci_func); if (!parent) parent = hwloc_pci_find_busid_parent(topology, nv_ctrl_pci_domain, nv_ctrl_pci_bus, nv_ctrl_pci_device, nv_ctrl_pci_func); if (!parent) parent = hwloc_get_root_obj(topology); hwloc_insert_object_by_parent(topology, parent, osdev); hwloc_debug("GL device %s (product %s) on PCI 0000:%02x:%02x.%u\n", name, productname, nv_ctrl_pci_domain, nv_ctrl_pci_bus, nv_ctrl_pci_device, nv_ctrl_pci_func); res++; } XCloseDisplay(display); } return res; }
void hwloc_look_darwin(struct hwloc_topology *topology) { int64_t _nprocs; unsigned nprocs; int64_t _npackages; unsigned i, j, cpu; struct hwloc_obj *obj; size_t size; int64_t l1cachesize; int64_t cacheways[2]; int64_t l2cachesize; int64_t cachelinesize; int64_t memsize; if (hwloc_get_sysctlbyname("hw.ncpu", &_nprocs) || _nprocs <= 0) return; nprocs = _nprocs; topology->support.discovery->pu = 1; hwloc_debug("%u procs\n", nprocs); if (!hwloc_get_sysctlbyname("hw.packages", &_npackages) && _npackages > 0) { unsigned npackages = _npackages; int64_t _cores_per_package; int64_t _logical_per_package; unsigned logical_per_package; hwloc_debug("%u packages\n", npackages); if (!hwloc_get_sysctlbyname("machdep.cpu.logical_per_package", &_logical_per_package) && _logical_per_package > 0) logical_per_package = _logical_per_package; else /* Assume the trivia. */ logical_per_package = nprocs / npackages; hwloc_debug("%u threads per package\n", logical_per_package); if (nprocs == npackages * logical_per_package) for (i = 0; i < npackages; i++) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_SOCKET, i); obj->cpuset = hwloc_bitmap_alloc(); for (cpu = i*logical_per_package; cpu < (i+1)*logical_per_package; cpu++) hwloc_bitmap_set(obj->cpuset, cpu); hwloc_debug_1arg_bitmap("package %u has cpuset %s\n", i, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } if (!hwloc_get_sysctlbyname("machdep.cpu.cores_per_package", &_cores_per_package) && _cores_per_package > 0) { unsigned cores_per_package = _cores_per_package; hwloc_debug("%u cores per package\n", cores_per_package); if (!(logical_per_package % cores_per_package)) for (i = 0; i < npackages * cores_per_package; i++) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i); obj->cpuset = hwloc_bitmap_alloc(); for (cpu = i*(logical_per_package/cores_per_package); cpu < (i+1)*(logical_per_package/cores_per_package); cpu++) hwloc_bitmap_set(obj->cpuset, cpu); hwloc_debug_1arg_bitmap("core %u has cpuset %s\n", i, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } } } if (hwloc_get_sysctlbyname("hw.l1dcachesize", &l1cachesize)) l1cachesize = 0; if (hwloc_get_sysctlbyname("hw.l2cachesize", &l2cachesize)) l2cachesize = 0; if (hwloc_get_sysctlbyname("machdep.cpu.cache.L1_associativity", &cacheways[0])) cacheways[0] = 0; else if (cacheways[0] == 0xff) cacheways[0] = -1; if (hwloc_get_sysctlbyname("machdep.cpu.cache.L2_associativity", &cacheways[1])) cacheways[1] = 0; else if (cacheways[1] == 0xff) cacheways[1] = -1; if (hwloc_get_sysctlbyname("hw.cachelinesize", &cachelinesize)) cachelinesize = 0; if (hwloc_get_sysctlbyname("hw.memsize", &memsize)) memsize = 0; if (!sysctlbyname("hw.cacheconfig", NULL, &size, NULL, 0)) { unsigned n = size / sizeof(uint32_t); uint64_t *cacheconfig = NULL; uint64_t *cachesize = NULL; uint32_t *cacheconfig32 = NULL; cacheconfig = malloc(sizeof(uint64_t) * n); if (NULL == cacheconfig) { goto out; } cachesize = malloc(sizeof(uint64_t) * n); if (NULL == cachesize) { goto out; } cacheconfig32 = malloc(sizeof(uint32_t) * n); if (NULL == cacheconfig32) { goto out; } if ((!sysctlbyname("hw.cacheconfig", cacheconfig, &size, NULL, 0))) { /* Yeech. Darwin seemingly has changed from 32bit to 64bit integers for * cacheconfig, with apparently no way for detection. Assume the machine * won't have more than 4 billion cpus */ if (cacheconfig[0] > 0xFFFFFFFFUL) { memcpy(cacheconfig32, cacheconfig, size); for (i = 0 ; i < size / sizeof(uint32_t); i++) cacheconfig[i] = cacheconfig32[i]; } memset(cachesize, 0, sizeof(uint64_t) * n); size = sizeof(uint64_t) * n; if (sysctlbyname("hw.cachesize", cachesize, &size, NULL, 0)) { if (n > 0) cachesize[0] = memsize; if (n > 1) cachesize[1] = l1cachesize; if (n > 2) cachesize[2] = l2cachesize; } hwloc_debug("%s", "caches"); for (i = 0; i < n && cacheconfig[i]; i++) hwloc_debug(" %"PRIu64"(%"PRIu64"kB)", cacheconfig[i], cachesize[i] / 1024); cacheconfig[i] = cacheconfig32[i]; /* Now we know how many caches there are */ n = i; hwloc_debug("\n%u cache levels\n", n - 1); /* For each cache level (0 is memory) */ for (i = 0; i < n; i++) { /* cacheconfig tells us how many cpus share it, let's iterate on each cache */ for (j = 0; j < (nprocs / cacheconfig[i]); j++) { obj = hwloc_alloc_setup_object(i?HWLOC_OBJ_CACHE:HWLOC_OBJ_NODE, j); if (!i) { obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, j); } obj->cpuset = hwloc_bitmap_alloc(); for (cpu = j*cacheconfig[i]; cpu < ((j+1)*cacheconfig[i]); cpu++) hwloc_bitmap_set(obj->cpuset, cpu); if (i) { hwloc_debug_2args_bitmap("L%ucache %u has cpuset %s\n", i, j, obj->cpuset); obj->attr->cache.depth = i; obj->attr->cache.size = cachesize[i]; obj->attr->cache.linesize = cachelinesize; if (i <= sizeof(cacheways) / sizeof(cacheways[0])) obj->attr->cache.associativity = cacheways[i-1]; else obj->attr->cache.associativity = 0; } else { hwloc_debug_1arg_bitmap("node %u has cpuset %s\n", j, obj->cpuset); obj->memory.local_memory = cachesize[i]; obj->memory.page_types_len = 2; obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types)); obj->memory.page_types[0].size = getpagesize(); #ifdef HAVE__SC_LARGE_PAGESIZE obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif } hwloc_insert_object_by_cpuset(topology, obj); } } } out: if (NULL != cacheconfig) { free(cacheconfig); } if (NULL != cachesize) { free(cachesize); } if (NULL != cacheconfig32) { free(cacheconfig32); } } /* add PU objects */ hwloc_setup_pu_level(topology, nprocs); hwloc_obj_add_info(topology->levels[0][0], "Backend", "Darwin"); }
static int hwloc_look_kstat(struct hwloc_topology *topology) { /* FIXME this assumes that all packages are identical */ char *CPUType = hwloc_solaris_get_chip_type(); char *CPUModel = hwloc_solaris_get_chip_model(); kstat_ctl_t *kc = kstat_open(); kstat_t *ksp; kstat_named_t *stat; unsigned look_cores = 1, look_chips = 1; unsigned Pproc_max = 0; unsigned Pproc_alloc = 256; struct hwloc_solaris_Pproc { unsigned Lpkg, Ppkg, Lcore, Lproc; } * Pproc = malloc(Pproc_alloc * sizeof(*Pproc)); unsigned Lproc_num = 0; unsigned Lproc_alloc = 256; struct hwloc_solaris_Lproc { unsigned Pproc; } * Lproc = malloc(Lproc_alloc * sizeof(*Lproc)); unsigned Lcore_num = 0; unsigned Lcore_alloc = 256; struct hwloc_solaris_Lcore { unsigned Pcore, Ppkg; } * Lcore = malloc(Lcore_alloc * sizeof(*Lcore)); unsigned Lpkg_num = 0; unsigned Lpkg_alloc = 256; struct hwloc_solaris_Lpkg { unsigned Ppkg; } * Lpkg = malloc(Lpkg_alloc * sizeof(*Lpkg)); unsigned pkgid, coreid, cpuid; unsigned i; for (i = 0; i < Pproc_alloc; i++) { Pproc[i].Lproc = -1; Pproc[i].Lpkg = -1; Pproc[i].Ppkg = -1; Pproc[i].Lcore = -1; } if (!kc) { hwloc_debug("kstat_open failed: %s\n", strerror(errno)); free(Pproc); free(Lproc); free(Lcore); free(Lpkg); return 0; } for (ksp = kc->kc_chain; ksp; ksp = ksp->ks_next) { if (strncmp("cpu_info", ksp->ks_module, 8)) continue; cpuid = ksp->ks_instance; if (kstat_read(kc, ksp, NULL) == -1) { fprintf(stderr, "kstat_read failed for CPU%u: %s\n", cpuid, strerror(errno)); continue; } hwloc_debug("cpu%u\n", cpuid); if (cpuid >= Pproc_alloc) { struct hwloc_solaris_Pproc *tmp = realloc(Pproc, 2*Pproc_alloc * sizeof(*Pproc)); if (!tmp) goto err; Pproc = tmp; Pproc_alloc *= 2; for(i = Pproc_alloc/2; i < Pproc_alloc; i++) { Pproc[i].Lproc = -1; Pproc[i].Lpkg = -1; Pproc[i].Ppkg = -1; Pproc[i].Lcore = -1; } } Pproc[cpuid].Lproc = Lproc_num; if (Lproc_num >= Lproc_alloc) { struct hwloc_solaris_Lproc *tmp = realloc(Lproc, 2*Lproc_alloc * sizeof(*Lproc)); if (!tmp) goto err; Lproc = tmp; Lproc_alloc *= 2; } Lproc[Lproc_num].Pproc = cpuid; Lproc_num++; if (cpuid >= Pproc_max) Pproc_max = cpuid + 1; stat = (kstat_named_t *) kstat_data_lookup(ksp, "state"); if (!stat) hwloc_debug("could not read state for CPU%u: %s\n", cpuid, strerror(errno)); else if (stat->data_type != KSTAT_DATA_CHAR) hwloc_debug("unknown kstat type %d for cpu state\n", stat->data_type); else { hwloc_debug("cpu%u's state is %s\n", cpuid, stat->value.c); if (strcmp(stat->value.c, "on-line")) /* not online */ hwloc_bitmap_clr(topology->levels[0][0]->online_cpuset, cpuid); } if (look_chips) do { /* Get Chip ID */ stat = (kstat_named_t *) kstat_data_lookup(ksp, "chip_id"); if (!stat) { if (Lpkg_num) fprintf(stderr, "could not read package id for CPU%u: %s\n", cpuid, strerror(errno)); else hwloc_debug("could not read package id for CPU%u: %s\n", cpuid, strerror(errno)); look_chips = 0; continue; } switch (stat->data_type) { case KSTAT_DATA_INT32: pkgid = stat->value.i32; break; case KSTAT_DATA_UINT32: pkgid = stat->value.ui32; break; #ifdef _INT64_TYPE case KSTAT_DATA_UINT64: pkgid = stat->value.ui64; break; case KSTAT_DATA_INT64: pkgid = stat->value.i64; break; #endif default: fprintf(stderr, "chip_id type %d unknown\n", stat->data_type); look_chips = 0; continue; } Pproc[cpuid].Ppkg = pkgid; for (i = 0; i < Lpkg_num; i++) if (pkgid == Lpkg[i].Ppkg) break; Pproc[cpuid].Lpkg = i; hwloc_debug("%u on package %u (%u)\n", cpuid, i, pkgid); if (i == Lpkg_num) { if (Lpkg_num == Lpkg_alloc) { struct hwloc_solaris_Lpkg *tmp = realloc(Lpkg, 2*Lpkg_alloc * sizeof(*Lpkg)); if (!tmp) goto err; Lpkg = tmp; Lpkg_alloc *= 2; } Lpkg[Lpkg_num++].Ppkg = pkgid; } } while(0); if (look_cores) do { /* Get Core ID */ stat = (kstat_named_t *) kstat_data_lookup(ksp, "core_id"); if (!stat) { if (Lcore_num) fprintf(stderr, "could not read core id for CPU%u: %s\n", cpuid, strerror(errno)); else hwloc_debug("could not read core id for CPU%u: %s\n", cpuid, strerror(errno)); look_cores = 0; continue; } switch (stat->data_type) { case KSTAT_DATA_INT32: coreid = stat->value.i32; break; case KSTAT_DATA_UINT32: coreid = stat->value.ui32; break; #ifdef _INT64_TYPE case KSTAT_DATA_UINT64: coreid = stat->value.ui64; break; case KSTAT_DATA_INT64: coreid = stat->value.i64; break; #endif default: fprintf(stderr, "core_id type %d unknown\n", stat->data_type); look_cores = 0; continue; } for (i = 0; i < Lcore_num; i++) if (coreid == Lcore[i].Pcore && Pproc[cpuid].Ppkg == Lcore[i].Ppkg) break; Pproc[cpuid].Lcore = i; hwloc_debug("%u on core %u (%u)\n", cpuid, i, coreid); if (i == Lcore_num) { if (Lcore_num == Lcore_alloc) { struct hwloc_solaris_Lcore *tmp = realloc(Lcore, 2*Lcore_alloc * sizeof(*Lcore)); if (!tmp) goto err; Lcore = tmp; Lcore_alloc *= 2; } Lcore[Lcore_num].Ppkg = Pproc[cpuid].Ppkg; Lcore[Lcore_num++].Pcore = coreid; } } while(0); /* Note: there is also clog_id for the Thread ID (not unique) and * pkg_core_id for the core ID (not unique). They are not useful to us * however. */ } if (look_chips) { struct hwloc_obj *obj; unsigned j,k; hwloc_debug("%d Packages\n", Lpkg_num); for (j = 0; j < Lpkg_num; j++) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, Lpkg[j].Ppkg); if (CPUType) hwloc_obj_add_info(obj, "CPUType", CPUType); if (CPUModel) hwloc_obj_add_info(obj, "CPUModel", CPUModel); obj->cpuset = hwloc_bitmap_alloc(); for(k=0; k<Pproc_max; k++) if (Pproc[k].Lpkg == j) hwloc_bitmap_set(obj->cpuset, k); hwloc_debug_1arg_bitmap("Package %d has cpuset %s\n", j, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } hwloc_debug("%s", "\n"); } if (look_cores) { struct hwloc_obj *obj; unsigned j,k; hwloc_debug("%d Cores\n", Lcore_num); for (j = 0; j < Lcore_num; j++) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, Lcore[j].Pcore); obj->cpuset = hwloc_bitmap_alloc(); for(k=0; k<Pproc_max; k++) if (Pproc[k].Lcore == j) hwloc_bitmap_set(obj->cpuset, k); hwloc_debug_1arg_bitmap("Core %d has cpuset %s\n", j, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } hwloc_debug("%s", "\n"); } if (Lproc_num) { struct hwloc_obj *obj; unsigned j,k; hwloc_debug("%d PUs\n", Lproc_num); for (j = 0; j < Lproc_num; j++) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_PU, Lproc[j].Pproc); obj->cpuset = hwloc_bitmap_alloc(); for(k=0; k<Pproc_max; k++) if (Pproc[k].Lproc == j) hwloc_bitmap_set(obj->cpuset, k); hwloc_debug_1arg_bitmap("PU %d has cpuset %s\n", j, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } hwloc_debug("%s", "\n"); } kstat_close(kc); free(Pproc); free(Lproc); free(Lcore); free(Lpkg); return Lproc_num > 0; err: kstat_close(kc); free(Pproc); free(Lproc); free(Lcore); free(Lpkg); return 0; }
static int hwloc_cuda_backend_notify_new_object(struct hwloc_backend *backend, struct hwloc_obj *pcidev) { struct hwloc_topology *topology = backend->topology; struct hwloc_cuda_backend_data_s *data = backend->private_data; unsigned i; if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO))) return 0; if (!hwloc_topology_is_thissystem(topology)) { hwloc_debug("%s", "\nno CUDA detection (not thissystem)\n"); return 0; } if (HWLOC_OBJ_PCI_DEVICE != pcidev->type) return 0; if (data->nr_devices == (unsigned) -1) { /* first call, lookup all devices */ hwloc_cuda_query_devices(data); /* if it fails, data->nr_devices = 0 so we won't do anything below and in next callbacks */ } if (!data->nr_devices) /* found no devices */ return 0; for(i=0; i<data->nr_devices; i++) { struct hwloc_cuda_device_info_s *info = &data->devices[i]; char cuda_name[32]; char number[32]; struct cudaDeviceProp prop; hwloc_obj_t cuda_device; cudaError_t cures; unsigned cores; if (info->pcidomain != pcidev->attr->pcidev.domain) continue; if (info->pcibus != pcidev->attr->pcidev.bus) continue; if (info->pcidev != pcidev->attr->pcidev.dev) continue; if (info->pcifunc != pcidev->attr->pcidev.func) continue; cuda_device = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1); snprintf(cuda_name, sizeof(cuda_name), "cuda%d", info->idx); cuda_device->name = strdup(cuda_name); cuda_device->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN; cuda_device->attr->osdev.type = HWLOC_OBJ_OSDEV_COPROC; hwloc_obj_add_info(cuda_device, "CoProcType", "CUDA"); hwloc_obj_add_info(cuda_device, "Backend", "CUDA"); hwloc_obj_add_info(cuda_device, "GPUVendor", "NVIDIA Corporation"); cures = cudaGetDeviceProperties(&prop, info->idx); if (!cures) hwloc_obj_add_info(cuda_device, "GPUModel", prop.name); snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.totalGlobalMem) >> 10); hwloc_obj_add_info(cuda_device, "CUDAGlobalMemorySize", number); snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.l2CacheSize) >> 10); hwloc_obj_add_info(cuda_device, "CUDAL2CacheSize", number); snprintf(number, sizeof(number), "%d", prop.multiProcessorCount); hwloc_obj_add_info(cuda_device, "CUDAMultiProcessors", number); cores = hwloc_cuda_cores_per_MP(prop.major, prop.minor); if (cores) { snprintf(number, sizeof(number), "%u", cores); hwloc_obj_add_info(cuda_device, "CUDACoresPerMP", number); } snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.sharedMemPerBlock) >> 10); hwloc_obj_add_info(cuda_device, "CUDASharedMemorySizePerMP", number); hwloc_insert_object_by_parent(topology, pcidev, cuda_device); return 1; } return 0; }
static int hwloc_look_darwin(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; int64_t _nprocs; unsigned nprocs; int64_t _npackages; unsigned i, j, cpu; struct hwloc_obj *obj; size_t size; int64_t l1dcachesize, l1icachesize; int64_t cacheways[2]; int64_t l2cachesize; int64_t cachelinesize; int64_t memsize; char cpumodel[64]; if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return -1; hwloc_alloc_obj_cpusets(topology->levels[0][0]); if (hwloc_get_sysctlbyname("hw.ncpu", &_nprocs) || _nprocs <= 0) return -1; nprocs = _nprocs; topology->support.discovery->pu = 1; hwloc_debug("%u procs\n", nprocs); size = sizeof(cpumodel); if (sysctlbyname("machdep.cpu.brand_string", cpumodel, &size, NULL, 0)) cpumodel[0] = '\0'; if (!hwloc_get_sysctlbyname("hw.packages", &_npackages) && _npackages > 0) { unsigned npackages = _npackages; int64_t _cores_per_package; int64_t _logical_per_package; unsigned logical_per_package; hwloc_debug("%u packages\n", npackages); if (!hwloc_get_sysctlbyname("machdep.cpu.logical_per_package", &_logical_per_package) && _logical_per_package > 0) logical_per_package = _logical_per_package; else /* Assume the trivia. */ logical_per_package = nprocs / npackages; hwloc_debug("%u threads per package\n", logical_per_package); if (nprocs == npackages * logical_per_package && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) for (i = 0; i < npackages; i++) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, i); obj->cpuset = hwloc_bitmap_alloc(); for (cpu = i*logical_per_package; cpu < (i+1)*logical_per_package; cpu++) hwloc_bitmap_set(obj->cpuset, cpu); hwloc_debug_1arg_bitmap("package %u has cpuset %s\n", i, obj->cpuset); if (cpumodel[0] != '\0') hwloc_obj_add_info(obj, "CPUModel", cpumodel); hwloc_insert_object_by_cpuset(topology, obj); } else if (cpumodel[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel); if (!hwloc_get_sysctlbyname("machdep.cpu.cores_per_package", &_cores_per_package) && _cores_per_package > 0 && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) { unsigned cores_per_package = _cores_per_package; hwloc_debug("%u cores per package\n", cores_per_package); if (!(logical_per_package % cores_per_package)) for (i = 0; i < npackages * cores_per_package; i++) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i); obj->cpuset = hwloc_bitmap_alloc(); for (cpu = i*(logical_per_package/cores_per_package); cpu < (i+1)*(logical_per_package/cores_per_package); cpu++) hwloc_bitmap_set(obj->cpuset, cpu); hwloc_debug_1arg_bitmap("core %u has cpuset %s\n", i, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } } } else if (cpumodel[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel); if (hwloc_get_sysctlbyname("hw.l1dcachesize", &l1dcachesize)) l1dcachesize = 0; if (hwloc_get_sysctlbyname("hw.l1icachesize", &l1icachesize)) l1icachesize = 0; if (hwloc_get_sysctlbyname("hw.l2cachesize", &l2cachesize)) l2cachesize = 0; if (hwloc_get_sysctlbyname("machdep.cpu.cache.L1_associativity", &cacheways[0])) cacheways[0] = 0; else if (cacheways[0] == 0xff) cacheways[0] = -1; if (hwloc_get_sysctlbyname("machdep.cpu.cache.L2_associativity", &cacheways[1])) cacheways[1] = 0; else if (cacheways[1] == 0xff) cacheways[1] = -1; if (hwloc_get_sysctlbyname("hw.cachelinesize", &cachelinesize)) cachelinesize = 0; if (hwloc_get_sysctlbyname("hw.memsize", &memsize)) memsize = 0; if (!sysctlbyname("hw.cacheconfig", NULL, &size, NULL, 0)) { unsigned n = size / sizeof(uint32_t); uint64_t *cacheconfig = NULL; uint64_t *cachesize = NULL; uint32_t *cacheconfig32 = NULL; cacheconfig = malloc(sizeof(uint64_t) * n); if (NULL == cacheconfig) { goto out; } cachesize = malloc(sizeof(uint64_t) * n); if (NULL == cachesize) { goto out; } cacheconfig32 = malloc(sizeof(uint32_t) * n); if (NULL == cacheconfig32) { goto out; } if ((!sysctlbyname("hw.cacheconfig", cacheconfig, &size, NULL, 0))) { /* Yeech. Darwin seemingly has changed from 32bit to 64bit integers for * cacheconfig, with apparently no way for detection. Assume the machine * won't have more than 4 billion cpus */ if (cacheconfig[0] > 0xFFFFFFFFUL) { memcpy(cacheconfig32, cacheconfig, size); for (i = 0 ; i < size / sizeof(uint32_t); i++) cacheconfig[i] = cacheconfig32[i]; } memset(cachesize, 0, sizeof(uint64_t) * n); size = sizeof(uint64_t) * n; if (sysctlbyname("hw.cachesize", cachesize, &size, NULL, 0)) { if (n > 0) cachesize[0] = memsize; if (n > 1) cachesize[1] = l1dcachesize; if (n > 2) cachesize[2] = l2cachesize; } hwloc_debug("%s", "caches"); for (i = 0; i < n && cacheconfig[i]; i++) hwloc_debug(" %"PRIu64"(%"PRIu64"kB)", cacheconfig[i], cachesize[i] / 1024); /* Now we know how many caches there are */ n = i; hwloc_debug("\n%u cache levels\n", n - 1); /* For each cache level (0 is memory) */ for (i = 0; i < n; i++) { /* cacheconfig tells us how many cpus share it, let's iterate on each cache */ for (j = 0; j < (nprocs / cacheconfig[i]); j++) { if (!i) { obj = hwloc_alloc_setup_object(HWLOC_OBJ_NUMANODE, j); obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, j); } else { obj = hwloc_alloc_setup_object(HWLOC_OBJ_L1CACHE+i-1, -1); } obj->cpuset = hwloc_bitmap_alloc(); for (cpu = j*cacheconfig[i]; cpu < ((j+1)*cacheconfig[i]); cpu++) hwloc_bitmap_set(obj->cpuset, cpu); if (i == 1 && l1icachesize && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) { /* FIXME assuming that L1i and L1d are shared the same way. Darwin * does not yet provide a way to know. */ hwloc_obj_t l1i = hwloc_alloc_setup_object(HWLOC_OBJ_L1ICACHE, -1); l1i->cpuset = hwloc_bitmap_dup(obj->cpuset); hwloc_debug_1arg_bitmap("L1icache %u has cpuset %s\n", j, l1i->cpuset); l1i->attr->cache.depth = i; l1i->attr->cache.size = l1icachesize; l1i->attr->cache.linesize = cachelinesize; l1i->attr->cache.associativity = 0; l1i->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; hwloc_insert_object_by_cpuset(topology, l1i); } if (i) { hwloc_debug_2args_bitmap("L%ucache %u has cpuset %s\n", i, j, obj->cpuset); obj->attr->cache.depth = i; obj->attr->cache.size = cachesize[i]; obj->attr->cache.linesize = cachelinesize; if (i <= sizeof(cacheways) / sizeof(cacheways[0])) obj->attr->cache.associativity = cacheways[i-1]; else obj->attr->cache.associativity = 0; if (i == 1 && l1icachesize) obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; else obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; } else { hwloc_debug_1arg_bitmap("node %u has cpuset %s\n", j, obj->cpuset); obj->memory.local_memory = cachesize[i]; obj->memory.page_types_len = 2; obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types)); memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types)); obj->memory.page_types[0].size = hwloc_getpagesize(); #ifdef HAVE__SC_LARGE_PAGESIZE obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif } if (hwloc_filter_check_keep_object_type(topology, obj->type)) hwloc_insert_object_by_cpuset(topology, obj); else hwloc_free_unlinked_object(obj); /* FIXME: don't built at all, just build the cpuset in case l1i needs it */ } } } out: free(cacheconfig); free(cachesize); free(cacheconfig32); } /* add PU objects */ hwloc_setup_pu_level(topology, nprocs); hwloc_obj_add_info(topology->levels[0][0], "Backend", "Darwin"); hwloc_add_uname_info(topology, NULL); return 0; }
static int hwloc_look_pci(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; enum hwloc_type_filter_e pfilter, bfilter; struct hwloc_obj *tree = NULL, *tmp; int ret; struct pci_device_iterator *iter; struct pci_device *pcidev; hwloc_topology_get_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, &pfilter); hwloc_topology_get_type_filter(topology, HWLOC_OBJ_BRIDGE, &bfilter); if (bfilter == HWLOC_TYPE_FILTER_KEEP_NONE && pfilter == HWLOC_TYPE_FILTER_KEEP_NONE) return 0; /* don't do anything if another backend attached PCI already * (they are attached to root until later in the core discovery) */ tmp = hwloc_get_root_obj(topology)->io_first_child; while (tmp) { if (tmp->type == HWLOC_OBJ_PCI_DEVICE || (tmp->type == HWLOC_OBJ_BRIDGE && tmp->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI)) { hwloc_debug("%s", "PCI objects already added, ignoring linuxpci backend.\n"); return 0; } tmp = tmp->next_sibling; } hwloc_debug("%s", "\nScanning PCI buses...\n"); /* initialize PCI scanning */ ret = pci_system_init(); if (ret) { hwloc_debug("%s", "Can not initialize libpciaccess\n"); return -1; } iter = pci_slot_match_iterator_create(NULL); /* iterate over devices */ for (pcidev = pci_device_next(iter); pcidev; pcidev = pci_device_next(iter)) { const char *vendorname, *devicename; unsigned char config_space_cache[CONFIG_SPACE_CACHESIZE]; hwloc_obj_type_t type; struct hwloc_obj *obj; unsigned domain; unsigned device_class; unsigned short tmp16; unsigned offset; /* initialize the config space in case we fail to read it (missing permissions, etc). */ memset(config_space_cache, 0xff, CONFIG_SPACE_CACHESIZE); pci_device_probe(pcidev); pci_device_cfg_read(pcidev, config_space_cache, 0, CONFIG_SPACE_CACHESIZE, NULL); /* try to read the domain */ domain = pcidev->domain; /* try to read the device_class */ device_class = pcidev->device_class >> 8; /* bridge or pci dev? */ type = hwloc_pcidisc_check_bridge_type(device_class, config_space_cache); /* filtered? */ if (type == HWLOC_OBJ_PCI_DEVICE) { enum hwloc_type_filter_e filter; hwloc_topology_get_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, &filter); if (filter == HWLOC_TYPE_FILTER_KEEP_NONE) continue; if (filter == HWLOC_TYPE_FILTER_KEEP_IMPORTANT && !hwloc_filter_check_pcidev_subtype_important(device_class)) continue; } else if (type == HWLOC_OBJ_BRIDGE) { enum hwloc_type_filter_e filter; hwloc_topology_get_type_filter(topology, HWLOC_OBJ_BRIDGE, &filter); if (filter == HWLOC_TYPE_FILTER_KEEP_NONE) continue; /* HWLOC_TYPE_FILTER_KEEP_IMPORTANT filtered later in the core */ } /* fixup SR-IOV buggy VF device/vendor IDs */ if (0xffff == pcidev->vendor_id && 0xffff == pcidev->device_id) { /* SR-IOV puts ffff:ffff in Virtual Function config space. * The actual VF device ID is stored at a special (dynamic) location in the Physical Function config space. * VF and PF have the same vendor ID. * * libpciaccess just returns ffff:ffff, needs to be fixed. * linuxpci is OK because sysfs files are already fixed in the kernel. * (pciutils is OK when it uses those Linux sysfs files.) * * Reading these files is an easy way to work around the libpciaccess issue on Linux, * but we have no way to know if this is caused by SR-IOV or not. * * TODO: * If PF has CAP_ID_PCIX or CAP_ID_EXP (offset>0), * look for extended capability PCI_EXT_CAP_ID_SRIOV (need extended config space (more than 256 bytes)), * then read the VF device ID after it (PCI_IOV_DID bytes later). * Needs access to extended config space (needs root on Linux). * TODO: * Add string info attributes in VF and PF objects? */ #ifdef HWLOC_LINUX_SYS /* Workaround for Linux (the kernel returns the VF device/vendor IDs). */ char path[64]; char value[16]; FILE *file; size_t read; snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/vendor", domain, pcidev->bus, pcidev->dev, pcidev->func); file = fopen(path, "r"); if (file) { read = fread(value, 1, sizeof(value), file); fclose(file); if (read) /* fixup the pciaccess struct so that pci_device_get_vendor_name() is correct later. */ pcidev->vendor_id = strtoul(value, NULL, 16); } snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/device", domain, pcidev->bus, pcidev->dev, pcidev->func); file = fopen(path, "r"); if (file) { read = fread(value, 1, sizeof(value), file); fclose(file); if (read) /* fixup the pciaccess struct so that pci_device_get_device_name() is correct later. */ pcidev->device_id = strtoul(value, NULL, 16); } #endif } obj = hwloc_alloc_setup_object(topology, type, HWLOC_UNKNOWN_INDEX); obj->attr->pcidev.domain = domain; obj->attr->pcidev.bus = pcidev->bus; obj->attr->pcidev.dev = pcidev->dev; obj->attr->pcidev.func = pcidev->func; obj->attr->pcidev.vendor_id = pcidev->vendor_id; obj->attr->pcidev.device_id = pcidev->device_id; obj->attr->pcidev.class_id = device_class; obj->attr->pcidev.revision = config_space_cache[PCI_REVISION_ID]; obj->attr->pcidev.linkspeed = 0; /* unknown */ offset = hwloc_pcidisc_find_cap(config_space_cache, PCI_CAP_ID_EXP); if (offset > 0 && offset + 20 /* size of PCI express block up to link status */ <= CONFIG_SPACE_CACHESIZE) hwloc_pcidisc_find_linkspeed(config_space_cache, offset, &obj->attr->pcidev.linkspeed); if (type == HWLOC_OBJ_BRIDGE) { if (hwloc_pcidisc_setup_bridge_attr(obj, config_space_cache) < 0) continue; } if (obj->type == HWLOC_OBJ_PCI_DEVICE) { memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_VENDOR_ID], sizeof(tmp16)); obj->attr->pcidev.subvendor_id = tmp16; memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_ID], sizeof(tmp16)); obj->attr->pcidev.subdevice_id = tmp16; } else { /* TODO: * bridge must lookup PCI_CAP_ID_SSVID and then look at offset+PCI_SSVID_VENDOR/DEVICE_ID * cardbus must look at PCI_CB_SUBSYSTEM_VENDOR_ID and PCI_CB_SUBSYSTEM_ID */ } /* get the vendor name */ vendorname = pci_device_get_vendor_name(pcidev); if (vendorname && *vendorname) hwloc_obj_add_info(obj, "PCIVendor", vendorname); /* get the device name */ devicename = pci_device_get_device_name(pcidev); if (devicename && *devicename) hwloc_obj_add_info(obj, "PCIDevice", devicename); hwloc_debug(" %04x:%02x:%02x.%01x %04x %04x:%04x %s %s\n", domain, pcidev->bus, pcidev->dev, pcidev->func, device_class, pcidev->vendor_id, pcidev->device_id, vendorname && *vendorname ? vendorname : "??", devicename && *devicename ? devicename : "??"); hwloc_pcidisc_tree_insert_by_busid(&tree, obj); } /* finalize device scanning */ pci_iterator_destroy(iter); pci_system_cleanup(); hwloc_pcidisc_tree_attach(topology, tree); return 0; }
static int hwloc_look_pci(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; struct hwloc_obj *first_obj = NULL, *last_obj = NULL; int ret; struct pci_device_iterator *iter; struct pci_device *pcidev; #ifdef HWLOC_LINUX_SYS DIR *dir; #endif if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO))) return 0; if (hwloc_get_next_pcidev(topology, NULL)) { hwloc_debug("%s", "PCI objects already added, ignoring pci backend.\n"); return 0; } if (!hwloc_topology_is_thissystem(topology)) { hwloc_debug("%s", "\nno PCI detection (not thissystem)\n"); return 0; } hwloc_debug("%s", "\nScanning PCI buses...\n"); /* initialize PCI scanning */ ret = pci_system_init(); if (ret) { hwloc_debug("%s", "Can not initialize libpciaccess\n"); return -1; } iter = pci_slot_match_iterator_create(NULL); /* iterate over devices */ for (pcidev = pci_device_next(iter); pcidev; pcidev = pci_device_next(iter)) { const char *vendorname, *devicename, *fullname; unsigned char config_space_cache[CONFIG_SPACE_CACHESIZE]; struct hwloc_obj *obj; unsigned os_index; unsigned domain; unsigned device_class; unsigned short tmp16; char name[128]; unsigned offset; /* initialize the config space in case we fail to read it (missing permissions, etc). */ memset(config_space_cache, 0xff, CONFIG_SPACE_CACHESIZE); pci_device_probe(pcidev); pci_device_cfg_read(pcidev, config_space_cache, 0, CONFIG_SPACE_CACHESIZE, NULL); /* try to read the domain */ domain = pcidev->domain; /* try to read the device_class */ device_class = pcidev->device_class >> 8; /* fixup SR-IOV buggy VF device/vendor IDs */ if (0xffff == pcidev->vendor_id && 0xffff == pcidev->device_id) { /* SR-IOV puts ffff:ffff in Virtual Function config space. * The actual VF device ID is stored at a special (dynamic) location in the Physical Function config space. * VF and PF have the same vendor ID. * * libpciaccess just returns ffff:ffff, needs to be fixed. * linuxpci is OK because sysfs files are already fixed the kernel. * (pciutils is OK when it uses those Linux sysfs files.) * * Reading these files is an easy way to work around the libpciaccess issue on Linux, * but we have no way to know if this is caused by SR-IOV or not. * * TODO: * If PF has CAP_ID_PCIX or CAP_ID_EXP (offset>0), * look for extended capability PCI_EXT_CAP_ID_SRIOV (need extended config space (more than 256 bytes)), * then read the VF device ID after it (PCI_IOV_DID bytes later). * Needs access to extended config space (needs root on Linux). * TODO: * Add string info attributes in VF and PF objects? */ #ifdef HWLOC_LINUX_SYS /* Workaround for Linux (the kernel returns the VF device/vendor IDs). */ char path[64]; char value[16]; FILE *file; size_t read; snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/vendor", domain, pcidev->bus, pcidev->dev, pcidev->func); file = fopen(path, "r"); if (file) { read = fread(value, 1, sizeof(value), file); fclose(file); if (read) /* fixup the pciaccess struct so that pci_device_get_vendor_name() is correct later. */ pcidev->vendor_id = strtoul(value, NULL, 16); } snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/device", domain, pcidev->bus, pcidev->dev, pcidev->func); file = fopen(path, "r"); if (file) { read = fread(value, 1, sizeof(value), file); fclose(file); if (read) /* fixup the pciaccess struct so that pci_device_get_device_name() is correct later. */ pcidev->device_id = strtoul(value, NULL, 16); } #endif } /* might be useful for debugging (note that domain might be truncated) */ os_index = (domain << 20) + (pcidev->bus << 12) + (pcidev->dev << 4) + pcidev->func; obj = hwloc_alloc_setup_object(HWLOC_OBJ_PCI_DEVICE, os_index); obj->attr->pcidev.domain = domain; obj->attr->pcidev.bus = pcidev->bus; obj->attr->pcidev.dev = pcidev->dev; obj->attr->pcidev.func = pcidev->func; obj->attr->pcidev.vendor_id = pcidev->vendor_id; obj->attr->pcidev.device_id = pcidev->device_id; obj->attr->pcidev.class_id = device_class; obj->attr->pcidev.revision = config_space_cache[PCI_REVISION_ID]; obj->attr->pcidev.linkspeed = 0; /* unknown */ offset = hwloc_pci_find_cap(config_space_cache, PCI_CAP_ID_EXP); if (offset > 0 && offset + 20 /* size of PCI express block up to link status */ <= CONFIG_SPACE_CACHESIZE) hwloc_pci_find_linkspeed(config_space_cache, offset, &obj->attr->pcidev.linkspeed); hwloc_pci_prepare_bridge(obj, config_space_cache); if (obj->type == HWLOC_OBJ_PCI_DEVICE) { memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_VENDOR_ID], sizeof(tmp16)); obj->attr->pcidev.subvendor_id = tmp16; memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_ID], sizeof(tmp16)); obj->attr->pcidev.subdevice_id = tmp16; } else { /* TODO: * bridge must lookup PCI_CAP_ID_SSVID and then look at offset+PCI_SSVID_VENDOR/DEVICE_ID * cardbus must look at PCI_CB_SUBSYSTEM_VENDOR_ID and PCI_CB_SUBSYSTEM_ID */ } /* get the vendor name */ vendorname = pci_device_get_vendor_name(pcidev); if (vendorname && *vendorname) hwloc_obj_add_info(obj, "PCIVendor", vendorname); /* get the device name */ devicename = pci_device_get_device_name(pcidev); if (devicename && *devicename) hwloc_obj_add_info(obj, "PCIDevice", devicename); /* generate or get the fullname */ snprintf(name, sizeof(name), "%s%s%s", vendorname ? vendorname : "", vendorname && devicename ? " " : "", devicename ? devicename : ""); fullname = name; if (*name) obj->name = strdup(name); hwloc_debug(" %04x:%02x:%02x.%01x %04x %04x:%04x %s\n", domain, pcidev->bus, pcidev->dev, pcidev->func, device_class, pcidev->vendor_id, pcidev->device_id, fullname && *fullname ? fullname : "??"); /* queue the object for now */ if (first_obj) last_obj->next_sibling = obj; else first_obj = obj; last_obj = obj; } /* finalize device scanning */ pci_iterator_destroy(iter); pci_system_cleanup(); #ifdef HWLOC_LINUX_SYS dir = opendir("/sys/bus/pci/slots/"); if (dir) { struct dirent *dirent; while ((dirent = readdir(dir)) != NULL) { char path[64]; FILE *file; if (dirent->d_name[0] == '.') continue; snprintf(path, sizeof(path), "/sys/bus/pci/slots/%s/address", dirent->d_name); file = fopen(path, "r"); if (file) { unsigned domain, bus, dev; if (fscanf(file, "%x:%x:%x", &domain, &bus, &dev) == 3) { hwloc_obj_t obj = first_obj; while (obj) { if (obj->attr->pcidev.domain == domain && obj->attr->pcidev.bus == bus && obj->attr->pcidev.dev == dev && obj->attr->pcidev.func == 0) { hwloc_obj_add_info(obj, "PCISlot", dirent->d_name); break; } obj = obj->next_sibling; } } fclose(file); } } closedir(dir); } #endif return hwloc_insert_pci_device_list(backend, first_obj); }
static int hwloc_look_hpux(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; int has_numa = sysconf(_SC_CCNUMA_SUPPORT) == 1; hwloc_obj_t *nodes = NULL, obj; spu_t currentcpu; ldom_t currentnode; int i, nbnodes = 0; if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return -1; hwloc_alloc_obj_cpusets(topology->levels[0][0]); if (has_numa) { nbnodes = mpctl((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) ? MPC_GETNUMLDOMS_SYS : MPC_GETNUMLDOMS, 0, 0); hwloc_debug("%d nodes\n", nbnodes); nodes = malloc(nbnodes * sizeof(*nodes)); i = 0; currentnode = mpctl((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) ? MPC_GETFIRSTLDOM_SYS : MPC_GETFIRSTLDOM, 0, 0); while (currentnode != -1 && i < nbnodes) { hwloc_debug("node %d is %d\n", i, currentnode); nodes[i] = obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_NUMANODE, currentnode); obj->cpuset = hwloc_bitmap_alloc(); obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, currentnode); /* TODO: obj->attr->node.memory_kB */ /* TODO: obj->attr->node.huge_page_free */ currentnode = mpctl((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) ? MPC_GETNEXTLDOM_SYS : MPC_GETNEXTLDOM, currentnode, 0); i++; } } i = 0; currentcpu = mpctl((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) ? MPC_GETFIRSTSPU_SYS : MPC_GETFIRSTSPU, 0,0); while (currentcpu != -1) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PU, currentcpu); obj->cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->cpuset, currentcpu); hwloc_debug("cpu %d\n", currentcpu); if (nodes) { /* Add this cpu to its node */ currentnode = mpctl(MPC_SPUTOLDOM, currentcpu, 0); /* Hopefully it's just the same as previous cpu */ if (i >= nbnodes || (ldom_t) nodes[i]->os_index != currentnode) for (i = 0; i < nbnodes; i++) if ((ldom_t) nodes[i]->os_index == currentnode) break; if (i < nbnodes) { hwloc_bitmap_set(nodes[i]->cpuset, currentcpu); hwloc_debug("is in node %d\n", i); } else { hwloc_debug("%s", "is in no node?!\n"); } } /* Add cpu */ hwloc_insert_object_by_cpuset(topology, obj); currentcpu = mpctl((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) ? MPC_GETNEXTSPU_SYS : MPC_GETNEXTSPU, currentcpu, 0); } if (nodes) { /* Add nodes */ for (i = 0 ; i < nbnodes ; i++) hwloc_insert_object_by_cpuset(topology, nodes[i]); free(nodes); } topology->support.discovery->pu = 1; hwloc_obj_add_info(topology->levels[0][0], "Backend", "HP-UX"); hwloc_add_uname_info(topology, NULL); return 0; }
static int hwloc_look_aix(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; int i; if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return 0; hwloc_alloc_obj_cpusets(topology->levels[0][0]); /* TODO: R_LGPGDEF/R_LGPGFREE for large pages */ hwloc_debug("Note: SMPSDL is at %d\n", rs_getinfo(NULL, R_SMPSDL, 0)); #ifdef R_REF1SDL hwloc_debug("Note: REF1SDL is at %d\n", rs_getinfo(NULL, R_REF1SDL, 0)); #endif for (i=0; i<=rs_getinfo(NULL, R_MAXSDL, 0); i++) { int known = 0; #if 0 if (i == rs_getinfo(NULL, R_SMPSDL, 0)) /* Not enabled for now because I'm not sure what it corresponds to. On * decrypthon it contains all the cpus. Is it a "machine" or a "system" * level ? */ { hwloc_debug("looking AIX \"SMP\" sdl %d\n", i); look_rset(i, HWLOC_OBJ_MACHINE, topology, i); known = 1; } #endif if (i == rs_getinfo(NULL, R_MCMSDL, 0)) { hwloc_debug("looking AIX node sdl %d\n", i); look_rset(i, HWLOC_OBJ_NODE, topology, i); known = 1; } # ifdef R_L2CSDL if (i == rs_getinfo(NULL, R_L2CSDL, 0)) { hwloc_debug("looking AIX L2 sdl %d\n", i); look_rset(i, HWLOC_OBJ_CACHE, topology, i); known = 1; } # endif # ifdef R_PCORESDL if (i == rs_getinfo(NULL, R_PCORESDL, 0)) { hwloc_debug("looking AIX core sdl %d\n", i); look_rset(i, HWLOC_OBJ_CORE, topology, i); known = 1; } # endif if (i == rs_getinfo(NULL, R_MAXSDL, 0)) { hwloc_debug("looking AIX max sdl %d\n", i); look_rset(i, HWLOC_OBJ_PU, topology, i); known = 1; topology->support.discovery->pu = 1; } /* Don't know how it should be rendered, make a misc object for it. */ if (!known) { hwloc_debug("looking AIX unknown sdl %d\n", i); look_rset(i, HWLOC_OBJ_GROUP, topology, i); } } hwloc_obj_add_info(topology->levels[0][0], "Backend", "AIX"); if (topology->is_thissystem) hwloc_add_uname_info(topology); return 1; }
static int hwloc_look_darwin(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; int64_t _nprocs; unsigned nprocs; int64_t _npackages; unsigned i, j, cpu; struct hwloc_obj *obj; size_t size; int64_t l1dcachesize, l1icachesize; int64_t cacheways[2]; int64_t l2cachesize; int64_t l3cachesize; int64_t cachelinesize; int64_t memsize; int64_t _tmp; char cpumodel[64]; char cpuvendor[64]; char cpufamilynumber[20], cpumodelnumber[20], cpustepping[20]; int gotnuma = 0; int gotnumamemory = 0; if (topology->levels[0][0]->cpuset) /* somebody discovered things */ return -1; hwloc_alloc_root_sets(topology->levels[0][0]); /* Don't use hwloc_fallback_nbprocessors() because it would return online cpus only, * while we need all cpus when computing logical_per_package, etc below. * We don't know which CPUs are offline, but Darwin doesn't support binding anyway. * * TODO: try hw.logicalcpu_max */ if (hwloc_get_sysctlbyname("hw.logicalcpu", &_nprocs) || _nprocs <= 0) /* fallback to deprecated way */ if (hwloc_get_sysctlbyname("hw.ncpu", &_nprocs) || _nprocs <= 0) return -1; nprocs = _nprocs; topology->support.discovery->pu = 1; hwloc_debug("%u procs\n", nprocs); size = sizeof(cpuvendor); if (sysctlbyname("machdep.cpu.vendor", cpuvendor, &size, NULL, 0)) cpuvendor[0] = '\0'; size = sizeof(cpumodel); if (sysctlbyname("machdep.cpu.brand_string", cpumodel, &size, NULL, 0)) cpumodel[0] = '\0'; if (hwloc_get_sysctlbyname("machdep.cpu.family", &_tmp)) cpufamilynumber[0] = '\0'; else snprintf(cpufamilynumber, sizeof(cpufamilynumber), "%lld", (long long) _tmp); if (hwloc_get_sysctlbyname("machdep.cpu.model", &_tmp)) cpumodelnumber[0] = '\0'; else snprintf(cpumodelnumber, sizeof(cpumodelnumber), "%lld", (long long) _tmp); /* .extfamily and .extmodel are already added to .family and .model */ if (hwloc_get_sysctlbyname("machdep.cpu.stepping", &_tmp)) cpustepping[0] = '\0'; else snprintf(cpustepping, sizeof(cpustepping), "%lld", (long long) _tmp); if (!hwloc_get_sysctlbyname("hw.packages", &_npackages) && _npackages > 0) { unsigned npackages = _npackages; int64_t _cores_per_package; unsigned cores_per_package; int64_t _logical_per_package; unsigned logical_per_package; hwloc_debug("%u packages\n", npackages); if (!hwloc_get_sysctlbyname("machdep.cpu.thread_count", &_logical_per_package) && _logical_per_package > 0) /* official/modern way */ logical_per_package = _logical_per_package; else if (!hwloc_get_sysctlbyname("machdep.cpu.logical_per_package", &_logical_per_package) && _logical_per_package > 0) /* old way, gives the max supported by this "kind" of processor, * can be larger than the actual number for this model. */ logical_per_package = _logical_per_package; else /* Assume the trivia. */ logical_per_package = nprocs / npackages; hwloc_debug("%u threads per package\n", logical_per_package); if (nprocs == npackages * logical_per_package && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) for (i = 0; i < npackages; i++) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PACKAGE, i); obj->cpuset = hwloc_bitmap_alloc(); for (cpu = i*logical_per_package; cpu < (i+1)*logical_per_package; cpu++) hwloc_bitmap_set(obj->cpuset, cpu); hwloc_debug_1arg_bitmap("package %u has cpuset %s\n", i, obj->cpuset); if (cpuvendor[0] != '\0') hwloc_obj_add_info(obj, "CPUVendor", cpuvendor); if (cpumodel[0] != '\0') hwloc_obj_add_info(obj, "CPUModel", cpumodel); if (cpufamilynumber[0] != '\0') hwloc_obj_add_info(obj, "CPUFamilyNumber", cpufamilynumber); if (cpumodelnumber[0] != '\0') hwloc_obj_add_info(obj, "CPUModelNumber", cpumodelnumber); if (cpustepping[0] != '\0') hwloc_obj_add_info(obj, "CPUStepping", cpustepping); hwloc_insert_object_by_cpuset(topology, obj); } else { if (cpuvendor[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUVendor", cpuvendor); if (cpumodel[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel); if (cpufamilynumber[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUFamilyNumber", cpufamilynumber); if (cpumodelnumber[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUModelNumber", cpumodelnumber); if (cpustepping[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUStepping", cpustepping); } if (!hwloc_get_sysctlbyname("machdep.cpu.core_count", &_cores_per_package) && _cores_per_package > 0) /* official/modern way */ cores_per_package = _cores_per_package; else if (!hwloc_get_sysctlbyname("machdep.cpu.cores_per_package", &_cores_per_package) && _cores_per_package > 0) /* old way, gives the max supported by this "kind" of processor, * can be larger than the actual number for this model. */ cores_per_package = _cores_per_package; else /* no idea */ cores_per_package = 0; if (cores_per_package > 0 && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) { hwloc_debug("%u cores per package\n", cores_per_package); if (!(logical_per_package % cores_per_package)) for (i = 0; i < npackages * cores_per_package; i++) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_CORE, i); obj->cpuset = hwloc_bitmap_alloc(); for (cpu = i*(logical_per_package/cores_per_package); cpu < (i+1)*(logical_per_package/cores_per_package); cpu++) hwloc_bitmap_set(obj->cpuset, cpu); hwloc_debug_1arg_bitmap("core %u has cpuset %s\n", i, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } } } else { if (cpuvendor[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUVendor", cpuvendor); if (cpumodel[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel); if (cpufamilynumber[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUFamilyNumber", cpufamilynumber); if (cpumodelnumber[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUModelNumber", cpumodelnumber); if (cpustepping[0] != '\0') hwloc_obj_add_info(topology->levels[0][0], "CPUStepping", cpustepping); } if (hwloc_get_sysctlbyname("hw.l1dcachesize", &l1dcachesize)) l1dcachesize = 0; if (hwloc_get_sysctlbyname("hw.l1icachesize", &l1icachesize)) l1icachesize = 0; if (hwloc_get_sysctlbyname("hw.l2cachesize", &l2cachesize)) l2cachesize = 0; if (hwloc_get_sysctlbyname("hw.l3cachesize", &l3cachesize)) l3cachesize = 0; if (hwloc_get_sysctlbyname("machdep.cpu.cache.L1_associativity", &cacheways[0])) cacheways[0] = 0; else if (cacheways[0] == 0xff) cacheways[0] = -1; if (hwloc_get_sysctlbyname("machdep.cpu.cache.L2_associativity", &cacheways[1])) cacheways[1] = 0; else if (cacheways[1] == 0xff) cacheways[1] = -1; if (hwloc_get_sysctlbyname("hw.cachelinesize", &cachelinesize)) cachelinesize = 0; if (hwloc_get_sysctlbyname("hw.memsize", &memsize)) memsize = 0; if (!sysctlbyname("hw.cacheconfig", NULL, &size, NULL, 0)) { unsigned n = size / sizeof(uint32_t); uint64_t cacheconfig[n]; uint64_t cachesize[n]; uint32_t cacheconfig32[n]; if ((!sysctlbyname("hw.cacheconfig", cacheconfig, &size, NULL, 0))) { /* Yeech. Darwin seemingly has changed from 32bit to 64bit integers for * cacheconfig, with apparently no way for detection. Assume the machine * won't have more than 4 billion cpus */ if (cacheconfig[0] > 0xFFFFFFFFUL) { memcpy(cacheconfig32, cacheconfig, size); for (i = 0 ; i < size / sizeof(uint32_t); i++) cacheconfig[i] = cacheconfig32[i]; } memset(cachesize, 0, sizeof(uint64_t) * n); size = sizeof(uint64_t) * n; if (sysctlbyname("hw.cachesize", cachesize, &size, NULL, 0)) { if (n > 0) cachesize[0] = memsize; if (n > 1) cachesize[1] = l1dcachesize; if (n > 2) cachesize[2] = l2cachesize; if (n > 3) cachesize[3] = l3cachesize; } hwloc_debug("%s", "caches"); for (i = 0; i < n && cacheconfig[i]; i++) hwloc_debug(" %"PRIu64"(%"PRIu64"kB)", cacheconfig[i], cachesize[i] / 1024); /* Now we know how many caches there are */ n = i; hwloc_debug("\n%u cache levels\n", n - 1); /* For each cache level (0 is memory) */ for (i = 0; i < n; i++) { /* cacheconfig tells us how many cpus share it, let's iterate on each cache */ for (j = 0; j < (nprocs / cacheconfig[i]); j++) { if (!i) { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_NUMANODE, j); obj->nodeset = hwloc_bitmap_alloc(); hwloc_bitmap_set(obj->nodeset, j); gotnuma++; } else { obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1CACHE+i-1, HWLOC_UNKNOWN_INDEX); } obj->cpuset = hwloc_bitmap_alloc(); for (cpu = j*cacheconfig[i]; cpu < ((j+1)*cacheconfig[i]); cpu++) hwloc_bitmap_set(obj->cpuset, cpu); if (i == 1 && l1icachesize && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) { /* FIXME assuming that L1i and L1d are shared the same way. Darwin * does not yet provide a way to know. */ hwloc_obj_t l1i = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1ICACHE, HWLOC_UNKNOWN_INDEX); l1i->cpuset = hwloc_bitmap_dup(obj->cpuset); hwloc_debug_1arg_bitmap("L1icache %u has cpuset %s\n", j, l1i->cpuset); l1i->attr->cache.depth = i; l1i->attr->cache.size = l1icachesize; l1i->attr->cache.linesize = cachelinesize; l1i->attr->cache.associativity = 0; l1i->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION; hwloc_insert_object_by_cpuset(topology, l1i); } if (i) { hwloc_debug_2args_bitmap("L%ucache %u has cpuset %s\n", i, j, obj->cpuset); obj->attr->cache.depth = i; obj->attr->cache.size = cachesize[i]; obj->attr->cache.linesize = cachelinesize; if (i <= sizeof(cacheways) / sizeof(cacheways[0])) obj->attr->cache.associativity = cacheways[i-1]; else obj->attr->cache.associativity = 0; if (i == 1 && l1icachesize) obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA; else obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; } else { hwloc_debug_1arg_bitmap("node %u has cpuset %s\n", j, obj->cpuset); if (cachesize[i]) { obj->attr->numanode.local_memory = cachesize[i]; gotnumamemory++; } obj->attr->numanode.page_types_len = 2; obj->attr->numanode.page_types = malloc(2*sizeof(*obj->attr->numanode.page_types)); memset(obj->attr->numanode.page_types, 0, 2*sizeof(*obj->attr->numanode.page_types)); obj->attr->numanode.page_types[0].size = hwloc_getpagesize(); #if HAVE_DECL__SC_LARGE_PAGESIZE obj->attr->numanode.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE); #endif } if (hwloc_filter_check_keep_object_type(topology, obj->type)) hwloc_insert_object_by_cpuset(topology, obj); else hwloc_free_unlinked_object(obj); /* FIXME: don't built at all, just build the cpuset in case l1i needs it */ } } } } if (gotnuma) topology->support.discovery->numa = 1; if (gotnumamemory) topology->support.discovery->numa = 1; /* add PU objects */ hwloc_setup_pu_level(topology, nprocs); hwloc_obj_add_info(topology->levels[0][0], "Backend", "Darwin"); hwloc_add_uname_info(topology, NULL); return 0; }