/* * Get the number of net cards in a NUMA node */ int hw_get_nnetcards(int NUMAnode) { int count, net_NUMAnode, error; hwloc_obj_t obj, obj_anc; count = 0; hwloc_topology_t topo_net; error = hwloc_topology_init(&topo_net); hwloc_topology_set_flags(topo_net, HWLOC_TOPOLOGY_FLAG_IO_DEVICES); if (!error){ hwloc_topology_load(topo_net); for (obj = hwloc_get_obj_by_type(topo_net, HWLOC_OBJ_OS_DEVICE, 0); obj; obj = hwloc_get_next_osdev(topo_net,obj)) if (obj->attr->osdev.type == HWLOC_OBJ_OSDEV_NETWORK || obj->attr->osdev.type == HWLOC_OBJ_OSDEV_OPENFABRICS){ obj_anc = hwloc_get_non_io_ancestor_obj(topo_net,obj); net_NUMAnode = hwloc_bitmap_first(obj_anc->nodeset); //only if the MPI NUMA node is equal to the found net card if(NUMAnode == net_NUMAnode) count++; } } hwloc_topology_destroy(topo_net); return count; }
hwloc_obj_t Machine::get_non_nvml_device(hwloc_topology_t topology, nvmlDevice_t device) { hwloc_obj_t osdev; nvmlReturn_t nvres; nvmlPciInfo_t pci; if (!hwloc_topology_is_thissystem(topology)) { errno = EINVAL; return NULL; } nvres = nvmlDeviceGetPciInfo(device, &pci); if (NVML_SUCCESS != nvres) return NULL; osdev = NULL; while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { hwloc_obj_t pcidev = osdev->parent; if (strncmp(osdev->name, "card", 4)) continue; if (pcidev && pcidev->type == HWLOC_OBJ_PCI_DEVICE && pcidev->attr->pcidev.domain == pci.domain && pcidev->attr->pcidev.bus == pci.bus && pcidev->attr->pcidev.dev == pci.device && pcidev->attr->pcidev.func == 0) return osdev; } return(NULL); }
int main(void) { hwloc_topology_t topology; hwloc_obj_t obj; hwloc_topology_init(&topology); hwloc_topology_set_io_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_ALL); hwloc_topology_load(topology); printf("Found %d bridges\n", hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_BRIDGE)); obj = NULL; while ((obj = hwloc_get_next_bridge(topology, obj)) != NULL) { assert(obj->type == HWLOC_OBJ_BRIDGE); /* only host->pci and pci->pci bridge supported so far */ if (obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST) { assert(obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI); printf(" Found host->PCI bridge for domain %04x bus %02x-%02x\n", obj->attr->bridge.downstream.pci.domain, obj->attr->bridge.downstream.pci.secondary_bus, obj->attr->bridge.downstream.pci.subordinate_bus); } else { assert(obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI); assert(obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI); printf(" Found PCI->PCI bridge [%04x:%04x] for domain %04x bus %02x-%02x\n", obj->attr->bridge.upstream.pci.vendor_id, obj->attr->bridge.upstream.pci.device_id, obj->attr->bridge.downstream.pci.domain, obj->attr->bridge.downstream.pci.secondary_bus, obj->attr->bridge.downstream.pci.subordinate_bus); } } printf("Found %d PCI devices\n", hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PCI_DEVICE)); obj = NULL; while ((obj = hwloc_get_next_pcidev(topology, obj)) != NULL) { assert(obj->type == HWLOC_OBJ_PCI_DEVICE); printf(" Found PCI device class %04x vendor %04x model %04x\n", obj->attr->pcidev.class_id, obj->attr->pcidev.vendor_id, obj->attr->pcidev.device_id); } printf("Found %d OS devices\n", hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_OS_DEVICE)); obj = NULL; while ((obj = hwloc_get_next_osdev(topology, obj)) != NULL) { assert(obj->type == HWLOC_OBJ_OS_DEVICE); printf(" Found OS device %s subtype %d\n", obj->name, obj->attr->osdev.type); } assert(HWLOC_TYPE_DEPTH_BRIDGE == hwloc_get_type_depth(topology, HWLOC_OBJ_BRIDGE)); assert(HWLOC_TYPE_DEPTH_PCI_DEVICE == hwloc_get_type_depth(topology, HWLOC_OBJ_PCI_DEVICE)); assert(HWLOC_TYPE_DEPTH_OS_DEVICE == hwloc_get_type_depth(topology, HWLOC_OBJ_OS_DEVICE)); assert(hwloc_compare_types(HWLOC_OBJ_BRIDGE, HWLOC_OBJ_PCI_DEVICE) < 0); assert(hwloc_compare_types(HWLOC_OBJ_BRIDGE, HWLOC_OBJ_OS_DEVICE) < 0); assert(hwloc_compare_types(HWLOC_OBJ_PCI_DEVICE, HWLOC_OBJ_OS_DEVICE) < 0); hwloc_topology_destroy(topology); return 0; }
/* * Returns the network card of a MPI process * */ int hw_my_netCard(int core, int myRank) { int card=0, error, nnetcards, *netcards, my_local_cards=0; int NUMAnode = hw_get_myNode(core), net_NUMAnode; hwloc_obj_t obj, obj_anc; nnetcards = hw_get_nnetcards(NUMAnode); netcards = malloc(nnetcards*sizeof(int)); hwloc_topology_t topo_net; error = hwloc_topology_init(&topo_net); hwloc_topology_set_flags(topo_net, HWLOC_TOPOLOGY_FLAG_IO_DEVICES); if (!error){ hwloc_topology_load(topo_net); my_local_cards = 0; for (obj = hwloc_get_obj_by_type(topo_net, HWLOC_OBJ_OS_DEVICE, 0); obj ; obj = hwloc_get_next_osdev(topo_net,obj)) if (obj->attr->osdev.type == HWLOC_OBJ_OSDEV_NETWORK || obj->attr->osdev.type == HWLOC_OBJ_OSDEV_OPENFABRICS) { obj_anc = hwloc_get_non_io_ancestor_obj(topo_net,obj); net_NUMAnode = hwloc_bitmap_first(obj_anc->nodeset); if ( net_NUMAnode == NUMAnode ) { netcards[my_local_cards] = obj->os_index; my_local_cards++;} } } hwloc_topology_destroy(topo_net); if (!my_local_cards) return -1; //no net affinity else return netcards[myRank%nnetcards]; }
//Initializes HWLOC and load the machine architecture int hw_topology_init (struct arch_topology *topo) { hwloc_obj_t obj, core1, core2; int count, i, j, error; //Create the machine representation error = hwloc_topology_init(&topology); //Go throught the topology only if HWLOC is //successifully initialized if(!error) { hwloc_topology_load(topology); local_topo = malloc(sizeof(struct arch_topology)); #if defined (__DBCSR_ACC) || defined (__PW_CUDA) int nDev; ma_get_ndevices_cu(&nDev); #endif //Extract number of NUMA nodes if (hwloc_get_type_depth (topology, HWLOC_OBJ_NODE)) topo->nnodes = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_NODE)); else topo->nnodes = 0; //Get number of cores, sockets and processing units topo->ncores = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_CORE)); topo->nsockets = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_SOCKET)); topo->npus = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_PU)); //Compute number of memory controlers per socket //basically the number of NUMA nodes per socket if (topo->nnodes > topo->nsockets) topo->nmemcontroller = topo->nnodes/topo->nsockets; else topo->nmemcontroller = 1; count = 0; topo->nshared_caches = 0; //Get derivate information - get number of cache per PU for(obj = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,0); obj; obj = obj->parent) { if (obj->type == HWLOC_OBJ_CACHE) { if (obj->arity>1) topo->nshared_caches++; else { count++; topo->ncaches = count; } } } //Number of direct siblings //Siblings cores are the ones that share at least one component //level of the architecture count = 0; core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, 0); core2 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, 1); obj = hwloc_get_common_ancestor_obj(topology, core1, core2); if (obj) topo->nsiblings = obj->arity; //Machine node and core representation machine_nodes = (struct node*) malloc (topo->nnodes*sizeof(struct node)); machine_cores = (struct core*) malloc (topo->ncores*sizeof(struct core)); phys_cpus = malloc (topo->ncores*sizeof(int)); get_phys_id(topology, topo->ncores, 0); //Get the caches sizes and other information for each core for (i = 0; i < topo->ncores ; i++) { machine_cores[i].caches = malloc (topo->ncaches*sizeof(size_t)); machine_cores[i].shared_caches = malloc (topo->ncaches*sizeof(int)); for (j = 0; j < topo->ncaches; j++) machine_cores[i].shared_caches[j] = 0; for (j = topo->ncaches ; j > topo->ncaches - topo->nshared_caches; j--) machine_cores[i].shared_caches[j-1] = 1; machine_cores[i].nsiblings = topo->nsiblings; machine_cores[i].siblings_id = malloc (topo->nsiblings*sizeof(unsigned)); if(topo->ncores == topo->npus){ core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, i); machine_cores[i].id = core1->os_index; count = 0; for(obj = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,i); obj; obj = obj->parent) { if (obj->type == HWLOC_OBJ_CACHE){ machine_cores[i].caches[count] = obj->attr->cache.size / 1024; count++; } if (obj->type == HWLOC_OBJ_NODE) machine_cores[i].numaNode = obj->logical_index; } } else{ core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, i); machine_cores[i].id = core1->os_index; count = 0; for(obj = hwloc_get_obj_by_type(topology,HWLOC_OBJ_CORE,i); obj; obj = obj->parent) { if (obj->type == HWLOC_OBJ_CACHE) { machine_cores[i].caches[count] = obj->attr->cache.size / 1024; count++; } if (obj->type == HWLOC_OBJ_NODE) machine_cores[i].numaNode = obj->logical_index; } } } //Get siblings id - so each core knows its siblings for (i = 0; i < topo->ncores ; i++) { if(topo->ncores == topo->npus){ core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, i); set_phys_siblings(i,machine_cores[i].id,core1,topo->ncores,topo->nsiblings,HWLOC_OBJ_PU); } else{ core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, i); set_phys_siblings(i,machine_cores[i].id,core1,topo->ncores,topo->nsiblings,HWLOC_OBJ_CORE); } } int ncore_node = topo->ncores/topo->nnodes; int count_cores; //Get the information for each NUMAnode for (i = 0; i < topo->nnodes ; i++) { obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, i); machine_nodes[i].id = obj->os_index; machine_nodes[i].memory = obj->memory.total_memory; machine_nodes[i].ncores = ncore_node; machine_nodes[i].mycores = malloc (ncore_node*sizeof(unsigned)); //Get the cores id of each NUMAnode count_cores = 0; set_node_cores(topology, obj, i, &count_cores); //GPU support #if defined (__DBCSR_ACC) || defined (__PW_CUDA) int *devIds; devIds = malloc (nDev*sizeof(int)); topo->ngpus = nDev; ma_get_cu(i,devIds); machine_nodes[i].mygpus = devIds; #endif } //counting network cards count = 0; hwloc_topology_t topo_net; error = hwloc_topology_init(&topo_net); hwloc_topology_set_flags(topo_net, HWLOC_TOPOLOGY_FLAG_IO_DEVICES); if (!error){ hwloc_topology_load(topo_net); for (obj = hwloc_get_obj_by_type(topo_net, HWLOC_OBJ_OS_DEVICE, 0); obj; obj = hwloc_get_next_osdev(topo_net,obj)) if (obj->attr->osdev.type == HWLOC_OBJ_OSDEV_NETWORK || obj->attr->osdev.type == HWLOC_OBJ_OSDEV_OPENFABRICS) count++; topo->nnetcards = count; } else //if can not load I/O devices topo->nnetcards = 0; hwloc_topology_destroy(topo_net); /*Local copy of the machine topology components*/ local_topo->nnodes = topo->nnodes; local_topo->nsockets = topo->nsockets; local_topo->ncores = topo->ncores; local_topo->npus = topo->npus; local_topo->ngpus = topo->ngpus; local_topo->ncaches = topo->ncaches; local_topo->nshared_caches = topo->nshared_caches; local_topo->nsiblings = topo->nsiblings; local_topo->nmemcontroller = topo->nmemcontroller; local_topo->nnetcards = topo->nnetcards; } return error; }
int main(void) { hwloc_topology_t topology; hwloc_obj_t obj; hwloc_topology_init(&topology); hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_IO); assert(-1 == hwloc_topology_ignore_type(topology, HWLOC_OBJ_PCI_DEVICE)); assert(-1 == hwloc_topology_ignore_type_keep_structure(topology, HWLOC_OBJ_BRIDGE)); assert(-1 == hwloc_topology_ignore_type(topology, HWLOC_OBJ_OS_DEVICE)); hwloc_topology_load(topology); printf("Found %d bridges\n", hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_BRIDGE)); obj = NULL; while ((obj = hwloc_get_next_bridge(topology, obj)) != NULL) { assert(obj->type == HWLOC_OBJ_BRIDGE); /* only host->pci and pci->pci bridge supported so far */ if (obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST) { assert(obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI); printf(" Found host->PCI bridge for domain %04x bus %02x-%02x\n", obj->attr->bridge.downstream.pci.domain, obj->attr->bridge.downstream.pci.secondary_bus, obj->attr->bridge.downstream.pci.subordinate_bus); } else { assert(obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI); assert(obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI); printf(" Found PCI->PCI bridge [%04x:%04x] for domain %04x bus %02x-%02x\n", obj->attr->bridge.upstream.pci.vendor_id, obj->attr->bridge.upstream.pci.device_id, obj->attr->bridge.downstream.pci.domain, obj->attr->bridge.downstream.pci.secondary_bus, obj->attr->bridge.downstream.pci.subordinate_bus); } } printf("Found %d PCI devices\n", hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PCI_DEVICE)); obj = NULL; while ((obj = hwloc_get_next_pcidev(topology, obj)) != NULL) { assert(obj->type == HWLOC_OBJ_PCI_DEVICE); printf(" Found PCI device class %04x vendor %04x model %04x\n", obj->attr->pcidev.class_id, obj->attr->pcidev.vendor_id, obj->attr->pcidev.device_id); } printf("Found %d OS devices\n", hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_OS_DEVICE)); obj = NULL; while ((obj = hwloc_get_next_osdev(topology, obj)) != NULL) { assert(obj->type == HWLOC_OBJ_OS_DEVICE); printf(" Found OS device %s subtype %d\n", obj->name, obj->attr->osdev.type); } assert(HWLOC_TYPE_DEPTH_BRIDGE == hwloc_get_type_depth(topology, HWLOC_OBJ_BRIDGE)); assert(HWLOC_TYPE_DEPTH_PCI_DEVICE == hwloc_get_type_depth(topology, HWLOC_OBJ_PCI_DEVICE)); assert(HWLOC_TYPE_DEPTH_OS_DEVICE == hwloc_get_type_depth(topology, HWLOC_OBJ_OS_DEVICE)); assert(hwloc_compare_types(HWLOC_OBJ_BRIDGE, HWLOC_OBJ_PCI_DEVICE) < 0); assert(hwloc_compare_types(HWLOC_OBJ_BRIDGE, HWLOC_OBJ_OS_DEVICE) < 0); assert(hwloc_compare_types(HWLOC_OBJ_PCI_DEVICE, HWLOC_OBJ_OS_DEVICE) < 0); /* check that hwloc_get_hostbridge_by_pcibus() and hwloc_get_non_io_ancestor_obj work fine */ obj = NULL; while ((obj = hwloc_get_next_pcidev(topology, obj)) != NULL) { assert(hwloc_get_hostbridge_by_pcibus(topology, obj->attr->pcidev.domain, obj->attr->pcidev.bus)->parent == hwloc_get_non_io_ancestor_obj(topology, obj)); } hwloc_topology_destroy(topology); return 0; }