void output_synthetic(hwloc_topology_t topology, const char *filename, int overwrite, int logical __hwloc_attribute_unused, int legend __hwloc_attribute_unused, int verbose_mode __hwloc_attribute_unused) { FILE *output; hwloc_obj_t obj = hwloc_get_root_obj(topology); int arity; if (!obj->symmetric_subtree) { fprintf(stderr, "Cannot output assymetric topology in synthetic format.\n"); fprintf(stderr, "Adding --no-io may help making the topology symmetric.\n"); return; } output = open_output(filename, overwrite); if (!output) { fprintf(stderr, "Failed to open %s for writing (%s)\n", filename, strerror(errno)); return; } arity = obj->arity; while (arity) { char types[64]; obj = obj->first_child; hwloc_obj_type_snprintf(types, sizeof(types), obj, 1); fprintf(output, "%s:%u ", types, arity); arity = obj->arity; } fprintf(output, "\n"); if (output != stdout) fclose(output); }
static void output_console_obj (hwloc_topology_t topology, hwloc_obj_t l, FILE *output, int logical, int verbose_mode) { char type[32], *attr, phys[32] = ""; unsigned idx = logical ? l->logical_index : l->os_index; const char *indexprefix = logical ? " L#" : " P#"; if (lstopo_show_cpuset < 2) { int len; if (l->type == HWLOC_OBJ_MISC && l->name) fprintf(output, "%s", l->name); else { hwloc_obj_type_snprintf (type, sizeof(type), l, verbose_mode-1); fprintf(output, "%s", type); } if (l->depth != 0 && idx != (unsigned)-1 && l->type != HWLOC_OBJ_PCI_DEVICE && (l->type != HWLOC_OBJ_BRIDGE || l->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST)) fprintf(output, "%s%u", indexprefix, idx); if (logical && l->os_index != (unsigned) -1 && (verbose_mode >= 2 || l->type == HWLOC_OBJ_PU || l->type == HWLOC_OBJ_NODE)) snprintf(phys, sizeof(phys), "P#%u", l->os_index); len = hwloc_obj_attr_snprintf (NULL, 0, l, " ", verbose_mode-1); attr = malloc(len+1); *attr = '\0'; hwloc_obj_attr_snprintf (attr, len+1, l, " ", verbose_mode-1); if (*phys || *attr) { const char *separator = *phys != '\0' && *attr!= '\0' ? " " : ""; fprintf(output, " (%s%s%s)", phys, separator, attr); } free(attr); if ((l->type == HWLOC_OBJ_OS_DEVICE || verbose_mode >= 2) && l->name && l->type != HWLOC_OBJ_MISC) fprintf(output, " \"%s\"", l->name); } if (!l->cpuset) return; if (lstopo_show_cpuset == 1) fprintf(output, " cpuset="); if (lstopo_show_cpuset) { char *cpusetstr; if (lstopo_show_taskset) hwloc_bitmap_taskset_asprintf(&cpusetstr, l->cpuset); else hwloc_bitmap_asprintf(&cpusetstr, l->cpuset); fprintf(output, "%s", cpusetstr); free(cpusetstr); } /* annotate if the PU is forbidden/offline/running */ if (l->type == HWLOC_OBJ_PU && verbose_mode >= 2) { if (lstopo_pu_offline(l)) printf(" (offline)"); else if (lstopo_pu_forbidden(l)) printf(" (forbidden)"); else if (lstopo_pu_running(topology, l)) printf(" (running)"); } }
static int lstopo_obj_snprintf(struct lstopo_output *loutput, char *text, size_t textlen, hwloc_obj_t obj) { int logical = loutput->logical; unsigned idx = logical ? obj->logical_index : obj->os_index; const char *indexprefix = logical ? " L#" : " P#"; char typestr[32]; char indexstr[32]= ""; char attrstr[256]; char totmemstr[64] = ""; int attrlen; /* For OSDev, Misc and Group, name replaces type+index+attrs */ if (obj->name && (obj->type == HWLOC_OBJ_OS_DEVICE || obj->type == HWLOC_OBJ_MISC || obj->type == HWLOC_OBJ_GROUP)) { return snprintf(text, textlen, "%s", obj->name); } /* subtype replaces the basic type name */ if (obj->subtype) { snprintf(typestr, sizeof(typestr), "%s", obj->subtype); } else { hwloc_obj_type_snprintf(typestr, sizeof(typestr), obj, 0); } if (loutput->show_indexes[obj->type] && idx != (unsigned)-1 && obj->depth != 0 && obj->type != HWLOC_OBJ_PCI_DEVICE && (obj->type != HWLOC_OBJ_BRIDGE || obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST)) snprintf(indexstr, sizeof(indexstr), "%s%u", indexprefix, idx); if (loutput->show_attrs[obj->type]) { attrlen = hwloc_obj_attr_snprintf(attrstr, sizeof(attrstr), obj, " ", 0); /* display the root total_memory (cannot be local_memory since root cannot be a NUMA node) */ if (!obj->parent && obj->total_memory) snprintf(totmemstr, sizeof(totmemstr), " (%lu%s total)", (unsigned long) hwloc_memory_size_printf_value(obj->total_memory, 0), hwloc_memory_size_printf_unit(obj->total_memory, 0)); } else attrlen = 0; if (attrlen > 0) return snprintf(text, textlen, "%s%s (%s)%s", typestr, indexstr, attrstr, totmemstr); else return snprintf(text, textlen, "%s%s%s", typestr, indexstr, totmemstr); }
static void print_children(hwloc_topology_t topology, hwloc_obj_t obj, int depth) { char type[32], attr[1024]; unsigned i; hwloc_obj_type_snprintf(type, sizeof(type), obj, 0); printf("%*s%s", 2*depth, "", type); if (obj->os_index != (unsigned) -1) printf("#%u", obj->os_index); hwloc_obj_attr_snprintf(attr, sizeof(attr), obj, " ", 0); if (*attr) printf("(%s)", attr); printf("\n"); for (i = 0; i < obj->arity; i++) { print_children(topology, obj->children[i], depth + 1); } }
/* *Prints the memory hierachy of the machine *Recursive function that goes throught the machine topology object *an group them into hierarchical groups * topology: the HWLOC object * obj: the current object of the machine */ void print_children_mem(hwloc_topology_t topology, hwloc_obj_t obj, int depth) { char string[128], out_string[128]; unsigned i; if(obj->type == HWLOC_OBJ_MACHINE || obj->type == HWLOC_OBJ_NODE || ( obj->type == HWLOC_OBJ_CACHE && obj->arity<=1)){ hwloc_obj_snprintf(string, sizeof(string), topology, obj, "#", 0); sprintf(out_string,"%*s%s\n", depth, "", string); strcat(console_output,out_string); } else if (obj->type == HWLOC_OBJ_CACHE && obj->arity>1){ hwloc_obj_type_snprintf(string, sizeof(string), obj, 0); sprintf(out_string,"%*s%s", depth, "", string); strcat(console_output,out_string); sprintf(out_string," (%dMB)\n", obj->attr->cache.size/(1024*1024)); strcat(console_output,out_string); } for (i = 0; i < obj->arity; i++) { print_children_mem(topology, obj->children[i], depth + 1); } }
static int lstopo_obj_snprintf(char *text, size_t textlen, hwloc_obj_t obj, int logical) { unsigned idx = logical ? obj->logical_index : obj->os_index; const char *indexprefix = logical ? " L#" : " P#"; const char *value; char typestr[32]; char indexstr[32]= ""; char attrstr[256]; char totmemstr[64] = ""; int attrlen; /* For OSDev, Misc and Group, name replaces type+index+attrs */ if (obj->name && (obj->type == HWLOC_OBJ_OS_DEVICE || obj->type == HWLOC_OBJ_MISC || obj->type == HWLOC_OBJ_GROUP)) { return snprintf(text, textlen, "%s", obj->name); } /* Type replaces the basic type name */ if ((value = hwloc_obj_get_info_by_name(obj, "Type")) != NULL) { snprintf(typestr, sizeof(typestr), "%s", value); } else { hwloc_obj_type_snprintf(typestr, sizeof(typestr), obj, 0); } if (idx != (unsigned)-1 && obj->depth != 0 && obj->type != HWLOC_OBJ_PCI_DEVICE && (obj->type != HWLOC_OBJ_BRIDGE || obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST)) snprintf(indexstr, sizeof(indexstr), "%s%u", indexprefix, idx); attrlen = hwloc_obj_attr_snprintf(attrstr, sizeof(attrstr), obj, " ", 0); /* display the root total_memory if different from the local_memory (already shown) */ if (!obj->parent && obj->memory.total_memory > obj->memory.local_memory) snprintf(totmemstr, sizeof(totmemstr), " (%lu%s total)", (unsigned long) hwloc_memory_size_printf_value(obj->memory.total_memory, 0), hwloc_memory_size_printf_unit(obj->memory.total_memory, 0)); if (attrlen > 0) return snprintf(text, textlen, "%s%s (%s)%s", typestr, indexstr, attrstr, totmemstr); else return snprintf(text, textlen, "%s%s%s", typestr, indexstr, totmemstr); }
static void print_task(hwloc_topology_t topology, long pid_number, const char *name, hwloc_bitmap_t cpuset, char *pidoutput, int thread) { printf("%s%ld\t", thread ? " " : "", pid_number); if (show_cpuset) { char *cpuset_str = NULL; hwloc_bitmap_asprintf(&cpuset_str, cpuset); printf("%s", cpuset_str); free(cpuset_str); } else { hwloc_bitmap_t remaining = hwloc_bitmap_dup(cpuset); int first = 1; while (!hwloc_bitmap_iszero(remaining)) { char type[64]; unsigned idx; hwloc_obj_t obj = hwloc_get_first_largest_obj_inside_cpuset(topology, remaining); /* don't show a cache if there's something equivalent and nicer */ while (hwloc_obj_type_is_cache(obj->type) && obj->arity == 1) obj = obj->first_child; hwloc_obj_type_snprintf(type, sizeof(type), obj, 1); idx = logical ? obj->logical_index : obj->os_index; if (idx == (unsigned) -1) printf("%s%s", first ? "" : " ", type); else printf("%s%s:%u", first ? "" : " ", type, idx); hwloc_bitmap_andnot(remaining, remaining, obj->cpuset); first = 0; } hwloc_bitmap_free(remaining); } printf("\t\t%s%s%s\n", name, pidoutput ? "\t" : "", pidoutput ? pidoutput : ""); }
static void print_hwloc_obj(char **output, char *prefix, hwloc_topology_t topo, hwloc_obj_t obj) { hwloc_obj_t obj2; char string[1024], *tmp, *tmp2, *pfx; unsigned i; struct hwloc_topology_support *support; /* print the object type */ hwloc_obj_type_snprintf(string, 1024, obj, 1); asprintf(&pfx, "\n%s\t", (NULL == prefix) ? "" : prefix); asprintf(&tmp, "%sType: %s Number of child objects: %u%sName=%s", (NULL == prefix) ? "" : prefix, string, obj->arity, pfx, (NULL == obj->name) ? "NULL" : obj->name); if (0 < hwloc_obj_attr_snprintf(string, 1024, obj, pfx, 1)) { /* print the attributes */ asprintf(&tmp2, "%s%s%s", tmp, pfx, string); free(tmp); tmp = tmp2; } /* print the cpusets - apparently, some new HWLOC types don't * have cpusets, so protect ourselves here */ if (NULL != obj->cpuset) { hwloc_bitmap_snprintf(string, OPAL_HWLOC_MAX_STRING, obj->cpuset); asprintf(&tmp2, "%s%sCpuset: %s", tmp, pfx, string); free(tmp); tmp = tmp2; } if (NULL != obj->online_cpuset) { hwloc_bitmap_snprintf(string, OPAL_HWLOC_MAX_STRING, obj->online_cpuset); asprintf(&tmp2, "%s%sOnline: %s", tmp, pfx, string); free(tmp); tmp = tmp2; } if (NULL != obj->allowed_cpuset) { hwloc_bitmap_snprintf(string, OPAL_HWLOC_MAX_STRING, obj->allowed_cpuset); asprintf(&tmp2, "%s%sAllowed: %s", tmp, pfx, string); free(tmp); tmp = tmp2; } if (HWLOC_OBJ_MACHINE == obj->type) { /* root level object - add support values */ support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo); asprintf(&tmp2, "%s%sBind CPU proc: %s%sBind CPU thread: %s", tmp, pfx, (support->cpubind->set_thisproc_cpubind) ? "TRUE" : "FALSE", pfx, (support->cpubind->set_thisthread_cpubind) ? "TRUE" : "FALSE"); free(tmp); tmp = tmp2; asprintf(&tmp2, "%s%sBind MEM proc: %s%sBind MEM thread: %s", tmp, pfx, (support->membind->set_thisproc_membind) ? "TRUE" : "FALSE", pfx, (support->membind->set_thisthread_membind) ? "TRUE" : "FALSE"); free(tmp); tmp = tmp2; } asprintf(&tmp2, "%s%s\n", (NULL == *output) ? "" : *output, tmp); free(tmp); free(pfx); asprintf(&pfx, "%s\t", (NULL == prefix) ? "" : prefix); for (i=0; i < obj->arity; i++) { obj2 = obj->children[i]; /* print the object */ print_hwloc_obj(&tmp2, pfx, topo, obj2); } free(pfx); if (NULL != *output) { free(*output); } *output = tmp2; }
static void output_console_obj (hwloc_topology_t topology, hwloc_obj_t l, FILE *output, int logical, int verbose_mode, int collapsed) { unsigned idx = logical ? l->logical_index : l->os_index; const char *value; char pidxstr[16] = "P#[collapsed]"; char lidxstr[16] = "L#[collapsed]"; if (!collapsed || l->type != HWLOC_OBJ_PCI_DEVICE) { snprintf(pidxstr, sizeof(pidxstr), "P#%u", l->os_index); snprintf(lidxstr, sizeof(lidxstr), "L#%u", l->logical_index); } if (lstopo_show_cpuset < 2) { char type[64], *attr, phys[32] = ""; int len; value = hwloc_obj_get_info_by_name(l, "Type"); hwloc_obj_type_snprintf (type, sizeof(type), l, verbose_mode-1); if (value) fprintf(output, "%s(%s)", type, value); else fprintf(output, "%s", type); if (l->depth != 0 && idx != (unsigned)-1 && l->type != HWLOC_OBJ_MISC && l->type != HWLOC_OBJ_PCI_DEVICE && (l->type != HWLOC_OBJ_BRIDGE || l->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_HOST)) fprintf(output, " %s", logical ? lidxstr : pidxstr); if (l->name && (l->type == HWLOC_OBJ_MISC || l->type == HWLOC_OBJ_GROUP)) fprintf(output, " %s", l->name); if (logical && l->os_index != (unsigned) -1 && (verbose_mode >= 2 || l->type == HWLOC_OBJ_PU || l->type == HWLOC_OBJ_NUMANODE)) snprintf(phys, sizeof(phys), "%s", pidxstr); /* display attributes */ len = hwloc_obj_attr_snprintf (NULL, 0, l, " ", verbose_mode-1); attr = malloc(len+1); *attr = '\0'; hwloc_obj_attr_snprintf (attr, len+1, l, " ", verbose_mode-1); if (*phys || *attr) { const char *separator = *phys != '\0' && *attr!= '\0' ? " " : ""; fprintf(output, " (%s%s%s)", phys, separator, attr); } free(attr); /* display the root total_memory if not verbose (already shown) * and different from the local_memory (already shown) */ if (verbose_mode == 1 && !l->parent && l->memory.total_memory > l->memory.local_memory) fprintf(output, " (%lu%s total)", (unsigned long) hwloc_memory_size_printf_value(l->memory.total_memory, 0), hwloc_memory_size_printf_unit(l->memory.total_memory, 0)); /* append the name */ if (l->name && (l->type == HWLOC_OBJ_OS_DEVICE || verbose_mode >= 2) && l->type != HWLOC_OBJ_MISC && l->type != HWLOC_OBJ_GROUP) fprintf(output, " \"%s\"", l->name); } if (!l->cpuset) return; if (lstopo_show_cpuset == 1) fprintf(output, " cpuset="); if (lstopo_show_cpuset) { char *cpusetstr; if (lstopo_show_taskset) hwloc_bitmap_taskset_asprintf(&cpusetstr, l->cpuset); else hwloc_bitmap_asprintf(&cpusetstr, l->cpuset); fprintf(output, "%s", cpusetstr); free(cpusetstr); } /* annotate if the PU is forbidden/running */ if (l->type == HWLOC_OBJ_PU && verbose_mode >= 2) { if (lstopo_pu_forbidden(l)) fprintf(output, " (forbidden)"); else if (lstopo_pu_running(topology, l)) fprintf(output, " (running)"); } }
/* recursively climb the topology, pruning procs beyond that allowed * by the given ppr */ static void prune(orte_jobid_t jobid, orte_app_idx_t app_idx, orte_node_t *node, opal_hwloc_level_t *level, orte_vpid_t *nmapped) { hwloc_obj_t obj, top; unsigned int i, nobjs; hwloc_obj_type_t lvl; unsigned cache_level = 0, k; int nprocs; hwloc_cpuset_t avail, cpus, childcpus; int n, limit, nmax, nunder, idx, idxmax = 0; orte_proc_t *proc, *pptr, *procmax; opal_hwloc_level_t ll; char dang[64]; hwloc_obj_t locale; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: pruning level %d", *level); /* convenience */ ll = *level; /* convenience */ lvl = opal_hwloc_levels[ll]; limit = ppr[ll]; if (0 == limit) { /* no limit at this level, so move up if necessary */ if (0 == ll) { /* done */ return; } --(*level); prune(jobid, app_idx, node, level, nmapped); return; } /* handle the darn cache thing again */ if (OPAL_HWLOC_L3CACHE_LEVEL == ll) { cache_level = 3; } else if (OPAL_HWLOC_L2CACHE_LEVEL == ll) { cache_level = 2; } else if (OPAL_HWLOC_L1CACHE_LEVEL == ll) { cache_level = 1; } /* get the number of resources at this level on this node */ nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology, lvl, cache_level, OPAL_HWLOC_AVAILABLE); /* for each resource, compute the number of procs sitting * underneath it and check against the limit */ for (i=0; i < nobjs; i++) { obj = opal_hwloc_base_get_obj_by_type(node->topology, lvl, cache_level, i, OPAL_HWLOC_AVAILABLE); /* get the available cpuset */ avail = opal_hwloc_base_get_available_cpus(node->topology, obj); /* look at the intersection of this object's cpuset and that * of each proc in the job/app - if they intersect, then count this proc * against the limit */ nprocs = 0; for (n=0; n < node->procs->size; n++) { if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, n))) { continue; } if (proc->name.jobid != jobid || proc->app_idx != app_idx) { continue; } locale = NULL; if (orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return; } cpus = opal_hwloc_base_get_available_cpus(node->topology, locale); if (hwloc_bitmap_intersects(avail, cpus)) { nprocs++; } } opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: found %d procs limit %d", nprocs, limit); /* check against the limit */ while (limit < nprocs) { /* need to remove procs - do this in a semi-intelligent * manner to provide a little load balancing by cycling * across the objects beneath this one, removing procs * in a round-robin fashion until the limit is satisfied * * NOTE: I'm sure someone more knowledgeable with hwloc * will come up with a more efficient way to do this, so * consider this is a starting point */ /* find the first level that has more than * one child beneath it - if all levels * have only one child, then return this * object */ top = find_split(node->topology, obj); hwloc_obj_type_snprintf(dang, 64, top, 1); opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: SPLIT AT LEVEL %s", dang); /* cycle across the children of this object */ nmax = 0; procmax = NULL; idx = 0; /* find the child with the most procs underneath it */ for (k=0; k < top->arity && limit < nprocs; k++) { /* get this object's available cpuset */ childcpus = opal_hwloc_base_get_available_cpus(node->topology, top->children[k]); nunder = 0; pptr = NULL; for (n=0; n < node->procs->size; n++) { if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, n))) { continue; } if (proc->name.jobid != jobid || proc->app_idx != app_idx) { continue; } locale = NULL; if (orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return; } cpus = opal_hwloc_base_get_available_cpus(node->topology, locale); if (hwloc_bitmap_intersects(childcpus, cpus)) { nunder++; if (NULL == pptr) { /* save the location of the first proc under this object */ pptr = proc; idx = n; } } } if (nmax < nunder) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: PROCS UNDER CHILD %d %d MAX %d", k, nunder, nmax); nmax = nunder; procmax = pptr; idxmax = idx; } } if (NULL == procmax) { /* can't find anything to remove - error out */ goto error; } /* remove it */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:ppr: removing proc at posn %d", idxmax); opal_pointer_array_set_item(node->procs, idxmax, NULL); node->num_procs--; node->slots_inuse--; if (node->slots_inuse < 0) { node->slots_inuse = 0; } nprocs--; *nmapped -= 1; OBJ_RELEASE(procmax); } } /* finished with this level - move up if necessary */ if (0 == ll) { return; } --(*level); prune(jobid, app_idx, node, level, nmapped); return; error: opal_output(0, "INFINITE LOOP"); }
int main(void) { hwloc_topology_t topology; hwloc_obj_t obj; unsigned n, i; int devid, platformid; const char *dev; /* Allocate, initialize and load topology object. */ hwloc_topology_init(&topology); hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_IO_DEVICES); hwloc_topology_load(topology); /* Find CUDA devices through the corresponding OS devices */ n = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_OS_DEVICE); for (i = 0; i < n ; i++) { const char *s; obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_OS_DEVICE, i); printf("%s:\n", obj->name); s = hwloc_obj_get_info_by_name(obj, "Backend"); if (s && !strcmp(s, "CUDA")) { /* This is a CUDA device */ assert(!strncmp(obj->name, "cuda", 4)); devid = atoi(obj->name + 4); printf("CUDA device %d\n", devid); s = hwloc_obj_get_info_by_name(obj, "GPUModel"); if (s) printf("Model: %s\n", s); s = hwloc_obj_get_info_by_name(obj, "CUDAGlobalMemorySize"); if (s) printf("Memory: %s\n", s); s = hwloc_obj_get_info_by_name(obj, "CUDAMultiProcessors"); if (s) { int mp = atoi(s); s = hwloc_obj_get_info_by_name(obj, "CUDACoresPerMP"); if (s) { int mp_cores = atoi(s); printf("Cores: %d\n", mp * mp_cores); } } } if (s && !strcmp(s, "OpenCL")) { /* This is an OpenCL device */ assert(!strncmp(obj->name, "opencl", 6)); platformid = atoi(obj->name + 6); printf("OpenCL platform %d\n", platformid); dev = strchr(obj->name + 6, 'd'); devid = atoi(dev + 1); printf("OpenCL device %d\n", devid); s = hwloc_obj_get_info_by_name(obj, "GPUModel"); if (s) printf("Model: %s\n", s); s = hwloc_obj_get_info_by_name(obj, "OpenCLGlobalMemorySize"); if (s) printf("Memory: %s\n", s); } /* One can also use helpers from hwloc/cuda.h, hwloc/cudart.h, * hwloc/opencl.h */ /* Find out cpuset this is connected to */ while (obj && (!obj->cpuset || hwloc_bitmap_iszero(obj->cpuset))) obj = obj->parent; if (obj) { char *cpuset_string; char name[16]; hwloc_obj_type_snprintf(name, sizeof(name), obj, 0); hwloc_bitmap_asprintf(&cpuset_string, obj->cpuset); printf("Location: %s P#%d\n", name, obj->os_index); printf("Cpuset: %s\n", cpuset_string); } printf("\n"); } /* Destroy topology object. */ hwloc_topology_destroy(topology); return 0; }
/* *Prints one branch of the machine topology *Recursive function that goes throught the machine topology object *an group them into hierarchical groups * topology: the HWLOC object that represents the machine * obj: the current object of a level */ void print_machine_branch(hwloc_topology_t topology, hwloc_obj_t obj, int depth, int obj_type) { char string[128], out_string[128]; unsigned i,arity; int *devIds,devId,countDev; if (obj->type != HWLOC_OBJ_MACHINE ){ if(obj->type == HWLOC_OBJ_NODE){ hwloc_obj_snprintf(string, sizeof(string), topology, obj, "#", 0); sprintf(out_string,"%*s%s\n", depth, "", string); strcat(console_output,out_string); #if defined (__DBCSR_ACC) || defined (__PW_CUDA) if ((local_topo->ngpus > 0) && (local_topo->ngpus < local_topo->ncores)){ ma_get_nDevcu(obj->logical_index, &countDev); devIds = malloc (countDev*sizeof(int)); ma_get_cu(obj->logical_index, devIds); strcat(console_output," Shared GPUS: "); for (i = 0; i<countDev; i++){ devId = devIds[i]; sprintf(out_string,"#%d ", devId); strcat(console_output,out_string);} strcat(console_output,"\n"); } #endif } else if (obj->type == HWLOC_OBJ_SOCKET){ hwloc_obj_snprintf(string, sizeof(string), topology, obj, "#", 0); sprintf(out_string,"%*s%s\n", depth, "", string); strcat(console_output,out_string);} else { hwloc_obj_snprintf(string, sizeof(string), topology, obj, "#", 0); if(obj->type == HWLOC_OBJ_PU) { #if defined (__DBCSR_ACC) || defined (__PW_CUDA) sprintf(out_string,"%*s%s\t", depth, "", string); strcat(console_output,out_string); if (local_topo->ngpus > 0 && local_topo->ngpus == local_topo->ncores){ ma_get_core_cu(obj->logical_index, &devId); strcat(console_output," GPU: "); sprintf(out_string,"%d ", devId); strcat(console_output,out_string);} strcat(console_output,"\n"); #else sprintf(out_string,"%*s%s\n", depth, "", string); strcat(console_output,out_string); #endif } else if (obj->type == HWLOC_OBJ_CACHE && obj->arity>1){ hwloc_obj_type_snprintf(string, sizeof(string), obj, 0); sprintf(out_string,"%*s%s", depth, "", string); strcat(console_output,out_string); sprintf(out_string," (%dMB)\n", obj->attr->cache.size/(1024*1024)); strcat(console_output,out_string); } else { sprintf(out_string,"%*s%s\t", depth, "", string); strcat(console_output,out_string); } } } if (obj->type != HWLOC_OBJ_PU) {//it is not a PU if((obj->first_child && obj->first_child->type == HWLOC_OBJ_PU) || obj->first_child->type == obj_type) arity = 1; //number of children else arity = obj->arity; for (i = 0; i < arity; i++) print_machine_branch(topology, obj->children[i],depth+1,obj_type); } }
/* *Prints the machine hierarchy *Recursive function that goes throught the machine topology object *an group them into hierarchical groups * topology: the HWLOC object * obj: the current object in the topology * depth: the horizontal level in the machine topology */ void print_machine(hwloc_topology_t topo, hwloc_obj_t obj, int depth) { char string[256], out_string[256]; unsigned i,arity; int *devIds,devId,countDev; if(obj->type == HWLOC_OBJ_SOCKET || obj->type == HWLOC_OBJ_MACHINE ){ hwloc_obj_snprintf(string, sizeof(string), topology, obj, "#", 0); sprintf(out_string,"%*s%s\n", depth, "", string); strcat(console_output,out_string); } else if (obj->type == HWLOC_OBJ_NODE){ hwloc_obj_snprintf(string, sizeof(string), topology, obj, "#", 0); sprintf(out_string,"%*s%s\n", depth, "", string); strcat(console_output,out_string); //if the machine has shared GPUs #if defined (__DBCSR_ACC) || defined (__PW_CUDA) if ((local_topo->ngpus > 0) && (local_topo->ngpus < local_topo->ncores)){ ma_get_nDevcu(obj->logical_index, &countDev); devIds = malloc (countDev*sizeof(int)); ma_get_cu(obj->logical_index, devIds); strcat(console_output," Shared GPUS: "); for (i = 0; i<countDev; i++){ devId = devIds[i]; sprintf(out_string,"#%d ", devId); strcat(console_output,out_string);} strcat(console_output,"\n"); } #endif } else { hwloc_obj_snprintf(string, sizeof(string), topology, obj, "#", 0); if(obj->type == HWLOC_OBJ_PU ) { #if defined (__DBCSR_ACC) || defined (__PW_CUDA) sprintf(out_string,"%*s%s\t", depth, "", string); strcat(console_output,out_string); if (local_topo->ngpus > 0 && local_topo->ngpus == local_topo->ncores){ ma_get_core_cu(obj->logical_index, &devId); strcat(console_output," GPU: "); sprintf(out_string,"%d ", devId); strcat(console_output,out_string);} strcat(console_output,"\n"); #else sprintf(out_string,"%*s%s\n", depth, "", string); strcat(console_output,out_string); #endif } else if (obj->type == HWLOC_OBJ_CACHE && obj->arity>1 ){ hwloc_obj_type_snprintf(string, sizeof(string), obj, 0); sprintf(out_string,"%*s%s", depth, "", string); strcat(console_output,out_string); sprintf(out_string," (%dMB)\n", obj->attr->cache.size/(1024*1024)); strcat(console_output,out_string); } else if (obj->type == HWLOC_OBJ_OS_DEVICE || obj->type == HWLOC_OBJ_PCI_DEVICE || obj->type == HWLOC_OBJ_BRIDGE){ if(obj->attr->osdev.type == HWLOC_OBJ_OSDEV_NETWORK ){ sprintf(out_string,"%*s%s\n", depth, "--", "Network Card"); strcat(console_output,out_string);} } else if (obj->type == HWLOC_OBJ_CORE) { char number[33]; strcpy(string,"Core#"); sprintf(number,"%d",obj->logical_index); strcat(string,number); sprintf(out_string,"%*s%s\t", depth, "", string); strcat(console_output,out_string); } else { sprintf(out_string,"%*s%s\t", depth, "", string); strcat(console_output,out_string); } } if (obj->type != HWLOC_OBJ_PU) {//it is not a PU if((obj->first_child && obj->first_child->type == HWLOC_OBJ_PU)) arity = 1; //number of children else arity = obj->arity; for (i = 0; i < arity; i++) print_machine(topo, obj->children[i],depth+1); } }
int main(void) { int depth; unsigned i, n; unsigned long size; int levels; char string[128]; int topodepth; void *m; hwloc_topology_t topology; hwloc_cpuset_t cpuset; hwloc_obj_t obj; /* Allocate and initialize topology object. */ hwloc_topology_init(&topology); /* ... Optionally, put detection configuration here to ignore some objects types, define a synthetic topology, etc.... The default is to detect all the objects of the machine that the caller is allowed to access. See Configure Topology Detection. */ /* Perform the topology detection. */ hwloc_topology_load(topology); /* Optionally, get some additional topology information in case we need the topology depth later. */ topodepth = hwloc_topology_get_depth(topology); /***************************************************************** * First example: * Walk the topology with an array style, from level 0 (always * the system level) to the lowest level (always the proc level). *****************************************************************/ for (depth = 0; depth < topodepth; depth++) { printf("*** Objects at level %d\n", depth); for (i = 0; i < hwloc_get_nbobjs_by_depth(topology, depth); i++) { hwloc_obj_type_snprintf(string, sizeof(string), hwloc_get_obj_by_depth(topology, depth, i), 0); printf("Index %u: %s\n", i, string); } } /***************************************************************** * Second example: * Walk the topology with a tree style. *****************************************************************/ printf("*** Printing overall tree\n"); print_children(topology, hwloc_get_root_obj(topology), 0); /***************************************************************** * Third example: * Print the number of packages. *****************************************************************/ depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PACKAGE); if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) { printf("*** The number of packages is unknown\n"); } else { printf("*** %u package(s)\n", hwloc_get_nbobjs_by_depth(topology, depth)); } /***************************************************************** * Fourth example: * Compute the amount of cache that the first logical processor * has above it. *****************************************************************/ levels = 0; size = 0; for (obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0); obj; obj = obj->parent) if (obj->type == HWLOC_OBJ_CACHE) { levels++; size += obj->attr->cache.size; } printf("*** Logical processor 0 has %d caches totaling %luKB\n", levels, size / 1024); /***************************************************************** * Fifth example: * Bind to only one thread of the last core of the machine. * * First find out where cores are, or else smaller sets of CPUs if * the OS doesn't have the notion of a "core". *****************************************************************/ depth = hwloc_get_type_or_below_depth(topology, HWLOC_OBJ_CORE); /* Get last core. */ obj = hwloc_get_obj_by_depth(topology, depth, hwloc_get_nbobjs_by_depth(topology, depth) - 1); if (obj) { /* Get a copy of its cpuset that we may modify. */ cpuset = hwloc_bitmap_dup(obj->cpuset); /* Get only one logical processor (in case the core is SMT/hyper-threaded). */ hwloc_bitmap_singlify(cpuset); /* And try to bind ourself there. */ if (hwloc_set_cpubind(topology, cpuset, 0)) { char *str; int error = errno; hwloc_bitmap_asprintf(&str, obj->cpuset); printf("Couldn't bind to cpuset %s: %s\n", str, strerror(error)); free(str); } /* Free our cpuset copy */ hwloc_bitmap_free(cpuset); } /***************************************************************** * Sixth example: * Allocate some memory on the last NUMA node, bind some existing * memory to the last NUMA node. *****************************************************************/ /* Get last node. There's always at least one. */ n = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NUMANODE); obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, n - 1); size = 1024*1024; m = hwloc_alloc_membind_nodeset(topology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0); hwloc_free(topology, m, size); m = malloc(size); hwloc_set_area_membind_nodeset(topology, m, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0); free(m); /* Destroy topology object. */ hwloc_topology_destroy(topology); return 0; }
int main(void) { hwloc_topology_t topology; hwloc_bitmap_t set, set2; hwloc_const_bitmap_t cset_available, cset_all; hwloc_obj_t obj; char *buffer; char type[64]; unsigned i; int err; /* create a topology */ err = hwloc_topology_init(&topology); if (err < 0) { fprintf(stderr, "failed to initialize the topology\n"); return EXIT_FAILURE; } err = hwloc_topology_load(topology); if (err < 0) { fprintf(stderr, "failed to load the topology\n"); hwloc_topology_destroy(topology); return EXIT_FAILURE; } /* retrieve the entire set of available PUs */ cset_available = hwloc_topology_get_topology_cpuset(topology); /* retrieve the CPU binding of the current entire process */ set = hwloc_bitmap_alloc(); if (!set) { fprintf(stderr, "failed to allocate a bitmap\n"); hwloc_topology_destroy(topology); return EXIT_FAILURE; } err = hwloc_get_cpubind(topology, set, HWLOC_CPUBIND_PROCESS); if (err < 0) { fprintf(stderr, "failed to get cpu binding\n"); hwloc_bitmap_free(set); hwloc_topology_destroy(topology); } /* display the processing units that cannot be used by this process */ if (hwloc_bitmap_isequal(set, cset_available)) { printf("this process can use all available processing units in the system\n"); } else { /* compute the set where we currently cannot run. * we can't modify cset_available because it's a system read-only one, * so we do set = available &~ set */ hwloc_bitmap_andnot(set, cset_available, set); hwloc_bitmap_asprintf(&buffer, set); printf("process cannot use %d process units (%s) among %u in the system\n", hwloc_bitmap_weight(set), buffer, hwloc_bitmap_weight(cset_available)); free(buffer); /* restore set where it was before the &~ operation above */ hwloc_bitmap_andnot(set, cset_available, set); } /* print the smallest object covering the current process binding */ obj = hwloc_get_obj_covering_cpuset(topology, set); hwloc_obj_type_snprintf(type, sizeof(type), obj, 0); printf("process is bound within object %s logical index %u\n", type, obj->logical_index); /* retrieve the single PU where the current thread actually runs within this process binding */ set2 = hwloc_bitmap_alloc(); if (!set2) { fprintf(stderr, "failed to allocate a bitmap\n"); hwloc_bitmap_free(set); hwloc_topology_destroy(topology); return EXIT_FAILURE; } err = hwloc_get_last_cpu_location(topology, set2, HWLOC_CPUBIND_THREAD); if (err < 0) { fprintf(stderr, "failed to get last cpu location\n"); hwloc_bitmap_free(set); hwloc_bitmap_free(set2); hwloc_topology_destroy(topology); } /* sanity checks that are not actually needed but help the reader */ /* this thread runs within the process binding */ assert(hwloc_bitmap_isincluded(set2, set)); /* this thread runs on a single PU at a time */ assert(hwloc_bitmap_weight(set2) == 1); /* print the logical number of the PU where that thread runs */ /* extract the PU OS index from the bitmap */ i = hwloc_bitmap_first(set2); obj = hwloc_get_pu_obj_by_os_index(topology, i); printf("thread is now running on PU logical index %u (OS/physical index %u)\n", obj->logical_index, i); /* migrate this single thread to where other PUs within the current binding */ hwloc_bitmap_andnot(set2, set, set2); err = hwloc_set_cpubind(topology, set2, HWLOC_CPUBIND_THREAD); if (err < 0) { fprintf(stderr, "failed to set thread binding\n"); hwloc_bitmap_free(set); hwloc_bitmap_free(set2); hwloc_topology_destroy(topology); } /* reprint the PU where that thread runs */ err = hwloc_get_last_cpu_location(topology, set2, HWLOC_CPUBIND_THREAD); if (err < 0) { fprintf(stderr, "failed to get last cpu location\n"); hwloc_bitmap_free(set); hwloc_bitmap_free(set2); hwloc_topology_destroy(topology); } /* print the logical number of the PU where that thread runs */ /* extract the PU OS index from the bitmap */ i = hwloc_bitmap_first(set2); obj = hwloc_get_pu_obj_by_os_index(topology, i); printf("thread is running on PU logical index %u (OS/physical index %u)\n", obj->logical_index, i); hwloc_bitmap_free(set); hwloc_bitmap_free(set2); /* retrieve the entire set of all PUs */ cset_all = hwloc_topology_get_complete_cpuset(topology); if (hwloc_bitmap_isequal(cset_all, cset_available)) { printf("all hardware PUs are available\n"); } else { printf("only %d hardware PUs are available in the machine among %d\n", hwloc_bitmap_weight(cset_available), hwloc_bitmap_weight(cset_all)); } hwloc_topology_destroy(topology); return EXIT_SUCCESS; }