int PlatformTopology::num_domain(int domain_type) const { int result = 0; try { result = hwloc_get_nbobjs_by_type(m_topo, hwloc_domain(domain_type)); } catch (const Exception &ex) { if (ex.err_value() != GEOPM_ERROR_INVALID) { throw ex; } if (domain_type == GEOPM_DOMAIN_TILE) { /// @todo This assumes that tiles are just below /// package in hwloc hierarchy. If tiles are /// at L2 cache, but processor has an L3 cache, /// this may not be correct. int depth = hwloc_get_type_depth(m_topo, hwloc_domain(GEOPM_DOMAIN_PACKAGE)) + 1; result = hwloc_get_nbobjs_by_depth(m_topo, depth); } else { throw ex; } if (result == 0) { throw ex; } } return result; }
static int hwloc_solaris_get_sth_cpubind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused) { processorid_t binding; int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); int n; int i; if (depth < 0) { errno = ENOSYS; return -1; } /* first check if processor_bind() was used to bind to a single processor rather than to an lgroup */ if ( processor_bind(idtype, id, PBIND_QUERY, &binding) == 0 && binding != PBIND_NONE ) { hwloc_bitmap_only(hwloc_set, binding); return 0; } /* if not, check lgroups */ hwloc_bitmap_zero(hwloc_set); n = hwloc_get_nbobjs_by_depth(topology, depth); for (i = 0; i < n; i++) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i); lgrp_affinity_t aff = lgrp_affinity_get(idtype, id, obj->os_index); if (aff == LGRP_AFF_STRONG) hwloc_bitmap_or(hwloc_set, hwloc_set, obj->cpuset); } if (hwloc_bitmap_iszero(hwloc_set)) hwloc_bitmap_copy(hwloc_set, hwloc_topology_get_complete_cpuset(topology)); return 0; }
thread_pool_t *create_threads(){ hwloc_topology_t topology; int i; local_thread_t *local; int nb_threads; unsigned int nb_cores; int depth; verbose_level = tm_get_verbose_level(); /*Get number of cores: set 1 thread per core*/ /* Allocate and initialize topology object. */ hwloc_topology_init(&topology); /* Only keep relevant levels hwloc_topology_ignore_all_keep_structure(topology);*/ /* Perform the topology detection. */ hwloc_topology_load(topology); depth = hwloc_topology_get_depth(topology); if (depth == -1 ) { if(verbose_level>=CRITICAL) fprintf(stderr,"Error: HWLOC unable to find the depth of the topology of this node!\n"); exit(-1); } /* at depth 'depth' it is necessary a PU/core where we can execute things*/ nb_cores = hwloc_get_nbobjs_by_depth(topology, depth-1); nb_threads = MIN(nb_cores, max_nb_threads); if(verbose_level>=INFO) printf("nb_threads = %d\n",nb_threads); pool = (thread_pool_t*) MALLOC(sizeof(thread_pool_t)); pool -> topology = topology; pool -> nb_threads = nb_threads; pool -> thread_list = (pthread_t*)MALLOC(sizeof(pthread_t)*nb_threads); pool -> working_list = (work_t*)CALLOC(nb_threads,sizeof(work_t)); pool -> cond_var = (pthread_cond_t*)MALLOC(sizeof(pthread_cond_t)*nb_threads); pool -> list_lock = (pthread_mutex_t*)MALLOC(sizeof(pthread_mutex_t)*nb_threads); local=(local_thread_t*)MALLOC(sizeof(local_thread_t)*nb_threads); pool->local = local; for (i=0;i<nb_threads;i++){ local[i].topology = topology; local[i].id = i; local[i].working_list = &pool->working_list[i]; pthread_cond_init(pool->cond_var +i, NULL); local[i].cond_var = pool->cond_var +i; pthread_mutex_init(pool->list_lock +i, NULL); local[i].list_lock = pool->list_lock+i; if (pthread_create (pool->thread_list+i, NULL, thread_loop, local+i) < 0) { if(verbose_level>=CRITICAL) fprintf(stderr, "pthread_create error for exec thread %d\n",i); return NULL; } } return pool; }
static void output_compute_pu_min_textwidth(struct lstopo_output *output) { unsigned fontsize = output->fontsize; char text[64]; int n; hwloc_topology_t topology = output->topology; hwloc_obj_t lastpu; if (!output->methods->textsize) { output->min_pu_textwidth = 0; return; } if (output->logical) { int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU); lastpu = hwloc_get_obj_by_depth(topology, depth, hwloc_get_nbobjs_by_depth(topology, depth)-1); } else { unsigned lastidx = hwloc_bitmap_last(hwloc_topology_get_topology_cpuset(topology)); lastpu = hwloc_get_pu_obj_by_os_index(topology, lastidx); } n = lstopo_obj_snprintf(output, text, sizeof(text), lastpu); output->min_pu_textwidth = get_textwidth(output, text, n, fontsize); }
static int hwloc_solaris_get_sth_membind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_nodeset_t nodeset, hwloc_membind_policy_t *policy, int flags __hwloc_attribute_unused) { int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); int n; int i; if (depth < 0) { errno = ENOSYS; return -1; } hwloc_bitmap_zero(nodeset); n = hwloc_get_nbobjs_by_depth(topology, depth); for (i = 0; i < n; i++) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i); lgrp_affinity_t aff = lgrp_affinity_get(idtype, id, obj->os_index); if (aff == LGRP_AFF_STRONG) hwloc_bitmap_set(nodeset, obj->os_index); } if (hwloc_bitmap_iszero(nodeset)) hwloc_bitmap_copy(nodeset, hwloc_topology_get_complete_nodeset(topology)); *policy = HWLOC_MEMBIND_BIND; return 0; }
int main (void) { hwloc_topology_t topology; unsigned depth; hwloc_obj_t last; hwloc_obj_t *closest; unsigned found; int err; unsigned numprocs; hwloc_obj_t ancestor; err = hwloc_topology_init (&topology); if (err) return EXIT_FAILURE; hwloc_topology_set_synthetic (topology, "2 3 4 5"); err = hwloc_topology_load (topology); if (err) return EXIT_FAILURE; depth = hwloc_topology_get_depth(topology); /* get the last object of last level */ numprocs = hwloc_get_nbobjs_by_depth(topology, depth-1); last = hwloc_get_obj_by_depth(topology, depth-1, numprocs-1); /* allocate the array of closest objects */ closest = malloc(numprocs * sizeof(*closest)); assert(closest); /* get closest levels */ found = hwloc_get_closest_objs (topology, last, closest, numprocs); printf("looked for %u closest entries, found %u\n", numprocs, found); assert(found == numprocs-1); /* check first found is closest */ assert(closest[0] == hwloc_get_obj_by_depth(topology, depth-1, numprocs-5 /* arity is 5 on last level */)); /* check some other expected positions */ assert(closest[found-1] == hwloc_get_obj_by_depth(topology, depth-1, 1*3*4*5-1 /* last of first half */)); assert(closest[found/2-1] == hwloc_get_obj_by_depth(topology, depth-1, 1*3*4*5+2*4*5-1 /* last of second third of second half */)); assert(closest[found/2/3-1] == hwloc_get_obj_by_depth(topology, depth-1, 1*3*4*5+2*4*5+3*5-1 /* last of third quarter of third third of second half */)); /* get ancestor of last and less close object */ ancestor = hwloc_get_common_ancestor_obj(topology, last, closest[found-1]); assert(hwloc_obj_is_in_subtree(topology, last, ancestor)); assert(hwloc_obj_is_in_subtree(topology, closest[found-1], ancestor)); assert(ancestor == hwloc_get_root_obj(topology)->first_child); printf("ancestor type %u depth %u number %u is system level\n", ancestor->type, ancestor->depth, ancestor->logical_index); free(closest); hwloc_topology_destroy (topology); return EXIT_SUCCESS; }
tm_topology_t* get_local_topo_with_hwloc(void) { hwloc_topology_t topology; tm_topology_t *res = NULL; hwloc_obj_t *objs = NULL; unsigned topodepth,depth; int nb_nodes,i; /* Build the topology */ hwloc_topology_init(&topology); hwloc_topology_ignore_all_keep_structure(topology); hwloc_topology_load(topology); /* Test if symetric */ if(!symetric(topology)){ if(get_verbose_level() >= CRITICAL) fprintf(stderr,"Local toplogy not symetric!\n"); exit(-1); } /* work on depth */ topodepth = hwloc_topology_get_depth(topology); res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t)); res->nb_levels = topodepth; res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels); res->nb_nodes = (int*)MALLOC(sizeof(int)*res->nb_levels); res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels); /* Build TreeMatch topology */ for( depth = 0 ; depth < topodepth ; depth++ ){ nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth); res->nb_nodes[depth] = nb_nodes; res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes); objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes); objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL); hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1); res->arity[depth] = objs[0]->arity; /* printf("%d:",res->arity[depth]); */ /* Build process id tab */ for (i = 0; i < nb_nodes; i++){ res->node_id[depth][i] = objs[i]->os_index; /* if(depth==topodepth-1) */ } FREE(objs); } /* Destroy HWLOC topology object. */ hwloc_topology_destroy(topology); /* printf("\n"); */ return res; }
int compute_context_nbr(int * ctxnbr, int threadnbr, int verbose) { #if (defined WITH_STARPU && defined STARPU_CONTEXT) if (*ctxnbr == -1) { int depth; unsigned i, n; int ncpu_per_socket, nsocket, ncpu; hwloc_topology_t topology; hwloc_obj_t obj; /* Allocate and initialize topology object. */ hwloc_topology_init(&topology); /* ... Optionally, put detection configuration here to ignore some objects types, define a synthetic topology, etc.... The default is to detect all the objects of the machine that the caller is allowed to access. See Configure Topology Detection. */ /* Perform the topology detection. */ hwloc_topology_load(topology); depth = hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET); if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) { /* number of socket is unknow, let say we have quadcore... */ ncpu_per_socket = 4; } else { ncpu = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE); nsocket = hwloc_get_nbobjs_by_depth(topology, depth); ncpu_per_socket = ncpu/nsocket; } if (verbose > API_VERBOSE_NO) fprintf(stdout, "ncpu_per_socket %d\n", ncpu_per_socket); nsocket = threadnbr/ncpu_per_socket; if (threadnbr%ncpu_per_socket) nsocket ++; *ctxnbr = nsocket + 1; hwloc_topology_destroy(topology); } if (threadnbr + 1 < *ctxnbr) *ctxnbr = threadnbr+1; if (*ctxnbr == 1) *ctxnbr = 2; /* can't have more than STARPU_NMAX_SCHED_CTXS CTX */ { PASTIX_INT nctx_bot = *ctxnbr - 1; while (*ctxnbr > STARPU_NMAX_SCHED_CTXS) { nctx_bot /= 2; *ctxnbr = nctx_bot + 1; } } #endif /* WITH_STARPU */ return 0; }
static void check_level(hwloc_topology_t topology, unsigned depth, unsigned width, unsigned arity) { unsigned j; assert(hwloc_get_nbobjs_by_depth(topology, depth) == width); for(j=0; j<width; j++) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, j); assert(obj); assert(obj->arity == arity); } }
magma_int_t magma_get_parallel_numthreads() { // query number of cores magma_int_t ncores = 0; #ifdef HAVE_HWLOC // hwloc gives physical cores, not hyperthreads // from http://stackoverflow.com/questions/12483399/getting-number-of-cores-not-ht-threads hwloc_topology_t topology; hwloc_topology_init( &topology ); hwloc_topology_load( topology ); magma_int_t depth = hwloc_get_type_depth( topology, HWLOC_OBJ_CORE ); if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { ncores = hwloc_get_nbobjs_by_depth( topology, depth ); } hwloc_topology_destroy( topology ); #endif if ( ncores == 0 ) { #ifdef _MSC_VER // Windows SYSTEM_INFO sysinfo; GetSystemInfo( &sysinfo ); ncores = sysinfo.dwNumberOfProcessors; #else ncores = sysconf( _SC_NPROCESSORS_ONLN ); #endif } // query MAGMA_NUM_THREADS or OpenMP const char *threads_str = getenv("MAGMA_NUM_THREADS"); magma_int_t threads = 0; if ( threads_str != NULL ) { char* endptr; threads = strtol( threads_str, &endptr, 10 ); if ( threads < 1 || *endptr != '\0' ) { threads = 1; fprintf( stderr, "$MAGMA_NUM_THREADS='%s' is an invalid number; using %d thread.\n", threads_str, (int) threads ); } } else { #if defined(_OPENMP) #pragma omp parallel { threads = omp_get_num_threads(); } #else threads = ncores; #endif } // limit to range [1, number of cores] threads = max( 1, min( ncores, threads )); return threads; }
/******************* FUNCTION *********************/ int TopoHwloc::getNbEntities ( int level, int depth ) const { //get real depth int absDepth = getAbsDepth(level,depth); allocAssume(depth != -1,"Cannot find the depth for the requested level, depth couple in hwloc.");//level,depth //get number of objs int res = hwloc_get_nbobjs_by_depth(topology, absDepth); allocAssume(res > 0, "Invalid nbobjs_by_depth in hwloc for absolute depth ....");//absDepth return res; }
static void check(hwloc_topology_t topology) { unsigned depth; unsigned i,j; depth = hwloc_topology_get_depth(topology); for(i=0; i<depth; i++) { for(j=0; j<hwloc_get_nbobjs_by_depth(topology, i); j++) { assert(hwloc_get_obj_by_depth(topology, i, j)->userdata == NULL); } } }
static int hwloc_aix_get_sth_membind(hwloc_topology_t topology, rstype_t what, rsid_t who, hwloc_bitmap_t nodeset, hwloc_membind_policy_t *policy, int flags __hwloc_attribute_unused) { hwloc_bitmap_t hwloc_set; rsethandle_t rset; unsigned cpu, maxcpus; int res = -1; int depth, n, i; depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); if (depth < 0) { errno = EXDEV; return -1; } n = hwloc_get_nbobjs_by_depth(topology, depth); rset = rs_alloc(RS_EMPTY); if (ra_getrset(what, who, 0, rset) == -1) goto out; hwloc_set = hwloc_bitmap_alloc(); maxcpus = rs_getinfo(rset, R_MAXPROCS, 0); for (cpu = 0; cpu < maxcpus; cpu++) if (rs_op(RS_TESTRESOURCE, rset, NULL, R_PROCS, cpu) == 1) hwloc_bitmap_set(hwloc_set, cpu); hwloc_bitmap_and(hwloc_set, hwloc_set, hwloc_topology_get_complete_cpuset(topology)); hwloc_bitmap_zero(nodeset); for (i = 0; i < n; i++) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i); if (hwloc_bitmap_isincluded(obj->cpuset, hwloc_set)) hwloc_bitmap_set(nodeset, obj->os_index); } hwloc_bitmap_free(hwloc_set); *policy = HWLOC_MEMBIND_BIND; res = 0; out: rs_free(rset); return res; }
static int hwloc_solaris_set_sth_membind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) { int depth; int n, i; switch (policy) { case HWLOC_MEMBIND_DEFAULT: case HWLOC_MEMBIND_BIND: break; default: errno = ENOSYS; return -1; } if (flags & HWLOC_MEMBIND_NOCPUBIND) { errno = ENOSYS; return -1; } depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); if (depth < 0) { errno = EXDEV; return -1; } n = hwloc_get_nbobjs_by_depth(topology, depth); for (i = 0; i < n; i++) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i); if (hwloc_bitmap_isset(nodeset, obj->os_index)) { lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_STRONG); } else { if (flags & HWLOC_CPUBIND_STRICT) lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_NONE); else lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_WEAK); } } return 0; }
int symetric(hwloc_topology_t topology) { int depth,i,topodepth = hwloc_topology_get_depth(topology); unsigned int arity; hwloc_obj_t obj; for ( depth = 0; depth < topodepth-1 ; depth++ ) { int N = hwloc_get_nbobjs_by_depth(topology, depth); obj = hwloc_get_next_obj_by_depth (topology,depth,NULL); arity = obj->arity; /* printf("Depth=%d, N=%d, Arity:%d\n",depth,N,arity); */ for (i = 1; i < N; i++ ){ obj = hwloc_get_next_obj_by_depth (topology,depth,obj); if( obj->arity != arity){ /* printf("[%d]: obj->arity=%d, arity=%d\n",i,obj->arity,arity); */ return 0; } } } return 1; }
void Hwloc::getNumSockets(unsigned int &allowedNodes, int &numSockets, unsigned int &hwThreads) { #ifdef HWLOC numSockets = 0; // Nodes that can be seen by hwloc allowedNodes = 0; // Hardware threads hwThreads = 0; int depth = hwloc_get_type_depth( _hwlocTopology, HWLOC_OBJ_NODE ); // If there are NUMA nodes in this machine if ( depth != HWLOC_TYPE_DEPTH_UNKNOWN ) { //hwloc_const_cpuset_t cpuset = hwloc_topology_get_online_cpuset( _hwlocTopology ); //allowedNodes = hwloc_get_nbobjs_inside_cpuset_by_type( _hwlocTopology, cpuset, HWLOC_OBJ_NODE ); //hwThreads = hwloc_get_nbobjs_inside_cpuset_by_type( _hwlocTopology, cpuset, HWLOC_OBJ_PU ); unsigned nodes = hwloc_get_nbobjs_by_depth( _hwlocTopology, depth ); //hwloc_cpuset_t set = i // For each node, count how many hardware threads there are below. for ( unsigned nodeIdx = 0; nodeIdx < nodes; ++nodeIdx ) { hwloc_obj_t node = hwloc_get_obj_by_depth( _hwlocTopology, depth, nodeIdx ); int localThreads = hwloc_get_nbobjs_inside_cpuset_by_type( _hwlocTopology, node->cpuset, HWLOC_OBJ_PU ); // Increase hw thread count hwThreads += localThreads; // If this node has hw threads beneath, increase the number of viewable nodes if ( localThreads > 0 ) ++allowedNodes; } numSockets = nodes; } // Otherwise, set it to 1 else { allowedNodes = 1; numSockets = 1; } #else numSockets = 0; allowedNodes = 0; #endif }
int bind_myself_to_core(hwloc_topology_t topology, int id){ hwloc_cpuset_t cpuset; hwloc_obj_t obj; char *str; int binding_res; int depth = hwloc_topology_get_depth(topology); int nb_cores = hwloc_get_nbobjs_by_depth(topology, depth-1); int my_core; int nb_threads = get_nb_threads(); /* printf("depth=%d\n",depth); */ switch (mapping_policy){ case SCATTER: my_core = id*(nb_cores/nb_threads); break; default: if(verbose_level>=WARNING){ printf("Wrong scheduling policy. Using COMPACT\n"); } case COMPACT: my_core = id%nb_cores; } if(verbose_level>=INFO){ printf("Mapping thread %d on core %d\n",id,my_core); } /* Get my core. */ obj = hwloc_get_obj_by_depth(topology, depth-1, my_core); if (obj) { /* Get a copy of its cpuset that we may modify. */ cpuset = hwloc_bitmap_dup(obj->cpuset); /* Get only one logical processor (in case the core is SMT/hyperthreaded). */ hwloc_bitmap_singlify(cpuset); /*hwloc_bitmap_asprintf(&str, cpuset); printf("Binding thread %d to cpuset %s\n", my_core,str); FREE(str); */ /* And try to bind ourself there. */ binding_res = hwloc_set_cpubind(topology, cpuset, HWLOC_CPUBIND_THREAD); if (binding_res == -1){ int error = errno; hwloc_bitmap_asprintf(&str, obj->cpuset); if(verbose_level>=WARNING) printf("Thread %d couldn't bind to cpuset %s: %s.\n This thread is not bound to any core...\n", my_core, str, strerror(error)); free(str); /* str is allocated by hlwoc, free it normally*/ return 0; } /* FREE our cpuset copy */ hwloc_bitmap_free(cpuset); return 1; }else{ if(verbose_level>=WARNING) printf("No valid object for core id %d!\n",my_core); return 0; } }
int main(void) { hwloc_topology_t topology; #ifdef HWLOC_HAVE_CPU_SET unsigned depth; hwloc_bitmap_t hwlocset; cpu_set_t schedset; hwloc_obj_t obj; int err; #endif /* HWLOC_HAVE_CPU_SET */ hwloc_topology_init(&topology); hwloc_topology_load(topology); #ifdef HWLOC_HAVE_CPU_SET depth = hwloc_topology_get_depth(topology); hwlocset = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset(topology)); hwloc_cpuset_to_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset)); #ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY err = sched_setaffinity(0, sizeof(schedset)); #else err = sched_setaffinity(0, sizeof(schedset), &schedset); #endif assert(!err); hwloc_bitmap_free(hwlocset); #ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY err = sched_getaffinity(0, sizeof(schedset)); #else err = sched_getaffinity(0, sizeof(schedset), &schedset); #endif assert(!err); hwlocset = hwloc_bitmap_alloc(); hwloc_cpuset_from_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset)); assert(hwloc_bitmap_isincluded(hwlocset, hwloc_topology_get_complete_cpuset(topology))); hwloc_bitmap_andnot(hwlocset, hwlocset, hwloc_topology_get_online_cpuset(topology)); hwloc_bitmap_andnot(hwlocset, hwlocset, hwloc_topology_get_allowed_cpuset(topology)); assert(hwloc_bitmap_iszero(hwlocset)); hwloc_bitmap_free(hwlocset); obj = hwloc_get_obj_by_depth(topology, depth-1, hwloc_get_nbobjs_by_depth(topology, depth-1) - 1); assert(obj); assert(obj->type == HWLOC_OBJ_PU); hwlocset = hwloc_bitmap_dup(obj->cpuset); hwloc_cpuset_to_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset)); #ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY err = sched_setaffinity(0, sizeof(schedset)); #else err = sched_setaffinity(0, sizeof(schedset), &schedset); #endif assert(!err); hwloc_bitmap_free(hwlocset); #ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY err = sched_getaffinity(0, sizeof(schedset)); #else err = sched_getaffinity(0, sizeof(schedset), &schedset); #endif assert(!err); hwlocset = hwloc_bitmap_alloc(); hwloc_cpuset_from_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset)); assert(hwloc_bitmap_isequal(hwlocset, obj->cpuset)); hwloc_bitmap_free(hwlocset); #endif /* HWLOC_HAVE_CPU_SET */ hwloc_topology_destroy(topology); return 0; }
extern int get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards, uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads, uint16_t *p_block_map_size, uint16_t **p_block_map, uint16_t **p_block_map_inv) { enum { SOCKET=0, CORE=1, PU=2, LAST_OBJ=3 }; hwloc_topology_t topology; hwloc_obj_t obj; hwloc_obj_type_t objtype[LAST_OBJ]; unsigned idx[LAST_OBJ]; int nobj[LAST_OBJ]; int actual_cpus; int macid; int absid; int actual_boards = 1, depth; int i; debug2("hwloc_topology_init"); if (hwloc_topology_init(&topology)) { /* error in initialize hwloc library */ debug("hwloc_topology_init() failed."); return 1; } /* parse all system */ hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM); /* ignores cache, misc */ hwloc_topology_ignore_type (topology, HWLOC_OBJ_CACHE); hwloc_topology_ignore_type (topology, HWLOC_OBJ_MISC); /* load topology */ debug2("hwloc_topology_load"); if (hwloc_topology_load(topology)) { /* error in load hardware topology */ debug("hwloc_topology_load() failed."); hwloc_topology_destroy(topology); return 2; } /* At least on a temporary basis, one could map AMD Bulldozer entities * onto the entities that Slurm does optimize placement for today (e.g. * map each Bulldozer core to a thread and each Bulldozer module to a * Slurm core, alternately map the Bulldozer module to a Slurm socket * and the Bulldozer socket to a Slurm board). Perhaps not ideal, but * it would achieve the desired locality. */ if ( hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) > hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET) ) { /* One socket contains multiple NUMA-nodes * like AMD Opteron 6000 series etc. * In such case, use NUMA-node instead of socket. */ objtype[SOCKET] = HWLOC_OBJ_NODE; objtype[CORE] = HWLOC_OBJ_CORE; objtype[PU] = HWLOC_OBJ_PU; } else { objtype[SOCKET] = HWLOC_OBJ_SOCKET; objtype[CORE] = HWLOC_OBJ_CORE; objtype[PU] = HWLOC_OBJ_PU; } /* number of objects */ depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP); if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { actual_boards = MAX(hwloc_get_nbobjs_by_depth(topology, depth), 1); } nobj[SOCKET] = hwloc_get_nbobjs_by_type(topology, objtype[SOCKET]); nobj[CORE] = hwloc_get_nbobjs_by_type(topology, objtype[CORE]); actual_cpus = hwloc_get_nbobjs_by_type(topology, objtype[PU]); nobj[PU] = actual_cpus/nobj[CORE]; /* threads per core */ nobj[CORE] /= nobj[SOCKET]; /* cores per socket */ debug("CPUs:%d Boards:%u Sockets:%d CoresPerSocket:%d ThreadsPerCore:%d", actual_cpus, actual_boards, nobj[SOCKET], nobj[CORE], nobj[PU]); /* allocate block_map */ *p_block_map_size = (uint16_t)actual_cpus; if (p_block_map && p_block_map_inv) { *p_block_map = xmalloc(actual_cpus * sizeof(uint16_t)); *p_block_map_inv = xmalloc(actual_cpus * sizeof(uint16_t)); /* initialize default as linear mapping */ for (i = 0; i < actual_cpus; i++) { (*p_block_map)[i] = i; (*p_block_map_inv)[i] = i; } /* create map with hwloc */ for (idx[SOCKET]=0; idx[SOCKET]<nobj[SOCKET]; ++idx[SOCKET]) { for (idx[CORE]=0; idx[CORE]<nobj[CORE]; ++idx[CORE]) { for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) { /* get hwloc_obj by indexes */ obj=hwloc_get_obj_below_array_by_type( topology, 3, objtype, idx); if (!obj) continue; macid = obj->os_index; absid = idx[SOCKET]*nobj[CORE]*nobj[PU] + idx[CORE]*nobj[PU] + idx[PU]; if ((macid >= actual_cpus) || (absid >= actual_cpus)) { /* physical or logical ID are * out of range */ continue; } debug4("CPU map[%d]=>%d", absid, macid); (*p_block_map)[absid] = macid; (*p_block_map_inv)[macid] = absid; } } } } hwloc_topology_destroy(topology); /* update output parameters */ *p_cpus = actual_cpus; *p_boards = actual_boards; *p_sockets = nobj[SOCKET]; *p_cores = nobj[CORE]; *p_threads = nobj[PU]; #if DEBUG_DETAIL /*** Display raw data ***/ debug("CPUs:%u Boards:%u Sockets:%u CoresPerSocket:%u ThreadsPerCore:%u", *p_cpus, *p_boards, *p_sockets, *p_cores, *p_threads); /* Display the mapping tables */ if (p_block_map && p_block_map_inv) { debug("------"); debug("Abstract -> Machine logical CPU ID block mapping:"); debug("AbstractId PhysicalId Inverse"); for (i = 0; i < *p_cpus; i++) { debug3(" %4d %4u %4u", i, (*p_block_map)[i], (*p_block_map_inv)[i]); } debug("------"); } #endif return 0; }
int main(void) { hwloc_topology_t local, global; hwloc_obj_t sw1, sw2, sw11, sw12, sw21, sw22, root; int err; printf("Loading the local topology...\n"); hwloc_topology_init(&local); hwloc_topology_set_synthetic(local, "n:2 s:2 ca:1 core:2 ca:2 pu:2"); hwloc_topology_load(local); printf("Try to create an empty custom topology...\n"); hwloc_topology_init(&global); hwloc_topology_set_custom(global); err = hwloc_topology_load(global); assert(err == -1); assert(errno == EINVAL); hwloc_topology_destroy(global); printf("Creating a custom topology...\n"); hwloc_topology_init(&global); hwloc_topology_set_custom(global); printf("Inserting the local topology into the global one...\n"); root = hwloc_get_root_obj(global); sw1 = hwloc_custom_insert_group_object_by_parent(global, root, 0); sw11 = hwloc_custom_insert_group_object_by_parent(global, sw1, 1); hwloc_custom_insert_topology(global, sw11, local, NULL); hwloc_custom_insert_topology(global, sw11, local, NULL); sw12 = hwloc_custom_insert_group_object_by_parent(global, sw1, 1); hwloc_custom_insert_topology(global, sw12, local, NULL); hwloc_custom_insert_topology(global, sw12, local, NULL); sw2 = hwloc_custom_insert_group_object_by_parent(global, root, 0); sw21 = hwloc_custom_insert_group_object_by_parent(global, sw2, 1); hwloc_custom_insert_topology(global, sw21, local, NULL); hwloc_custom_insert_topology(global, sw21, local, NULL); hwloc_custom_insert_topology(global, sw21, local, NULL); sw22 = hwloc_custom_insert_group_object_by_parent(global, sw2, 1); hwloc_custom_insert_topology(global, sw22, local, NULL); /* only one to check that it won't get merged */ hwloc_topology_destroy(local); printf("Building the global topology...\n"); hwloc_topology_load(global); hwloc_topology_check(global); assert(hwloc_topology_get_depth(global) == 10); assert(hwloc_get_depth_type(global, 0) == HWLOC_OBJ_SYSTEM); assert(hwloc_get_nbobjs_by_type(global, HWLOC_OBJ_SYSTEM) == 1); assert(hwloc_get_depth_type(global, 1) == HWLOC_OBJ_GROUP); assert(hwloc_get_nbobjs_by_depth(global, 1) == 2); assert(hwloc_get_depth_type(global, 2) == HWLOC_OBJ_GROUP); assert(hwloc_get_nbobjs_by_depth(global, 2) == 4); /* the last group of this level shouldn't be merged */ assert(hwloc_get_depth_type(global, 3) == HWLOC_OBJ_MACHINE); assert(hwloc_get_nbobjs_by_type(global, HWLOC_OBJ_MACHINE) == 8); assert(hwloc_get_depth_type(global, 4) == HWLOC_OBJ_NODE); assert(hwloc_get_nbobjs_by_type(global, HWLOC_OBJ_NODE) == 16); assert(hwloc_get_depth_type(global, 5) == HWLOC_OBJ_SOCKET); assert(hwloc_get_nbobjs_by_type(global, HWLOC_OBJ_SOCKET) == 32); assert(hwloc_get_depth_type(global, 6) == HWLOC_OBJ_CACHE); assert(hwloc_get_nbobjs_by_depth(global, 6) == 32); assert(hwloc_get_depth_type(global, 7) == HWLOC_OBJ_CORE); assert(hwloc_get_nbobjs_by_type(global, HWLOC_OBJ_CORE) == 64); assert(hwloc_get_depth_type(global, 8) == HWLOC_OBJ_CACHE); assert(hwloc_get_nbobjs_by_depth(global, 8) == 128); assert(hwloc_get_depth_type(global, 9) == HWLOC_OBJ_PU); assert(hwloc_get_nbobjs_by_type(global, HWLOC_OBJ_PU) == 256); hwloc_topology_destroy(global); return 0; }
static HYD_status handle_bitmap_binding(const char *binding, const char *mapping) { int i, j, k, bind_count, map_count, cache_depth = 0, bind_depth = 0, map_depth = 0; int total_map_objs, total_bind_objs, num_pus_in_map_domain, num_pus_in_bind_domain, total_map_domains; hwloc_obj_t map_obj, bind_obj, *start_pu; hwloc_cpuset_t *map_domains; char *bind_str, *map_str; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); /* split out the count fields */ status = split_count_field(binding, &bind_str, &bind_count); HYDU_ERR_POP(status, "error splitting count field\n"); status = split_count_field(mapping, &map_str, &map_count); HYDU_ERR_POP(status, "error splitting count field\n"); /* get the binding object */ if (!strcmp(bind_str, "board")) bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_MACHINE); else if (!strcmp(bind_str, "numa")) bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_NODE); else if (!strcmp(bind_str, "socket")) bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_SOCKET); else if (!strcmp(bind_str, "core")) bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_CORE); else if (!strcmp(bind_str, "hwthread")) bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_PU); else { /* check if it's in the l*cache format */ cache_depth = parse_cache_string(bind_str); if (!cache_depth) { HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "unrecognized binding string \"%s\"\n", binding); } bind_depth = hwloc_get_cache_type_depth(topology, cache_depth, -1); } /* get the mapping */ if (!strcmp(map_str, "board")) map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_MACHINE); else if (!strcmp(map_str, "numa")) map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_NODE); else if (!strcmp(map_str, "socket")) map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_SOCKET); else if (!strcmp(map_str, "core")) map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_CORE); else if (!strcmp(map_str, "hwthread")) map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_PU); else { cache_depth = parse_cache_string(map_str); if (!cache_depth) { HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR, "unrecognized mapping string \"%s\"\n", mapping); } map_depth = hwloc_get_cache_type_depth(topology, cache_depth, -1); } /* * Process Affinity Algorithm: * * The code below works in 3 stages. The end result is an array of all the possible * binding bitmaps for a system, based on the options specified. * * 1. Define all possible mapping "domains" in a system. A mapping domain is a group * of hardware elements found by traversing the topology. Each traversal skips the * number of elements the user specified in the mapping string. The traversal ends * when the next mapping domain == the first mapping domain. Note that if the * mapping string defines a domain that is larger than the system size, we exit * with an error. * * 2. Define the number of possible binding domains within a mapping domain. This * process is similar to step 1, in that we traverse the mapping domain finding * all possible bind combinations, stopping when a duplicate of the first binding * is reached. If a binding is larger (in # of PUs) than the mapping domain, * the number of possible bindings for that domain is 1. In this stage, we also * locate the first PU in each mapping domain for use later during binding. * * 3. Create the binding bitmaps. We allocate an array of bitmaps and fill them in * with all possible bindings. The starting PU in each mapping domain is advanced * if and when we wrap around to the beginning of the mapping domains. This ensures * that we do not repeat. * */ /* calculate the number of map domains */ total_map_objs = hwloc_get_nbobjs_by_depth(topology, map_depth); num_pus_in_map_domain = (HYDT_topo_hwloc_info.total_num_pus / total_map_objs) * map_count; HYDU_ERR_CHKANDJUMP(status, num_pus_in_map_domain > HYDT_topo_hwloc_info.total_num_pus, HYD_INTERNAL_ERROR, "mapping option \"%s\" larger than total system size\n", mapping); /* The number of total_map_domains should be large enough to * contain all contiguous map object collections of length * map_count. For example, if the map object is "socket" and the * map_count is 3, on a system with 4 sockets, the following map * domains should be included: (0,1,2), (3,0,1), (2,3,0), (1,2,3). * We do this by finding how many times we need to replicate the * list of the map objects so that an integral number of map * domains can map to them. In the above case, the list of map * objects is replicated 3 times. */ for (i = 1; (i * total_map_objs) % map_count; i++); total_map_domains = (i * total_map_objs) / map_count; /* initialize the map domains */ HYDU_MALLOC_OR_JUMP(map_domains, hwloc_bitmap_t *, total_map_domains * sizeof(hwloc_bitmap_t), status); HYDU_MALLOC_OR_JUMP(start_pu, hwloc_obj_t *, total_map_domains * sizeof(hwloc_obj_t), status); /* For each map domain, find the next map object (first map object * for the first map domain) and add the following "map_count" * number of contiguous map objects, wrapping to the first one if * needed, to the map domain. Store the first PU in the first map * object of the map domain as "start_pu". This is needed later * for the actual binding. */ map_obj = NULL; for (i = 0; i < total_map_domains; i++) { map_domains[i] = hwloc_bitmap_alloc(); hwloc_bitmap_zero(map_domains[i]); for (j = 0; j < map_count; j++) { map_obj = hwloc_get_next_obj_by_depth(topology, map_depth, map_obj); /* map_obj will be NULL if it reaches the end. call again to wrap around */ if (!map_obj) map_obj = hwloc_get_next_obj_by_depth(topology, map_depth, map_obj); if (j == 0) start_pu[i] = hwloc_get_obj_inside_cpuset_by_type(topology, map_obj->cpuset, HWLOC_OBJ_PU, 0); hwloc_bitmap_or(map_domains[i], map_domains[i], map_obj->cpuset); } } /* Find the possible binding domains is similar to that of map * domains. But if a binding domain is larger (in # of PUs) than * the mapping domain, the number of possible bindings for that * domain is 1. */ /* calculate the number of possible bindings and allocate bitmaps for them */ total_bind_objs = hwloc_get_nbobjs_by_depth(topology, bind_depth); num_pus_in_bind_domain = (HYDT_topo_hwloc_info.total_num_pus / total_bind_objs) * bind_count; if (num_pus_in_bind_domain < num_pus_in_map_domain) { for (i = 1; (i * num_pus_in_map_domain) % num_pus_in_bind_domain; i++); HYDT_topo_hwloc_info.num_bitmaps = (i * num_pus_in_map_domain * total_map_domains) / num_pus_in_bind_domain; } else { HYDT_topo_hwloc_info.num_bitmaps = total_map_domains; } /* initialize bitmaps */ HYDU_MALLOC_OR_JUMP(HYDT_topo_hwloc_info.bitmap, hwloc_bitmap_t *, HYDT_topo_hwloc_info.num_bitmaps * sizeof(hwloc_bitmap_t), status); for (i = 0; i < HYDT_topo_hwloc_info.num_bitmaps; i++) { HYDT_topo_hwloc_info.bitmap[i] = hwloc_bitmap_alloc(); hwloc_bitmap_zero(HYDT_topo_hwloc_info.bitmap[i]); } /* do bindings */ i = 0; while (i < HYDT_topo_hwloc_info.num_bitmaps) { for (j = 0; j < total_map_domains; j++) { bind_obj = hwloc_get_ancestor_obj_by_depth(topology, bind_depth, start_pu[j]); for (k = 0; k < bind_count; k++) { hwloc_bitmap_or(HYDT_topo_hwloc_info.bitmap[i], HYDT_topo_hwloc_info.bitmap[i], bind_obj->cpuset); /* if the binding is smaller than the mapping domain, wrap around inside that domain */ if (num_pus_in_bind_domain < num_pus_in_map_domain) { bind_obj = hwloc_get_next_obj_inside_cpuset_by_depth(topology, map_domains[j], bind_depth, bind_obj); if (!bind_obj) bind_obj = hwloc_get_next_obj_inside_cpuset_by_depth(topology, map_domains[j], bind_depth, bind_obj); } else { bind_obj = hwloc_get_next_obj_by_depth(topology, bind_depth, bind_obj); if (!bind_obj) bind_obj = hwloc_get_next_obj_by_depth(topology, bind_depth, bind_obj); } } i++; /* advance the starting position for this map domain, if needed */ if (num_pus_in_bind_domain < num_pus_in_map_domain) { for (k = 0; k < num_pus_in_bind_domain; k++) { start_pu[j] = hwloc_get_next_obj_inside_cpuset_by_type(topology, map_domains[j], HWLOC_OBJ_PU, start_pu[j]); if (!start_pu[j]) start_pu[j] = hwloc_get_next_obj_inside_cpuset_by_type(topology, map_domains[j], HWLOC_OBJ_PU, start_pu[j]); } } } } /* free temporary memory */ MPL_free(map_domains); MPL_free(start_pu); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
extern int get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards, uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads, uint16_t *p_block_map_size, uint16_t **p_block_map, uint16_t **p_block_map_inv) { enum { SOCKET=0, CORE=1, PU=2, LAST_OBJ=3 }; hwloc_topology_t topology; hwloc_obj_t obj; hwloc_obj_type_t objtype[LAST_OBJ]; unsigned idx[LAST_OBJ]; int nobj[LAST_OBJ]; int actual_cpus; int macid; int absid; int actual_boards = 1, depth; int i; debug2("hwloc_topology_init"); if (hwloc_topology_init(&topology)) { /* error in initialize hwloc library */ debug("hwloc_topology_init() failed."); return 1; } /* parse all system */ hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM); /* ignores cache, misc */ hwloc_topology_ignore_type (topology, HWLOC_OBJ_CACHE); hwloc_topology_ignore_type (topology, HWLOC_OBJ_MISC); /* load topology */ debug2("hwloc_topology_load"); if (hwloc_topology_load(topology)) { /* error in load hardware topology */ debug("hwloc_topology_load() failed."); hwloc_topology_destroy(topology); return 2; } /* Some processors (e.g. AMD Opteron 6000 series) contain multiple * NUMA nodes per socket. This is a configuration which does not map * into the hardware entities that Slurm optimizes resource allocation * for (PU/thread, core, socket, baseboard, node and network switch). * In order to optimize resource allocations on such hardware, Slurm * will consider each NUMA node within the socket as a separate socket. * You can disable this configuring "SchedulerParameters=Ignore_NUMA", * in which case Slurm will report the correct socket count on the node, * but not be able to optimize resource allocations on the NUMA nodes. */ objtype[SOCKET] = HWLOC_OBJ_SOCKET; objtype[CORE] = HWLOC_OBJ_CORE; objtype[PU] = HWLOC_OBJ_PU; if (hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) > hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET)) { char *sched_params = slurm_get_sched_params(); if (sched_params && strcasestr(sched_params, "Ignore_NUMA")) { info("Ignoring NUMA nodes within a socket"); } else { info("Considering each NUMA node as a socket"); objtype[SOCKET] = HWLOC_OBJ_NODE; } xfree(sched_params); } /* number of objects */ depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP); if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { actual_boards = MAX(hwloc_get_nbobjs_by_depth(topology, depth), 1); } nobj[SOCKET] = hwloc_get_nbobjs_by_type(topology, objtype[SOCKET]); nobj[CORE] = hwloc_get_nbobjs_by_type(topology, objtype[CORE]); /* * Workaround for hwloc * hwloc_get_nbobjs_by_type() returns 0 on some architectures. */ if ( nobj[SOCKET] == 0 ) { debug("get_cpuinfo() fudging nobj[SOCKET] from 0 to 1"); nobj[SOCKET] = 1; } if ( nobj[CORE] == 0 ) { debug("get_cpuinfo() fudging nobj[CORE] from 0 to 1"); nobj[CORE] = 1; } if ( nobj[SOCKET] == -1 ) fatal("get_cpuinfo() can not handle nobj[SOCKET] = -1"); if ( nobj[CORE] == -1 ) fatal("get_cpuinfo() can not handle nobj[CORE] = -1"); actual_cpus = hwloc_get_nbobjs_by_type(topology, objtype[PU]); #if 0 /* Used to find workaround above */ info("CORE = %d SOCKET = %d actual_cpus = %d nobj[CORE] = %d", CORE, SOCKET, actual_cpus, nobj[CORE]); #endif nobj[PU] = actual_cpus/nobj[CORE]; /* threads per core */ nobj[CORE] /= nobj[SOCKET]; /* cores per socket */ debug("CPUs:%d Boards:%u Sockets:%d CoresPerSocket:%d ThreadsPerCore:%d", actual_cpus, actual_boards, nobj[SOCKET], nobj[CORE], nobj[PU]); /* allocate block_map */ *p_block_map_size = (uint16_t)actual_cpus; if (p_block_map && p_block_map_inv) { *p_block_map = xmalloc(actual_cpus * sizeof(uint16_t)); *p_block_map_inv = xmalloc(actual_cpus * sizeof(uint16_t)); /* initialize default as linear mapping */ for (i = 0; i < actual_cpus; i++) { (*p_block_map)[i] = i; (*p_block_map_inv)[i] = i; } /* create map with hwloc */ for (idx[SOCKET]=0; idx[SOCKET]<nobj[SOCKET]; ++idx[SOCKET]) { for (idx[CORE]=0; idx[CORE]<nobj[CORE]; ++idx[CORE]) { for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) { /* get hwloc_obj by indexes */ obj=hwloc_get_obj_below_array_by_type( topology, 3, objtype, idx); if (!obj) continue; macid = obj->os_index; absid = idx[SOCKET]*nobj[CORE]*nobj[PU] + idx[CORE]*nobj[PU] + idx[PU]; if ((macid >= actual_cpus) || (absid >= actual_cpus)) { /* physical or logical ID are * out of range */ continue; } debug4("CPU map[%d]=>%d", absid, macid); (*p_block_map)[absid] = macid; (*p_block_map_inv)[macid] = absid; } } } } hwloc_topology_destroy(topology); /* update output parameters */ *p_cpus = actual_cpus; *p_boards = actual_boards; *p_sockets = nobj[SOCKET]; *p_cores = nobj[CORE]; *p_threads = nobj[PU]; #if DEBUG_DETAIL /*** Display raw data ***/ debug("CPUs:%u Boards:%u Sockets:%u CoresPerSocket:%u ThreadsPerCore:%u", *p_cpus, *p_boards, *p_sockets, *p_cores, *p_threads); /* Display the mapping tables */ if (p_block_map && p_block_map_inv) { debug("------"); debug("Abstract -> Machine logical CPU ID block mapping:"); debug("AbstractId PhysicalId Inverse"); for (i = 0; i < *p_cpus; i++) { debug3(" %4d %4u %4u", i, (*p_block_map)[i], (*p_block_map_inv)[i]); } debug("------"); } #endif return 0; }
//Initializes HWLOC and load the machine architecture int hw_topology_init (struct arch_topology *topo) { hwloc_obj_t obj, core1, core2; int count, i, j, error; //Create the machine representation error = hwloc_topology_init(&topology); //Go throught the topology only if HWLOC is //successifully initialized if(!error) { hwloc_topology_load(topology); local_topo = malloc(sizeof(struct arch_topology)); #if defined (__DBCSR_ACC) || defined (__PW_CUDA) int nDev; ma_get_ndevices_cu(&nDev); #endif //Extract number of NUMA nodes if (hwloc_get_type_depth (topology, HWLOC_OBJ_NODE)) topo->nnodes = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_NODE)); else topo->nnodes = 0; //Get number of cores, sockets and processing units topo->ncores = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_CORE)); topo->nsockets = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_SOCKET)); topo->npus = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_PU)); //Compute number of memory controlers per socket //basically the number of NUMA nodes per socket if (topo->nnodes > topo->nsockets) topo->nmemcontroller = topo->nnodes/topo->nsockets; else topo->nmemcontroller = 1; count = 0; topo->nshared_caches = 0; //Get derivate information - get number of cache per PU for(obj = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,0); obj; obj = obj->parent) { if (obj->type == HWLOC_OBJ_CACHE) { if (obj->arity>1) topo->nshared_caches++; else { count++; topo->ncaches = count; } } } //Number of direct siblings //Siblings cores are the ones that share at least one component //level of the architecture count = 0; core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, 0); core2 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, 1); obj = hwloc_get_common_ancestor_obj(topology, core1, core2); if (obj) topo->nsiblings = obj->arity; //Machine node and core representation machine_nodes = (struct node*) malloc (topo->nnodes*sizeof(struct node)); machine_cores = (struct core*) malloc (topo->ncores*sizeof(struct core)); phys_cpus = malloc (topo->ncores*sizeof(int)); get_phys_id(topology, topo->ncores, 0); //Get the caches sizes and other information for each core for (i = 0; i < topo->ncores ; i++) { machine_cores[i].caches = malloc (topo->ncaches*sizeof(size_t)); machine_cores[i].shared_caches = malloc (topo->ncaches*sizeof(int)); for (j = 0; j < topo->ncaches; j++) machine_cores[i].shared_caches[j] = 0; for (j = topo->ncaches ; j > topo->ncaches - topo->nshared_caches; j--) machine_cores[i].shared_caches[j-1] = 1; machine_cores[i].nsiblings = topo->nsiblings; machine_cores[i].siblings_id = malloc (topo->nsiblings*sizeof(unsigned)); if(topo->ncores == topo->npus){ core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, i); machine_cores[i].id = core1->os_index; count = 0; for(obj = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,i); obj; obj = obj->parent) { if (obj->type == HWLOC_OBJ_CACHE){ machine_cores[i].caches[count] = obj->attr->cache.size / 1024; count++; } if (obj->type == HWLOC_OBJ_NODE) machine_cores[i].numaNode = obj->logical_index; } } else{ core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, i); machine_cores[i].id = core1->os_index; count = 0; for(obj = hwloc_get_obj_by_type(topology,HWLOC_OBJ_CORE,i); obj; obj = obj->parent) { if (obj->type == HWLOC_OBJ_CACHE) { machine_cores[i].caches[count] = obj->attr->cache.size / 1024; count++; } if (obj->type == HWLOC_OBJ_NODE) machine_cores[i].numaNode = obj->logical_index; } } } //Get siblings id - so each core knows its siblings for (i = 0; i < topo->ncores ; i++) { if(topo->ncores == topo->npus){ core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, i); set_phys_siblings(i,machine_cores[i].id,core1,topo->ncores,topo->nsiblings,HWLOC_OBJ_PU); } else{ core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, i); set_phys_siblings(i,machine_cores[i].id,core1,topo->ncores,topo->nsiblings,HWLOC_OBJ_CORE); } } int ncore_node = topo->ncores/topo->nnodes; int count_cores; //Get the information for each NUMAnode for (i = 0; i < topo->nnodes ; i++) { obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, i); machine_nodes[i].id = obj->os_index; machine_nodes[i].memory = obj->memory.total_memory; machine_nodes[i].ncores = ncore_node; machine_nodes[i].mycores = malloc (ncore_node*sizeof(unsigned)); //Get the cores id of each NUMAnode count_cores = 0; set_node_cores(topology, obj, i, &count_cores); //GPU support #if defined (__DBCSR_ACC) || defined (__PW_CUDA) int *devIds; devIds = malloc (nDev*sizeof(int)); topo->ngpus = nDev; ma_get_cu(i,devIds); machine_nodes[i].mygpus = devIds; #endif } //counting network cards count = 0; hwloc_topology_t topo_net; error = hwloc_topology_init(&topo_net); hwloc_topology_set_flags(topo_net, HWLOC_TOPOLOGY_FLAG_IO_DEVICES); if (!error){ hwloc_topology_load(topo_net); for (obj = hwloc_get_obj_by_type(topo_net, HWLOC_OBJ_OS_DEVICE, 0); obj; obj = hwloc_get_next_osdev(topo_net,obj)) if (obj->attr->osdev.type == HWLOC_OBJ_OSDEV_NETWORK || obj->attr->osdev.type == HWLOC_OBJ_OSDEV_OPENFABRICS) count++; topo->nnetcards = count; } else //if can not load I/O devices topo->nnetcards = 0; hwloc_topology_destroy(topo_net); /*Local copy of the machine topology components*/ local_topo->nnodes = topo->nnodes; local_topo->nsockets = topo->nsockets; local_topo->ncores = topo->ncores; local_topo->npus = topo->npus; local_topo->ngpus = topo->ngpus; local_topo->ncaches = topo->ncaches; local_topo->nshared_caches = topo->nshared_caches; local_topo->nsiblings = topo->nsiblings; local_topo->nmemcontroller = topo->nmemcontroller; local_topo->nnetcards = topo->nnetcards; } return error; }
int main(void) { int depth; unsigned i, n; unsigned long size; int levels; char string[128]; int topodepth; void *m; hwloc_topology_t topology; hwloc_cpuset_t cpuset; hwloc_obj_t obj; /* Allocate and initialize topology object. */ hwloc_topology_init(&topology); /* ... Optionally, put detection configuration here to ignore some objects types, define a synthetic topology, etc.... The default is to detect all the objects of the machine that the caller is allowed to access. See Configure Topology Detection. */ /* Perform the topology detection. */ hwloc_topology_load(topology); /* Optionally, get some additional topology information in case we need the topology depth later. */ topodepth = hwloc_topology_get_depth(topology); /***************************************************************** * First example: * Walk the topology with an array style, from level 0 (always * the system level) to the lowest level (always the proc level). *****************************************************************/ for (depth = 0; depth < topodepth; depth++) { printf("*** Objects at level %d\n", depth); for (i = 0; i < hwloc_get_nbobjs_by_depth(topology, depth); i++) { hwloc_obj_type_snprintf(string, sizeof(string), hwloc_get_obj_by_depth(topology, depth, i), 0); printf("Index %u: %s\n", i, string); } } /***************************************************************** * Second example: * Walk the topology with a tree style. *****************************************************************/ printf("*** Printing overall tree\n"); print_children(topology, hwloc_get_root_obj(topology), 0); /***************************************************************** * Third example: * Print the number of packages. *****************************************************************/ depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PACKAGE); if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) { printf("*** The number of packages is unknown\n"); } else { printf("*** %u package(s)\n", hwloc_get_nbobjs_by_depth(topology, depth)); } /***************************************************************** * Fourth example: * Compute the amount of cache that the first logical processor * has above it. *****************************************************************/ levels = 0; size = 0; for (obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0); obj; obj = obj->parent) if (obj->type == HWLOC_OBJ_CACHE) { levels++; size += obj->attr->cache.size; } printf("*** Logical processor 0 has %d caches totaling %luKB\n", levels, size / 1024); /***************************************************************** * Fifth example: * Bind to only one thread of the last core of the machine. * * First find out where cores are, or else smaller sets of CPUs if * the OS doesn't have the notion of a "core". *****************************************************************/ depth = hwloc_get_type_or_below_depth(topology, HWLOC_OBJ_CORE); /* Get last core. */ obj = hwloc_get_obj_by_depth(topology, depth, hwloc_get_nbobjs_by_depth(topology, depth) - 1); if (obj) { /* Get a copy of its cpuset that we may modify. */ cpuset = hwloc_bitmap_dup(obj->cpuset); /* Get only one logical processor (in case the core is SMT/hyper-threaded). */ hwloc_bitmap_singlify(cpuset); /* And try to bind ourself there. */ if (hwloc_set_cpubind(topology, cpuset, 0)) { char *str; int error = errno; hwloc_bitmap_asprintf(&str, obj->cpuset); printf("Couldn't bind to cpuset %s: %s\n", str, strerror(error)); free(str); } /* Free our cpuset copy */ hwloc_bitmap_free(cpuset); } /***************************************************************** * Sixth example: * Allocate some memory on the last NUMA node, bind some existing * memory to the last NUMA node. *****************************************************************/ /* Get last node. There's always at least one. */ n = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NUMANODE); obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, n - 1); size = 1024*1024; m = hwloc_alloc_membind_nodeset(topology, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0); hwloc_free(topology, m, size); m = malloc(size); hwloc_set_area_membind_nodeset(topology, m, size, obj->nodeset, HWLOC_MEMBIND_BIND, 0); free(m); /* Destroy topology object. */ hwloc_topology_destroy(topology); return 0; }
int main(void) { hwloc_topology_t topology; unsigned depth; hwloc_obj_t objs[OBJ_MAX]; hwloc_obj_t obj; hwloc_bitmap_t set; int ret; hwloc_topology_init(&topology); hwloc_topology_set_synthetic(topology, SYNTHETIC_TOPOLOGY_DESCRIPTION); hwloc_topology_load(topology); depth = hwloc_topology_get_depth(topology); /* just get the system object */ obj = hwloc_get_root_obj(topology); ret = hwloc_get_largest_objs_inside_cpuset(topology, obj->cpuset, objs, 1); assert(ret == 1); assert(objs[0] == obj); objs[0] = hwloc_get_first_largest_obj_inside_cpuset(topology, obj->cpuset); assert(objs[0] == obj); /* just get the very last object */ obj = hwloc_get_obj_by_depth(topology, depth-1, hwloc_get_nbobjs_by_depth(topology, depth-1)-1); ret = hwloc_get_largest_objs_inside_cpuset(topology, obj->cpuset, objs, 1); assert(ret == 1); assert(objs[0] == obj); /* try an empty one */ set = hwloc_bitmap_alloc(); ret = hwloc_get_largest_objs_inside_cpuset(topology, set, objs, 1); assert(ret == 0); objs[0] = hwloc_get_first_largest_obj_inside_cpuset(topology, set); assert(objs[0] == NULL); hwloc_bitmap_free(set); /* try an impossible one */ set = hwloc_bitmap_alloc(); hwloc_bitmap_sscanf(set, GIVEN_TOOLARGE_CPUSET_STRING); ret = hwloc_get_largest_objs_inside_cpuset(topology, set, objs, 1); assert(ret == -1); objs[0] = hwloc_get_first_largest_obj_inside_cpuset(topology, set); assert(objs[0] == NULL); hwloc_bitmap_free(set); /* try a harder one with 1 obj instead of 2 needed */ set = hwloc_bitmap_alloc(); hwloc_bitmap_sscanf(set, GIVEN_LARGESPLIT_CPUSET_STRING); ret = hwloc_get_largest_objs_inside_cpuset(topology, set, objs, 1); assert(ret == 1); assert(objs[0] == hwloc_get_obj_by_depth(topology, depth-1, 0)); objs[0] = hwloc_get_first_largest_obj_inside_cpuset(topology, set); assert(objs[0] == hwloc_get_obj_by_depth(topology, depth-1, 0)); /* try a harder one with lots of objs instead of 2 needed */ ret = hwloc_get_largest_objs_inside_cpuset(topology, set, objs, 2); assert(ret == 2); assert(objs[0] == hwloc_get_obj_by_depth(topology, depth-1, 0)); assert(objs[1] == hwloc_get_obj_by_depth(topology, depth-1, hwloc_get_nbobjs_by_depth(topology, depth-1)-1)); objs[0] = hwloc_get_first_largest_obj_inside_cpuset(topology, set); hwloc_bitmap_andnot(set, set, objs[0]->cpuset); objs[1] = hwloc_get_first_largest_obj_inside_cpuset(topology, set); hwloc_bitmap_andnot(set, set, objs[1]->cpuset); objs[2] = hwloc_get_first_largest_obj_inside_cpuset(topology, set); assert(objs[0] == hwloc_get_obj_by_depth(topology, depth-1, 0)); assert(objs[1] == hwloc_get_obj_by_depth(topology, depth-1, hwloc_get_nbobjs_by_depth(topology, depth-1)-1)); assert(objs[2] == NULL); assert(hwloc_bitmap_iszero(set)); hwloc_bitmap_free(set); /* try a very hard one */ set = hwloc_bitmap_alloc(); hwloc_bitmap_sscanf(set, GIVEN_HARD_CPUSET_STRING); ret = hwloc_get_largest_objs_inside_cpuset(topology, set, objs, OBJ_MAX); assert(objs[0] == hwloc_get_obj_by_depth(topology, 5, 29)); assert(objs[1] == hwloc_get_obj_by_depth(topology, 3, 5)); assert(objs[2] == hwloc_get_obj_by_depth(topology, 3, 6)); assert(objs[3] == hwloc_get_obj_by_depth(topology, 3, 7)); assert(objs[4] == hwloc_get_obj_by_depth(topology, 2, 2)); assert(objs[5] == hwloc_get_obj_by_depth(topology, 4, 36)); assert(objs[6] == hwloc_get_obj_by_depth(topology, 5, 74)); hwloc_bitmap_free(set); hwloc_topology_destroy(topology); return EXIT_SUCCESS; }
int main(void) { hwloc_topology_t topology; unsigned depth; unsigned i,j, width; char buffer[1024]; int err; /* check a synthetic topology */ hwloc_topology_init(&topology); err = hwloc_topology_set_synthetic(topology, "2 3 4 5 6"); assert(!err); hwloc_topology_load(topology); /* internal checks */ hwloc_topology_check(topology); /* local checks */ depth = hwloc_topology_get_depth(topology); assert(depth == 6); width = 1; for(i=0; i<6; i++) { /* check arities */ assert(hwloc_get_nbobjs_by_depth(topology, i) == width); for(j=0; j<width; j++) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, i, j); assert(obj); assert(obj->arity == (i<5 ? i+2 : 0)); } width *= i+2; } err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0); assert(err == 75); err = strcmp("Package:2 NUMANode:3(memory=1073741824) L2Cache:4(size=4194304) Core:5 PU:6", buffer); assert(!err); err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES|HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS); assert(err == 42); err = strcmp("Package:2 NUMANode:3 L2Cache:4 Core:5 PU:6", buffer); assert(!err); hwloc_topology_destroy(topology); hwloc_topology_init(&topology); err = hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L1ICACHE, HWLOC_TYPE_FILTER_KEEP_ALL); err = hwloc_topology_set_synthetic(topology, "pack:2(indexes=3,5) numa:2(memory=256GB indexes=pack) l3u:1(size=20mb) l2:2 l1i:1(size=16kB) l1dcache:2 core:1 pu:2(indexes=l2)"); assert(!err); hwloc_topology_load(topology); err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0); assert(err == 181); err = strcmp("Package:2 NUMANode:2(memory=274877906944 indexes=2*2:1*2) L3Cache:1(size=20971520) L2Cache:2(size=4194304) L1iCache:1(size=16384) L1dCache:2(size=32768) Core:1 PU:2(indexes=4*8:1*4)", buffer); assert(!err); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PACKAGE, 1)->os_index == 5); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 1)->os_index == 2); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 12)->os_index == 3); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 13)->os_index == 11); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 14)->os_index == 19); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 15)->os_index == 27); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 16)->os_index == 4); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 17)->os_index == 12); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 18)->os_index == 20); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 19)->os_index == 28); hwloc_topology_destroy(topology); hwloc_topology_init(&topology); err = hwloc_topology_set_synthetic(topology, "pack:2 core:2 pu:2(indexes=0,4,2,6,1,5,3,7)"); assert(!err); hwloc_topology_load(topology); err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0); assert(err == 72); err = strcmp("NUMANode:1(memory=1073741824) Package:2 Core:2 PU:2(indexes=4*2:2*2:1*2)", buffer); assert(!err); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0)->os_index == 0); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 1)->os_index == 4); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 2)->os_index == 2); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 3)->os_index == 6); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 4)->os_index == 1); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 5)->os_index == 5); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 6)->os_index == 3); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 7)->os_index == 7); hwloc_topology_destroy(topology); hwloc_topology_init(&topology); err = hwloc_topology_set_synthetic(topology, "pack:2 numa:2 core:1 pu:2(indexes=0,4,2,6,1,3,5,7)"); assert(!err); hwloc_topology_load(topology); err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0); assert(err == 76); err = strcmp("Package:2 NUMANode:2(memory=1073741824) Core:1 PU:2(indexes=0,4,2,6,1,3,5,7)", buffer); assert(!err); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0)->os_index == 0); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 1)->os_index == 4); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 2)->os_index == 2); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 3)->os_index == 6); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 4)->os_index == 1); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 5)->os_index == 3); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 6)->os_index == 5); assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 7)->os_index == 7); hwloc_topology_destroy(topology); return 0; }
int pocl_topology_detect_device_info(cl_device_id device) { hwloc_topology_t pocl_topology; int ret = 0; #ifdef HWLOC_API_2 if (hwloc_get_api_version () < 0x20000) POCL_MSG_ERR ("pocl was compiled against libhwloc 2.x but is" "actually running against libhwloc 1.x \n"); #else if (hwloc_get_api_version () >= 0x20000) POCL_MSG_ERR ("pocl was compiled against libhwloc 1.x but is" "actually running against libhwloc 2.x \n"); #endif /* * hwloc's OpenCL backend causes problems at the initialization stage * because it reloads libpocl.so via the ICD loader. * * See: https://github.com/pocl/pocl/issues/261 * * The only trick to stop hwloc from initializing the OpenCL plugin * I could find is to point the plugin search path to a place where there * are no plugins to be found. */ setenv ("HWLOC_PLUGINS_PATH", "/dev/null", 1); ret = hwloc_topology_init (&pocl_topology); if (ret == -1) { POCL_MSG_ERR ("Cannot initialize the topology.\n"); return ret; } #ifdef HWLOC_API_2 hwloc_topology_set_io_types_filter(pocl_topology, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_SYSTEM, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_GROUP, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_BRIDGE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_MISC, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_PCI_DEVICE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_OS_DEVICE, HWLOC_TYPE_FILTER_KEEP_NONE); #else hwloc_topology_ignore_type (pocl_topology, HWLOC_TOPOLOGY_FLAG_WHOLE_IO); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_SYSTEM); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_GROUP); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_BRIDGE); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_MISC); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_PCI_DEVICE); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_OS_DEVICE); #endif ret = hwloc_topology_load (pocl_topology); if (ret == -1) { POCL_MSG_ERR ("Cannot load the topology.\n"); goto exit_destroy; } #ifdef HWLOC_API_2 device->global_mem_size = hwloc_get_root_obj(pocl_topology)->total_memory; #else device->global_mem_size = hwloc_get_root_obj(pocl_topology)->memory.total_memory; #endif // Try to get the number of CPU cores from topology int depth = hwloc_get_type_depth(pocl_topology, HWLOC_OBJ_PU); if(depth != HWLOC_TYPE_DEPTH_UNKNOWN) device->max_compute_units = hwloc_get_nbobjs_by_depth(pocl_topology, depth); /* Find information about global memory cache by looking at the first * cache covering the first PU */ do { size_t cache_size = 0, cacheline_size = 0; hwloc_obj_t core = hwloc_get_next_obj_by_type (pocl_topology, HWLOC_OBJ_CORE, NULL); if (core) { hwloc_obj_t cache = hwloc_get_shared_cache_covering_obj (pocl_topology, core); if ((cache) && (cache->attr)) { cacheline_size = cache->attr->cache.linesize; cache_size = cache->attr->cache.size; } else core = NULL; /* fallback to L1 cache size */ } hwloc_obj_t pu = hwloc_get_next_obj_by_type (pocl_topology, HWLOC_OBJ_PU, NULL); if (!core && pu) { hwloc_obj_t cache = hwloc_get_shared_cache_covering_obj (pocl_topology, pu); if ((cache) && (cache->attr)) { cacheline_size = cache->attr->cache.linesize; cache_size = cache->attr->cache.size; } } if (!cache_size || !cacheline_size) break; device->global_mem_cache_type = 0x2; // CL_READ_WRITE_CACHE, without including all of CL/cl.h device->global_mem_cacheline_size = cacheline_size; device->global_mem_cache_size = cache_size; } while (0); // Destroy topology object and return exit_destroy: hwloc_topology_destroy (pocl_topology); return ret; }
static int hwloc_solaris_set_sth_cpubind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_const_bitmap_t hwloc_set, int flags) { unsigned target_cpu; /* The resulting binding is always strict */ if (hwloc_bitmap_isequal(hwloc_set, hwloc_topology_get_complete_cpuset(topology))) { if (processor_bind(idtype, id, PBIND_NONE, NULL) != 0) return -1; #ifdef HAVE_LIBLGRP if (!(flags & HWLOC_CPUBIND_NOMEMBIND)) { int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); if (depth >= 0) { int n = hwloc_get_nbobjs_by_depth(topology, depth); int i; for (i = 0; i < n; i++) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i); lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_NONE); } } } #endif /* HAVE_LIBLGRP */ return 0; } #ifdef HAVE_LIBLGRP if (!(flags & HWLOC_CPUBIND_NOMEMBIND)) { int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); if (depth >= 0) { int n = hwloc_get_nbobjs_by_depth(topology, depth); int i; int ok; hwloc_bitmap_t target = hwloc_bitmap_alloc(); for (i = 0; i < n; i++) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i); if (hwloc_bitmap_isincluded(obj->cpuset, hwloc_set)) hwloc_bitmap_or(target, target, obj->cpuset); } ok = hwloc_bitmap_isequal(target, hwloc_set); hwloc_bitmap_free(target); if (ok) { /* Ok, managed to achieve hwloc_set by just combining NUMA nodes */ for (i = 0; i < n; i++) { hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i); if (hwloc_bitmap_isincluded(obj->cpuset, hwloc_set)) { lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_STRONG); } else { if (flags & HWLOC_CPUBIND_STRICT) lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_NONE); else lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_WEAK); } } return 0; } } } #endif /* HAVE_LIBLGRP */ if (hwloc_bitmap_weight(hwloc_set) != 1) { errno = EXDEV; return -1; } target_cpu = hwloc_bitmap_first(hwloc_set); if (processor_bind(idtype, id, (processorid_t) (target_cpu), NULL) != 0) return -1; return 0; }
int main(void) { hwloc_topology_t topology; hwloc_obj_t obj; unsigned indexes[5]; float distances[5*5]; unsigned depth; unsigned width; /* group 3 numa nodes as 1 group of 2 and 1 on the side */ hwloc_topology_init(&topology); hwloc_topology_set_synthetic(topology, "node:3 pu:1"); indexes[0] = 0; indexes[1] = 1; indexes[2] = 2; distances[0] = 1; distances[1] = 4; distances[2] = 4; distances[3] = 4; distances[4] = 1; distances[5] = 2; distances[6] = 4; distances[7] = 2; distances[8] = 1; hwloc_topology_set_distance_matrix(topology, HWLOC_OBJ_PU, 3, indexes, distances); hwloc_topology_load(topology); /* 2 groups at depth 1 */ depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP); assert(depth == 1); width = hwloc_get_nbobjs_by_depth(topology, depth); assert(width == 1); /* 3 node at depth 2 */ depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); assert(depth == 2); width = hwloc_get_nbobjs_by_depth(topology, depth); assert(width == 3); /* find the root obj */ obj = hwloc_get_root_obj(topology); assert(obj->arity == 2); /* check its children */ assert(obj->children[0]->type == HWLOC_OBJ_NUMANODE); assert(obj->children[0]->depth == 2); assert(obj->children[0]->arity == 1); assert(obj->children[1]->type == HWLOC_OBJ_GROUP); assert(obj->children[1]->depth == 1); assert(obj->children[1]->arity == 2); hwloc_topology_destroy(topology); /* group 5 packages as 2 group of 2 and 1 on the side, all of them below a common node object */ hwloc_topology_init(&topology); hwloc_topology_set_synthetic(topology, "node:1 pack:5 pu:1"); indexes[0] = 0; indexes[1] = 1; indexes[2] = 2; indexes[3] = 3; indexes[4] = 4; distances[ 0] = 1; distances[ 1] = 2; distances[ 2] = 4; distances[ 3] = 4; distances[ 4] = 4; distances[ 5] = 2; distances[ 6] = 1; distances[ 7] = 4; distances[ 8] = 4; distances[ 9] = 4; distances[10] = 4; distances[11] = 4; distances[12] = 1; distances[13] = 4; distances[14] = 4; distances[15] = 4; distances[16] = 4; distances[17] = 4; distances[18] = 1; distances[19] = 2; distances[20] = 4; distances[21] = 4; distances[22] = 4; distances[23] = 2; distances[24] = 1; hwloc_topology_set_distance_matrix(topology, HWLOC_OBJ_PACKAGE, 5, indexes, distances); hwloc_topology_load(topology); /* 1 node at depth 1 */ depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE); assert(depth == 1); width = hwloc_get_nbobjs_by_depth(topology, depth); assert(width == 1); /* 2 groups at depth 2 */ depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP); assert(depth == 2); width = hwloc_get_nbobjs_by_depth(topology, depth); assert(width == 2); /* 5 packages at depth 3 */ depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PACKAGE); assert(depth == 3); width = hwloc_get_nbobjs_by_depth(topology, depth); assert(width == 5); /* find the node obj */ obj = hwloc_get_root_obj(topology); assert(obj->arity == 1); obj = obj->children[0]; assert(obj->type == HWLOC_OBJ_NUMANODE); assert(obj->arity == 3); /* check its children */ assert(obj->children[0]->type == HWLOC_OBJ_GROUP); assert(obj->children[0]->depth == 2); assert(obj->children[0]->arity == 2); assert(obj->children[1]->type == HWLOC_OBJ_PACKAGE); assert(obj->children[1]->depth == 3); assert(obj->children[1]->arity == 1); assert(obj->children[2]->type == HWLOC_OBJ_GROUP); assert(obj->children[2]->depth == 2); assert(obj->children[2]->arity == 2); hwloc_topology_destroy(topology); return 0; }
tm_topology_t* hwloc_to_tm(char *filename,double **pcost) { hwloc_topology_t topology; tm_topology_t *res = NULL; hwloc_obj_t *objs = NULL; unsigned topodepth,depth; int nb_nodes,i; double *cost; int err; /* Build the topology */ hwloc_topology_init(&topology); err = hwloc_topology_set_xml(topology,filename); if(err == -1){ if(get_verbose_level() >= CRITICAL) fprintf(stderr,"Error: %s is a bad xml topology file!\n",filename); exit(-1); } hwloc_topology_ignore_all_keep_structure(topology); hwloc_topology_load(topology); /* Test if symetric */ if(!symetric(topology)){ if(get_verbose_level() >= CRITICAL) fprintf(stderr,"%s not symetric!\n",filename); exit(-1); } /* work on depth */ topodepth = hwloc_topology_get_depth(topology); res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t)); res->nb_levels = topodepth; res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels); res->nb_nodes = (int*)MALLOC(sizeof(int)*res->nb_levels); res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels); if(get_verbose_level() >= INFO) printf("topodepth = %d\n",topodepth); /* Build TreeMatch topology */ for( depth = 0 ; depth < topodepth ; depth++ ){ nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth); res->nb_nodes[depth] = nb_nodes; res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes); objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes); objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL); hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1); res->arity[depth] = objs[0]->arity; if(get_verbose_level() >= INFO) printf("%d(%d):",res->arity[depth],nb_nodes); /* Build process id tab */ for (i = 0; i < nb_nodes; i++){ res->node_id[depth][i] = objs[i]->os_index; /* if(depth==topodepth-1) */ } FREE(objs); } cost = (double*)CALLOC(res->nb_levels,sizeof(double)); for(i=0; i<res->nb_levels; i++){ cost[i] = speed(i); } *pcost = cost; /* Destroy topology object. */ hwloc_topology_destroy(topology); if(get_verbose_level() >= INFO) printf("\n"); return res; }