Example #1
0
    int PlatformTopology::num_domain(int domain_type) const
    {
        int result = 0;

        try {
            result = hwloc_get_nbobjs_by_type(m_topo, hwloc_domain(domain_type));
        }
        catch (const Exception &ex) {
            if (ex.err_value() != GEOPM_ERROR_INVALID) {
                throw ex;
            }
            if (domain_type == GEOPM_DOMAIN_TILE) {
                /// @todo  This assumes that tiles are just below
                ///        package in hwloc hierarchy.  If tiles are
                ///        at L2 cache, but processor has an L3 cache,
                ///        this may not be correct.
                int depth = hwloc_get_type_depth(m_topo, hwloc_domain(GEOPM_DOMAIN_PACKAGE)) + 1;
                result = hwloc_get_nbobjs_by_depth(m_topo, depth);
            }
            else {
                throw ex;
            }
            if (result == 0) {
                throw ex;
            }
        }
        return result;
    }
Example #2
0
static int
hwloc_solaris_get_sth_cpubind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_bitmap_t hwloc_set, int flags __hwloc_attribute_unused)
{
  processorid_t binding;
  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
  int n;
  int i;

  if (depth < 0) {
    errno = ENOSYS;
    return -1;
  }

  /* first check if processor_bind() was used to bind to a single processor rather than to an lgroup */
  if ( processor_bind(idtype, id, PBIND_QUERY, &binding) == 0 && binding != PBIND_NONE ) {
    hwloc_bitmap_only(hwloc_set, binding);
    return 0;
  }

  /* if not, check lgroups */
  hwloc_bitmap_zero(hwloc_set);
  n = hwloc_get_nbobjs_by_depth(topology, depth);
  for (i = 0; i < n; i++) {
    hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
    lgrp_affinity_t aff = lgrp_affinity_get(idtype, id, obj->os_index);

    if (aff == LGRP_AFF_STRONG)
      hwloc_bitmap_or(hwloc_set, hwloc_set, obj->cpuset);
  }

  if (hwloc_bitmap_iszero(hwloc_set))
    hwloc_bitmap_copy(hwloc_set, hwloc_topology_get_complete_cpuset(topology));

  return 0;
}
Example #3
0
thread_pool_t *create_threads(){
  hwloc_topology_t topology;
  int i;
  local_thread_t *local;
  int nb_threads;
  unsigned int nb_cores;
  int depth;

  verbose_level = tm_get_verbose_level();

    /*Get number of cores: set 1 thread per core*/
  /* Allocate and initialize topology object. */
  hwloc_topology_init(&topology);
  /* Only keep relevant levels
     hwloc_topology_ignore_all_keep_structure(topology);*/
  /* Perform the topology detection. */
  hwloc_topology_load(topology);
  depth = hwloc_topology_get_depth(topology);
  if (depth == -1 ) {
    if(verbose_level>=CRITICAL)
      fprintf(stderr,"Error: HWLOC unable to find the depth of the topology of this node!\n");
    exit(-1);
  }



  /* at depth 'depth' it is necessary a PU/core where we can execute things*/
  nb_cores = hwloc_get_nbobjs_by_depth(topology, depth-1);
  nb_threads = MIN(nb_cores,  max_nb_threads);

  if(verbose_level>=INFO)
    printf("nb_threads = %d\n",nb_threads);

  pool = (thread_pool_t*) MALLOC(sizeof(thread_pool_t));
  pool -> topology = topology;
  pool -> nb_threads = nb_threads;
  pool -> thread_list = (pthread_t*)MALLOC(sizeof(pthread_t)*nb_threads);
  pool -> working_list = (work_t*)CALLOC(nb_threads,sizeof(work_t));
  pool -> cond_var = (pthread_cond_t*)MALLOC(sizeof(pthread_cond_t)*nb_threads);
  pool -> list_lock = (pthread_mutex_t*)MALLOC(sizeof(pthread_mutex_t)*nb_threads);

  local=(local_thread_t*)MALLOC(sizeof(local_thread_t)*nb_threads);
  pool->local = local;

  for (i=0;i<nb_threads;i++){
    local[i].topology = topology;
    local[i].id = i;
    local[i].working_list = &pool->working_list[i];
    pthread_cond_init(pool->cond_var +i, NULL);
    local[i].cond_var = pool->cond_var +i;
    pthread_mutex_init(pool->list_lock +i, NULL);
    local[i].list_lock = pool->list_lock+i;
    if (pthread_create (pool->thread_list+i, NULL, thread_loop, local+i) < 0) {
      if(verbose_level>=CRITICAL)
	fprintf(stderr, "pthread_create error for exec thread %d\n",i);
      return NULL;
    }
  }
  return pool;
}
Example #4
0
static void
output_compute_pu_min_textwidth(struct lstopo_output *output)
{
  unsigned fontsize = output->fontsize;
  char text[64];
  int n;
  hwloc_topology_t topology = output->topology;
  hwloc_obj_t lastpu;

  if (!output->methods->textsize) {
    output->min_pu_textwidth = 0;
    return;
  }

  if (output->logical) {
    int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU);
    lastpu = hwloc_get_obj_by_depth(topology, depth, hwloc_get_nbobjs_by_depth(topology, depth)-1);
  } else {
    unsigned lastidx = hwloc_bitmap_last(hwloc_topology_get_topology_cpuset(topology));
    lastpu = hwloc_get_pu_obj_by_os_index(topology, lastidx);
  }

  n = lstopo_obj_snprintf(output, text, sizeof(text), lastpu);
  output->min_pu_textwidth = get_textwidth(output, text, n, fontsize);
}
Example #5
0
static int
hwloc_solaris_get_sth_membind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_nodeset_t nodeset, hwloc_membind_policy_t *policy, int flags __hwloc_attribute_unused)
{
  int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
  int n;
  int i;

  if (depth < 0) {
    errno = ENOSYS;
    return -1;
  }

  hwloc_bitmap_zero(nodeset);
  n = hwloc_get_nbobjs_by_depth(topology, depth);

  for (i = 0; i < n; i++) {
    hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
    lgrp_affinity_t aff = lgrp_affinity_get(idtype, id, obj->os_index);

    if (aff == LGRP_AFF_STRONG)
      hwloc_bitmap_set(nodeset, obj->os_index);
  }

  if (hwloc_bitmap_iszero(nodeset))
    hwloc_bitmap_copy(nodeset, hwloc_topology_get_complete_nodeset(topology));

  *policy = HWLOC_MEMBIND_BIND;
  return 0;
}
int
main (void)
{
  hwloc_topology_t topology;
  unsigned depth;
  hwloc_obj_t last;
  hwloc_obj_t *closest;
  unsigned found;
  int err;
  unsigned numprocs;
  hwloc_obj_t ancestor;

  err = hwloc_topology_init (&topology);
  if (err)
    return EXIT_FAILURE;

  hwloc_topology_set_synthetic (topology, "2 3 4 5");

  err = hwloc_topology_load (topology);
  if (err)
    return EXIT_FAILURE;

  depth = hwloc_topology_get_depth(topology);

  /* get the last object of last level */
  numprocs =  hwloc_get_nbobjs_by_depth(topology, depth-1);
  last = hwloc_get_obj_by_depth(topology, depth-1, numprocs-1);

  /* allocate the array of closest objects */
  closest = malloc(numprocs * sizeof(*closest));
  assert(closest);

  /* get closest levels */
  found = hwloc_get_closest_objs (topology, last, closest, numprocs);
  printf("looked for %u closest entries, found %u\n", numprocs, found);
  assert(found == numprocs-1);

  /* check first found is closest */
  assert(closest[0] == hwloc_get_obj_by_depth(topology, depth-1, numprocs-5 /* arity is 5 on last level */));
  /* check some other expected positions */
  assert(closest[found-1] == hwloc_get_obj_by_depth(topology, depth-1, 1*3*4*5-1 /* last of first half */));
  assert(closest[found/2-1] == hwloc_get_obj_by_depth(topology, depth-1, 1*3*4*5+2*4*5-1 /* last of second third of second half */));
  assert(closest[found/2/3-1] == hwloc_get_obj_by_depth(topology, depth-1, 1*3*4*5+2*4*5+3*5-1 /* last of third quarter of third third of second half */));

  /* get ancestor of last and less close object */
  ancestor = hwloc_get_common_ancestor_obj(topology, last, closest[found-1]);
  assert(hwloc_obj_is_in_subtree(topology, last, ancestor));
  assert(hwloc_obj_is_in_subtree(topology, closest[found-1], ancestor));
  assert(ancestor == hwloc_get_root_obj(topology)->first_child);
  printf("ancestor type %u depth %u number %u is system level\n",
	 ancestor->type, ancestor->depth, ancestor->logical_index);

  free(closest);
  hwloc_topology_destroy (topology);

  return EXIT_SUCCESS;
}
Example #7
0
tm_topology_t* get_local_topo_with_hwloc(void)
{
  hwloc_topology_t topology;
  tm_topology_t *res = NULL;
  hwloc_obj_t *objs = NULL;
  unsigned topodepth,depth;
  int nb_nodes,i;

  /* Build the topology */
  hwloc_topology_init(&topology);
  hwloc_topology_ignore_all_keep_structure(topology);
  hwloc_topology_load(topology);

  /* Test if symetric */
  if(!symetric(topology)){
    if(get_verbose_level() >= CRITICAL)
      fprintf(stderr,"Local toplogy not symetric!\n");
    exit(-1);
  }

  /* work on depth */
  topodepth = hwloc_topology_get_depth(topology);

  res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
  res->nb_levels = topodepth;
  res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels);
  res->nb_nodes = (int*)MALLOC(sizeof(int)*res->nb_levels);
  res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels);

  /* Build TreeMatch topology */
  for( depth = 0 ; depth < topodepth ; depth++ ){
    nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
    res->nb_nodes[depth] = nb_nodes;
    res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);

    objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
    objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
    hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1);
    res->arity[depth] = objs[0]->arity;

    /* printf("%d:",res->arity[depth]); */

    /* Build process id tab */
    for (i = 0; i < nb_nodes; i++){
      res->node_id[depth][i] = objs[i]->os_index;
      /* if(depth==topodepth-1) */
    }
    FREE(objs);
  }

  /* Destroy HWLOC topology object. */
  hwloc_topology_destroy(topology);

  /* printf("\n"); */
  return res;
}
int compute_context_nbr(int * ctxnbr, int threadnbr, int verbose)
{
#if (defined WITH_STARPU && defined STARPU_CONTEXT)
  if (*ctxnbr == -1)
    {
      int depth;
      unsigned i, n;
      int ncpu_per_socket, nsocket, ncpu;
      hwloc_topology_t topology;
      hwloc_obj_t obj;
      /* Allocate and initialize topology object. */
      hwloc_topology_init(&topology);
      /* ... Optionally, put detection configuration here to ignore
         some objects types, define a synthetic topology, etc....
         The default is to detect all the objects of the machine that
         the caller is allowed to access.  See Configure Topology
         Detection. */
      /* Perform the topology detection. */
      hwloc_topology_load(topology);

      depth = hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET);
      if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) {
        /* number of socket is unknow, let say we have quadcore... */
        ncpu_per_socket = 4;
      } else {
        ncpu    = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE);
        nsocket = hwloc_get_nbobjs_by_depth(topology, depth);
        ncpu_per_socket = ncpu/nsocket;
      }
      if (verbose > API_VERBOSE_NO)
        fprintf(stdout, "ncpu_per_socket %d\n", ncpu_per_socket);
      nsocket = threadnbr/ncpu_per_socket;
      if (threadnbr%ncpu_per_socket)
        nsocket ++;
      *ctxnbr = nsocket + 1;

      hwloc_topology_destroy(topology);
    }
  if (threadnbr + 1 < *ctxnbr)
    *ctxnbr = threadnbr+1;

  if (*ctxnbr == 1)
    *ctxnbr = 2;

  /* can't have more than STARPU_NMAX_SCHED_CTXS CTX */
  {
    PASTIX_INT nctx_bot = *ctxnbr - 1;
    while (*ctxnbr > STARPU_NMAX_SCHED_CTXS)
      {
        nctx_bot /= 2;
        *ctxnbr = nctx_bot + 1;
      }
  }
#endif /* WITH_STARPU */
  return 0;
}
Example #9
0
static void check_level(hwloc_topology_t topology, unsigned depth, unsigned width, unsigned arity)
{
  unsigned j;
  assert(hwloc_get_nbobjs_by_depth(topology, depth) == width);
  for(j=0; j<width; j++) {
    hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, j);
    assert(obj);
    assert(obj->arity == arity);
  }
}
Example #10
0
magma_int_t magma_get_parallel_numthreads()
{
    // query number of cores
    magma_int_t ncores = 0;

#ifdef HAVE_HWLOC
    // hwloc gives physical cores, not hyperthreads
    // from http://stackoverflow.com/questions/12483399/getting-number-of-cores-not-ht-threads
    hwloc_topology_t topology;
    hwloc_topology_init( &topology );
    hwloc_topology_load( topology );
    magma_int_t depth = hwloc_get_type_depth( topology, HWLOC_OBJ_CORE );
    if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
        ncores = hwloc_get_nbobjs_by_depth( topology, depth );
    }
    hwloc_topology_destroy( topology );
#endif

    if ( ncores == 0 ) {
        #ifdef _MSC_VER  // Windows
        SYSTEM_INFO sysinfo;
        GetSystemInfo( &sysinfo );
        ncores = sysinfo.dwNumberOfProcessors;
        #else
        ncores = sysconf( _SC_NPROCESSORS_ONLN );
        #endif
    }

    // query MAGMA_NUM_THREADS or OpenMP
    const char *threads_str = getenv("MAGMA_NUM_THREADS");
    magma_int_t threads = 0;
    if ( threads_str != NULL ) {
        char* endptr;
        threads = strtol( threads_str, &endptr, 10 );
        if ( threads < 1 || *endptr != '\0' ) {
            threads = 1;
            fprintf( stderr, "$MAGMA_NUM_THREADS='%s' is an invalid number; using %d thread.\n",
                     threads_str, (int) threads );
        }
    }
    else {
        #if defined(_OPENMP)
        #pragma omp parallel
        {
            threads = omp_get_num_threads();
        }
        #else
            threads = ncores;
        #endif
    }

    // limit to range [1, number of cores]
    threads = max( 1, min( ncores, threads ));
    return threads;
}
Example #11
0
/*******************  FUNCTION  *********************/
int TopoHwloc::getNbEntities ( int level, int depth ) const
{
	//get real depth
	int absDepth = getAbsDepth(level,depth);
	allocAssume(depth != -1,"Cannot find the depth for the requested level, depth couple in hwloc.");//level,depth
	
	//get number of objs
	int res = hwloc_get_nbobjs_by_depth(topology, absDepth);
	allocAssume(res > 0, "Invalid nbobjs_by_depth in hwloc for absolute depth ....");//absDepth
	return res;
}
static void check(hwloc_topology_t topology)
{
  unsigned depth;
  unsigned i,j;

  depth = hwloc_topology_get_depth(topology);
  for(i=0; i<depth; i++) {
    for(j=0; j<hwloc_get_nbobjs_by_depth(topology, i); j++) {
      assert(hwloc_get_obj_by_depth(topology, i, j)->userdata == NULL);
    }
  }
}
Example #13
0
static int
hwloc_aix_get_sth_membind(hwloc_topology_t topology, rstype_t what, rsid_t who, hwloc_bitmap_t nodeset, hwloc_membind_policy_t *policy, int flags __hwloc_attribute_unused)
{
  hwloc_bitmap_t hwloc_set;
  rsethandle_t rset;
  unsigned cpu, maxcpus;
  int res = -1;
  int depth, n, i;

  depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
  if (depth < 0) {
    errno = EXDEV;
    return -1;
  }
  n = hwloc_get_nbobjs_by_depth(topology, depth);

  rset = rs_alloc(RS_EMPTY);

  if (ra_getrset(what, who, 0, rset) == -1)
    goto out;

  hwloc_set = hwloc_bitmap_alloc();

  maxcpus = rs_getinfo(rset, R_MAXPROCS, 0);
  for (cpu = 0; cpu < maxcpus; cpu++)
    if (rs_op(RS_TESTRESOURCE, rset, NULL, R_PROCS, cpu) == 1)
      hwloc_bitmap_set(hwloc_set, cpu);
  hwloc_bitmap_and(hwloc_set, hwloc_set, hwloc_topology_get_complete_cpuset(topology));

  hwloc_bitmap_zero(nodeset);
  for (i = 0; i < n; i++) {
    hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
    if (hwloc_bitmap_isincluded(obj->cpuset, hwloc_set))
      hwloc_bitmap_set(nodeset, obj->os_index);
  }

  hwloc_bitmap_free(hwloc_set);

  *policy = HWLOC_MEMBIND_BIND;
  res = 0;

out:
  rs_free(rset);
  return res;
}
Example #14
0
static int
hwloc_solaris_set_sth_membind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags)
{
  int depth;
  int n, i;

  switch (policy) {
    case HWLOC_MEMBIND_DEFAULT:
    case HWLOC_MEMBIND_BIND:
      break;
    default:
      errno = ENOSYS;
      return -1;
  }

  if (flags & HWLOC_MEMBIND_NOCPUBIND) {
    errno = ENOSYS;
    return -1;
  }

  depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
  if (depth < 0) {
    errno = EXDEV;
    return -1;
  }
  n = hwloc_get_nbobjs_by_depth(topology, depth);

  for (i = 0; i < n; i++) {
    hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
    if (hwloc_bitmap_isset(nodeset, obj->os_index)) {
      lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_STRONG);
    } else {
      if (flags & HWLOC_CPUBIND_STRICT)
	lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_NONE);
      else
	lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_WEAK);
    }
  }

  return 0;
}
Example #15
0
int symetric(hwloc_topology_t topology)
{
   int depth,i,topodepth = hwloc_topology_get_depth(topology);
   unsigned int arity;
   hwloc_obj_t obj;
   for ( depth = 0; depth < topodepth-1 ; depth++ ) {
    int N = hwloc_get_nbobjs_by_depth(topology, depth);
    obj = hwloc_get_next_obj_by_depth (topology,depth,NULL);
    arity = obj->arity;

    /* printf("Depth=%d, N=%d, Arity:%d\n",depth,N,arity); */
    for (i = 1; i < N; i++ ){
      obj = hwloc_get_next_obj_by_depth (topology,depth,obj);
      if( obj->arity != arity){
	/* printf("[%d]: obj->arity=%d, arity=%d\n",i,obj->arity,arity); */
	return 0;
      }
    }
   }
   return 1;
}
Example #16
0
void Hwloc::getNumSockets(unsigned int &allowedNodes, int &numSockets, unsigned int &hwThreads) {
#ifdef HWLOC
   numSockets = 0;
   // Nodes that can be seen by hwloc
   allowedNodes = 0;
   // Hardware threads
   hwThreads = 0;

   int depth = hwloc_get_type_depth( _hwlocTopology, HWLOC_OBJ_NODE );
   // If there are NUMA nodes in this machine
   if ( depth != HWLOC_TYPE_DEPTH_UNKNOWN ) {
      //hwloc_const_cpuset_t cpuset = hwloc_topology_get_online_cpuset( _hwlocTopology );
      //allowedNodes = hwloc_get_nbobjs_inside_cpuset_by_type( _hwlocTopology, cpuset, HWLOC_OBJ_NODE );
      //hwThreads = hwloc_get_nbobjs_inside_cpuset_by_type( _hwlocTopology, cpuset, HWLOC_OBJ_PU );
      unsigned nodes = hwloc_get_nbobjs_by_depth( _hwlocTopology, depth );
      //hwloc_cpuset_t set = i

      // For each node, count how many hardware threads there are below.
      for ( unsigned nodeIdx = 0; nodeIdx < nodes; ++nodeIdx )
      {
         hwloc_obj_t node = hwloc_get_obj_by_depth( _hwlocTopology, depth, nodeIdx );
         int localThreads = hwloc_get_nbobjs_inside_cpuset_by_type( _hwlocTopology, node->cpuset, HWLOC_OBJ_PU );
         // Increase hw thread count
         hwThreads += localThreads;
         // If this node has hw threads beneath, increase the number of viewable nodes
         if ( localThreads > 0 ) ++allowedNodes;
      }
      numSockets = nodes;
   }
   // Otherwise, set it to 1
   else {
      allowedNodes = 1; 
      numSockets = 1;
   }
#else
   numSockets = 0;
   allowedNodes = 0;
#endif
}
Example #17
0
int bind_myself_to_core(hwloc_topology_t topology, int id){
  hwloc_cpuset_t cpuset;
  hwloc_obj_t obj;
  char *str;
  int binding_res;
  int depth = hwloc_topology_get_depth(topology);
  int nb_cores = hwloc_get_nbobjs_by_depth(topology, depth-1);
  int my_core;
  int nb_threads = get_nb_threads();
  /* printf("depth=%d\n",depth); */

  switch (mapping_policy){
  case SCATTER:
    my_core = id*(nb_cores/nb_threads);
    break;
  default:
    if(verbose_level>=WARNING){
      printf("Wrong scheduling policy. Using COMPACT\n");
    }
  case COMPACT:
    my_core = id%nb_cores;
  }

    if(verbose_level>=INFO){
       printf("Mapping thread %d on core %d\n",id,my_core);
   }

    /* Get my core. */
    obj = hwloc_get_obj_by_depth(topology, depth-1, my_core);
    if (obj) {
      /* Get a copy of its cpuset that we may modify. */
      cpuset = hwloc_bitmap_dup(obj->cpuset);

      /* Get only one logical processor (in case the core is
	 SMT/hyperthreaded). */
      hwloc_bitmap_singlify(cpuset);


      /*hwloc_bitmap_asprintf(&str, cpuset);
      printf("Binding thread %d to cpuset %s\n", my_core,str);
      FREE(str);
      */

      /* And try  to bind ourself there. */
      binding_res = hwloc_set_cpubind(topology, cpuset, HWLOC_CPUBIND_THREAD);
      if (binding_res == -1){
	int error = errno;
	hwloc_bitmap_asprintf(&str, obj->cpuset);
	if(verbose_level>=WARNING)
	  printf("Thread %d couldn't bind to cpuset %s: %s.\n This thread is not bound to any core...\n", my_core, str, strerror(error));
	free(str); /* str is allocated by hlwoc, free it normally*/
	return 0;
      }
      /* FREE our cpuset copy */
      hwloc_bitmap_free(cpuset);
      return 1;
    }else{
      if(verbose_level>=WARNING)
	printf("No valid object for core id %d!\n",my_core);
      return 0;
    }
}
Example #18
0
int main(void)
{
  hwloc_topology_t topology;
#ifdef HWLOC_HAVE_CPU_SET
  unsigned depth;
  hwloc_bitmap_t hwlocset;
  cpu_set_t schedset;
  hwloc_obj_t obj;
  int err;
#endif /* HWLOC_HAVE_CPU_SET */

  hwloc_topology_init(&topology);
  hwloc_topology_load(topology);

#ifdef HWLOC_HAVE_CPU_SET

  depth = hwloc_topology_get_depth(topology);

  hwlocset = hwloc_bitmap_dup(hwloc_topology_get_complete_cpuset(topology));
  hwloc_cpuset_to_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset));
#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
  err = sched_setaffinity(0, sizeof(schedset));
#else
  err = sched_setaffinity(0, sizeof(schedset), &schedset);
#endif
  assert(!err);
  hwloc_bitmap_free(hwlocset);

#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
  err = sched_getaffinity(0, sizeof(schedset));
#else
  err = sched_getaffinity(0, sizeof(schedset), &schedset);
#endif
  assert(!err);
  hwlocset = hwloc_bitmap_alloc();
  hwloc_cpuset_from_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset));
  assert(hwloc_bitmap_isincluded(hwlocset, hwloc_topology_get_complete_cpuset(topology)));
  hwloc_bitmap_andnot(hwlocset, hwlocset, hwloc_topology_get_online_cpuset(topology));
  hwloc_bitmap_andnot(hwlocset, hwlocset, hwloc_topology_get_allowed_cpuset(topology));
  assert(hwloc_bitmap_iszero(hwlocset));
  hwloc_bitmap_free(hwlocset);

  obj = hwloc_get_obj_by_depth(topology, depth-1, hwloc_get_nbobjs_by_depth(topology, depth-1) - 1);
  assert(obj);
  assert(obj->type == HWLOC_OBJ_PU);

  hwlocset = hwloc_bitmap_dup(obj->cpuset);
  hwloc_cpuset_to_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset));
#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
  err = sched_setaffinity(0, sizeof(schedset));
#else
  err = sched_setaffinity(0, sizeof(schedset), &schedset);
#endif
  assert(!err);
  hwloc_bitmap_free(hwlocset);

#ifdef HWLOC_HAVE_OLD_SCHED_SETAFFINITY
  err = sched_getaffinity(0, sizeof(schedset));
#else
  err = sched_getaffinity(0, sizeof(schedset), &schedset);
#endif
  assert(!err);
  hwlocset = hwloc_bitmap_alloc();
  hwloc_cpuset_from_glibc_sched_affinity(topology, hwlocset, &schedset, sizeof(schedset));
  assert(hwloc_bitmap_isequal(hwlocset, obj->cpuset));
  hwloc_bitmap_free(hwlocset);

#endif /* HWLOC_HAVE_CPU_SET */

  hwloc_topology_destroy(topology);
  return 0;
}
Example #19
0
extern int
get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards,
	    uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads,
	    uint16_t *p_block_map_size,
	    uint16_t **p_block_map, uint16_t **p_block_map_inv)
{
	enum { SOCKET=0, CORE=1, PU=2, LAST_OBJ=3 };
	hwloc_topology_t topology;
	hwloc_obj_t obj;
	hwloc_obj_type_t objtype[LAST_OBJ];
	unsigned idx[LAST_OBJ];
	int nobj[LAST_OBJ];
	int actual_cpus;
	int macid;
	int absid;
	int actual_boards = 1, depth;
	int i;

	debug2("hwloc_topology_init");
	if (hwloc_topology_init(&topology)) {
		/* error in initialize hwloc library */
		debug("hwloc_topology_init() failed.");
		return 1;
	}

	/* parse all system */
	hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM);

	/* ignores cache, misc */
	hwloc_topology_ignore_type (topology, HWLOC_OBJ_CACHE);
	hwloc_topology_ignore_type (topology, HWLOC_OBJ_MISC);

	/* load topology */
	debug2("hwloc_topology_load");
	if (hwloc_topology_load(topology)) {
		/* error in load hardware topology */
		debug("hwloc_topology_load() failed.");
		hwloc_topology_destroy(topology);
		return 2;
	}

	/* At least on a temporary basis, one could map AMD Bulldozer entities
	 * onto the entities that Slurm does optimize placement for today (e.g.
	 * map each Bulldozer core to a thread and each Bulldozer module to a
	 * Slurm core, alternately map the Bulldozer module to a Slurm socket
	 * and the Bulldozer socket to a Slurm board). Perhaps not ideal, but
	 * it would achieve the desired locality. */

	if ( hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) >
	     hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET) ) {
		/* One socket contains multiple NUMA-nodes 
		 * like AMD Opteron 6000 series etc.
		 * In such case, use NUMA-node instead of socket. */
		objtype[SOCKET] = HWLOC_OBJ_NODE;
		objtype[CORE]   = HWLOC_OBJ_CORE;
		objtype[PU]     = HWLOC_OBJ_PU;
	} else {
		objtype[SOCKET] = HWLOC_OBJ_SOCKET;
		objtype[CORE]   = HWLOC_OBJ_CORE;
		objtype[PU]     = HWLOC_OBJ_PU;
	}

	/* number of objects */
	depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP);
	if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
		actual_boards = MAX(hwloc_get_nbobjs_by_depth(topology, depth),
				    1);
	}
	nobj[SOCKET] = hwloc_get_nbobjs_by_type(topology, objtype[SOCKET]);
	nobj[CORE]   = hwloc_get_nbobjs_by_type(topology, objtype[CORE]);
	actual_cpus  = hwloc_get_nbobjs_by_type(topology, objtype[PU]);
	nobj[PU]     = actual_cpus/nobj[CORE];  /* threads per core */
	nobj[CORE]  /= nobj[SOCKET];            /* cores per socket */

	debug("CPUs:%d Boards:%u Sockets:%d CoresPerSocket:%d ThreadsPerCore:%d",
	      actual_cpus, actual_boards, nobj[SOCKET], nobj[CORE], nobj[PU]);

	/* allocate block_map */
	*p_block_map_size = (uint16_t)actual_cpus;
	if (p_block_map && p_block_map_inv) {
		*p_block_map     = xmalloc(actual_cpus * sizeof(uint16_t));
		*p_block_map_inv = xmalloc(actual_cpus * sizeof(uint16_t));

		/* initialize default as linear mapping */
		for (i = 0; i < actual_cpus; i++) {
			(*p_block_map)[i]     = i;
			(*p_block_map_inv)[i] = i;
		}
		
		/* create map with hwloc */
		for (idx[SOCKET]=0; idx[SOCKET]<nobj[SOCKET]; ++idx[SOCKET]) {
			for (idx[CORE]=0; idx[CORE]<nobj[CORE]; ++idx[CORE]) {
				for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) {
					/* get hwloc_obj by indexes */
					obj=hwloc_get_obj_below_array_by_type(
					            topology, 3, objtype, idx);
					if (!obj)
						continue;
					macid = obj->os_index;
					absid = idx[SOCKET]*nobj[CORE]*nobj[PU]
					      + idx[CORE]*nobj[PU]
					      + idx[PU];

					if ((macid >= actual_cpus) ||
					    (absid >= actual_cpus)) {
						/* physical or logical ID are
						 * out of range */
						continue;
					}
					debug4("CPU map[%d]=>%d", absid, macid);
					(*p_block_map)[absid]     = macid;
					(*p_block_map_inv)[macid] = absid;
				}
			 }
		}
	}

	hwloc_topology_destroy(topology);

	/* update output parameters */
	*p_cpus    = actual_cpus;
	*p_boards  = actual_boards;
	*p_sockets = nobj[SOCKET];
	*p_cores   = nobj[CORE];
	*p_threads = nobj[PU];

#if DEBUG_DETAIL
	/*** Display raw data ***/
	debug("CPUs:%u Boards:%u Sockets:%u CoresPerSocket:%u ThreadsPerCore:%u",
	      *p_cpus, *p_boards, *p_sockets, *p_cores, *p_threads);

	/* Display the mapping tables */
	if (p_block_map && p_block_map_inv) {
		debug("------");
		debug("Abstract -> Machine logical CPU ID block mapping:");
		debug("AbstractId PhysicalId Inverse");
		for (i = 0; i < *p_cpus; i++) {
			debug3("   %4d      %4u       %4u",
				i, (*p_block_map)[i], (*p_block_map_inv)[i]);
		}
		debug("------");
	}
#endif
	return 0;

}
Example #20
0
int main(void)
{
  hwloc_topology_t local, global;
  hwloc_obj_t sw1, sw2, sw11, sw12, sw21, sw22, root;
  int err;

  printf("Loading the local topology...\n");
  hwloc_topology_init(&local);
  hwloc_topology_set_synthetic(local, "n:2 s:2 ca:1 core:2 ca:2 pu:2");
  hwloc_topology_load(local);

  printf("Try to create an empty custom topology...\n");
  hwloc_topology_init(&global);
  hwloc_topology_set_custom(global);
  err = hwloc_topology_load(global);
  assert(err == -1);
  assert(errno == EINVAL);
  hwloc_topology_destroy(global);

  printf("Creating a custom topology...\n");
  hwloc_topology_init(&global);
  hwloc_topology_set_custom(global);

  printf("Inserting the local topology into the global one...\n");
  root = hwloc_get_root_obj(global);

  sw1 = hwloc_custom_insert_group_object_by_parent(global, root, 0);
  sw11 = hwloc_custom_insert_group_object_by_parent(global, sw1, 1);
  hwloc_custom_insert_topology(global, sw11, local, NULL);
  hwloc_custom_insert_topology(global, sw11, local, NULL);
  sw12 = hwloc_custom_insert_group_object_by_parent(global, sw1, 1);
  hwloc_custom_insert_topology(global, sw12, local, NULL);
  hwloc_custom_insert_topology(global, sw12, local, NULL);

  sw2 = hwloc_custom_insert_group_object_by_parent(global, root, 0);
  sw21 = hwloc_custom_insert_group_object_by_parent(global, sw2, 1);
  hwloc_custom_insert_topology(global, sw21, local, NULL);
  hwloc_custom_insert_topology(global, sw21, local, NULL);
  hwloc_custom_insert_topology(global, sw21, local, NULL);
  sw22 = hwloc_custom_insert_group_object_by_parent(global, sw2, 1);
  hwloc_custom_insert_topology(global, sw22, local, NULL); /* only one to check that it won't get merged */

  hwloc_topology_destroy(local);

  printf("Building the global topology...\n");
  hwloc_topology_load(global);
  hwloc_topology_check(global);

  assert(hwloc_topology_get_depth(global) == 10);
  assert(hwloc_get_depth_type(global, 0) == HWLOC_OBJ_SYSTEM);
  assert(hwloc_get_nbobjs_by_type(global, HWLOC_OBJ_SYSTEM) == 1);
  assert(hwloc_get_depth_type(global, 1) == HWLOC_OBJ_GROUP);
  assert(hwloc_get_nbobjs_by_depth(global, 1) == 2);
  assert(hwloc_get_depth_type(global, 2) == HWLOC_OBJ_GROUP);
  assert(hwloc_get_nbobjs_by_depth(global, 2) == 4); /* the last group of this level shouldn't be merged */
  assert(hwloc_get_depth_type(global, 3) == HWLOC_OBJ_MACHINE);
  assert(hwloc_get_nbobjs_by_type(global, HWLOC_OBJ_MACHINE) == 8);
  assert(hwloc_get_depth_type(global, 4) == HWLOC_OBJ_NODE);
  assert(hwloc_get_nbobjs_by_type(global, HWLOC_OBJ_NODE) == 16);
  assert(hwloc_get_depth_type(global, 5) == HWLOC_OBJ_SOCKET);
  assert(hwloc_get_nbobjs_by_type(global, HWLOC_OBJ_SOCKET) == 32);
  assert(hwloc_get_depth_type(global, 6) == HWLOC_OBJ_CACHE);
  assert(hwloc_get_nbobjs_by_depth(global, 6) == 32);
  assert(hwloc_get_depth_type(global, 7) == HWLOC_OBJ_CORE);
  assert(hwloc_get_nbobjs_by_type(global, HWLOC_OBJ_CORE) == 64);
  assert(hwloc_get_depth_type(global, 8) == HWLOC_OBJ_CACHE);
  assert(hwloc_get_nbobjs_by_depth(global, 8) == 128);
  assert(hwloc_get_depth_type(global, 9) == HWLOC_OBJ_PU);
  assert(hwloc_get_nbobjs_by_type(global, HWLOC_OBJ_PU) == 256);

  hwloc_topology_destroy(global);

  return 0;
}
Example #21
0
static HYD_status handle_bitmap_binding(const char *binding, const char *mapping)
{
    int i, j, k, bind_count, map_count, cache_depth = 0, bind_depth = 0, map_depth = 0;
    int total_map_objs, total_bind_objs, num_pus_in_map_domain, num_pus_in_bind_domain,
        total_map_domains;
    hwloc_obj_t map_obj, bind_obj, *start_pu;
    hwloc_cpuset_t *map_domains;
    char *bind_str, *map_str;
    HYD_status status = HYD_SUCCESS;

    HYDU_FUNC_ENTER();

    /* split out the count fields */
    status = split_count_field(binding, &bind_str, &bind_count);
    HYDU_ERR_POP(status, "error splitting count field\n");

    status = split_count_field(mapping, &map_str, &map_count);
    HYDU_ERR_POP(status, "error splitting count field\n");


    /* get the binding object */
    if (!strcmp(bind_str, "board"))
        bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_MACHINE);
    else if (!strcmp(bind_str, "numa"))
        bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_NODE);
    else if (!strcmp(bind_str, "socket"))
        bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_SOCKET);
    else if (!strcmp(bind_str, "core"))
        bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_CORE);
    else if (!strcmp(bind_str, "hwthread"))
        bind_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_PU);
    else {
        /* check if it's in the l*cache format */
        cache_depth = parse_cache_string(bind_str);
        if (!cache_depth) {
            HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                                "unrecognized binding string \"%s\"\n", binding);
        }
        bind_depth = hwloc_get_cache_type_depth(topology, cache_depth, -1);
    }

    /* get the mapping */
    if (!strcmp(map_str, "board"))
        map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_MACHINE);
    else if (!strcmp(map_str, "numa"))
        map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_NODE);
    else if (!strcmp(map_str, "socket"))
        map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_SOCKET);
    else if (!strcmp(map_str, "core"))
        map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_CORE);
    else if (!strcmp(map_str, "hwthread"))
        map_depth = hwloc_get_type_or_above_depth(topology, HWLOC_OBJ_PU);
    else {
        cache_depth = parse_cache_string(map_str);
        if (!cache_depth) {
            HYDU_ERR_SETANDJUMP(status, HYD_INTERNAL_ERROR,
                                "unrecognized mapping string \"%s\"\n", mapping);
        }
        map_depth = hwloc_get_cache_type_depth(topology, cache_depth, -1);
    }

    /*
     * Process Affinity Algorithm:
     *
     * The code below works in 3 stages. The end result is an array of all the possible
     * binding bitmaps for a system, based on the options specified.
     *
     * 1. Define all possible mapping "domains" in a system. A mapping domain is a group
     *    of hardware elements found by traversing the topology. Each traversal skips the
     *    number of elements the user specified in the mapping string. The traversal ends
     *    when the next mapping domain == the first mapping domain. Note that if the
     *    mapping string defines a domain that is larger than the system size, we exit
     *    with an error.
     *
     * 2. Define the number of possible binding domains within a mapping domain. This
     *    process is similar to step 1, in that we traverse the mapping domain finding
     *    all possible bind combinations, stopping when a duplicate of the first binding
     *    is reached. If a binding is larger (in # of PUs) than the mapping domain,
     *    the number of possible bindings for that domain is 1. In this stage, we also
     *    locate the first PU in each mapping domain for use later during binding.
     *
     * 3. Create the binding bitmaps. We allocate an array of bitmaps and fill them in
     *    with all possible bindings. The starting PU in each mapping domain is advanced
     *    if and when we wrap around to the beginning of the mapping domains. This ensures
     *    that we do not repeat.
     *
     */

    /* calculate the number of map domains */
    total_map_objs = hwloc_get_nbobjs_by_depth(topology, map_depth);
    num_pus_in_map_domain = (HYDT_topo_hwloc_info.total_num_pus / total_map_objs) * map_count;
    HYDU_ERR_CHKANDJUMP(status, num_pus_in_map_domain > HYDT_topo_hwloc_info.total_num_pus,
                        HYD_INTERNAL_ERROR, "mapping option \"%s\" larger than total system size\n",
                        mapping);

    /* The number of total_map_domains should be large enough to
     * contain all contiguous map object collections of length
     * map_count.  For example, if the map object is "socket" and the
     * map_count is 3, on a system with 4 sockets, the following map
     * domains should be included: (0,1,2), (3,0,1), (2,3,0), (1,2,3).
     * We do this by finding how many times we need to replicate the
     * list of the map objects so that an integral number of map
     * domains can map to them.  In the above case, the list of map
     * objects is replicated 3 times. */
    for (i = 1; (i * total_map_objs) % map_count; i++);
    total_map_domains = (i * total_map_objs) / map_count;

    /* initialize the map domains */
    HYDU_MALLOC_OR_JUMP(map_domains, hwloc_bitmap_t *, total_map_domains * sizeof(hwloc_bitmap_t),
                        status);
    HYDU_MALLOC_OR_JUMP(start_pu, hwloc_obj_t *, total_map_domains * sizeof(hwloc_obj_t), status);

    /* For each map domain, find the next map object (first map object
     * for the first map domain) and add the following "map_count"
     * number of contiguous map objects, wrapping to the first one if
     * needed, to the map domain.  Store the first PU in the first map
     * object of the map domain as "start_pu".  This is needed later
     * for the actual binding. */
    map_obj = NULL;
    for (i = 0; i < total_map_domains; i++) {
        map_domains[i] = hwloc_bitmap_alloc();
        hwloc_bitmap_zero(map_domains[i]);

        for (j = 0; j < map_count; j++) {
            map_obj = hwloc_get_next_obj_by_depth(topology, map_depth, map_obj);
            /* map_obj will be NULL if it reaches the end. call again to wrap around */
            if (!map_obj)
                map_obj = hwloc_get_next_obj_by_depth(topology, map_depth, map_obj);

            if (j == 0)
                start_pu[i] =
                    hwloc_get_obj_inside_cpuset_by_type(topology, map_obj->cpuset, HWLOC_OBJ_PU, 0);

            hwloc_bitmap_or(map_domains[i], map_domains[i], map_obj->cpuset);
        }
    }


    /* Find the possible binding domains is similar to that of map
     * domains.  But if a binding domain is larger (in # of PUs) than
     * the mapping domain, the number of possible bindings for that
     * domain is 1. */

    /* calculate the number of possible bindings and allocate bitmaps for them */
    total_bind_objs = hwloc_get_nbobjs_by_depth(topology, bind_depth);
    num_pus_in_bind_domain = (HYDT_topo_hwloc_info.total_num_pus / total_bind_objs) * bind_count;

    if (num_pus_in_bind_domain < num_pus_in_map_domain) {
        for (i = 1; (i * num_pus_in_map_domain) % num_pus_in_bind_domain; i++);
        HYDT_topo_hwloc_info.num_bitmaps =
            (i * num_pus_in_map_domain * total_map_domains) / num_pus_in_bind_domain;
    }
    else {
        HYDT_topo_hwloc_info.num_bitmaps = total_map_domains;
    }

    /* initialize bitmaps */
    HYDU_MALLOC_OR_JUMP(HYDT_topo_hwloc_info.bitmap, hwloc_bitmap_t *,
                        HYDT_topo_hwloc_info.num_bitmaps * sizeof(hwloc_bitmap_t), status);

    for (i = 0; i < HYDT_topo_hwloc_info.num_bitmaps; i++) {
        HYDT_topo_hwloc_info.bitmap[i] = hwloc_bitmap_alloc();
        hwloc_bitmap_zero(HYDT_topo_hwloc_info.bitmap[i]);
    }

    /* do bindings */
    i = 0;
    while (i < HYDT_topo_hwloc_info.num_bitmaps) {
        for (j = 0; j < total_map_domains; j++) {
            bind_obj = hwloc_get_ancestor_obj_by_depth(topology, bind_depth, start_pu[j]);

            for (k = 0; k < bind_count; k++) {
                hwloc_bitmap_or(HYDT_topo_hwloc_info.bitmap[i], HYDT_topo_hwloc_info.bitmap[i],
                                bind_obj->cpuset);

                /* if the binding is smaller than the mapping domain, wrap around inside that domain */
                if (num_pus_in_bind_domain < num_pus_in_map_domain) {
                    bind_obj =
                        hwloc_get_next_obj_inside_cpuset_by_depth(topology, map_domains[j],
                                                                  bind_depth, bind_obj);
                    if (!bind_obj)
                        bind_obj =
                            hwloc_get_next_obj_inside_cpuset_by_depth(topology, map_domains[j],
                                                                      bind_depth, bind_obj);
                }
                else {
                    bind_obj = hwloc_get_next_obj_by_depth(topology, bind_depth, bind_obj);
                    if (!bind_obj)
                        bind_obj = hwloc_get_next_obj_by_depth(topology, bind_depth, bind_obj);
                }

            }
            i++;

            /* advance the starting position for this map domain, if needed */
            if (num_pus_in_bind_domain < num_pus_in_map_domain) {
                for (k = 0; k < num_pus_in_bind_domain; k++) {
                    start_pu[j] = hwloc_get_next_obj_inside_cpuset_by_type(topology, map_domains[j],
                                                                           HWLOC_OBJ_PU,
                                                                           start_pu[j]);
                    if (!start_pu[j])
                        start_pu[j] =
                            hwloc_get_next_obj_inside_cpuset_by_type(topology, map_domains[j],
                                                                     HWLOC_OBJ_PU, start_pu[j]);
                }
            }
        }
    }

    /* free temporary memory */
    MPL_free(map_domains);
    MPL_free(start_pu);

  fn_exit:
    HYDU_FUNC_EXIT();
    return status;

  fn_fail:
    goto fn_exit;
}
Example #22
0
extern int
get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards,
	    uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads,
	    uint16_t *p_block_map_size,
	    uint16_t **p_block_map, uint16_t **p_block_map_inv)
{
	enum { SOCKET=0, CORE=1, PU=2, LAST_OBJ=3 };
	hwloc_topology_t topology;
	hwloc_obj_t obj;
	hwloc_obj_type_t objtype[LAST_OBJ];
	unsigned idx[LAST_OBJ];
	int nobj[LAST_OBJ];
	int actual_cpus;
	int macid;
	int absid;
	int actual_boards = 1, depth;
	int i;

	debug2("hwloc_topology_init");
	if (hwloc_topology_init(&topology)) {
		/* error in initialize hwloc library */
		debug("hwloc_topology_init() failed.");
		return 1;
	}

	/* parse all system */
	hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM);

	/* ignores cache, misc */
	hwloc_topology_ignore_type (topology, HWLOC_OBJ_CACHE);
	hwloc_topology_ignore_type (topology, HWLOC_OBJ_MISC);

	/* load topology */
	debug2("hwloc_topology_load");
	if (hwloc_topology_load(topology)) {
		/* error in load hardware topology */
		debug("hwloc_topology_load() failed.");
		hwloc_topology_destroy(topology);
		return 2;
	}

	/* Some processors (e.g. AMD Opteron 6000 series) contain multiple
	 * NUMA nodes per socket. This is a configuration which does not map
	 * into the hardware entities that Slurm optimizes resource allocation
	 * for (PU/thread, core, socket, baseboard, node and network switch).
	 * In order to optimize resource allocations on such hardware, Slurm
	 * will consider each NUMA node within the socket as a separate socket.
	 * You can disable this configuring "SchedulerParameters=Ignore_NUMA",
	 * in which case Slurm will report the correct socket count on the node,
	 * but not be able to optimize resource allocations on the NUMA nodes.
	 */
	objtype[SOCKET] = HWLOC_OBJ_SOCKET;
	objtype[CORE]   = HWLOC_OBJ_CORE;
	objtype[PU]     = HWLOC_OBJ_PU;
	if (hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) >
	    hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET)) {
		char *sched_params = slurm_get_sched_params();
		if (sched_params &&
		    strcasestr(sched_params, "Ignore_NUMA")) {
			info("Ignoring NUMA nodes within a socket");
		} else {
			info("Considering each NUMA node as a socket");
			objtype[SOCKET] = HWLOC_OBJ_NODE;
		}
		xfree(sched_params);
	}

	/* number of objects */
	depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP);
	if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
		actual_boards = MAX(hwloc_get_nbobjs_by_depth(topology, depth),
				    1);
	}
	nobj[SOCKET] = hwloc_get_nbobjs_by_type(topology, objtype[SOCKET]);
	nobj[CORE]   = hwloc_get_nbobjs_by_type(topology, objtype[CORE]);
	/*
	 * Workaround for hwloc
	 * hwloc_get_nbobjs_by_type() returns 0 on some architectures.
	 */
	if ( nobj[SOCKET] == 0 ) {
		debug("get_cpuinfo() fudging nobj[SOCKET] from 0 to 1");
		nobj[SOCKET] = 1;
	}
	if ( nobj[CORE] == 0 ) {
		debug("get_cpuinfo() fudging nobj[CORE] from 0 to 1");
		nobj[CORE] = 1;
	}
	if ( nobj[SOCKET] == -1 )
		fatal("get_cpuinfo() can not handle nobj[SOCKET] = -1");
	if ( nobj[CORE] == -1 )
		fatal("get_cpuinfo() can not handle nobj[CORE] = -1");
	actual_cpus  = hwloc_get_nbobjs_by_type(topology, objtype[PU]);
#if 0
	/* Used to find workaround above */
	info("CORE = %d SOCKET = %d actual_cpus = %d nobj[CORE] = %d",
	     CORE, SOCKET, actual_cpus, nobj[CORE]);
#endif
	nobj[PU]     = actual_cpus/nobj[CORE];  /* threads per core */
	nobj[CORE]  /= nobj[SOCKET];            /* cores per socket */

	debug("CPUs:%d Boards:%u Sockets:%d CoresPerSocket:%d ThreadsPerCore:%d",
	      actual_cpus, actual_boards, nobj[SOCKET], nobj[CORE], nobj[PU]);

	/* allocate block_map */
	*p_block_map_size = (uint16_t)actual_cpus;
	if (p_block_map && p_block_map_inv) {
		*p_block_map     = xmalloc(actual_cpus * sizeof(uint16_t));
		*p_block_map_inv = xmalloc(actual_cpus * sizeof(uint16_t));

		/* initialize default as linear mapping */
		for (i = 0; i < actual_cpus; i++) {
			(*p_block_map)[i]     = i;
			(*p_block_map_inv)[i] = i;
		}
		/* create map with hwloc */
		for (idx[SOCKET]=0; idx[SOCKET]<nobj[SOCKET]; ++idx[SOCKET]) {
			for (idx[CORE]=0; idx[CORE]<nobj[CORE]; ++idx[CORE]) {
				for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) {
					/* get hwloc_obj by indexes */
					obj=hwloc_get_obj_below_array_by_type(
					            topology, 3, objtype, idx);
					if (!obj)
						continue;
					macid = obj->os_index;
					absid = idx[SOCKET]*nobj[CORE]*nobj[PU]
					      + idx[CORE]*nobj[PU]
					      + idx[PU];

					if ((macid >= actual_cpus) ||
					    (absid >= actual_cpus)) {
						/* physical or logical ID are
						 * out of range */
						continue;
					}
					debug4("CPU map[%d]=>%d", absid, macid);
					(*p_block_map)[absid]     = macid;
					(*p_block_map_inv)[macid] = absid;
				}
			 }
		}
	}

	hwloc_topology_destroy(topology);

	/* update output parameters */
	*p_cpus    = actual_cpus;
	*p_boards  = actual_boards;
	*p_sockets = nobj[SOCKET];
	*p_cores   = nobj[CORE];
	*p_threads = nobj[PU];

#if DEBUG_DETAIL
	/*** Display raw data ***/
	debug("CPUs:%u Boards:%u Sockets:%u CoresPerSocket:%u ThreadsPerCore:%u",
	      *p_cpus, *p_boards, *p_sockets, *p_cores, *p_threads);

	/* Display the mapping tables */
	if (p_block_map && p_block_map_inv) {
		debug("------");
		debug("Abstract -> Machine logical CPU ID block mapping:");
		debug("AbstractId PhysicalId Inverse");
		for (i = 0; i < *p_cpus; i++) {
			debug3("   %4d      %4u       %4u",
				i, (*p_block_map)[i], (*p_block_map_inv)[i]);
		}
		debug("------");
	}
#endif
	return 0;

}
Example #23
0
//Initializes HWLOC and load the machine architecture
int hw_topology_init (struct arch_topology *topo)
{
  hwloc_obj_t obj, core1, core2;
  int count, i, j, error;


  //Create the machine representation
  error = hwloc_topology_init(&topology);
  //Go throught the topology only if HWLOC is 
  //successifully initialized
  if(!error)
  {
    hwloc_topology_load(topology);
    local_topo = malloc(sizeof(struct arch_topology));

#if defined  (__DBCSR_ACC) || defined (__PW_CUDA)
  int nDev;
  ma_get_ndevices_cu(&nDev);
#endif

    //Extract number of NUMA nodes
    if (hwloc_get_type_depth (topology, HWLOC_OBJ_NODE))
     topo->nnodes = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_NODE));
    else
     topo->nnodes = 0;

    //Get number of cores, sockets and processing units
    topo->ncores = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_CORE));
    topo->nsockets = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_SOCKET)); 
    topo->npus = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_PU));

   //Compute number of memory controlers per socket
   //basically the number of NUMA nodes per socket
   if (topo->nnodes > topo->nsockets)
    topo->nmemcontroller = topo->nnodes/topo->nsockets;
   else
    topo->nmemcontroller = 1;

   count = 0; 
   topo->nshared_caches = 0;
   //Get derivate information - get number of cache per PU
   for(obj = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,0);
      obj; obj = obj->parent)
   {
	   if (obj->type == HWLOC_OBJ_CACHE)
	   { 
       if (obj->arity>1)
        topo->nshared_caches++; 
       else  
       { count++;  topo->ncaches = count;	}  
     } 
   }

  //Number of direct siblings
  //Siblings cores are the ones that share at least one component
  //level of the architecture
  count = 0;
  core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, 0);
  core2 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, 1);
  obj   = hwloc_get_common_ancestor_obj(topology, core1, core2);
  if (obj) 
    topo->nsiblings = obj->arity;

  //Machine node and core representation
  machine_nodes = (struct node*) malloc (topo->nnodes*sizeof(struct node));
  machine_cores = (struct core*) malloc (topo->ncores*sizeof(struct core));

  phys_cpus = malloc (topo->ncores*sizeof(int));
  get_phys_id(topology, topo->ncores, 0);
 
  //Get the caches sizes and other information for each core  
  for (i = 0; i < topo->ncores ; i++)
   {
	      machine_cores[i].caches = malloc (topo->ncaches*sizeof(size_t));
        machine_cores[i].shared_caches = malloc (topo->ncaches*sizeof(int));

        for (j = 0; j < topo->ncaches; j++)
         machine_cores[i].shared_caches[j] = 0;
        for (j = topo->ncaches ; j > topo->ncaches - topo->nshared_caches; j--)
         machine_cores[i].shared_caches[j-1] = 1;

        machine_cores[i].nsiblings = topo->nsiblings;
        machine_cores[i].siblings_id = malloc (topo->nsiblings*sizeof(unsigned));
        if(topo->ncores == topo->npus){
          core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, i);
 	         machine_cores[i].id = core1->os_index;
          count = 0;
          for(obj = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,i);
              obj; obj = obj->parent) {
	          if (obj->type == HWLOC_OBJ_CACHE){
	        	  machine_cores[i].caches[count] = obj->attr->cache.size / 1024;
			        count++;
		        }
            if (obj->type == HWLOC_OBJ_NODE)
              machine_cores[i].numaNode = obj->logical_index;                
 	        }
       }
       else{
	        core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, i);
 	        machine_cores[i].id = core1->os_index;
          count = 0;
          for(obj = hwloc_get_obj_by_type(topology,HWLOC_OBJ_CORE,i);
              obj; obj = obj->parent) {
	          if (obj->type == HWLOC_OBJ_CACHE) {
    	        	machine_cores[i].caches[count] = obj->attr->cache.size / 1024;
			          count++;
		        }
           if (obj->type == HWLOC_OBJ_NODE)
                machine_cores[i].numaNode = obj->logical_index;
 	        }
       }
   }    

  //Get siblings id - so each core knows its siblings
  for (i = 0; i < topo->ncores ; i++)
   {
        if(topo->ncores == topo->npus){
          core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, i);
          set_phys_siblings(i,machine_cores[i].id,core1,topo->ncores,topo->nsiblings,HWLOC_OBJ_PU);
        }
        else{
          core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, i);
          set_phys_siblings(i,machine_cores[i].id,core1,topo->ncores,topo->nsiblings,HWLOC_OBJ_CORE);
        }

  }

   int ncore_node = topo->ncores/topo->nnodes;
   int count_cores;
  //Get the information for each NUMAnode
  for (i = 0; i < topo->nnodes ; i++)
   {
        obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, i);
        
      	machine_nodes[i].id = obj->os_index;
      	machine_nodes[i].memory = obj->memory.total_memory;
        machine_nodes[i].ncores = ncore_node;
        machine_nodes[i].mycores = malloc (ncore_node*sizeof(unsigned)); 

        //Get the cores id of each NUMAnode
        count_cores = 0;
        set_node_cores(topology, obj, i, &count_cores);

       //GPU support
#if defined  (__DBCSR_ACC) || defined (__PW_CUDA)
       int *devIds;
       devIds = malloc (nDev*sizeof(int));
       topo->ngpus = nDev;
       ma_get_cu(i,devIds); 
       machine_nodes[i].mygpus = devIds;
#endif    	
   }    

  //counting network cards
  count = 0;
  hwloc_topology_t topo_net;
  error = hwloc_topology_init(&topo_net);  
  hwloc_topology_set_flags(topo_net, HWLOC_TOPOLOGY_FLAG_IO_DEVICES);  
  if (!error){
      hwloc_topology_load(topo_net);
      for (obj = hwloc_get_obj_by_type(topo_net, HWLOC_OBJ_OS_DEVICE, 0);
           obj;
           obj = hwloc_get_next_osdev(topo_net,obj))
        if (obj->attr->osdev.type == HWLOC_OBJ_OSDEV_NETWORK ||
            obj->attr->osdev.type == HWLOC_OBJ_OSDEV_OPENFABRICS)
              count++;   
      topo->nnetcards = count;
  } 
  else //if can not load I/O devices
   topo->nnetcards = 0;  
  hwloc_topology_destroy(topo_net); 
  

 /*Local copy of the machine topology components*/  
 local_topo->nnodes = topo->nnodes;
 local_topo->nsockets = topo->nsockets;
 local_topo->ncores = topo->ncores;
 local_topo->npus = topo->npus;
 local_topo->ngpus = topo->ngpus;
 local_topo->ncaches = topo->ncaches;
 local_topo->nshared_caches = topo->nshared_caches;
 local_topo->nsiblings = topo->nsiblings;
 local_topo->nmemcontroller =  topo->nmemcontroller;
 local_topo->nnetcards = topo->nnetcards;
}

 return error;

}
int main(void)
{
    int depth;
    unsigned i, n;
    unsigned long size;
    int levels;
    char string[128];
    int topodepth;
    void *m;
    hwloc_topology_t topology;
    hwloc_cpuset_t cpuset;
    hwloc_obj_t obj;

    /* Allocate and initialize topology object. */
    hwloc_topology_init(&topology);

    /* ... Optionally, put detection configuration here to ignore
       some objects types, define a synthetic topology, etc....  

       The default is to detect all the objects of the machine that
       the caller is allowed to access.  See Configure Topology
       Detection. */

    /* Perform the topology detection. */
    hwloc_topology_load(topology);

    /* Optionally, get some additional topology information
       in case we need the topology depth later. */
    topodepth = hwloc_topology_get_depth(topology);

    /*****************************************************************
     * First example:
     * Walk the topology with an array style, from level 0 (always
     * the system level) to the lowest level (always the proc level).
     *****************************************************************/
    for (depth = 0; depth < topodepth; depth++) {
        printf("*** Objects at level %d\n", depth);
        for (i = 0; i < hwloc_get_nbobjs_by_depth(topology, depth); 
             i++) {
            hwloc_obj_type_snprintf(string, sizeof(string),
				    hwloc_get_obj_by_depth(topology, depth, i), 0);
            printf("Index %u: %s\n", i, string);
        }
    }

    /*****************************************************************
     * Second example:
     * Walk the topology with a tree style.
     *****************************************************************/
    printf("*** Printing overall tree\n");
    print_children(topology, hwloc_get_root_obj(topology), 0);

    /*****************************************************************
     * Third example:
     * Print the number of packages.
     *****************************************************************/
    depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PACKAGE);
    if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) {
        printf("*** The number of packages is unknown\n");
    } else {
        printf("*** %u package(s)\n",
               hwloc_get_nbobjs_by_depth(topology, depth));
    }

    /*****************************************************************
     * Fourth example:
     * Compute the amount of cache that the first logical processor
     * has above it.
     *****************************************************************/
    levels = 0;
    size = 0;
    for (obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0);
         obj;
         obj = obj->parent)
      if (obj->type == HWLOC_OBJ_CACHE) {
        levels++;
        size += obj->attr->cache.size;
      }
    printf("*** Logical processor 0 has %d caches totaling %luKB\n", 
           levels, size / 1024);

    /*****************************************************************
     * Fifth example:
     * Bind to only one thread of the last core of the machine.
     *
     * First find out where cores are, or else smaller sets of CPUs if
     * the OS doesn't have the notion of a "core".
     *****************************************************************/
    depth = hwloc_get_type_or_below_depth(topology, HWLOC_OBJ_CORE);

    /* Get last core. */
    obj = hwloc_get_obj_by_depth(topology, depth,
                   hwloc_get_nbobjs_by_depth(topology, depth) - 1);
    if (obj) {
        /* Get a copy of its cpuset that we may modify. */
        cpuset = hwloc_bitmap_dup(obj->cpuset);

        /* Get only one logical processor (in case the core is
           SMT/hyper-threaded). */
        hwloc_bitmap_singlify(cpuset);

        /* And try to bind ourself there. */
        if (hwloc_set_cpubind(topology, cpuset, 0)) {
            char *str;
            int error = errno;
            hwloc_bitmap_asprintf(&str, obj->cpuset);
            printf("Couldn't bind to cpuset %s: %s\n", str, strerror(error));
            free(str);
        }

        /* Free our cpuset copy */
        hwloc_bitmap_free(cpuset);
    }

    /*****************************************************************
     * Sixth example:
     * Allocate some memory on the last NUMA node, bind some existing
     * memory to the last NUMA node.
     *****************************************************************/
    /* Get last node. There's always at least one. */
    n = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NUMANODE);
    obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, n - 1);

    size = 1024*1024;
    m = hwloc_alloc_membind_nodeset(topology, size, obj->nodeset,
                                    HWLOC_MEMBIND_BIND, 0);
    hwloc_free(topology, m, size);

    m = malloc(size);
    hwloc_set_area_membind_nodeset(topology, m, size, obj->nodeset,
                                   HWLOC_MEMBIND_BIND, 0);
    free(m);

    /* Destroy topology object. */
    hwloc_topology_destroy(topology);

    return 0;
}
int main(void)
{
    hwloc_topology_t topology;
    unsigned depth;
    hwloc_obj_t objs[OBJ_MAX];
    hwloc_obj_t obj;
    hwloc_bitmap_t set;
    int ret;

    hwloc_topology_init(&topology);
    hwloc_topology_set_synthetic(topology, SYNTHETIC_TOPOLOGY_DESCRIPTION);
    hwloc_topology_load(topology);
    depth = hwloc_topology_get_depth(topology);

    /* just get the system object */
    obj = hwloc_get_root_obj(topology);
    ret = hwloc_get_largest_objs_inside_cpuset(topology, obj->cpuset, objs, 1);
    assert(ret == 1);
    assert(objs[0] == obj);
    objs[0] = hwloc_get_first_largest_obj_inside_cpuset(topology, obj->cpuset);
    assert(objs[0] == obj);

    /* just get the very last object */
    obj = hwloc_get_obj_by_depth(topology, depth-1, hwloc_get_nbobjs_by_depth(topology, depth-1)-1);
    ret = hwloc_get_largest_objs_inside_cpuset(topology, obj->cpuset, objs, 1);
    assert(ret == 1);
    assert(objs[0] == obj);

    /* try an empty one */
    set = hwloc_bitmap_alloc();
    ret = hwloc_get_largest_objs_inside_cpuset(topology, set, objs, 1);
    assert(ret == 0);
    objs[0] = hwloc_get_first_largest_obj_inside_cpuset(topology, set);
    assert(objs[0] == NULL);
    hwloc_bitmap_free(set);

    /* try an impossible one */
    set = hwloc_bitmap_alloc();
    hwloc_bitmap_sscanf(set, GIVEN_TOOLARGE_CPUSET_STRING);
    ret = hwloc_get_largest_objs_inside_cpuset(topology, set, objs, 1);
    assert(ret == -1);
    objs[0] = hwloc_get_first_largest_obj_inside_cpuset(topology, set);
    assert(objs[0] == NULL);
    hwloc_bitmap_free(set);

    /* try a harder one with 1 obj instead of 2 needed */
    set = hwloc_bitmap_alloc();
    hwloc_bitmap_sscanf(set, GIVEN_LARGESPLIT_CPUSET_STRING);
    ret = hwloc_get_largest_objs_inside_cpuset(topology, set, objs, 1);
    assert(ret == 1);
    assert(objs[0] == hwloc_get_obj_by_depth(topology, depth-1, 0));
    objs[0] = hwloc_get_first_largest_obj_inside_cpuset(topology, set);
    assert(objs[0] == hwloc_get_obj_by_depth(topology, depth-1, 0));
    /* try a harder one with lots of objs instead of 2 needed */
    ret = hwloc_get_largest_objs_inside_cpuset(topology, set, objs, 2);
    assert(ret == 2);
    assert(objs[0] == hwloc_get_obj_by_depth(topology, depth-1, 0));
    assert(objs[1] == hwloc_get_obj_by_depth(topology, depth-1, hwloc_get_nbobjs_by_depth(topology, depth-1)-1));
    objs[0] = hwloc_get_first_largest_obj_inside_cpuset(topology, set);
    hwloc_bitmap_andnot(set, set, objs[0]->cpuset);
    objs[1] = hwloc_get_first_largest_obj_inside_cpuset(topology, set);
    hwloc_bitmap_andnot(set, set, objs[1]->cpuset);
    objs[2] = hwloc_get_first_largest_obj_inside_cpuset(topology, set);
    assert(objs[0] == hwloc_get_obj_by_depth(topology, depth-1, 0));
    assert(objs[1] == hwloc_get_obj_by_depth(topology, depth-1, hwloc_get_nbobjs_by_depth(topology, depth-1)-1));
    assert(objs[2] == NULL);
    assert(hwloc_bitmap_iszero(set));
    hwloc_bitmap_free(set);

    /* try a very hard one */
    set = hwloc_bitmap_alloc();
    hwloc_bitmap_sscanf(set, GIVEN_HARD_CPUSET_STRING);
    ret = hwloc_get_largest_objs_inside_cpuset(topology, set, objs, OBJ_MAX);
    assert(objs[0] == hwloc_get_obj_by_depth(topology, 5, 29));
    assert(objs[1] == hwloc_get_obj_by_depth(topology, 3, 5));
    assert(objs[2] == hwloc_get_obj_by_depth(topology, 3, 6));
    assert(objs[3] == hwloc_get_obj_by_depth(topology, 3, 7));
    assert(objs[4] == hwloc_get_obj_by_depth(topology, 2, 2));
    assert(objs[5] == hwloc_get_obj_by_depth(topology, 4, 36));
    assert(objs[6] == hwloc_get_obj_by_depth(topology, 5, 74));
    hwloc_bitmap_free(set);

    hwloc_topology_destroy(topology);

    return EXIT_SUCCESS;
}
Example #26
0
int main(void)
{
  hwloc_topology_t topology;
  unsigned depth;
  unsigned i,j, width;
  char buffer[1024];
  int err;

  /* check a synthetic topology */
  hwloc_topology_init(&topology);
  err = hwloc_topology_set_synthetic(topology, "2 3 4 5 6");
  assert(!err);
  hwloc_topology_load(topology);

  /* internal checks */

  hwloc_topology_check(topology);

  /* local checks */
  depth = hwloc_topology_get_depth(topology);
  assert(depth == 6);

  width = 1;
  for(i=0; i<6; i++) {
    /* check arities */
    assert(hwloc_get_nbobjs_by_depth(topology, i) == width);
    for(j=0; j<width; j++) {
      hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, i, j);
      assert(obj);
      assert(obj->arity == (i<5 ? i+2 : 0));
    }
    width *= i+2;
  }

  err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0);
  assert(err == 75);
  err = strcmp("Package:2 NUMANode:3(memory=1073741824) L2Cache:4(size=4194304) Core:5 PU:6", buffer);
  assert(!err);

  err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES|HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS);
  assert(err == 42);
  err = strcmp("Package:2 NUMANode:3 L2Cache:4 Core:5 PU:6", buffer);
  assert(!err);

  hwloc_topology_destroy(topology);



  hwloc_topology_init(&topology);
  err = hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L1ICACHE, HWLOC_TYPE_FILTER_KEEP_ALL);
  err = hwloc_topology_set_synthetic(topology, "pack:2(indexes=3,5) numa:2(memory=256GB indexes=pack) l3u:1(size=20mb) l2:2 l1i:1(size=16kB) l1dcache:2 core:1 pu:2(indexes=l2)");
  assert(!err);
  hwloc_topology_load(topology);

  err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0);
  assert(err == 181);
  err = strcmp("Package:2 NUMANode:2(memory=274877906944 indexes=2*2:1*2) L3Cache:1(size=20971520) L2Cache:2(size=4194304) L1iCache:1(size=16384) L1dCache:2(size=32768) Core:1 PU:2(indexes=4*8:1*4)", buffer);
  assert(!err);

  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PACKAGE, 1)->os_index == 5);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 1)->os_index == 2);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 12)->os_index == 3);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 13)->os_index == 11);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 14)->os_index == 19);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 15)->os_index == 27);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 16)->os_index == 4);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 17)->os_index == 12);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 18)->os_index == 20);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 19)->os_index == 28);

  hwloc_topology_destroy(topology);




  hwloc_topology_init(&topology);
  err = hwloc_topology_set_synthetic(topology, "pack:2 core:2 pu:2(indexes=0,4,2,6,1,5,3,7)");
  assert(!err);
  hwloc_topology_load(topology);

  err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0);
  assert(err == 72);
  err = strcmp("NUMANode:1(memory=1073741824) Package:2 Core:2 PU:2(indexes=4*2:2*2:1*2)", buffer);
  assert(!err);

  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0)->os_index == 0);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 1)->os_index == 4);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 2)->os_index == 2);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 3)->os_index == 6);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 4)->os_index == 1);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 5)->os_index == 5);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 6)->os_index == 3);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 7)->os_index == 7);

  hwloc_topology_destroy(topology);




  hwloc_topology_init(&topology);
  err = hwloc_topology_set_synthetic(topology, "pack:2 numa:2 core:1 pu:2(indexes=0,4,2,6,1,3,5,7)");
  assert(!err);
  hwloc_topology_load(topology);

  err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0);
  assert(err == 76);
  err = strcmp("Package:2 NUMANode:2(memory=1073741824) Core:1 PU:2(indexes=0,4,2,6,1,3,5,7)", buffer);
  assert(!err);

  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0)->os_index == 0);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 1)->os_index == 4);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 2)->os_index == 2);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 3)->os_index == 6);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 4)->os_index == 1);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 5)->os_index == 3);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 6)->os_index == 5);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 7)->os_index == 7);

  hwloc_topology_destroy(topology);

  return 0;
}
Example #27
0
int
pocl_topology_detect_device_info(cl_device_id device)
{
  hwloc_topology_t pocl_topology;
  int ret = 0;

#ifdef HWLOC_API_2
  if (hwloc_get_api_version () < 0x20000)
    POCL_MSG_ERR ("pocl was compiled against libhwloc 2.x but is"
                  "actually running against libhwloc 1.x \n");
#else
  if (hwloc_get_api_version () >= 0x20000)
    POCL_MSG_ERR ("pocl was compiled against libhwloc 1.x but is"
                  "actually running against libhwloc 2.x \n");
#endif

  /*

   * hwloc's OpenCL backend causes problems at the initialization stage
   * because it reloads libpocl.so via the ICD loader.
   *
   * See: https://github.com/pocl/pocl/issues/261
   *
   * The only trick to stop hwloc from initializing the OpenCL plugin
   * I could find is to point the plugin search path to a place where there
   * are no plugins to be found.
   */
  setenv ("HWLOC_PLUGINS_PATH", "/dev/null", 1);

  ret = hwloc_topology_init (&pocl_topology);
  if (ret == -1)
  {
    POCL_MSG_ERR ("Cannot initialize the topology.\n");
    return ret;
  }

#ifdef HWLOC_API_2
  hwloc_topology_set_io_types_filter(pocl_topology, HWLOC_TYPE_FILTER_KEEP_NONE);
  hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_SYSTEM, HWLOC_TYPE_FILTER_KEEP_NONE);
  hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_GROUP, HWLOC_TYPE_FILTER_KEEP_NONE);
  hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_BRIDGE, HWLOC_TYPE_FILTER_KEEP_NONE);
  hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_MISC, HWLOC_TYPE_FILTER_KEEP_NONE);
  hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_PCI_DEVICE, HWLOC_TYPE_FILTER_KEEP_NONE);
  hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_OS_DEVICE, HWLOC_TYPE_FILTER_KEEP_NONE);
#else
  hwloc_topology_ignore_type (pocl_topology, HWLOC_TOPOLOGY_FLAG_WHOLE_IO);
  hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_SYSTEM);
  hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_GROUP);
  hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_BRIDGE);
  hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_MISC);
  hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_PCI_DEVICE);
  hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_OS_DEVICE);
#endif

  ret = hwloc_topology_load (pocl_topology);
  if (ret == -1)
  {
    POCL_MSG_ERR ("Cannot load the topology.\n");
    goto exit_destroy;
  }

#ifdef HWLOC_API_2
  device->global_mem_size =
      hwloc_get_root_obj(pocl_topology)->total_memory;
#else
  device->global_mem_size =
      hwloc_get_root_obj(pocl_topology)->memory.total_memory;
#endif

  // Try to get the number of CPU cores from topology
  int depth = hwloc_get_type_depth(pocl_topology, HWLOC_OBJ_PU);
  if(depth != HWLOC_TYPE_DEPTH_UNKNOWN)
    device->max_compute_units = hwloc_get_nbobjs_by_depth(pocl_topology, depth);

  /* Find information about global memory cache by looking at the first
   * cache covering the first PU */
  do {
      size_t cache_size = 0, cacheline_size = 0;

      hwloc_obj_t core
          = hwloc_get_next_obj_by_type (pocl_topology, HWLOC_OBJ_CORE, NULL);
      if (core)
        {
          hwloc_obj_t cache
              = hwloc_get_shared_cache_covering_obj (pocl_topology, core);
          if ((cache) && (cache->attr))
            {
              cacheline_size = cache->attr->cache.linesize;
              cache_size = cache->attr->cache.size;
            }
          else
            core = NULL; /* fallback to L1 cache size */
        }

      hwloc_obj_t pu
          = hwloc_get_next_obj_by_type (pocl_topology, HWLOC_OBJ_PU, NULL);
      if (!core && pu)
        {
          hwloc_obj_t cache
              = hwloc_get_shared_cache_covering_obj (pocl_topology, pu);
          if ((cache) && (cache->attr))
            {
              cacheline_size = cache->attr->cache.linesize;
              cache_size = cache->attr->cache.size;
            }
        }

      if (!cache_size || !cacheline_size)
        break;

      device->global_mem_cache_type
          = 0x2; // CL_READ_WRITE_CACHE, without including all of CL/cl.h
      device->global_mem_cacheline_size = cacheline_size;
      device->global_mem_cache_size = cache_size;
  } while (0);

  // Destroy topology object and return
exit_destroy:
  hwloc_topology_destroy (pocl_topology);
  return ret;

}
Example #28
0
static int
hwloc_solaris_set_sth_cpubind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_const_bitmap_t hwloc_set, int flags)
{
  unsigned target_cpu;

  /* The resulting binding is always strict */

  if (hwloc_bitmap_isequal(hwloc_set, hwloc_topology_get_complete_cpuset(topology))) {
    if (processor_bind(idtype, id, PBIND_NONE, NULL) != 0)
      return -1;
#ifdef HAVE_LIBLGRP
    if (!(flags & HWLOC_CPUBIND_NOMEMBIND)) {
      int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
      if (depth >= 0) {
	int n = hwloc_get_nbobjs_by_depth(topology, depth);
	int i;

	for (i = 0; i < n; i++) {
	  hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
	  lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_NONE);
	}
      }
    }
#endif /* HAVE_LIBLGRP */
    return 0;
  }

#ifdef HAVE_LIBLGRP
  if (!(flags & HWLOC_CPUBIND_NOMEMBIND)) {
    int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
    if (depth >= 0) {
      int n = hwloc_get_nbobjs_by_depth(topology, depth);
      int i;
      int ok;
      hwloc_bitmap_t target = hwloc_bitmap_alloc();

      for (i = 0; i < n; i++) {
	hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
        if (hwloc_bitmap_isincluded(obj->cpuset, hwloc_set))
          hwloc_bitmap_or(target, target, obj->cpuset);
      }

      ok = hwloc_bitmap_isequal(target, hwloc_set);
      hwloc_bitmap_free(target);

      if (ok) {
        /* Ok, managed to achieve hwloc_set by just combining NUMA nodes */

        for (i = 0; i < n; i++) {
          hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);

          if (hwloc_bitmap_isincluded(obj->cpuset, hwloc_set)) {
            lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_STRONG);
          } else {
            if (flags & HWLOC_CPUBIND_STRICT)
              lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_NONE);
            else
              lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_WEAK);
          }
        }

        return 0;
      }
    }
  }
#endif /* HAVE_LIBLGRP */

  if (hwloc_bitmap_weight(hwloc_set) != 1) {
    errno = EXDEV;
    return -1;
  }

  target_cpu = hwloc_bitmap_first(hwloc_set);

  if (processor_bind(idtype, id,
		     (processorid_t) (target_cpu), NULL) != 0)
    return -1;

  return 0;
}
Example #29
0
int main(void)
{
  hwloc_topology_t topology;
  hwloc_obj_t obj;
  unsigned indexes[5];
  float distances[5*5];
  unsigned depth;
  unsigned width;

  /* group 3 numa nodes as 1 group of 2 and 1 on the side */
  hwloc_topology_init(&topology);
  hwloc_topology_set_synthetic(topology, "node:3 pu:1");
  indexes[0] = 0; indexes[1] = 1; indexes[2] = 2;
  distances[0] = 1; distances[1] = 4; distances[2] = 4;
  distances[3] = 4; distances[4] = 1; distances[5] = 2;
  distances[6] = 4; distances[7] = 2; distances[8] = 1;
  hwloc_topology_set_distance_matrix(topology, HWLOC_OBJ_PU, 3, indexes, distances);
  hwloc_topology_load(topology);
  /* 2 groups at depth 1 */
  depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP);
  assert(depth == 1);
  width = hwloc_get_nbobjs_by_depth(topology, depth);
  assert(width == 1);
  /* 3 node at depth 2 */
  depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
  assert(depth == 2);
  width = hwloc_get_nbobjs_by_depth(topology, depth);
  assert(width == 3);
  /* find the root obj */
  obj = hwloc_get_root_obj(topology);
  assert(obj->arity == 2);
  /* check its children */
  assert(obj->children[0]->type == HWLOC_OBJ_NUMANODE);
  assert(obj->children[0]->depth == 2);
  assert(obj->children[0]->arity == 1);
  assert(obj->children[1]->type == HWLOC_OBJ_GROUP);
  assert(obj->children[1]->depth == 1);
  assert(obj->children[1]->arity == 2);
  hwloc_topology_destroy(topology);

  /* group 5 packages as 2 group of 2 and 1 on the side, all of them below a common node object */
  hwloc_topology_init(&topology);
  hwloc_topology_set_synthetic(topology, "node:1 pack:5 pu:1");
  indexes[0] = 0; indexes[1] = 1; indexes[2] = 2; indexes[3] = 3; indexes[4] = 4;
  distances[ 0] = 1; distances[ 1] = 2; distances[ 2] = 4; distances[ 3] = 4; distances[ 4] = 4;
  distances[ 5] = 2; distances[ 6] = 1; distances[ 7] = 4; distances[ 8] = 4; distances[ 9] = 4;
  distances[10] = 4; distances[11] = 4; distances[12] = 1; distances[13] = 4; distances[14] = 4;
  distances[15] = 4; distances[16] = 4; distances[17] = 4; distances[18] = 1; distances[19] = 2;
  distances[20] = 4; distances[21] = 4; distances[22] = 4; distances[23] = 2; distances[24] = 1;
  hwloc_topology_set_distance_matrix(topology, HWLOC_OBJ_PACKAGE, 5, indexes, distances);
  hwloc_topology_load(topology);
  /* 1 node at depth 1 */
  depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
  assert(depth == 1);
  width = hwloc_get_nbobjs_by_depth(topology, depth);
  assert(width == 1);
  /* 2 groups at depth 2 */
  depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP);
  assert(depth == 2);
  width = hwloc_get_nbobjs_by_depth(topology, depth);
  assert(width == 2);
  /* 5 packages at depth 3 */
  depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PACKAGE);
  assert(depth == 3);
  width = hwloc_get_nbobjs_by_depth(topology, depth);
  assert(width == 5);
  /* find the node obj */
  obj = hwloc_get_root_obj(topology);
  assert(obj->arity == 1);
  obj = obj->children[0];
  assert(obj->type == HWLOC_OBJ_NUMANODE);
  assert(obj->arity == 3);
  /* check its children */
  assert(obj->children[0]->type == HWLOC_OBJ_GROUP);
  assert(obj->children[0]->depth == 2);
  assert(obj->children[0]->arity == 2);
  assert(obj->children[1]->type == HWLOC_OBJ_PACKAGE);
  assert(obj->children[1]->depth == 3);
  assert(obj->children[1]->arity == 1);
  assert(obj->children[2]->type == HWLOC_OBJ_GROUP);
  assert(obj->children[2]->depth == 2);
  assert(obj->children[2]->arity == 2);
  hwloc_topology_destroy(topology);

  return 0;
}
Example #30
0
tm_topology_t* hwloc_to_tm(char *filename,double **pcost)
{
  hwloc_topology_t topology;
  tm_topology_t *res = NULL;
  hwloc_obj_t *objs = NULL;
  unsigned topodepth,depth;
  int nb_nodes,i;
  double *cost;
  int err;

  /* Build the topology */
  hwloc_topology_init(&topology);
  err = hwloc_topology_set_xml(topology,filename);
  if(err == -1){
    if(get_verbose_level() >= CRITICAL)
      fprintf(stderr,"Error: %s is a bad xml topology file!\n",filename);
    exit(-1);
  }

  hwloc_topology_ignore_all_keep_structure(topology);
  hwloc_topology_load(topology);


  /* Test if symetric */
  if(!symetric(topology)){
    if(get_verbose_level() >= CRITICAL)
      fprintf(stderr,"%s not symetric!\n",filename);
    exit(-1);
  }

  /* work on depth */
  topodepth = hwloc_topology_get_depth(topology);

  res = (tm_topology_t*)MALLOC(sizeof(tm_topology_t));
  res->nb_levels = topodepth;
  res->node_id = (int**)MALLOC(sizeof(int*)*res->nb_levels);
  res->nb_nodes = (int*)MALLOC(sizeof(int)*res->nb_levels);
  res->arity = (int*)MALLOC(sizeof(int)*res->nb_levels);

  if(get_verbose_level() >= INFO)
      printf("topodepth = %d\n",topodepth);

  /* Build TreeMatch topology */
  for( depth = 0 ; depth < topodepth ; depth++ ){
    nb_nodes = hwloc_get_nbobjs_by_depth(topology, depth);
    res->nb_nodes[depth] = nb_nodes;
    res->node_id[depth] = (int*)MALLOC(sizeof(int)*nb_nodes);

    objs = (hwloc_obj_t*)MALLOC(sizeof(hwloc_obj_t)*nb_nodes);
    objs[0] = hwloc_get_next_obj_by_depth(topology,depth,NULL);
    hwloc_get_closest_objs(topology,objs[0],objs+1,nb_nodes-1);
    res->arity[depth] = objs[0]->arity;

    if(get_verbose_level() >= INFO)
      printf("%d(%d):",res->arity[depth],nb_nodes);

    /* Build process id tab */
    for (i = 0; i < nb_nodes; i++){
      res->node_id[depth][i] = objs[i]->os_index;
      /* if(depth==topodepth-1) */
    }
    FREE(objs);
  }

  cost = (double*)CALLOC(res->nb_levels,sizeof(double));
  for(i=0; i<res->nb_levels; i++){
    cost[i] = speed(i);
  }

  *pcost = cost;


  /* Destroy topology object. */
  hwloc_topology_destroy(topology);
  if(get_verbose_level() >= INFO)
    printf("\n");
  return res;
}