コード例 #1
0
int
main (void)
{
  hwloc_topology_t topology;
  unsigned depth;
  hwloc_obj_t last;
  hwloc_obj_t *closest;
  unsigned found;
  int err;
  unsigned numprocs;
  hwloc_obj_t ancestor;

  err = hwloc_topology_init (&topology);
  if (err)
    return EXIT_FAILURE;

  hwloc_topology_set_synthetic (topology, "2 3 4 5");

  err = hwloc_topology_load (topology);
  if (err)
    return EXIT_FAILURE;

  depth = hwloc_topology_get_depth(topology);

  /* get the last object of last level */
  numprocs =  hwloc_get_nbobjs_by_depth(topology, depth-1);
  last = hwloc_get_obj_by_depth(topology, depth-1, numprocs-1);

  /* allocate the array of closest objects */
  closest = malloc(numprocs * sizeof(*closest));
  assert(closest);

  /* get closest levels */
  found = hwloc_get_closest_objs (topology, last, closest, numprocs);
  printf("looked for %u closest entries, found %u\n", numprocs, found);
  assert(found == numprocs-1);

  /* check first found is closest */
  assert(closest[0] == hwloc_get_obj_by_depth(topology, depth-1, numprocs-5 /* arity is 5 on last level */));
  /* check some other expected positions */
  assert(closest[found-1] == hwloc_get_obj_by_depth(topology, depth-1, 1*3*4*5-1 /* last of first half */));
  assert(closest[found/2-1] == hwloc_get_obj_by_depth(topology, depth-1, 1*3*4*5+2*4*5-1 /* last of second third of second half */));
  assert(closest[found/2/3-1] == hwloc_get_obj_by_depth(topology, depth-1, 1*3*4*5+2*4*5+3*5-1 /* last of third quarter of third third of second half */));

  /* get ancestor of last and less close object */
  ancestor = hwloc_get_common_ancestor_obj(topology, last, closest[found-1]);
  assert(hwloc_obj_is_in_subtree(topology, last, ancestor));
  assert(hwloc_obj_is_in_subtree(topology, closest[found-1], ancestor));
  assert(ancestor == hwloc_get_root_obj(topology)->first_child);
  printf("ancestor type %u depth %u number %u is system level\n",
	 ancestor->type, ancestor->depth, ancestor->logical_index);

  free(closest);
  hwloc_topology_destroy (topology);

  return EXIT_SUCCESS;
}
コード例 #2
0
ファイル: ma_hwloc.c プロジェクト: 12182007/cp2k
/*
*Get the direct siblings of a core - it is allways considering cache sharing
* index:     the logical id of the core
* myid:      the physical id of the core
* obj:       the HWLOC representation of the core
* nsiblings: the number of siblings of the core 
*/
void set_phys_siblings(int index, unsigned myid, hwloc_obj_t obj, int ncores, int nsiblings,int type)
{
  int i,j = 0;
  hwloc_obj_t obj2, ant;

  for(i = 0; i< ncores && nsiblings > 0 ; i++)
   {
         obj2 = hwloc_get_obj_by_type(topology,type,i);
         ant =  hwloc_get_common_ancestor_obj(topology,obj,obj2);       

         if(ant->type == HWLOC_OBJ_CACHE && obj2->os_index != myid)
         {
            machine_cores[index].siblings_id[j] = obj2->os_index;
            j++; nsiblings--;
            ant = NULL;
	 }  
   }
}
コード例 #3
0
ファイル: ma_hwloc.c プロジェクト: 12182007/cp2k
//Initializes HWLOC and load the machine architecture
int hw_topology_init (struct arch_topology *topo)
{
  hwloc_obj_t obj, core1, core2;
  int count, i, j, error;


  //Create the machine representation
  error = hwloc_topology_init(&topology);
  //Go throught the topology only if HWLOC is 
  //successifully initialized
  if(!error)
  {
    hwloc_topology_load(topology);
    local_topo = malloc(sizeof(struct arch_topology));

#if defined  (__DBCSR_ACC) || defined (__PW_CUDA)
  int nDev;
  ma_get_ndevices_cu(&nDev);
#endif

    //Extract number of NUMA nodes
    if (hwloc_get_type_depth (topology, HWLOC_OBJ_NODE))
     topo->nnodes = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_NODE));
    else
     topo->nnodes = 0;

    //Get number of cores, sockets and processing units
    topo->ncores = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_CORE));
    topo->nsockets = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_SOCKET)); 
    topo->npus = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_PU));

   //Compute number of memory controlers per socket
   //basically the number of NUMA nodes per socket
   if (topo->nnodes > topo->nsockets)
    topo->nmemcontroller = topo->nnodes/topo->nsockets;
   else
    topo->nmemcontroller = 1;

   count = 0; 
   topo->nshared_caches = 0;
   //Get derivate information - get number of cache per PU
   for(obj = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,0);
      obj; obj = obj->parent)
   {
	   if (obj->type == HWLOC_OBJ_CACHE)
	   { 
       if (obj->arity>1)
        topo->nshared_caches++; 
       else  
       { count++;  topo->ncaches = count;	}  
     } 
   }

  //Number of direct siblings
  //Siblings cores are the ones that share at least one component
  //level of the architecture
  count = 0;
  core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, 0);
  core2 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, 1);
  obj   = hwloc_get_common_ancestor_obj(topology, core1, core2);
  if (obj) 
    topo->nsiblings = obj->arity;

  //Machine node and core representation
  machine_nodes = (struct node*) malloc (topo->nnodes*sizeof(struct node));
  machine_cores = (struct core*) malloc (topo->ncores*sizeof(struct core));

  phys_cpus = malloc (topo->ncores*sizeof(int));
  get_phys_id(topology, topo->ncores, 0);
 
  //Get the caches sizes and other information for each core  
  for (i = 0; i < topo->ncores ; i++)
   {
	      machine_cores[i].caches = malloc (topo->ncaches*sizeof(size_t));
        machine_cores[i].shared_caches = malloc (topo->ncaches*sizeof(int));

        for (j = 0; j < topo->ncaches; j++)
         machine_cores[i].shared_caches[j] = 0;
        for (j = topo->ncaches ; j > topo->ncaches - topo->nshared_caches; j--)
         machine_cores[i].shared_caches[j-1] = 1;

        machine_cores[i].nsiblings = topo->nsiblings;
        machine_cores[i].siblings_id = malloc (topo->nsiblings*sizeof(unsigned));
        if(topo->ncores == topo->npus){
          core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, i);
 	         machine_cores[i].id = core1->os_index;
          count = 0;
          for(obj = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,i);
              obj; obj = obj->parent) {
	          if (obj->type == HWLOC_OBJ_CACHE){
	        	  machine_cores[i].caches[count] = obj->attr->cache.size / 1024;
			        count++;
		        }
            if (obj->type == HWLOC_OBJ_NODE)
              machine_cores[i].numaNode = obj->logical_index;                
 	        }
       }
       else{
	        core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, i);
 	        machine_cores[i].id = core1->os_index;
          count = 0;
          for(obj = hwloc_get_obj_by_type(topology,HWLOC_OBJ_CORE,i);
              obj; obj = obj->parent) {
	          if (obj->type == HWLOC_OBJ_CACHE) {
    	        	machine_cores[i].caches[count] = obj->attr->cache.size / 1024;
			          count++;
		        }
           if (obj->type == HWLOC_OBJ_NODE)
                machine_cores[i].numaNode = obj->logical_index;
 	        }
       }
   }    

  //Get siblings id - so each core knows its siblings
  for (i = 0; i < topo->ncores ; i++)
   {
        if(topo->ncores == topo->npus){
          core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, i);
          set_phys_siblings(i,machine_cores[i].id,core1,topo->ncores,topo->nsiblings,HWLOC_OBJ_PU);
        }
        else{
          core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, i);
          set_phys_siblings(i,machine_cores[i].id,core1,topo->ncores,topo->nsiblings,HWLOC_OBJ_CORE);
        }

  }

   int ncore_node = topo->ncores/topo->nnodes;
   int count_cores;
  //Get the information for each NUMAnode
  for (i = 0; i < topo->nnodes ; i++)
   {
        obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, i);
        
      	machine_nodes[i].id = obj->os_index;
      	machine_nodes[i].memory = obj->memory.total_memory;
        machine_nodes[i].ncores = ncore_node;
        machine_nodes[i].mycores = malloc (ncore_node*sizeof(unsigned)); 

        //Get the cores id of each NUMAnode
        count_cores = 0;
        set_node_cores(topology, obj, i, &count_cores);

       //GPU support
#if defined  (__DBCSR_ACC) || defined (__PW_CUDA)
       int *devIds;
       devIds = malloc (nDev*sizeof(int));
       topo->ngpus = nDev;
       ma_get_cu(i,devIds); 
       machine_nodes[i].mygpus = devIds;
#endif    	
   }    

  //counting network cards
  count = 0;
  hwloc_topology_t topo_net;
  error = hwloc_topology_init(&topo_net);  
  hwloc_topology_set_flags(topo_net, HWLOC_TOPOLOGY_FLAG_IO_DEVICES);  
  if (!error){
      hwloc_topology_load(topo_net);
      for (obj = hwloc_get_obj_by_type(topo_net, HWLOC_OBJ_OS_DEVICE, 0);
           obj;
           obj = hwloc_get_next_osdev(topo_net,obj))
        if (obj->attr->osdev.type == HWLOC_OBJ_OSDEV_NETWORK ||
            obj->attr->osdev.type == HWLOC_OBJ_OSDEV_OPENFABRICS)
              count++;   
      topo->nnetcards = count;
  } 
  else //if can not load I/O devices
   topo->nnetcards = 0;  
  hwloc_topology_destroy(topo_net); 
  

 /*Local copy of the machine topology components*/  
 local_topo->nnodes = topo->nnodes;
 local_topo->nsockets = topo->nsockets;
 local_topo->ncores = topo->ncores;
 local_topo->npus = topo->npus;
 local_topo->ngpus = topo->ngpus;
 local_topo->ncaches = topo->ncaches;
 local_topo->nshared_caches = topo->nshared_caches;
 local_topo->nsiblings = topo->nsiblings;
 local_topo->nmemcontroller =  topo->nmemcontroller;
 local_topo->nnetcards = topo->nnetcards;
}

 return error;

}