C++ (Cpp) ma_get_cu 예제들

예제 #1

0

파일 보기

파일: ma_hwloc.c 프로젝트: 12182007/cp2k

/*
*Prints one branch of the machine topology
*Recursive function that goes throught the machine topology object
*an group them into hierarchical groups
* topology: the HWLOC object that represents the machine
* obj: the current object of a level
*/
void print_machine_branch(hwloc_topology_t topology, hwloc_obj_t obj, int depth, int obj_type)
{
    char string[128], out_string[128];
    unsigned i,arity;
    int *devIds,devId,countDev;

    if (obj->type != HWLOC_OBJ_MACHINE ){
     if(obj->type == HWLOC_OBJ_NODE){
       hwloc_obj_snprintf(string, sizeof(string), topology, obj, "#", 0);
       sprintf(out_string,"%*s%s\n", depth, "", string); 
       strcat(console_output,out_string);      
#if defined  (__DBCSR_ACC) || defined (__PW_CUDA)
       if ((local_topo->ngpus > 0) && (local_topo->ngpus < local_topo->ncores)){
                ma_get_nDevcu(obj->logical_index, &countDev);
                devIds = malloc (countDev*sizeof(int));
                ma_get_cu(obj->logical_index, devIds);
                strcat(console_output," Shared GPUS: ");
                for (i = 0; i<countDev; i++){
                 devId = devIds[i];
                 sprintf(out_string,"#%d ", devId);
                 strcat(console_output,out_string);}
                strcat(console_output,"\n");
       }
#endif
     } 
     else if (obj->type == HWLOC_OBJ_SOCKET){ 
       hwloc_obj_snprintf(string, sizeof(string), topology, obj, "#", 0);
       sprintf(out_string,"%*s%s\n", depth, "", string); 
       strcat(console_output,out_string);}
     else {
       hwloc_obj_snprintf(string, sizeof(string), topology, obj, "#", 0);
      if(obj->type == HWLOC_OBJ_PU)  {
#if defined  (__DBCSR_ACC) || defined (__PW_CUDA)
       sprintf(out_string,"%*s%s\t", depth, "", string);
       strcat(console_output,out_string);
       if (local_topo->ngpus > 0 && local_topo->ngpus == local_topo->ncores){
                ma_get_core_cu(obj->logical_index, &devId);
                strcat(console_output," GPU: ");
                sprintf(out_string,"%d ", devId);
                strcat(console_output,out_string);}
      strcat(console_output,"\n");
#else
       sprintf(out_string,"%*s%s\n", depth, "", string);
       strcat(console_output,out_string);
#endif      
      }
      else if (obj->type == HWLOC_OBJ_CACHE && obj->arity>1){
       hwloc_obj_type_snprintf(string, sizeof(string), obj, 0);
       sprintf(out_string,"%*s%s", depth, "", string);
       strcat(console_output,out_string);
       sprintf(out_string," (%dMB)\n", obj->attr->cache.size/(1024*1024));
       strcat(console_output,out_string);
      }
      else {
       sprintf(out_string,"%*s%s\t", depth, "", string); 
       strcat(console_output,out_string);
      }
     }                 
    }  
    if (obj->type != HWLOC_OBJ_PU) {//it is not a PU
      if((obj->first_child && obj->first_child->type == HWLOC_OBJ_PU) ||
          obj->first_child->type == obj_type)
       arity = 1; //number of children
      else
       arity = obj->arity;

    for (i = 0; i < arity; i++) 
        print_machine_branch(topology, obj->children[i],depth+1,obj_type);
   }
}

예제 #2

0

파일 보기

파일: ma_hwloc.c 프로젝트: 12182007/cp2k

//Initializes HWLOC and load the machine architecture
int hw_topology_init (struct arch_topology *topo)
{
  hwloc_obj_t obj, core1, core2;
  int count, i, j, error;


  //Create the machine representation
  error = hwloc_topology_init(&topology);
  //Go throught the topology only if HWLOC is 
  //successifully initialized
  if(!error)
  {
    hwloc_topology_load(topology);
    local_topo = malloc(sizeof(struct arch_topology));

#if defined  (__DBCSR_ACC) || defined (__PW_CUDA)
  int nDev;
  ma_get_ndevices_cu(&nDev);
#endif

    //Extract number of NUMA nodes
    if (hwloc_get_type_depth (topology, HWLOC_OBJ_NODE))
     topo->nnodes = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_NODE));
    else
     topo->nnodes = 0;

    //Get number of cores, sockets and processing units
    topo->ncores = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_CORE));
    topo->nsockets = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_SOCKET)); 
    topo->npus = hwloc_get_nbobjs_by_depth (topology, hwloc_get_type_depth (topology, HWLOC_OBJ_PU));

   //Compute number of memory controlers per socket
   //basically the number of NUMA nodes per socket
   if (topo->nnodes > topo->nsockets)
    topo->nmemcontroller = topo->nnodes/topo->nsockets;
   else
    topo->nmemcontroller = 1;

   count = 0; 
   topo->nshared_caches = 0;
   //Get derivate information - get number of cache per PU
   for(obj = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,0);
      obj; obj = obj->parent)
   {
	   if (obj->type == HWLOC_OBJ_CACHE)
	   { 
       if (obj->arity>1)
        topo->nshared_caches++; 
       else  
       { count++;  topo->ncaches = count;	}  
     } 
   }

  //Number of direct siblings
  //Siblings cores are the ones that share at least one component
  //level of the architecture
  count = 0;
  core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, 0);
  core2 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, 1);
  obj   = hwloc_get_common_ancestor_obj(topology, core1, core2);
  if (obj) 
    topo->nsiblings = obj->arity;

  //Machine node and core representation
  machine_nodes = (struct node*) malloc (topo->nnodes*sizeof(struct node));
  machine_cores = (struct core*) malloc (topo->ncores*sizeof(struct core));

  phys_cpus = malloc (topo->ncores*sizeof(int));
  get_phys_id(topology, topo->ncores, 0);
 
  //Get the caches sizes and other information for each core  
  for (i = 0; i < topo->ncores ; i++)
   {
	      machine_cores[i].caches = malloc (topo->ncaches*sizeof(size_t));
        machine_cores[i].shared_caches = malloc (topo->ncaches*sizeof(int));

        for (j = 0; j < topo->ncaches; j++)
         machine_cores[i].shared_caches[j] = 0;
        for (j = topo->ncaches ; j > topo->ncaches - topo->nshared_caches; j--)
         machine_cores[i].shared_caches[j-1] = 1;

        machine_cores[i].nsiblings = topo->nsiblings;
        machine_cores[i].siblings_id = malloc (topo->nsiblings*sizeof(unsigned));
        if(topo->ncores == topo->npus){
          core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, i);
 	         machine_cores[i].id = core1->os_index;
          count = 0;
          for(obj = hwloc_get_obj_by_type(topology,HWLOC_OBJ_PU,i);
              obj; obj = obj->parent) {
	          if (obj->type == HWLOC_OBJ_CACHE){
	        	  machine_cores[i].caches[count] = obj->attr->cache.size / 1024;
			        count++;
		        }
            if (obj->type == HWLOC_OBJ_NODE)
              machine_cores[i].numaNode = obj->logical_index;                
 	        }
       }
       else{
	        core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, i);
 	        machine_cores[i].id = core1->os_index;
          count = 0;
          for(obj = hwloc_get_obj_by_type(topology,HWLOC_OBJ_CORE,i);
              obj; obj = obj->parent) {
	          if (obj->type == HWLOC_OBJ_CACHE) {
    	        	machine_cores[i].caches[count] = obj->attr->cache.size / 1024;
			          count++;
		        }
           if (obj->type == HWLOC_OBJ_NODE)
                machine_cores[i].numaNode = obj->logical_index;
 	        }
       }
   }    

  //Get siblings id - so each core knows its siblings
  for (i = 0; i < topo->ncores ; i++)
   {
        if(topo->ncores == topo->npus){
          core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, i);
          set_phys_siblings(i,machine_cores[i].id,core1,topo->ncores,topo->nsiblings,HWLOC_OBJ_PU);
        }
        else{
          core1 = hwloc_get_obj_by_type(topology, HWLOC_OBJ_CORE, i);
          set_phys_siblings(i,machine_cores[i].id,core1,topo->ncores,topo->nsiblings,HWLOC_OBJ_CORE);
        }

  }

   int ncore_node = topo->ncores/topo->nnodes;
   int count_cores;
  //Get the information for each NUMAnode
  for (i = 0; i < topo->nnodes ; i++)
   {
        obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NODE, i);
        
      	machine_nodes[i].id = obj->os_index;
      	machine_nodes[i].memory = obj->memory.total_memory;
        machine_nodes[i].ncores = ncore_node;
        machine_nodes[i].mycores = malloc (ncore_node*sizeof(unsigned)); 

        //Get the cores id of each NUMAnode
        count_cores = 0;
        set_node_cores(topology, obj, i, &count_cores);

       //GPU support
#if defined  (__DBCSR_ACC) || defined (__PW_CUDA)
       int *devIds;
       devIds = malloc (nDev*sizeof(int));
       topo->ngpus = nDev;
       ma_get_cu(i,devIds); 
       machine_nodes[i].mygpus = devIds;
#endif    	
   }    

  //counting network cards
  count = 0;
  hwloc_topology_t topo_net;
  error = hwloc_topology_init(&topo_net);  
  hwloc_topology_set_flags(topo_net, HWLOC_TOPOLOGY_FLAG_IO_DEVICES);  
  if (!error){
      hwloc_topology_load(topo_net);
      for (obj = hwloc_get_obj_by_type(topo_net, HWLOC_OBJ_OS_DEVICE, 0);
           obj;
           obj = hwloc_get_next_osdev(topo_net,obj))
        if (obj->attr->osdev.type == HWLOC_OBJ_OSDEV_NETWORK ||
            obj->attr->osdev.type == HWLOC_OBJ_OSDEV_OPENFABRICS)
              count++;   
      topo->nnetcards = count;
  } 
  else //if can not load I/O devices
   topo->nnetcards = 0;  
  hwloc_topology_destroy(topo_net); 
  

 /*Local copy of the machine topology components*/  
 local_topo->nnodes = topo->nnodes;
 local_topo->nsockets = topo->nsockets;
 local_topo->ncores = topo->ncores;
 local_topo->npus = topo->npus;
 local_topo->ngpus = topo->ngpus;
 local_topo->ncaches = topo->ncaches;
 local_topo->nshared_caches = topo->nshared_caches;
 local_topo->nsiblings = topo->nsiblings;
 local_topo->nmemcontroller =  topo->nmemcontroller;
 local_topo->nnetcards = topo->nnetcards;
}

 return error;

}

예제 #3

0

파일 보기

파일: ma_hwloc.c 프로젝트: 12182007/cp2k

/*
*Prints the machine hierarchy 
*Recursive function that goes throught the machine topology object
*an group them into hierarchical groups
* topology: the HWLOC object
* obj: the current object in the topology
* depth: the horizontal level in the machine topology 
*/
void print_machine(hwloc_topology_t topo, hwloc_obj_t obj, int depth)
{
    char string[256], out_string[256];
    unsigned i,arity;
    int *devIds,devId,countDev;

    if(obj->type == HWLOC_OBJ_SOCKET || obj->type == HWLOC_OBJ_MACHINE ){ 
       hwloc_obj_snprintf(string, sizeof(string), topology, obj, "#", 0);
       sprintf(out_string,"%*s%s\n", depth, "", string);
       strcat(console_output,out_string);
     }
     else if (obj->type == HWLOC_OBJ_NODE){
      hwloc_obj_snprintf(string, sizeof(string), topology, obj, "#", 0);
      sprintf(out_string,"%*s%s\n", depth, "", string);
      strcat(console_output,out_string); 
//if the machine has shared GPUs
#if defined  (__DBCSR_ACC) || defined (__PW_CUDA)
       if ((local_topo->ngpus > 0) && (local_topo->ngpus < local_topo->ncores)){
                ma_get_nDevcu(obj->logical_index, &countDev);
                devIds = malloc (countDev*sizeof(int));
                ma_get_cu(obj->logical_index, devIds);
		strcat(console_output," Shared GPUS: ");
		for (i = 0; i<countDev; i++){
		 devId = devIds[i];
                 sprintf(out_string,"#%d ", devId); 
       		 strcat(console_output,out_string);}
       		strcat(console_output,"\n");
       }
#endif
     }
     else {
       hwloc_obj_snprintf(string, sizeof(string), topology, obj, "#", 0);
      if(obj->type == HWLOC_OBJ_PU )
      {
#if defined  (__DBCSR_ACC) || defined (__PW_CUDA)
       sprintf(out_string,"%*s%s\t", depth, "", string);
       strcat(console_output,out_string);
       if (local_topo->ngpus > 0 && local_topo->ngpus == local_topo->ncores){
                ma_get_core_cu(obj->logical_index, &devId);
                strcat(console_output," GPU: ");
                sprintf(out_string,"%d ", devId);
                strcat(console_output,out_string);}
      strcat(console_output,"\n");
#else
       sprintf(out_string,"%*s%s\n", depth, "", string);
       strcat(console_output,out_string);	
#endif
      }
      else if (obj->type == HWLOC_OBJ_CACHE && obj->arity>1 ){
           hwloc_obj_type_snprintf(string, sizeof(string), obj, 0);
           sprintf(out_string,"%*s%s", depth, "", string);
           strcat(console_output,out_string);
           sprintf(out_string," (%dMB)\n", obj->attr->cache.size/(1024*1024));
           strcat(console_output,out_string);       
         }
      else if (obj->type == HWLOC_OBJ_OS_DEVICE ||
               obj->type == HWLOC_OBJ_PCI_DEVICE ||
               obj->type == HWLOC_OBJ_BRIDGE){
               if(obj->attr->osdev.type == HWLOC_OBJ_OSDEV_NETWORK ){
                sprintf(out_string,"%*s%s\n", depth, "--", "Network Card");
                  strcat(console_output,out_string);}
       }
      else if (obj->type == HWLOC_OBJ_CORE)
       {
          char number[33];
          strcpy(string,"Core#");
          sprintf(number,"%d",obj->logical_index);
          strcat(string,number);
          sprintf(out_string,"%*s%s\t", depth, "", string);
          strcat(console_output,out_string);
       }
      else {
           sprintf(out_string,"%*s%s\t", depth, "", string);
           strcat(console_output,out_string);
        }
     }  
    if (obj->type != HWLOC_OBJ_PU) {//it is not a PU
      if((obj->first_child && obj->first_child->type == HWLOC_OBJ_PU))
       arity = 1; //number of children
      else
       arity = obj->arity;

    for (i = 0; i < arity; i++) 
        print_machine(topo, obj->children[i],depth+1);
   }
}

예제 #4

0

파일 보기

파일: ma_linux.c 프로젝트: Jwonsever/shirleyxas-newt

int linux_topology_init(struct arch_topology *topo)
{
  int count, i, j, error,k,tmpNode;
  
#ifdef  __DBCSR_CUDA
  int nDev;
  ma_get_ndevices_cu(&nDev);
  topo->ngpus = nDev;
#endif

  local_topo = malloc(sizeof(struct arch_topology));

  topo->nnodes = linux_get_nnodes();
  local_topo->nnodes = topo->nnodes;
  topo->ncores = linux_get_ncores();
  local_topo->ncores = topo->ncores;
  topo->npus = topo->ncores;
  local_topo->npus = topo->npus;

  //libnuma has no support for I/O devices
  topo->nnetcards = 0;
  local_topo->nnetcards = 0;
  topo->nsockets = linux_get_nsockets();
  local_topo->nsockets = topo->nsockets;
   //Compute number of memory controlers per socket
   //basically the number of NUMA nodes per socket
  if (topo->nnodes > topo->nsockets)
    topo->nmemcontroller = topo->nnodes/topo->nsockets;
  else
    topo->nmemcontroller = 1;
                  
  topo->ncaches = linux_get_ncaches(); 

   local_topo->nmemcontroller = topo->nmemcontroller;
   local_topo->ncaches = topo->ncaches;

  topo->nshared_caches = linux_get_nshared_caches();
  topo->nsiblings = linux_get_nsiblings();  

  local_topo->nshared_caches =  topo->nshared_caches;
  local_topo->nsiblings = topo->nsiblings;

  //Machine node and core representation
  machine_nodes = (struct node*) malloc (topo->nnodes*sizeof(struct node));

  int ncore_node = topo->ncores/topo->nnodes;

 for (i = 0; i < topo->nnodes ; i++)
   {
        machine_nodes[i].id = i;
        machine_nodes[i].memory = 0;
        machine_nodes[i].ncores = ncore_node;
#ifdef  __DBCSR_CUDA
       ma_get_nDevcu(i,&nDev);
       machine_nodes[i].mygpus = malloc (nDev*sizeof(int));
       ma_get_cu(i,machine_nodes[i].mygpus);
#endif
   }

   if (topo->nnodes == -1 || topo->ncores == -1 || topo->npus == -1 ||
       topo->nsockets == -1)
        return -1;
   else
        return 0;

}