示例#1
0
c_sublocid_t chpl_topo_getMemLocality(void* p) {
  int flags;
  hwloc_nodeset_t nodeset;
  int node;

  if (!haveTopology) {
    return c_sublocid_any;
  }

  if (!topoSupport->membind->get_area_memlocation) {
    return c_sublocid_any;
  }

  if (p == NULL) {
    return c_sublocid_any;
  }

  CHK_ERR_ERRNO((nodeset = hwloc_bitmap_alloc()) != NULL);

  flags = HWLOC_MEMBIND_BYNODESET;
  CHK_ERR_ERRNO(hwloc_get_area_memlocation(topology, p, 1, nodeset, flags)
                == 0);

  node = hwloc_bitmap_first(nodeset);
  if (!isActualSublocID(node)) {
    node = c_sublocid_any;
  }

  hwloc_bitmap_free(nodeset);

  return node;
}
示例#2
0
文件: ma_hwloc.c 项目: 12182007/cp2k
/*
 * Get the number of net cards in a NUMA node
 */
int hw_get_nnetcards(int NUMAnode)
{
  int count, net_NUMAnode, error;
  hwloc_obj_t obj, obj_anc;

 count = 0;
 hwloc_topology_t topo_net;
 error = hwloc_topology_init(&topo_net);
 hwloc_topology_set_flags(topo_net, HWLOC_TOPOLOGY_FLAG_IO_DEVICES);
 if (!error){
    hwloc_topology_load(topo_net);
    for (obj = hwloc_get_obj_by_type(topo_net, HWLOC_OBJ_OS_DEVICE, 0);
         obj;
         obj = hwloc_get_next_osdev(topo_net,obj))
         if (obj->attr->osdev.type == HWLOC_OBJ_OSDEV_NETWORK ||
             obj->attr->osdev.type == HWLOC_OBJ_OSDEV_OPENFABRICS){
             obj_anc = hwloc_get_non_io_ancestor_obj(topo_net,obj);
             net_NUMAnode = hwloc_bitmap_first(obj_anc->nodeset);
             //only if the MPI NUMA node is equal to the found net card
               if(NUMAnode == net_NUMAnode)
                 count++;
         }        
  }         
  hwloc_topology_destroy(topo_net);

  return count;
}
示例#3
0
c_sublocid_t chpl_topo_getThreadLocality(void) {
  hwloc_cpuset_t cpuset;
  hwloc_nodeset_t nodeset;
  int flags;
  int node;

  if (!haveTopology) {
    return c_sublocid_any;
  }

  if (!topoSupport->cpubind->get_thread_cpubind) {
    return c_sublocid_any;
  }

  CHK_ERR_ERRNO((cpuset = hwloc_bitmap_alloc()) != NULL);
  CHK_ERR_ERRNO((nodeset = hwloc_bitmap_alloc()) != NULL);

  flags = HWLOC_CPUBIND_THREAD;
  CHK_ERR_ERRNO(hwloc_set_cpubind(topology, cpuset, flags) == 0);

  hwloc_cpuset_to_nodeset(topology, cpuset, nodeset);

  node = hwloc_bitmap_first(nodeset);

  hwloc_bitmap_free(nodeset);
  hwloc_bitmap_free(cpuset);

  return node;
}
示例#4
0
文件: ma_hwloc.c 项目: 12182007/cp2k
/*
* Set the node where the current process will run
*/
void hw_set_first_core_node(int node, int proc)
{
  hwloc_nodeset_t nset;
  hwloc_cpuset_t set,newset;

  if (local_topo->nnodes != 0 ){
    nset = hwloc_bitmap_alloc();
    set = hwloc_bitmap_alloc();
    newset = hwloc_bitmap_alloc();
    hwloc_bitmap_zero(set);
    hwloc_bitmap_zero(newset);  
    hwloc_bitmap_zero(nset);
    hwloc_bitmap_set(nset,node);
    hwloc_cpuset_from_nodeset(topology,set,nset);
    int core = hwloc_bitmap_first(set);
    hwloc_bitmap_set(newset,core);
    if (proc) 
       hwloc_set_proc_cpubind (topology,0,newset,HWLOC_CPUBIND_PROCESS);
    else
       hwloc_set_proc_cpubind (topology,0,newset,HWLOC_CPUBIND_THREAD);
    hwloc_bitmap_free(newset);     
    hwloc_bitmap_free(set);
    hwloc_bitmap_free(nset);
  }
}
示例#5
0
static void *
hwloc_win_alloc_membind(hwloc_topology_t topology __hwloc_attribute_unused, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) {
  int node;

  switch (policy) {
    case HWLOC_MEMBIND_DEFAULT:
    case HWLOC_MEMBIND_BIND:
      break;
    default:
      errno = ENOSYS;
      return hwloc_alloc_or_fail(topology, len, flags);
  }

  if (flags & HWLOC_MEMBIND_STRICT) {
    errno = ENOSYS;
    return NULL;
  }

  if (hwloc_bitmap_weight(nodeset) != 1) {
    /* Not a single node, can't do this */
    errno = EXDEV;
    return hwloc_alloc_or_fail(topology, len, flags);
  }

  node = hwloc_bitmap_first(nodeset);
  return VirtualAllocExNumaProc(GetCurrentProcess(), NULL, len, MEM_COMMIT|MEM_RESERVE, PAGE_EXECUTE_READWRITE, node);
}
示例#6
0
文件: ma_hwloc.c 项目: 12182007/cp2k
/*
* Get the memory policy of a process 
*/
void hw_get_mempol(int *node, int *mem_pol)
{
  hwloc_nodeset_t nset;
  hwloc_membind_policy_t mempol=-1;

 if (local_topo->nnodes != 0 ){

  nset = hwloc_bitmap_alloc();
  hwloc_get_membind_nodeset(topology,nset,&mempol,0);  
  (*node) = hwloc_bitmap_first(nset);
  switch(mempol)
  {
   case HWLOC_MEMBIND_FIRSTTOUCH:
     (*mem_pol) = OS; 
   break;
   case HWLOC_MEMBIND_BIND:
     (*mem_pol) = LOCAL; 
   break;
   case HWLOC_MEMBIND_INTERLEAVE: 
     (*mem_pol) = INTERLEAVE;
     (*node)   = -1;
   break;
   default:
     (*mem_pol) = -1;
     (*node) = -1;
   break;
  }
 }
 else
  (*mem_pol) = -1;
}
示例#7
0
inline
void print_bitmap( std::ostream & s , const hwloc_const_bitmap_t bitmap )
{
  s << "{" ;
  for ( int i = hwloc_bitmap_first( bitmap ) ;
        -1 != i ; i = hwloc_bitmap_next( bitmap , i ) ) {
    s << " " << i ;
  }
  s << " }" ;
}
示例#8
0
static spu_t
hwloc_hpux_find_spu(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_bitmap_t hwloc_set)
{
  spu_t cpu;

  cpu = hwloc_bitmap_first(hwloc_set);
  if (cpu != -1 && hwloc_bitmap_weight(hwloc_set) == 1)
    return cpu;
  return -1;
}
示例#9
0
/*******************  FUNCTION  *********************/
int TopoHwloc::getFirstBitInBitmap(hwloc_bitmap_t bitmap) const
{
	int last = hwloc_bitmap_last(bitmap);
	int current = hwloc_bitmap_first(bitmap);
	assert(current != -1);
	while (current != last)
	{
		if (hwloc_bitmap_isset(bitmap,current))
			break;
		current = hwloc_bitmap_next(bitmap,current);
	}
	return current;
}
示例#10
0
/* convert set into index+mask if all set bits are in the same ULONG.
 * otherwise return -1.
 */
static int hwloc_bitmap_to_single_ULONG_PTR(hwloc_const_bitmap_t set, unsigned *index, ULONG_PTR *mask)
{
  unsigned first_ulp, last_ulp;
  if (hwloc_bitmap_weight(set) == -1)
    return -1;
  first_ulp = hwloc_bitmap_first(set) / (sizeof(ULONG_PTR)*8);
  last_ulp = hwloc_bitmap_last(set) / (sizeof(ULONG_PTR)*8);
  if (first_ulp != last_ulp)
    return -1;
  *mask = hwloc_bitmap_to_ith_ULONG_PTR(set, first_ulp);
  *index = first_ulp;
  return 0;
}
示例#11
0
文件: ma_hwloc.c 项目: 12182007/cp2k
/*
* Get the core where the current process is running
* param flag set of cores is possible
* return the core
*/
int hw_get_proc_core(int flag)
{
  int core;

  hwloc_cpuset_t set;
  set = hwloc_bitmap_alloc();
  hwloc_get_proc_cpubind (topology,0,set,HWLOC_CPUBIND_PROCESS);
  core = hwloc_bitmap_first(set);
  hwloc_bitmap_free(set);

  //hwloc can't detect the thread core - use linux based syscall
  if (flag)
     core = sched_getcpu();

  return core;
}
示例#12
0
文件: ma_hwloc.c 项目: 12182007/cp2k
/*
* Get the first core of a node
* return the core
*/
int hw_get_firstcore()
{
  int core;
  hwloc_cpuset_t set;

  if (local_topo->nnodes != 0 ){
    set = hwloc_bitmap_alloc();
    hwloc_get_proc_cpubind (topology,0,set,HWLOC_CPUBIND_PROCESS);
    core = hwloc_bitmap_first(set);
    hwloc_bitmap_free(set);
  }
 else
   core = -1;

  return core;
}
示例#13
0
文件: ma_hwloc.c 项目: 12182007/cp2k
/*
* Get the node where the current thread is running
* return the node of the core
*/
int hw_my_node()
{
  int node;
  hwloc_cpuset_t set;
  hwloc_nodeset_t nset;

  if (local_topo->nnodes != 0 ){
    set = hwloc_bitmap_alloc();
    nset = hwloc_bitmap_alloc();
    hwloc_get_cpubind (topology,set,HWLOC_CPUBIND_THREAD);
    hwloc_cpuset_to_nodeset(topology,set,nset);
    node = hwloc_bitmap_first(nset); 	
    hwloc_bitmap_free(set);
    hwloc_bitmap_free(nset);
  }
 else
   node = -1;

  return node;
}
示例#14
0
文件: ma_hwloc.c 项目: 12182007/cp2k
/*
 * Find a NUMA node with network cards
 * */
int hw_get_node_net()
{
  int net_NUMAnode=0, error;
  hwloc_obj_t obj, obj_anc;

  
 hwloc_topology_t topo_net;
 error = hwloc_topology_init(&topo_net);
 hwloc_topology_set_flags(topo_net, HWLOC_TOPOLOGY_FLAG_IO_DEVICES);
 if (!error){
     hwloc_topology_load(topo_net);
     obj = hwloc_get_obj_by_type(topo_net, HWLOC_OBJ_OS_DEVICE, 0);
      if (obj != NULL){
        obj_anc = hwloc_get_non_io_ancestor_obj(topo_net,obj);
        net_NUMAnode = hwloc_bitmap_first(obj_anc->nodeset);
      }  
 }

 return net_NUMAnode;
}
示例#15
0
//
// p must be page aligned and the page size must evenly divide size
//
static
void chpl_topo_setMemLocalityByPages(unsigned char* p, size_t size,
                                     hwloc_obj_t numaObj) {
  int flags;

  if (!haveTopology) {
    return;
  }

  if (!topoSupport->membind->set_area_membind
      || !do_set_area_membind)
    return;

  _DBG_P("hwloc_set_area_membind_nodeset(%p, %#zx, %d)\n", p, size,
         (int) hwloc_bitmap_first(numaObj->allowed_nodeset));

  flags = HWLOC_MEMBIND_MIGRATE | HWLOC_MEMBIND_STRICT;
  CHK_ERR_ERRNO(hwloc_set_area_membind_nodeset(topology, p, size,
                                               numaObj->allowed_nodeset,
                                               HWLOC_MEMBIND_BIND, flags)
                == 0);
}
示例#16
0
文件: chpl-topo.c 项目: bollu/chapel
c_sublocid_t chpl_topo_getThreadLocality(void) {
  hwloc_cpuset_t cpuset;
  hwloc_nodeset_t nodeset;
  int flags;
  int node;

  if (!haveTopology) {
    return c_sublocid_any;
  }

  if (!topoSupport->cpubind->get_thread_cpubind) {
    return c_sublocid_any;
  }

  if ((cpuset = hwloc_bitmap_alloc()) == NULL) {
    report_error("hwloc_bitmap_alloc()", errno);
  }

  if ((nodeset = hwloc_bitmap_alloc()) == NULL) {
    report_error("hwloc_bitmap_alloc()", errno);
  }

  flags = HWLOC_CPUBIND_THREAD;
  if (hwloc_get_cpubind(topology, cpuset, flags)) {
    report_error("hwloc_get_cpubind()", errno);
  }

  hwloc_cpuset_to_nodeset(topology, cpuset, nodeset);

  node = hwloc_bitmap_first(nodeset);

  hwloc_bitmap_free(nodeset);
  hwloc_bitmap_free(cpuset);

  return node;
}
示例#17
0
文件: ma_hwloc.c 项目: 12182007/cp2k
/*
 * Returns the network card of a MPI process
 * */
int hw_my_netCard(int core, int myRank)
{
  int card=0, error, nnetcards, *netcards, my_local_cards=0;
  int NUMAnode = hw_get_myNode(core), net_NUMAnode; 
  hwloc_obj_t obj, obj_anc;

  nnetcards = hw_get_nnetcards(NUMAnode);
  netcards = malloc(nnetcards*sizeof(int));

  hwloc_topology_t topo_net;
  error = hwloc_topology_init(&topo_net);
  hwloc_topology_set_flags(topo_net, HWLOC_TOPOLOGY_FLAG_IO_DEVICES);
  if (!error){
    hwloc_topology_load(topo_net);
    my_local_cards = 0;
    for (obj = hwloc_get_obj_by_type(topo_net, HWLOC_OBJ_OS_DEVICE, 0);
         obj ;
         obj = hwloc_get_next_osdev(topo_net,obj))
         if (obj->attr->osdev.type == HWLOC_OBJ_OSDEV_NETWORK ||
         obj->attr->osdev.type == HWLOC_OBJ_OSDEV_OPENFABRICS)
         {
           obj_anc = hwloc_get_non_io_ancestor_obj(topo_net,obj);
           net_NUMAnode = hwloc_bitmap_first(obj_anc->nodeset);
           if ( net_NUMAnode == NUMAnode ) {
              netcards[my_local_cards] = obj->os_index;
              my_local_cards++;}
         }
  }

  hwloc_topology_destroy(topo_net);

  if (!my_local_cards)
    return -1; //no net affinity
  else  
    return netcards[myRank%nnetcards];
}
示例#18
0
int main(void)
{
  hwloc_topology_t topology;
  hwloc_bitmap_t set, set2;
  hwloc_const_bitmap_t cset_available, cset_all;
  hwloc_obj_t obj;
  char *buffer;
  char type[64];
  unsigned i;
  int err;

  /* create a topology */
  err = hwloc_topology_init(&topology);
  if (err < 0) {
    fprintf(stderr, "failed to initialize the topology\n");
    return EXIT_FAILURE;
  }
  err = hwloc_topology_load(topology);
  if (err < 0) {
    fprintf(stderr, "failed to load the topology\n");
    hwloc_topology_destroy(topology);
    return EXIT_FAILURE;
  }

  /* retrieve the entire set of available PUs */
  cset_available = hwloc_topology_get_topology_cpuset(topology);

  /* retrieve the CPU binding of the current entire process */
  set = hwloc_bitmap_alloc();
  if (!set) {
    fprintf(stderr, "failed to allocate a bitmap\n");
    hwloc_topology_destroy(topology);
    return EXIT_FAILURE;
  }
  err = hwloc_get_cpubind(topology, set, HWLOC_CPUBIND_PROCESS);
  if (err < 0) {
    fprintf(stderr, "failed to get cpu binding\n");
    hwloc_bitmap_free(set);
    hwloc_topology_destroy(topology);
  }

  /* display the processing units that cannot be used by this process */
  if (hwloc_bitmap_isequal(set, cset_available)) {
    printf("this process can use all available processing units in the system\n");
  } else {
    /* compute the set where we currently cannot run.
     * we can't modify cset_available because it's a system read-only one,
     * so we do   set = available &~ set
     */
    hwloc_bitmap_andnot(set, cset_available, set);
    hwloc_bitmap_asprintf(&buffer, set);
    printf("process cannot use %d process units (%s) among %u in the system\n",
	   hwloc_bitmap_weight(set), buffer, hwloc_bitmap_weight(cset_available));
    free(buffer);
    /* restore set where it was before the &~ operation above */
    hwloc_bitmap_andnot(set, cset_available, set);
  }
  /* print the smallest object covering the current process binding */
  obj = hwloc_get_obj_covering_cpuset(topology, set);
  hwloc_obj_type_snprintf(type, sizeof(type), obj, 0);
  printf("process is bound within object %s logical index %u\n", type, obj->logical_index);

  /* retrieve the single PU where the current thread actually runs within this process binding */
  set2 = hwloc_bitmap_alloc();
  if (!set2) {
    fprintf(stderr, "failed to allocate a bitmap\n");
    hwloc_bitmap_free(set);
    hwloc_topology_destroy(topology);
    return EXIT_FAILURE;
  }
  err = hwloc_get_last_cpu_location(topology, set2, HWLOC_CPUBIND_THREAD);
  if (err < 0) {
    fprintf(stderr, "failed to get last cpu location\n");
    hwloc_bitmap_free(set);
    hwloc_bitmap_free(set2);
    hwloc_topology_destroy(topology);
  }
  /* sanity checks that are not actually needed but help the reader */
  /* this thread runs within the process binding */
  assert(hwloc_bitmap_isincluded(set2, set));
  /* this thread runs on a single PU at a time */
  assert(hwloc_bitmap_weight(set2) == 1);

  /* print the logical number of the PU where that thread runs */
  /* extract the PU OS index from the bitmap */
  i = hwloc_bitmap_first(set2);
  obj = hwloc_get_pu_obj_by_os_index(topology, i);
  printf("thread is now running on PU logical index %u (OS/physical index %u)\n",
	 obj->logical_index, i);

  /* migrate this single thread to where other PUs within the current binding */
  hwloc_bitmap_andnot(set2, set, set2);
  err = hwloc_set_cpubind(topology, set2, HWLOC_CPUBIND_THREAD);
  if (err < 0) {
    fprintf(stderr, "failed to set thread binding\n");
    hwloc_bitmap_free(set);
    hwloc_bitmap_free(set2);
    hwloc_topology_destroy(topology);
  }
  /* reprint the PU where that thread runs */
  err = hwloc_get_last_cpu_location(topology, set2, HWLOC_CPUBIND_THREAD);
  if (err < 0) {
    fprintf(stderr, "failed to get last cpu location\n");
    hwloc_bitmap_free(set);
    hwloc_bitmap_free(set2);
    hwloc_topology_destroy(topology);
  }
  /* print the logical number of the PU where that thread runs */
  /* extract the PU OS index from the bitmap */
  i = hwloc_bitmap_first(set2);
  obj = hwloc_get_pu_obj_by_os_index(topology, i);
  printf("thread is running on PU logical index %u (OS/physical index %u)\n",
	 obj->logical_index, i);

  hwloc_bitmap_free(set);
  hwloc_bitmap_free(set2);

  /* retrieve the entire set of all PUs */
  cset_all = hwloc_topology_get_complete_cpuset(topology);
  if (hwloc_bitmap_isequal(cset_all, cset_available)) {
    printf("all hardware PUs are available\n");
  } else {
    printf("only %d hardware PUs are available in the machine among %d\n",
	   hwloc_bitmap_weight(cset_available), hwloc_bitmap_weight(cset_all));
  }

  hwloc_topology_destroy(topology);
  return EXIT_SUCCESS;
}
示例#19
0
static void
look_rset(int sdl, hwloc_obj_type_t type, struct hwloc_topology *topology, int level)
{
  rsethandle_t rset, rad;
  int i,maxcpus,j;
  int nbnodes;
  struct hwloc_obj *obj;

  if ((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM))
    rset = rs_alloc(RS_ALL);
  else
    rset = rs_alloc(RS_PARTITION);
  rad = rs_alloc(RS_EMPTY);
  nbnodes = rs_numrads(rset, sdl, 0);
  if (nbnodes == -1) {
    perror("rs_numrads");
    return;
  }

  for (i = 0; i < nbnodes; i++) {
    hwloc_bitmap_t cpuset;
    unsigned os_index = (unsigned) -1; /* no os_index except for PU and NUMANODE below */

    if (rs_getrad(rset, rad, sdl, i, 0)) {
      fprintf(stderr,"rs_getrad(%d) failed: %s\n", i, strerror(errno));
      continue;
    }
    if (!rs_getinfo(rad, R_NUMPROCS, 0))
      continue;

    maxcpus = rs_getinfo(rad, R_MAXPROCS, 0);
    cpuset = hwloc_bitmap_alloc();
    for (j = 0; j < maxcpus; j++) {
      if (rs_op(RS_TESTRESOURCE, rad, NULL, R_PROCS, j))
	hwloc_bitmap_set(cpuset, j);
    }

    if (type == HWLOC_OBJ_PU) {
      os_index = hwloc_bitmap_first(cpuset);
      hwloc_debug("Found PU #%u inside node %d for sdl %d\n", os_index, i, sdl);
      assert(hwloc_bitmap_weight(cpuset) == 1);
    } else if (type == HWLOC_OBJ_NUMANODE) {
      /* NUMA node os_index isn't used for binding, just use the rad number to get unique values.
       * Note that we'll use that fact in hwloc_aix_prepare_membind(). */
      os_index = i;
      hwloc_debug("Using os_index #%u for NUMA node inside node %d for sdl %d\n", os_index, i, sdl);
    }

    obj = hwloc_alloc_setup_object(type, os_index);
    obj->cpuset = cpuset;
    obj->os_level = sdl;

    switch(type) {
      case HWLOC_OBJ_NUMANODE:
	obj->nodeset = hwloc_bitmap_alloc();
	hwloc_bitmap_set(obj->nodeset, i);
	obj->memory.local_memory = 0; /* TODO: odd, rs_getinfo(rad, R_MEMSIZE, 0) << 10 returns the total memory ... */
	obj->memory.page_types_len = 2;
	obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types));
	memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types));
	obj->memory.page_types[0].size = hwloc_getpagesize();
#ifdef HAVE__SC_LARGE_PAGESIZE
	obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
#endif
	/* TODO: obj->memory.page_types[1].count = rs_getinfo(rset, R_LGPGFREE, 0) / hugepagesize */
	break;
      case HWLOC_OBJ_CACHE:
	obj->attr->cache.size = _system_configuration.L2_cache_size;
	obj->attr->cache.associativity = _system_configuration.L2_cache_asc;

	obj->attr->cache.linesize = 0; /* unknown by default */
	if (__power_pc())
	  if (__power_4() || __power_5() || __power_6() || __power_7())
	    obj->attr->cache.linesize = 128;

	obj->attr->cache.depth = 2;
	obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED; /* OK for power[4567], unknown for others */
	break;
      case HWLOC_OBJ_GROUP:
	obj->attr->group.depth = level;
	break;
      case HWLOC_OBJ_CORE:
      {
	hwloc_obj_t obj2, obj3;
	obj2 = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, i);
	obj2->cpuset = hwloc_bitmap_dup(obj->cpuset);
	obj2->attr->cache.size = _system_configuration.dcache_size;
	obj2->attr->cache.associativity = _system_configuration.dcache_asc;
	obj2->attr->cache.linesize = _system_configuration.dcache_line;
	obj2->attr->cache.depth = 1;
	if (_system_configuration.cache_attrib & (1<<30)) {
	  /* Unified cache */
	  obj2->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
	  hwloc_debug("Adding an L1u cache for core %d\n", i);
	  hwloc_insert_object_by_cpuset(topology, obj2);
	} else {
	  /* Separate Instruction and Data caches */
	  obj2->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
	  hwloc_debug("Adding an L1d cache for core %d\n", i);
	  hwloc_insert_object_by_cpuset(topology, obj2);

	  obj3 = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, i);
	  obj3->cpuset = hwloc_bitmap_dup(obj->cpuset);
	  obj3->attr->cache.size = _system_configuration.icache_size;
	  obj3->attr->cache.associativity = _system_configuration.icache_asc;
	  obj3->attr->cache.linesize = _system_configuration.icache_line;
	  obj3->attr->cache.depth = 1;
	  obj3->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
	  hwloc_debug("Adding an L1i cache for core %d\n", i);
	  hwloc_insert_object_by_cpuset(topology, obj3);
	}
	break;
      }
      default:
	break;
    }
    hwloc_debug_2args_bitmap("%s %d has cpuset %s\n",
	       hwloc_obj_type_string(type),
	       i, obj->cpuset);
    hwloc_insert_object_by_cpuset(topology, obj);
  }

  rs_free(rset);
  rs_free(rad);
}
示例#20
0
int main(void)
{
    hwloc_bitmap_t set;

    /* check an empty bitmap */
    set = hwloc_bitmap_alloc();
    assert(hwloc_bitmap_to_ulong(set) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL);
    /* check a non-empty bitmap */
    hwloc_bitmap_from_ith_ulong(set, 4, 0xff);
    assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0xff);
    assert(hwloc_bitmap_to_ulong(set) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL);
    /* check a zeroed bitmap */
    hwloc_bitmap_zero(set);
    assert(hwloc_bitmap_to_ulong(set) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL);
    hwloc_bitmap_free(set);

    /* check a full bitmap */
    set = hwloc_bitmap_alloc_full();
    assert(hwloc_bitmap_to_ulong(set) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == ~0UL);
    /* check a almost full bitmap */
    hwloc_bitmap_set_ith_ulong(set, 4, 0xff);
    assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0xff);
    assert(hwloc_bitmap_to_ulong(set) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == ~0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == ~0UL);
    /* check a almost empty bitmap */
    hwloc_bitmap_from_ith_ulong(set, 4, 0xff);
    assert(hwloc_bitmap_to_ith_ulong(set, 4) == 0xff);
    assert(hwloc_bitmap_to_ulong(set) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 0) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 1) == 0UL);
    assert(hwloc_bitmap_to_ith_ulong(set, 23) == 0UL);
    hwloc_bitmap_free(set);

    /* check ranges */
    set = hwloc_bitmap_alloc();
    assert(hwloc_bitmap_weight(set) == 0);
    /* 23-45 */
    hwloc_bitmap_set_range(set, 23, 45);
    assert(hwloc_bitmap_weight(set) == 23);
    /* 23-45,78- */
    hwloc_bitmap_set_range(set, 78, -1);
    assert(hwloc_bitmap_weight(set) == -1);
    /* 23- */
    hwloc_bitmap_set_range(set, 44, 79);
    assert(hwloc_bitmap_weight(set) == -1);
    assert(hwloc_bitmap_first(set) == 23);
    assert(!hwloc_bitmap_isfull(set));
    /* 0- */
    hwloc_bitmap_set_range(set, 0, 22);
    assert(hwloc_bitmap_weight(set) == -1);
    assert(hwloc_bitmap_isfull(set));
    /* 0-34,57- */
    hwloc_bitmap_clr_range(set, 35, 56);
    assert(hwloc_bitmap_weight(set) == -1);
    assert(!hwloc_bitmap_isfull(set));
    /* 0-34,57 */
    hwloc_bitmap_clr_range(set, 58, -1);
    assert(hwloc_bitmap_weight(set) == 36);
    assert(hwloc_bitmap_last(set) == 57);
    assert(hwloc_bitmap_next(set, 34) == 57);
    /* 0-34 */
    hwloc_bitmap_clr(set, 57);
    assert(hwloc_bitmap_weight(set) == 35);
    assert(hwloc_bitmap_last(set) == 34);
    /* empty */
    hwloc_bitmap_clr_range(set, 0, 34);
    assert(hwloc_bitmap_weight(set) == 0);
    assert(hwloc_bitmap_first(set) == -1);
    hwloc_bitmap_free(set);

    return 0;
}
示例#21
0
        void computeCPUOMP(int threadId, expression_type * expr, im_type * im, element_iterator * elt_it, std::vector<std::pair<element_iterator, element_iterator> > * elts)
        {
            char * a;
            int cid;
            std::ostringstream oss;

#if 0
            hwloc_cpuset_t set = nullptr;

            /* get a cpuset object */
            set = hwloc_bitmap_alloc();

            /* Get the cpu thread affinity info of the current process/thread */
            hwloc_get_cpubind(Environment::getHwlocTopology(), set, 0);
            hwloc_bitmap_asprintf(&a, set);
            oss << a;
            free(a); 
            
            cid = hwloc_bitmap_first(set);
            oss << "(";
            while(cid != -1)
            {
                oss << cid << " ";
                cid = hwloc_bitmap_next(set, cid);
            }
            oss << ")|";
            std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl;

            /* Get the latest core location of the current process/thread */
            hwloc_get_last_cpu_location(Environment::getHwlocTopology(), set, 0);
            hwloc_bitmap_asprintf(&a, set);
            oss << a;
            free(a);

            cid = hwloc_bitmap_first(set);
            oss << "(";
            while(cid != -1)
            {
                oss << cid << " ";
                cid = hwloc_bitmap_next(set, cid);
            }
            oss << ");";
            std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl;
#endif

#if defined(FEELPP_HAS_HARTS)
            perf_mng.init("cpu") ;
            perf_mng.start("cpu") ;
            perf_mng.init("1.1") ;
            perf_mng.init("1.2") ;
            perf_mng.init("2.1") ;
            perf_mng.init("2.2") ;
            perf_mng.init("3") ;
#endif
            
            //M_gm((*elt_it)->gm());
            gm_ptrtype gm = (*elt_it)->gm();
            //M_geopc(new typename eval::gmpc_type( M_gm, im->points() ));
            typename eval::gmpc_ptrtype __geopc( new typename eval::gmpc_type(gm, im->points()) );
            //M_c(new gmc_type( M_gm, *(*elt_it), M_geopc ));
            gmc_ptrtype __c( new gmc_type( gm, *(*elt_it), __geopc ) );
            //M_expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( M_c ) ) );
            eval_expr_type __expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( __c ) ) );


            for (int i = 0; i < elts->size(); i++)
            {
                /*
                std::cout << Environment::worldComm().rank() <<  " nbItems: " << elts->size() 
                          << " nbElts " << std::distance(elts->at(i), elts->at(i+1))
                          << " 1st id " << elts->at(i)->id() << std::endl;
                */

                //std::cout << Environment::worldComm().rank() << "|" << theadId << " fid=" elts.at(i).first.id() << std::endl;
                for ( auto _elt = elts->at(i).first; _elt != elts->at(i).second; ++_elt )
                {
                    //perf_mng.start("1.1") ;
                    __c->update( *_elt );
                    //perf_mng.stop("1.1") ;
                    //perf_mng.start("1.2") ;
                    map_gmc_type mapgmc( fusion::make_pair<vf::detail::gmc<0> >( __c ) );
                    //perf_mng.stop("1.2") ;

                    //perf_mng.start("2.1") ;
                    __expr.update( mapgmc );
                    //perf_mng.stop("2.1") ;
                    //perf_mng.start("2.2") ;
                    im->update( *__c );
                    //perf_mng.stop("2.2") ;

                    //perf_mng.start("3") ;
                    for ( uint16_type c1 = 0; c1 < eval::shape::M; ++c1 )
                    {
                        for ( uint16_type c2 = 0; c2 < eval::shape::N; ++c2 )
                        {
                            M_ret( c1,c2 ) += (*im)( __expr, c1, c2 );
                        }
                    }
                    //perf_mng.stop("3") ;
                }
            }

#if defined(FEELPP_HAS_HARTS)
            perf_mng.stop("cpu") ;
            M_cpuTime = perf_mng.getValueInSeconds("cpu");
#endif
        }
示例#22
0
static int _task_cgroup_cpuset_dist_cyclic(
	hwloc_topology_t topology, hwloc_obj_type_t hwtype,
	hwloc_obj_type_t req_hwtype, stepd_step_rec_t *job, int bind_verbose,
	hwloc_bitmap_t cpuset)
{
	hwloc_obj_t obj;
	uint32_t  s_ix;		/* socket index */
	uint32_t *c_ixc;	/* core index by socket (current taskid) */
	uint32_t *c_ixn;	/* core index by socket (next taskid) */
	uint32_t *t_ix;		/* thread index by core by socket */
	uint16_t npus = 0, nboards = 0, nthreads = 0, ncores = 0, nsockets = 0;
	uint32_t taskid = job->envtp->localid;
	int spec_thread_cnt = 0;
	bitstr_t *spec_threads = NULL;
	uint32_t obj_idxs[3], cps, tpc, i, j, sock_loop, ntskip, npdist;
	bool core_cyclic, core_fcyclic, sock_fcyclic;
	bool hwloc_success = true;

	/*
	 * We can't trust the slurmd_conf_t *conf here as we need actual
	 * hardware instead of whatever is possibly configured.  So we need to
	 * look it up again.
	 */
	if (get_cpuinfo(&npus, &nboards, &nsockets, &ncores, &nthreads,
			NULL, NULL, NULL) != SLURM_SUCCESS) {
		/*
		 * Fall back to use allocated resources, but this may result
		 * in incorrect layout due to a uneven task distribution
		 * (e.g. 4 cores on socket 0 and 3 cores on socket 1)
		 */
		nsockets = (uint16_t) hwloc_get_nbobjs_by_type(topology,
							HWLOC_OBJ_SOCKET);
		ncores = (uint16_t) hwloc_get_nbobjs_by_type(topology,
							HWLOC_OBJ_CORE);
		nthreads = (uint16_t) hwloc_get_nbobjs_by_type(topology,
							HWLOC_OBJ_PU);
		npus = (uint16_t) hwloc_get_nbobjs_by_type(topology,
							   HWLOC_OBJ_PU);
	} else {
		/* Translate cores-per-socket to total core count, etc. */
		nsockets *= nboards;
		ncores *= nsockets;
		nthreads *= ncores;
	}

	if ((nsockets == 0) || (ncores == 0))
		return XCGROUP_ERROR;
	cps = (ncores + nsockets - 1) / nsockets;
	tpc = (nthreads + ncores - 1) / ncores;

	sock_fcyclic = (job->task_dist & SLURM_DIST_SOCKMASK) ==
		SLURM_DIST_SOCKCFULL ? true : false;
	core_cyclic = (job->task_dist & SLURM_DIST_COREMASK) ==
		SLURM_DIST_CORECYCLIC ? true : false;
	core_fcyclic = (job->task_dist & SLURM_DIST_COREMASK) ==
		SLURM_DIST_CORECFULL ? true : false;

	if (bind_verbose) {
		info("task/cgroup: task[%u] using %s distribution "
		     "(task_dist=0x%x)", taskid,
		     format_task_dist_states(job->task_dist), job->task_dist);
	}

	t_ix = xmalloc(ncores * sizeof(uint32_t));
	c_ixc = xmalloc(nsockets * sizeof(uint32_t));
	c_ixn = xmalloc(nsockets * sizeof(uint32_t));

	if (hwloc_compare_types(hwtype, HWLOC_OBJ_CORE) >= 0) {
		/* cores or threads granularity */
		ntskip = taskid;
		npdist = job->cpus_per_task;
	} else {
		/* sockets or ldoms granularity */
		ntskip = taskid;
		npdist = 1;
	}
	if ((job->job_core_spec != NO_VAL16) &&
	    (job->job_core_spec &  CORE_SPEC_THREAD)  &&
	    (job->job_core_spec != CORE_SPEC_THREAD)) {
		/* Skip specialized threads as needed */
		int i, t, c, s;
		int cores = (ncores + nsockets - 1) / nsockets;
		int threads = (npus + cores - 1) / cores;
		spec_thread_cnt = job->job_core_spec & (~CORE_SPEC_THREAD);
		spec_threads = bit_alloc(npus);
		for (t = threads - 1;
		     ((t >= 0) && (spec_thread_cnt > 0)); t--) {
			for (c = cores - 1;
			     ((c >= 0) && (spec_thread_cnt > 0)); c--) {
				for (s = nsockets - 1;
				     ((s >= 0) && (spec_thread_cnt > 0)); s--) {
					i = s * cores + c;
					i = (i * threads) + t;
					bit_set(spec_threads, i);
					spec_thread_cnt--;
				}
			}
		}
		if (hwtype == HWLOC_OBJ_PU) {
			for (i = 0; i <= ntskip && i < npus; i++) {
				if (bit_test(spec_threads, i))
					ntskip++;
			};
		}
	}

	/* skip objs for lower taskids, then add them to the
	   current task cpuset. To prevent infinite loop, check
	   that we do not loop more than npdist times around the available
	   sockets, which is the worst scenario we should afford here. */
	i = j = s_ix = sock_loop = 0;
	while (i < ntskip + 1 && (sock_loop/tpc) < npdist + 1) {
		/* fill one or multiple sockets using block mode, unless
		   otherwise stated in the job->task_dist field */
		while ((s_ix < nsockets) && (j < npdist)) {
			obj = hwloc_get_obj_below_by_type(
				topology, HWLOC_OBJ_SOCKET, s_ix,
				hwtype, c_ixc[s_ix]);
			if ((obj == NULL) && (s_ix == 0) && (c_ixc[s_ix] == 0))
				hwloc_success = false;	/* Complete failure */
			if ((obj != NULL) &&
			    (hwloc_bitmap_first(obj->allowed_cpuset) != -1)) {
				if (hwloc_compare_types(hwtype, HWLOC_OBJ_PU)
									>= 0) {
					/* granularity is thread */
					obj_idxs[0]=s_ix;
					obj_idxs[1]=c_ixc[s_ix];
					obj_idxs[2]=t_ix[(s_ix*cps)+c_ixc[s_ix]];
					obj = hwloc_get_obj_below_array_by_type(
						topology, 3, obj_types, obj_idxs);
					if ((obj != NULL) &&
					    (hwloc_bitmap_first(
					     obj->allowed_cpuset) != -1)) {
						t_ix[(s_ix*cps)+c_ixc[s_ix]]++;
						j++;
						if (i == ntskip)
							_add_hwloc_cpuset(hwtype,
							req_hwtype, obj, taskid,
							bind_verbose, cpuset);
						if (j < npdist) {
							if (core_cyclic) {
								c_ixn[s_ix] =
								c_ixc[s_ix] + 1;
							} else if (core_fcyclic){
								c_ixc[s_ix]++;
								c_ixn[s_ix] =
								c_ixc[s_ix];
							}
							if (sock_fcyclic)
								s_ix++;
						}
					} else {
						c_ixc[s_ix]++;
						if (c_ixc[s_ix] == cps)
							s_ix++;
					}
				} else {
					/* granularity is core or larger */
					c_ixc[s_ix]++;
					j++;
					if (i == ntskip)
						_add_hwloc_cpuset(hwtype,
							req_hwtype, obj, taskid,
						  	bind_verbose, cpuset);
					if ((j < npdist) && (sock_fcyclic))
						s_ix++;
				}
			} else
				s_ix++;
		}
		/* if it succeeds, switch to the next task, starting
		 * with the next available socket, otherwise, loop back
		 * from the first socket trying to find available slots. */
		if (j == npdist) {
			i++;
			j = 0;
			s_ix++; // no validity check, handled by the while
			sock_loop = 0;
		} else {
			sock_loop++;
			s_ix = 0;
		}
	}
	xfree(t_ix);
	xfree(c_ixc);
	xfree(c_ixn);

	if (spec_threads) {
		for (i = 0; i < npus; i++) {
			if (bit_test(spec_threads, i)) {
				hwloc_bitmap_clr(cpuset, i);
			}
		};
		FREE_NULL_BITMAP(spec_threads);
	}

	/* should never happen in normal scenario */
	if ((sock_loop > npdist) && !hwloc_success) {
		/* hwloc_get_obj_below_by_type() fails if no CPU set
		 * configured, see hwloc documentation for details */
		error("task/cgroup: hwloc_get_obj_below_by_type() failing, "
		      "task/affinity plugin may be required to address bug "
		      "fixed in HWLOC version 1.11.5");
		return XCGROUP_ERROR;
	} else if (sock_loop > npdist) {
		char buf[128] = "";
		hwloc_bitmap_snprintf(buf, sizeof(buf), cpuset);
		error("task/cgroup: task[%u] infinite loop broken while trying "
		      "to provision compute elements using %s (bitmap:%s)",
		      taskid, format_task_dist_states(job->task_dist), buf);
		return XCGROUP_ERROR;
	} else
		return XCGROUP_SUCCESS;
}
示例#23
0
int main(int argc, char **argv)
{
    int rank;
    int num_ranks;
    MPI_Status status;
    hwloc_topology_t topology;
    hwloc_cpuset_t set;
    int pu_rank = -1;
    char name[1024];
    int resultlen;

    MPI_Init(&argc,&argv);

    if (argc != 2) {
        fprintf(stderr, "Usage: %s <output file>\n", argv[0]);
        exit(1);
    }

    FILE *output = fopen(argv[1], "w");

    if (!output) {
        perror("fopen");
        exit(2);
    }

    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &num_ranks);

    hwloc_topology_init(&topology);
    hwloc_topology_load(topology);
    set = hwloc_bitmap_alloc();
    hwloc_get_cpubind(topology, set, 0);
    pu_rank = hwloc_bitmap_first(set);

    MPI_Get_processor_name(name, &resultlen);
    resultlen++;

    if (rank == 0) {
        node_t *nodes = NULL;
        node_t *node;

        /* Rank 0 info */
        /* Find node */
        HASH_FIND_PTR(nodes, &name, node);
        /* If node does not exist yet, create it */
        if (!node) {
            node = (node_t *)malloc(sizeof(node_t));
            node->name = name;
            utarray_new(node->slots, &ut_int_icd);
            utarray_new(node->ranks, &ut_int_icd);
            HASH_ADD_KEYPTR(hh, nodes, node->name, strlen(node->name), node);
        }
        /* Add the slot to the list of slots */
        utarray_push_back(node->slots, &pu_rank);
        utarray_push_back(node->ranks, &rank);

        /* Info about other ranks */
        for (int p = 1; p < num_ranks; p++) {
            /* Receive node name size, and slot index */
            char *nodename;
            int buffer[2];
            MPI_Recv (buffer, 2, MPI_INT, p, 0, MPI_COMM_WORLD, &status);
            int size = buffer[0];
            int slot = buffer[1];

            /* Receive node name */
            nodename = (char *)malloc(sizeof(char[size]));
            MPI_Recv(nodename, size, MPI_CHAR, p, 0, MPI_COMM_WORLD, &status);

            /* Find node */
            HASH_FIND_STR(nodes, nodename, node);
            /* If node does not exist yet, create it */
            if (!node) {
                node = (node_t *)malloc(sizeof(node_t));
                node->name = nodename;
                utarray_new(node->slots, &ut_int_icd);
                utarray_new(node->ranks, &ut_int_icd);
                HASH_ADD_KEYPTR(hh, nodes, node->name, strlen(node->name), node);
            }
            /* Add the slot to the list of slots */
            utarray_push_back(node->slots, &slot);
            utarray_push_back(node->ranks, &p);
        }

        /* Write the list of nodes and slots by node */

        /* Number of nodes */
        int num_nodes = HASH_COUNT(nodes);
        fprintf(output, "%d", num_nodes);

        /* Names of nodes */
        node_t *node_tmp;
        HASH_ITER(hh, nodes, node, node_tmp) {
            fprintf(output, " %s", node->name);
        }
int main(void)
{
  hwloc_bitmap_t set;
  int i, cpu, expected_cpu = 0;

  /* empty set */
  set = hwloc_bitmap_alloc();
  assert(hwloc_bitmap_first(set) == -1);
  assert(hwloc_bitmap_last(set) == -1);
  assert(hwloc_bitmap_next(set, 0) == -1);
  assert(hwloc_bitmap_next(set, -1) == -1);
  assert(hwloc_bitmap_weight(set) == 0);

  /* full set */
  hwloc_bitmap_fill(set);
  assert(hwloc_bitmap_first(set) == 0);
  assert(hwloc_bitmap_last(set) == -1);
  assert(hwloc_bitmap_next(set, -1) == 0);
  assert(hwloc_bitmap_next(set, 0) == 1);
  assert(hwloc_bitmap_next(set, 1) == 2);
  assert(hwloc_bitmap_next(set, 2) == 3);
  assert(hwloc_bitmap_next(set, 30) == 31);
  assert(hwloc_bitmap_next(set, 31) == 32);
  assert(hwloc_bitmap_next(set, 32) == 33);
  assert(hwloc_bitmap_next(set, 62) == 63);
  assert(hwloc_bitmap_next(set, 63) == 64);
  assert(hwloc_bitmap_next(set, 64) == 65);
  assert(hwloc_bitmap_next(set, 12345) == 12346);
  assert(hwloc_bitmap_weight(set) == -1);

  /* custom sets */
  hwloc_bitmap_zero(set);
  hwloc_bitmap_set_range(set, 36, 59);
  assert(hwloc_bitmap_first(set) == 36);
  assert(hwloc_bitmap_last(set) == 59);
  assert(hwloc_bitmap_next(set, -1) == 36);
  assert(hwloc_bitmap_next(set, 0) == 36);
  assert(hwloc_bitmap_next(set, 36) == 37);
  assert(hwloc_bitmap_next(set, 59) == -1);
  assert(hwloc_bitmap_weight(set) == 24);
  hwloc_bitmap_set_range(set, 136, 259);
  assert(hwloc_bitmap_first(set) == 36);
  assert(hwloc_bitmap_last(set) == 259);
  assert(hwloc_bitmap_next(set, 59) == 136);
  assert(hwloc_bitmap_next(set, 259) == -1);
  assert(hwloc_bitmap_weight(set) == 148);
  hwloc_bitmap_clr(set, 199);
  assert(hwloc_bitmap_first(set) == 36);
  assert(hwloc_bitmap_last(set) == 259);
  assert(hwloc_bitmap_next(set, 198) == 200);
  assert(hwloc_bitmap_next(set, 199) == 200);
  assert(hwloc_bitmap_weight(set) == 147);

  i = 0;
  hwloc_bitmap_foreach_begin(cpu, set) {
    if (0 <= i && i < 24)
      expected_cpu = i + 36;
    else if (24 <= i && i < 87)
      expected_cpu = i + 112;
    else if (87 <= i && i < 147)
      expected_cpu = i + 113;

    assert(expected_cpu == cpu);

    i++;
  } hwloc_bitmap_foreach_end();

  hwloc_bitmap_free(set);

  return 0;
}
示例#25
0
static int
hwloc_solaris_set_sth_cpubind(hwloc_topology_t topology, idtype_t idtype, id_t id, hwloc_const_bitmap_t hwloc_set, int flags)
{
  unsigned target_cpu;

  /* The resulting binding is always strict */

  if (hwloc_bitmap_isequal(hwloc_set, hwloc_topology_get_complete_cpuset(topology))) {
    if (processor_bind(idtype, id, PBIND_NONE, NULL) != 0)
      return -1;
#ifdef HAVE_LIBLGRP
    if (!(flags & HWLOC_CPUBIND_NOMEMBIND)) {
      int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
      if (depth >= 0) {
	int n = hwloc_get_nbobjs_by_depth(topology, depth);
	int i;

	for (i = 0; i < n; i++) {
	  hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
	  lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_NONE);
	}
      }
    }
#endif /* HAVE_LIBLGRP */
    return 0;
  }

#ifdef HAVE_LIBLGRP
  if (!(flags & HWLOC_CPUBIND_NOMEMBIND)) {
    int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NUMANODE);
    if (depth >= 0) {
      int n = hwloc_get_nbobjs_by_depth(topology, depth);
      int i;
      int ok;
      hwloc_bitmap_t target = hwloc_bitmap_alloc();

      for (i = 0; i < n; i++) {
	hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);
        if (hwloc_bitmap_isincluded(obj->cpuset, hwloc_set))
          hwloc_bitmap_or(target, target, obj->cpuset);
      }

      ok = hwloc_bitmap_isequal(target, hwloc_set);
      hwloc_bitmap_free(target);

      if (ok) {
        /* Ok, managed to achieve hwloc_set by just combining NUMA nodes */

        for (i = 0; i < n; i++) {
          hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, depth, i);

          if (hwloc_bitmap_isincluded(obj->cpuset, hwloc_set)) {
            lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_STRONG);
          } else {
            if (flags & HWLOC_CPUBIND_STRICT)
              lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_NONE);
            else
              lgrp_affinity_set(idtype, id, obj->os_index, LGRP_AFF_WEAK);
          }
        }

        return 0;
      }
    }
  }
#endif /* HAVE_LIBLGRP */

  if (hwloc_bitmap_weight(hwloc_set) != 1) {
    errno = EXDEV;
    return -1;
  }

  target_cpu = hwloc_bitmap_first(hwloc_set);

  if (processor_bind(idtype, id,
		     (processorid_t) (target_cpu), NULL) != 0)
    return -1;

  return 0;
}
示例#26
0
        void computeCPU(DataArgsType& args)
        {
            char * a;
            int cid;
            hwloc_cpuset_t set = nullptr;
            std::ostringstream oss;
            
            /* This initialization takes some time */
            /* When using hartsi, the object instanciation is done when creating tasks */
            /* and this is not a parallel section, thus we lose time in initialization */
            /* doing it the computation step allows to incorporate this init time in the parallel section */
            /*
            M_threadId( threadId ),
            M_gm( new gm_type( *_elt.gm() ) ),
            M_geopc( new gmpc_type( M_gm, _im.points() ) ),
            M_c( new gmc_type( M_gm, _elt, M_geopc ) ),
            M_expr( _expr, map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( M_c ) ) ),
            M_im( _im ),
            M_ret( eval::matrix_type::Zero() ),
            M_cpuTime( 0.0 )
            */

#if 0
            /* get a cpuset object */
            set = hwloc_bitmap_alloc();

            /* Get the cpu thread affinity info of the current process/thread */
            hwloc_get_cpubind(Environment::getHwlocTopology(), set, 0);
            hwloc_bitmap_asprintf(&a, set);
            oss << a;
            free(a); 
            
            cid = hwloc_bitmap_first(set);
            oss << "(";
            while(cid != -1)
            {
                oss << cid << " ";
                cid = hwloc_bitmap_next(set, cid);
            }
            oss << ")|";
            std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl;

            /* Get the latest core location of the current process/thread */
            hwloc_get_last_cpu_location(Environment::getHwlocTopology(), set, 0);
            hwloc_bitmap_asprintf(&a, set);
            oss << a;
            free(a);

            cid = hwloc_bitmap_first(set);
            oss << "(";
            while(cid != -1)
            {
                oss << cid << " ";
                cid = hwloc_bitmap_next(set, cid);
            }
            oss << ");";
            std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl;
#endif

            perf_mng.init("1.1") ;
            perf_mng.init("1.1") ;
            perf_mng.init("2.1") ;
            perf_mng.init("2.2") ;
            perf_mng.init("3") ;

            /* free memory */
            if(set != nullptr)
            {
                hwloc_bitmap_free(set);
            }

            //perf_mng.init("data") ;
            //perf_mng.start("data") ;

            // DEFINE the range to be iterated on
            std::vector<std::pair<element_iterator, element_iterator> > * elts =
                args.get("elements")->get<std::vector<std::pair<element_iterator, element_iterator> > >();

            int * threadId = args.get("threadId")->get<int>();
            expression_type * expr = args.get("expr")->get<expression_type>();
            im_type * im = args.get("im")->get<im_type>();
            element_iterator * elt_it = args.get("elt")->get<element_iterator>();
            
            //M_gm((*elt_it)->gm());
            gm_ptrtype gm = (*elt_it)->gm();
            //M_geopc(new typename eval::gmpc_type( M_gm, im->points() ));
            typename eval::gmpc_ptrtype __geopc( new typename eval::gmpc_type(gm, im->points()) );
            //M_c(new gmc_type( M_gm, *(*elt_it), M_geopc ));
            gmc_ptrtype __c( new gmc_type( gm, *(*elt_it), __geopc ) );
            //M_expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( M_c ) ) );
            eval_expr_type __expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( __c ) ) );

            //perf_mng.stop("data");

            perf_mng.init("cpu") ;
            perf_mng.start("cpu") ;

            for (int i = 0; i < elts->size(); i++)
            {
                //std::cout << Environment::worldComm().rank() <<  " nbItems: " << elts->size() << " nbElts " << std::distance(elts->at(i), elts->at(i+1)) << std::endl;
                for ( auto _elt = elts->at(i).first; _elt != elts->at(i).second; ++_elt )
                {
                    //perf_mng.start("1.1") ;
                    //M_c->update( *_elt );
                    __c->update( *_elt );
                    //perf_mng.stop("1.1") ;
                    //perf_mng.start("1.2") ;
                    map_gmc_type mapgmc( fusion::make_pair<vf::detail::gmc<0> >( __c ) );
                    //perf_mng.stop("1.2") ;

                    //perf_mng.start("2.1") ;
                    __expr.update( mapgmc );
                    //perf_mng.stop("2.1") ;
                    //perf_mng.start("2.2") ;
                    im->update( *__c );
                    //perf_mng.stop("2.2") ;

                    //perf_mng.start("3") ;
                    for ( uint16_type c1 = 0; c1 < eval::shape::M; ++c1 )
                    {
                        for ( uint16_type c2 = 0; c2 < eval::shape::N; ++c2 )
                        {
                            M_ret( c1,c2 ) += (*im)( __expr, c1, c2 );
                        }
                    }
                    //perf_mng.stop("3") ;
                }
            }

            perf_mng.stop("cpu") ;
            M_cpuTime = perf_mng.getValueInSeconds("cpu");
        }