コード例 #1
0
bool hwloc::bind_this_thread( const std::pair<unsigned,unsigned> coord )
{

#if 0

  std::cout << "KokkosArray::hwloc::bind_this_thread() at " ;

  hwloc_get_last_cpu_location( s_hwloc_topology ,
                               s_hwloc_location , HWLOC_CPUBIND_THREAD );

  print_bitmap( std::cout , s_hwloc_location );

  std::cout << " to " ;

  print_bitmap( std::cout , s_core[ coord.second + coord.first * s_core_topology.second ] );

  std::cout << std::endl ;

#endif

  // As safe and fast as possible.
  // Fast-lookup by caching the coordinate -> hwloc cpuset mapping in 's_core'.
  return coord.first  < s_core_topology.first &&
         coord.second < s_core_topology.second &&
         0 == hwloc_set_cpubind( s_hwloc_topology ,
                                 s_core[ coord.second + coord.first * s_core_topology.second ] ,
                                 HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT );
}
コード例 #2
0
inline void __pact_reuse_add(void *ary, long long start, long long end, long long mem_ac) {
    hwloc_bitmap_t set = hwloc_bitmap_alloc();
    hwloc_get_cpubind(__pact_topo, set, HWLOC_CPUBIND_THREAD);
    hwloc_get_last_cpu_location(__pact_topo, set, HWLOC_CPUBIND_THREAD);
    hwloc_bitmap_singlify(set);
    hwloc_set_area_membind ( __pact_topo, (const void*)ary, abs(end-start), (hwloc_const_cpuset_t)set, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_MIGRATE );
    hwloc_bitmap_free(set);
}
コード例 #3
0
ファイル: HwlocHelper.cpp プロジェクト: HanumathRao/hyrise
signed getCurrentCore() {
  hwloc_topology_t topology = getHWTopology();
  hwloc_cpuset_t cpu_set = hwloc_bitmap_alloc();
  if (hwloc_get_last_cpu_location(topology, cpu_set, HWLOC_CPUBIND_THREAD) < 0) {
    return -1;
  }
  hwloc_obj_t current_core = hwloc_get_next_obj_covering_cpuset_by_type(topology, cpu_set, HWLOC_OBJ_CORE, NULL);
  hwloc_bitmap_free(cpu_set);
  return current_core->logical_index;
}
void migrate(long PageStart, long PageEnd) {
  SPMR_DEBUG(std::cout << "Runtime: migrate pages: " << PageStart << " to "
                       << PageEnd << "\n");
  SPMR_DEBUG(std::cout << "Runtime: hwloc call: " << (PageStart << PAGE_EXP)
                       << ", " << ((PageEnd - PageStart) << PAGE_EXP) << "\n");

  hwloc_bitmap_t set = hwloc_bitmap_alloc();

  hwloc_get_cpubind(__spm_topo, set, HWLOC_CPUBIND_THREAD);
  hwloc_get_last_cpu_location(__spm_topo, set, HWLOC_CPUBIND_THREAD);

  hwloc_bitmap_singlify(set);

	assert(
			hwloc_set_area_membind(__spm_topo, (const void*)(PageStart << PAGE_EXP),
								  (PageEnd - PageStart) << PAGE_EXP,
								  (hwloc_const_cpuset_t)set, HWLOC_MEMBIND_BIND,
								  HWLOC_MEMBIND_MIGRATE)
	!= -1 && "Unable to migrate requested pages");
                         
  hwloc_bitmap_free(set);
}
コード例 #5
0
std::pair<unsigned,unsigned> hwloc::get_this_thread_coordinate()
{
  const unsigned n = s_core_topology.first * s_core_topology.second ;

  std::pair<unsigned,unsigned> coord(0,0);

  // Using the pre-allocated 's_hwloc_location' to avoid memory
  // allocation by this thread.  This call is NOT thread-safe.
  hwloc_get_last_cpu_location( s_hwloc_topology ,
                               s_hwloc_location , HWLOC_CPUBIND_THREAD );

  unsigned i = 0 ;

  while ( i < n && ! hwloc_bitmap_intersects( s_hwloc_location , s_core[ i ] ) ) ++i ;

  if ( i < n ) {
    coord.first  = i / s_core_topology.second ;
    coord.second = i % s_core_topology.second ;
  }
  else {
    std::ostringstream msg ;
    msg << "KokkosArray::hwloc::get_this_thread_coordinate() FAILED :" ;

    if ( 0 != s_process_binding && 0 != s_hwloc_location ) {
      msg << " cpu_location" ;
      print_bitmap( msg , s_hwloc_location );
      msg << " is not a member of the process_cpu_set" ;
      print_bitmap( msg , s_process_binding );
    }
    else {
      msg << " not initialized" ;
    }
    throw std::runtime_error( msg.str() );
  }
  return coord ;
}
コード例 #6
0
ファイル: Kokkos_hwloc.cpp プロジェクト: UoB-HPC/TeaLeaf
std::pair<unsigned,unsigned> get_this_thread_coordinate()
{
  std::pair<unsigned,unsigned> coord(0u,0u);

  if ( ! sentinel() ) return coord ;

  const unsigned n = s_core_topology.first * s_core_topology.second ;

  // Using the pre-allocated 's_hwloc_location' to avoid memory
  // allocation by this thread.  This call is NOT thread-safe.
  hwloc_get_last_cpu_location( s_hwloc_topology ,
                               s_hwloc_location , HWLOC_CPUBIND_THREAD );

  unsigned i = 0 ;

  while ( i < n && ! hwloc_bitmap_intersects( s_hwloc_location , s_core[ i ] ) ) ++i ;

  if ( i < n ) {
    coord.first  = i / s_core_topology.second ;
    coord.second = i % s_core_topology.second ;
  }

  return coord ;
}
コード例 #7
0
ファイル: hwloc-bind.c プロジェクト: anhzhang/hwloc
int main(int argc, char *argv[])
{
  hwloc_topology_t topology;
  int loaded = 0;
  unsigned depth;
  hwloc_bitmap_t cpubind_set, membind_set;
  int got_cpubind = 0, got_membind = 0;
  int working_on_cpubind = 1; /* membind if 0 */
  int get_binding = 0;
  int get_last_cpu_location = 0;
  unsigned long flags = 0;
  int force = 0;
  int single = 0;
  int verbose = 0;
  int logical = 1;
  int taskset = 0;
  int cpubind_flags = 0;
  hwloc_membind_policy_t membind_policy = HWLOC_MEMBIND_BIND;
  int membind_flags = 0;
  int opt;
  int ret;
  int pid_number = -1;
  hwloc_pid_t pid = 0; /* only valid when pid_number > 0, but gcc-4.8 still reports uninitialized warnings */
  char *callname;

  cpubind_set = hwloc_bitmap_alloc();
  membind_set = hwloc_bitmap_alloc();

  /* don't load now, in case some options change the config before the topology is actually used */
#define LOADED() (loaded)
#define ENSURE_LOADED() do { \
  if (!loaded) { \
    hwloc_topology_init(&topology); \
    hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_ALL); \
    hwloc_topology_set_flags(topology, flags); \
    hwloc_topology_load(topology); \
    depth = hwloc_topology_get_depth(topology); \
    loaded = 1; \
  } \
} while (0)

  callname = argv[0];
  /* skip argv[0], handle options */
  argv++;
  argc--;

  while (argc >= 1) {
    if (!strcmp(argv[0], "--")) {
      argc--;
      argv++;
      break;
    }

    opt = 0;

    if (*argv[0] == '-') {
      if (!strcmp(argv[0], "-v") || !strcmp(argv[0], "--verbose")) {
	verbose++;
	goto next;
      }
      if (!strcmp(argv[0], "-q") || !strcmp(argv[0], "--quiet")) {
	verbose--;
	goto next;
      }
      if (!strcmp(argv[0], "--help")) {
        usage("hwloc-bind", stdout);
	return EXIT_SUCCESS;
      }
      if (!strcmp(argv[0], "--single")) {
	single = 1;
	goto next;
      }
      if (!strcmp(argv[0], "-f") || !strcmp(argv[0], "--force")) {
	force = 1;
	goto next;
      }
      if (!strcmp(argv[0], "--strict")) {
	cpubind_flags |= HWLOC_CPUBIND_STRICT;
	membind_flags |= HWLOC_MEMBIND_STRICT;
	goto next;
      }
      if (!strcmp(argv[0], "--pid")) {
        if (argc < 2) {
          usage ("hwloc-bind", stderr);
          exit(EXIT_FAILURE);
        }
        pid_number = atoi(argv[1]);
        opt = 1;
        goto next;
      }
      if (!strcmp (argv[0], "--version")) {
	printf("%s %s\n", callname, HWLOC_VERSION);
	exit(EXIT_SUCCESS);
      }
      if (!strcmp(argv[0], "-l") || !strcmp(argv[0], "--logical")) {
        logical = 1;
        goto next;
      }
      if (!strcmp(argv[0], "-p") || !strcmp(argv[0], "--physical")) {
        logical = 0;
        goto next;
      }
      if (!strcmp(argv[0], "--taskset")) {
        taskset = 1;
        goto next;
      }
      if (!strcmp (argv[0], "-e") || !strncmp (argv[0], "--get-last-cpu-location", 10)) {
	get_last_cpu_location = 1;
	goto next;
      }
      if (!strcmp (argv[0], "--get")) {
	get_binding = 1;
	goto next;
      }
      if (!strcmp (argv[0], "--cpubind")) {
	working_on_cpubind = 1;
	goto next;
      }
      if (!strcmp (argv[0], "--membind")) {
	working_on_cpubind = 0;
	goto next;
      }
      if (!strcmp (argv[0], "--mempolicy")) {
	if (!strncmp(argv[1], "default", 2))
	  membind_policy = HWLOC_MEMBIND_DEFAULT;
	else if (!strncmp(argv[1], "firsttouch", 2))
	  membind_policy = HWLOC_MEMBIND_FIRSTTOUCH;
	else if (!strncmp(argv[1], "bind", 2))
	  membind_policy = HWLOC_MEMBIND_BIND;
	else if (!strncmp(argv[1], "interleave", 2))
	  membind_policy = HWLOC_MEMBIND_INTERLEAVE;
	else if (!strncmp(argv[1], "nexttouch", 2))
	  membind_policy = HWLOC_MEMBIND_NEXTTOUCH;
	else {
	  fprintf(stderr, "Unrecognized memory binding policy %s\n", argv[1]);
          usage ("hwloc-bind", stderr);
          exit(EXIT_FAILURE);
	}
	opt = 1;
	goto next;
      }
      if (!strcmp (argv[0], "--whole-system")) {
	if (loaded) {
	  fprintf(stderr, "Input option %s disallowed after options using the topology\n", argv[0]);
	  exit(EXIT_FAILURE);
	}
	flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM;
	goto next;
      }
      if (!strcmp (argv[0], "--restrict")) {
	hwloc_bitmap_t restrictset;
	int err;
	if (argc < 2) {
	  usage (callname, stdout);
	  exit(EXIT_FAILURE);
	}
	restrictset = hwloc_bitmap_alloc();
	hwloc_bitmap_sscanf(restrictset, argv[1]);
	ENSURE_LOADED();
	err = hwloc_topology_restrict (topology, restrictset, 0);
	if (err) {
	  perror("Restricting the topology");
	  /* fallthrough */
	}
	hwloc_bitmap_free(restrictset);
	argc--;
	argv++;
	goto next;
      }

      fprintf (stderr, "Unrecognized option: %s\n", argv[0]);
      usage("hwloc-bind", stderr);
      return EXIT_FAILURE;
    }

    ENSURE_LOADED();
    ret = hwloc_calc_process_arg(topology, depth, argv[0], logical,
				 working_on_cpubind ? cpubind_set : membind_set,
				 verbose);
    if (ret < 0) {
      if (verbose > 0)
	fprintf(stderr, "assuming the command starts at %s\n", argv[0]);
      break;
    }
    if (working_on_cpubind)
      got_cpubind = 1;
    else
      got_membind = 1;

  next:
    argc -= opt+1;
    argv += opt+1;
  }

  ENSURE_LOADED();

  if (pid_number > 0) {
    pid = hwloc_pid_from_number(pid_number, !(get_binding || get_last_cpu_location));
    /* no need to set_pid()
     * the doc just says we're operating on pid, not that we're retrieving the topo/cpuset as seen from inside pid
     */
  }

  if (get_last_cpu_location && !working_on_cpubind) {
    fprintf(stderr, "Options --membind and --get-last-cpu-location cannot be combined.\n");
    return EXIT_FAILURE;
  }
  if ((get_binding || get_last_cpu_location) && (got_cpubind || got_membind)) {
    /* doesn't work because get_binding/get_last_cpu_location overwrites cpubind_set */
    fprintf(stderr, "Cannot display and set binding at the same time.\n");
    return EXIT_FAILURE;
  }

  if (get_binding || get_last_cpu_location) {
    char *s;
    const char *policystr = NULL;
    int err;
    if (working_on_cpubind) {
      if (get_last_cpu_location) {
	if (pid_number > 0)
	  err = hwloc_get_proc_last_cpu_location(topology, pid, cpubind_set, 0);
	else
	  err = hwloc_get_last_cpu_location(topology, cpubind_set, 0);
      } else {
	if (pid_number > 0)
	  err = hwloc_get_proc_cpubind(topology, pid, cpubind_set, 0);
	else
	  err = hwloc_get_cpubind(topology, cpubind_set, 0);
      }
      if (err) {
	const char *errmsg = strerror(errno);
	if (pid_number > 0)
	  fprintf(stderr, "hwloc_get_proc_%s %d failed (errno %d %s)\n", get_last_cpu_location ? "last_cpu_location" : "cpubind", pid_number, errno, errmsg);
	else
	  fprintf(stderr, "hwloc_get_%s failed (errno %d %s)\n", get_last_cpu_location ? "last_cpu_location" : "cpubind", errno, errmsg);
	return EXIT_FAILURE;
      }
      if (taskset)
	hwloc_bitmap_taskset_asprintf(&s, cpubind_set);
      else
	hwloc_bitmap_asprintf(&s, cpubind_set);
    } else {
      hwloc_membind_policy_t policy;
      if (pid_number > 0)
	err = hwloc_get_proc_membind(topology, pid, membind_set, &policy, 0);
      else
	err = hwloc_get_membind(topology, membind_set, &policy, 0);
      if (err) {
	const char *errmsg = strerror(errno);
        if (pid_number > 0)
          fprintf(stderr, "hwloc_get_proc_membind %d failed (errno %d %s)\n", pid_number, errno, errmsg);
        else
	  fprintf(stderr, "hwloc_get_membind failed (errno %d %s)\n", errno, errmsg);
	return EXIT_FAILURE;
      }
      if (taskset)
	hwloc_bitmap_taskset_asprintf(&s, membind_set);
      else
	hwloc_bitmap_asprintf(&s, membind_set);
      switch (policy) {
      case HWLOC_MEMBIND_DEFAULT: policystr = "default"; break;
      case HWLOC_MEMBIND_FIRSTTOUCH: policystr = "firsttouch"; break;
      case HWLOC_MEMBIND_BIND: policystr = "bind"; break;
      case HWLOC_MEMBIND_INTERLEAVE: policystr = "interleave"; break;
      case HWLOC_MEMBIND_NEXTTOUCH: policystr = "nexttouch"; break;
      default: fprintf(stderr, "unknown memory policy %d\n", policy); assert(0); break;
      }
    }
    if (policystr)
      printf("%s (%s)\n", s, policystr);
    else
      printf("%s\n", s);
    free(s);
  }

  if (got_membind) {
    if (hwloc_bitmap_iszero(membind_set)) {
      if (verbose >= 0)
	fprintf(stderr, "cannot membind to empty set\n");
      if (!force)
	goto failed_binding;
    }
    if (verbose > 0) {
      char *s;
      hwloc_bitmap_asprintf(&s, membind_set);
      fprintf(stderr, "binding on memory set %s\n", s);
      free(s);
    }
    if (single)
      hwloc_bitmap_singlify(membind_set);
    if (pid_number > 0)
      ret = hwloc_set_proc_membind(topology, pid, membind_set, membind_policy, membind_flags);
    else
      ret = hwloc_set_membind(topology, membind_set, membind_policy, membind_flags);
    if (ret && verbose >= 0) {
      int bind_errno = errno;
      const char *errmsg = strerror(bind_errno);
      char *s;
      hwloc_bitmap_asprintf(&s, membind_set);
      if (pid_number > 0)
        fprintf(stderr, "hwloc_set_proc_membind %s %d failed (errno %d %s)\n", s, pid_number, bind_errno, errmsg);
      else
        fprintf(stderr, "hwloc_set_membind %s failed (errno %d %s)\n", s, bind_errno, errmsg);
      free(s);
    }
    if (ret && !force)
      goto failed_binding;
  }

  if (got_cpubind) {
    if (hwloc_bitmap_iszero(cpubind_set)) {
      if (verbose >= 0)
	fprintf(stderr, "cannot cpubind to empty set\n");
      if (!force)
	goto failed_binding;
    }
    if (verbose > 0) {
      char *s;
      hwloc_bitmap_asprintf(&s, cpubind_set);
      fprintf(stderr, "binding on cpu set %s\n", s);
      free(s);
    }
    if (single)
      hwloc_bitmap_singlify(cpubind_set);
    if (pid_number > 0)
      ret = hwloc_set_proc_cpubind(topology, pid, cpubind_set, cpubind_flags);
    else
      ret = hwloc_set_cpubind(topology, cpubind_set, cpubind_flags);
    if (ret && verbose >= 0) {
      int bind_errno = errno;
      const char *errmsg = strerror(bind_errno);
      char *s;
      hwloc_bitmap_asprintf(&s, cpubind_set);
      if (pid_number > 0)
        fprintf(stderr, "hwloc_set_proc_cpubind %s %d failed (errno %d %s)\n", s, pid_number, bind_errno, errmsg);
      else
        fprintf(stderr, "hwloc_set_cpubind %s failed (errno %d %s)\n", s, bind_errno, errmsg);
      free(s);
    }
    if (ret && !force)
      goto failed_binding;
  }

  hwloc_bitmap_free(cpubind_set);
  hwloc_bitmap_free(membind_set);

  hwloc_topology_destroy(topology);

  if (pid_number > 0)
    return EXIT_SUCCESS;

  if (0 == argc) {
    if (get_binding || get_last_cpu_location)
      return EXIT_SUCCESS;
    fprintf(stderr, "%s: nothing to do!\n", callname);
    return EXIT_FAILURE;
  }

  /* FIXME: check whether Windows execvp() passes INHERIT_PARENT_AFFINITY to CreateProcess()
   * because we need to propagate processor group affinity. However process-wide affinity
   * isn't supported with processor groups so far.
   */
  ret = execvp(argv[0], argv);
  if (ret) {
      fprintf(stderr, "%s: Failed to launch executable \"%s\"\n",
              callname, argv[0]);
      perror("execvp");
  }
  return EXIT_FAILURE;


failed_binding:
  hwloc_bitmap_free(cpubind_set);
  hwloc_bitmap_free(membind_set);
  hwloc_topology_destroy(topology);
  return EXIT_FAILURE;
}
コード例 #8
0
ファイル: hwloc-bind.c プロジェクト: CoryXie/hwloc
int main(int argc, char *argv[])
{
  hwloc_topology_t topology;
  unsigned depth;
  hwloc_bitmap_t cpubind_set, membind_set;
  int got_cpubind = 0, got_membind = 0;
  int working_on_cpubind = 1; /* membind if 0 */
  int get_binding = 0;
  int get_last_cpu_location = 0;
  unsigned long flags = HWLOC_TOPOLOGY_FLAG_WHOLE_IO|HWLOC_TOPOLOGY_FLAG_ICACHES;
  int force = 0;
  int single = 0;
  int verbose = 0;
  int logical = 1;
  int taskset = 0;
  int cpubind_flags = 0;
  hwloc_membind_policy_t membind_policy = HWLOC_MEMBIND_BIND;
  int membind_flags = 0;
  int opt;
  int ret;
  int pid_number = 0;
  hwloc_pid_t pid;
  char *callname;

  cpubind_set = hwloc_bitmap_alloc();
  membind_set = hwloc_bitmap_alloc();

  hwloc_topology_init(&topology);
  hwloc_topology_set_flags(topology, flags);
  hwloc_topology_load(topology);
  depth = hwloc_topology_get_depth(topology);

  callname = argv[0];
  /* skip argv[0], handle options */
  argv++;
  argc--;

  while (argc >= 1) {
    if (!strcmp(argv[0], "--")) {
      argc--;
      argv++;
      break;
    }

    opt = 0;

    if (*argv[0] == '-') {
      if (!strcmp(argv[0], "-v") || !strcmp(argv[0], "--verbose")) {
	verbose++;
	goto next;
      }
      else if (!strcmp(argv[0], "-q") || !strcmp(argv[0], "--quiet")) {
	verbose--;
	goto next;
      }
      else if (!strcmp(argv[0], "--help")) {
        usage("hwloc-bind", stdout);
	return EXIT_SUCCESS;
      }
      else if (!strcmp(argv[0], "--single")) {
	single = 1;
	goto next;
      }
      else if (!strcmp(argv[0], "-f") || !strcmp(argv[0], "--force")) {
	force = 1;
	goto next;
      }
      else if (!strcmp(argv[0], "--strict")) {
	cpubind_flags |= HWLOC_CPUBIND_STRICT;
	membind_flags |= HWLOC_MEMBIND_STRICT;
	goto next;
      }
      else if (!strcmp(argv[0], "--pid")) {
        if (argc < 2) {
          usage ("hwloc-bind", stderr);
          exit(EXIT_FAILURE);
        }
        pid_number = atoi(argv[1]);
        opt = 1;
        goto next;
      }
      else if (!strcmp (argv[0], "--version")) {
          printf("%s %s\n", callname, VERSION);
          exit(EXIT_SUCCESS);
      }
      if (!strcmp(argv[0], "-l") || !strcmp(argv[0], "--logical")) {
        logical = 1;
        goto next;
      }
      if (!strcmp(argv[0], "-p") || !strcmp(argv[0], "--physical")) {
        logical = 0;
        goto next;
      }
      if (!strcmp(argv[0], "--taskset")) {
        taskset = 1;
        goto next;
      }
      else if (!strcmp (argv[0], "-e") || !strncmp (argv[0], "--get-last-cpu-location", 10)) {
	get_last_cpu_location = 1;
	goto next;
      }
      else if (!strcmp (argv[0], "--get")) {
	get_binding = 1;
	goto next;
      }
      else if (!strcmp (argv[0], "--cpubind")) {
	  working_on_cpubind = 1;
	  goto next;
      }
      else if (!strcmp (argv[0], "--membind")) {
	  working_on_cpubind = 0;
	  goto next;
      }
      else if (!strcmp (argv[0], "--mempolicy")) {
	if (!strncmp(argv[1], "default", 2))
	  membind_policy = HWLOC_MEMBIND_DEFAULT;
	else if (!strncmp(argv[1], "firsttouch", 2))
	  membind_policy = HWLOC_MEMBIND_FIRSTTOUCH;
	else if (!strncmp(argv[1], "bind", 2))
	  membind_policy = HWLOC_MEMBIND_BIND;
	else if (!strncmp(argv[1], "interleave", 2))
	  membind_policy = HWLOC_MEMBIND_INTERLEAVE;
	else if (!strncmp(argv[1], "replicate", 2))
	  membind_policy = HWLOC_MEMBIND_REPLICATE;
	else if (!strncmp(argv[1], "nexttouch", 2))
	  membind_policy = HWLOC_MEMBIND_NEXTTOUCH;
	else {
	  fprintf(stderr, "Unrecognized memory binding policy %s\n", argv[1]);
          usage ("hwloc-bind", stderr);
          exit(EXIT_FAILURE);
	}
	opt = 1;
	goto next;
      }
      else if (!strcmp (argv[0], "--whole-system")) {
	flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM;
	hwloc_topology_destroy(topology);
	hwloc_topology_init(&topology);
	hwloc_topology_set_flags(topology, flags);
	hwloc_topology_load(topology);
	depth = hwloc_topology_get_depth(topology);
	goto next;
      }
      else if (!strcmp (argv[0], "--restrict")) {
	hwloc_bitmap_t restrictset;
	int err;
	if (argc < 2) {
	  usage (callname, stdout);
	  exit(EXIT_FAILURE);
	}
	restrictset = hwloc_bitmap_alloc();
	hwloc_bitmap_sscanf(restrictset, argv[1]);
	err = hwloc_topology_restrict (topology, restrictset, 0);
	if (err) {
	  perror("Restricting the topology");
	  /* fallthrough */
	}
	hwloc_bitmap_free(restrictset);
	argc--;
	argv++;
	goto next;
      }

      fprintf (stderr, "Unrecognized option: %s\n", argv[0]);
      usage("hwloc-bind", stderr);
      return EXIT_FAILURE;
    }

    ret = hwloc_calc_process_arg(topology, depth, argv[0], logical,
				 working_on_cpubind ? cpubind_set : membind_set,
				 verbose);
    if (ret < 0) {
      if (verbose > 0)
	fprintf(stderr, "assuming the command starts at %s\n", argv[0]);
      break;
    }
    if (working_on_cpubind)
      got_cpubind = 1;
    else
      got_membind = 1;

  next:
    argc -= opt+1;
    argv += opt+1;
  }

  pid = hwloc_pid_from_number(pid_number, !(get_binding || get_last_cpu_location));

  if (get_binding || get_last_cpu_location) {
    char *s;
    const char *policystr = NULL;
    int err;
    if (working_on_cpubind) {
      if (get_last_cpu_location) {
	if (pid_number)
	  err = hwloc_get_proc_last_cpu_location(topology, pid, cpubind_set, 0);
	else
	  err = hwloc_get_last_cpu_location(topology, cpubind_set, 0);
      } else {
	if (pid_number)
	  err = hwloc_get_proc_cpubind(topology, pid, cpubind_set, 0);
	else
	  err = hwloc_get_cpubind(topology, cpubind_set, 0);
      }
      if (err) {
	const char *errmsg = strerror(errno);
	if (pid_number)
	  fprintf(stderr, "hwloc_get_proc_%s %d failed (errno %d %s)\n", get_last_cpu_location ? "last_cpu_location" : "cpubind", pid_number, errno, errmsg);
	else
	  fprintf(stderr, "hwloc_get_%s failed (errno %d %s)\n", get_last_cpu_location ? "last_cpu_location" : "cpubind", errno, errmsg);
	return EXIT_FAILURE;
      }
      if (taskset)
	hwloc_bitmap_taskset_asprintf(&s, cpubind_set);
      else
	hwloc_bitmap_asprintf(&s, cpubind_set);
    } else {
      hwloc_membind_policy_t policy;
      if (pid_number)
	err = hwloc_get_proc_membind(topology, pid, membind_set, &policy, 0);
      else
	err = hwloc_get_membind(topology, membind_set, &policy, 0);
      if (err) {
	const char *errmsg = strerror(errno);
        if (pid_number)
          fprintf(stderr, "hwloc_get_proc_membind %d failed (errno %d %s)\n", pid_number, errno, errmsg);
        else
	  fprintf(stderr, "hwloc_get_membind failed (errno %d %s)\n", errno, errmsg);
	return EXIT_FAILURE;
      }
      if (taskset)
	hwloc_bitmap_taskset_asprintf(&s, membind_set);
      else
	hwloc_bitmap_asprintf(&s, membind_set);
      switch (policy) {
      case HWLOC_MEMBIND_DEFAULT: policystr = "default"; break;
      case HWLOC_MEMBIND_FIRSTTOUCH: policystr = "firsttouch"; break;
      case HWLOC_MEMBIND_BIND: policystr = "bind"; break;
      case HWLOC_MEMBIND_INTERLEAVE: policystr = "interleave"; break;
      case HWLOC_MEMBIND_REPLICATE: policystr = "replicate"; break;
      case HWLOC_MEMBIND_NEXTTOUCH: policystr = "nexttouch"; break;
      default: fprintf(stderr, "unknown memory policy %d\n", policy); assert(0); break;
      }
    }
    if (policystr)
      printf("%s (%s)\n", s, policystr);
    else
      printf("%s\n", s);
    free(s);
    return EXIT_SUCCESS;
  }

  if (got_membind) {
    if (hwloc_bitmap_iszero(membind_set)) {
      if (verbose >= 0)
	fprintf(stderr, "cannot membind to empty set\n");
      if (!force)
	goto failed_binding;
    }
    if (verbose > 0) {
      char *s;
      hwloc_bitmap_asprintf(&s, membind_set);
      fprintf(stderr, "binding on memory set %s\n", s);
      free(s);
    }
    if (single)
      hwloc_bitmap_singlify(membind_set);
    if (pid_number)
      ret = hwloc_set_proc_membind(topology, pid, membind_set, membind_policy, membind_flags);
    else
      ret = hwloc_set_membind(topology, membind_set, membind_policy, membind_flags);
    if (ret && verbose >= 0) {
      int bind_errno = errno;
      const char *errmsg = strerror(bind_errno);
      char *s;
      hwloc_bitmap_asprintf(&s, membind_set);
      if (pid_number)
        fprintf(stderr, "hwloc_set_proc_membind %s %d failed (errno %d %s)\n", s, pid_number, bind_errno, errmsg);
      else
        fprintf(stderr, "hwloc_set_membind %s failed (errno %d %s)\n", s, bind_errno, errmsg);
      free(s);
    }
    if (ret && !force)
      goto failed_binding;
  }

  if (got_cpubind) {
    if (hwloc_bitmap_iszero(cpubind_set)) {
      if (verbose >= 0)
	fprintf(stderr, "cannot cpubind to empty set\n");
      if (!force)
	goto failed_binding;
    }
    if (verbose > 0) {
      char *s;
      hwloc_bitmap_asprintf(&s, cpubind_set);
      fprintf(stderr, "binding on cpu set %s\n", s);
      free(s);
    }
    if (single)
      hwloc_bitmap_singlify(cpubind_set);
    if (pid_number)
      ret = hwloc_set_proc_cpubind(topology, pid, cpubind_set, cpubind_flags);
    else
      ret = hwloc_set_cpubind(topology, cpubind_set, cpubind_flags);
    if (ret && verbose >= 0) {
      int bind_errno = errno;
      const char *errmsg = strerror(bind_errno);
      char *s;
      hwloc_bitmap_asprintf(&s, cpubind_set);
      if (pid_number)
        fprintf(stderr, "hwloc_set_proc_cpubind %s %d failed (errno %d %s)\n", s, pid_number, bind_errno, errmsg);
      else
        fprintf(stderr, "hwloc_set_cpubind %s failed (errno %d %s)\n", s, bind_errno, errmsg);
      free(s);
    }
    if (ret && !force)
      goto failed_binding;
  }

  hwloc_bitmap_free(cpubind_set);
  hwloc_bitmap_free(membind_set);

  hwloc_topology_destroy(topology);

  if (pid_number)
    return EXIT_SUCCESS;

  if (0 == argc) {
    fprintf(stderr, "%s: nothing to do!\n", callname);
    return EXIT_FAILURE;
  }

  ret = execvp(argv[0], argv);
  if (ret) {
      fprintf(stderr, "%s: Failed to launch executable \"%s\"\n", 
              callname, argv[0]);
      perror("execvp");
  }
  return EXIT_FAILURE;


failed_binding:
  hwloc_bitmap_free(cpubind_set);
  hwloc_bitmap_free(membind_set);
  hwloc_topology_destroy(topology);
  return EXIT_FAILURE;
}
コード例 #9
0
hwloc::hwloc()
{
  s_core_topology   = std::pair<unsigned,unsigned>(0,0);
  s_core_capacity   = 0 ;
  s_hwloc_topology  = 0 ;
  s_hwloc_location  = 0 ;
  s_process_binding = 0 ;

  for ( unsigned i = 0 ; i < MAX_CORE ; ++i ) s_core[i] = 0 ;

  hwloc_topology_init( & s_hwloc_topology );
  hwloc_topology_load( s_hwloc_topology );

  s_hwloc_location  = hwloc_bitmap_alloc();
  s_process_binding = hwloc_bitmap_alloc();

  hwloc_get_cpubind( s_hwloc_topology , s_process_binding ,  HWLOC_CPUBIND_PROCESS );

  // Choose a hwloc object type for the NUMA level, which may not exist.

  hwloc_obj_type_t root_type = HWLOC_OBJ_TYPE_MAX ;

  {
    // Object types to search, in order.
    static const hwloc_obj_type_t candidate_root_type[] =
      { HWLOC_OBJ_NODE     /* NUMA region     */
      , HWLOC_OBJ_SOCKET   /* hardware socket */
      , HWLOC_OBJ_MACHINE  /* local machine   */
      };

    enum { CANDIDATE_ROOT_TYPE_COUNT =
             sizeof(candidate_root_type) / sizeof(hwloc_obj_type_t) };

    for ( int k = 0 ; k < CANDIDATE_ROOT_TYPE_COUNT && HWLOC_OBJ_TYPE_MAX == root_type ; ++k ) {
      if ( 0 < hwloc_get_nbobjs_by_type( s_hwloc_topology , candidate_root_type[k] ) ) {
        root_type = candidate_root_type[k] ;
      }
    }
  }

  // Determine which of these 'root' types are available to this process.
  // The process may have been bound (e.g., by MPI) to a subset of these root types.
  // Determine current location of the master (calling) process>

  hwloc_bitmap_t proc_cpuset_location = hwloc_bitmap_alloc();

  hwloc_get_last_cpu_location( s_hwloc_topology , proc_cpuset_location , HWLOC_CPUBIND_THREAD );

  const unsigned max_root = hwloc_get_nbobjs_by_type( s_hwloc_topology , root_type );

  unsigned root_base     = max_root ;
  unsigned root_count    = 0 ;
  unsigned core_per_root = 0 ;
  unsigned pu_per_core   = 0 ;
  bool     symmetric     = true ;

  for ( unsigned i = 0 ; i < max_root ; ++i ) {

    const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , i );

    if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {

      ++root_count ;

      // Remember which root (NUMA) object the master thread is running on.
      // This will be logical NUMA rank #0 for this process.

      if ( hwloc_bitmap_intersects( proc_cpuset_location, root->allowed_cpuset ) ) {
        root_base = i ;
      }

      // Count available cores:

      const unsigned max_core =
        hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
                                                root->allowed_cpuset ,
                                                HWLOC_OBJ_CORE );

      unsigned core_count = 0 ;

      for ( unsigned j = 0 ; j < max_core ; ++j ) {

        const hwloc_obj_t core =
          hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
                                               root->allowed_cpuset ,
                                               HWLOC_OBJ_CORE , j );

        // If process' cpuset intersects core's cpuset then process can access this core.
        // Must use intersection instead of inclusion because the Intel-Phi
        // MPI may bind the process to only one of the core's hyperthreads.
        //
        // Assumption: if the process can access any hyperthread of the core
        // then it has ownership of the entire core.
        // This assumes that it would be performance-detrimental
        // to spawn more than one MPI process per core and use nested threading.

        if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {

          ++core_count ;

          const unsigned pu_count =
            hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
                                                    core->allowed_cpuset ,
                                                    HWLOC_OBJ_PU );

          if ( pu_per_core == 0 ) pu_per_core = pu_count ;

          // Enforce symmetry by taking the minimum:

          pu_per_core = std::min( pu_per_core , pu_count );

          if ( pu_count != pu_per_core ) symmetric = false ;
        }
      }

      if ( 0 == core_per_root ) core_per_root = core_count ;

      // Enforce symmetry by taking the minimum:

      core_per_root = std::min( core_per_root , core_count );

      if ( core_count != core_per_root ) symmetric = false ;
    }
  }

  s_core_topology.first  = root_count ;
  s_core_topology.second = core_per_root ;
  s_core_capacity        = pu_per_core ;

  // Fill the 's_core' array for fast mapping from a core coordinate to the
  // hwloc cpuset object required for thread location querying and binding.

  for ( unsigned i = 0 ; i < max_root ; ++i ) {

    const unsigned root_rank = ( i + root_base ) % max_root ;

    const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , root_rank );

    if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) {

      const unsigned max_core =
        hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology ,
                                                root->allowed_cpuset ,
                                                HWLOC_OBJ_CORE );

      unsigned core_count = 0 ;

      for ( unsigned j = 0 ; j < max_core && core_count < core_per_root ; ++j ) {

        const hwloc_obj_t core =
          hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology ,
                                               root->allowed_cpuset ,
                                               HWLOC_OBJ_CORE , j );

        if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) {

          s_core[ core_count + core_per_root * i ] = core->allowed_cpuset ;

          ++core_count ;
        }
      }
    }
  }

  hwloc_bitmap_free( proc_cpuset_location );

  if ( ! symmetric ) {
    std::cout << "KokkosArray::hwloc WARNING: Using a symmetric subset of a non-symmetric core topology."
              << std::endl ;
  }
}
コード例 #10
0
int main(void)
{
  hwloc_topology_t topology;
  hwloc_bitmap_t set, set2;
  hwloc_const_bitmap_t cset_available, cset_all;
  hwloc_obj_t obj;
  char *buffer;
  char type[64];
  unsigned i;
  int err;

  /* create a topology */
  err = hwloc_topology_init(&topology);
  if (err < 0) {
    fprintf(stderr, "failed to initialize the topology\n");
    return EXIT_FAILURE;
  }
  err = hwloc_topology_load(topology);
  if (err < 0) {
    fprintf(stderr, "failed to load the topology\n");
    hwloc_topology_destroy(topology);
    return EXIT_FAILURE;
  }

  /* retrieve the entire set of available PUs */
  cset_available = hwloc_topology_get_topology_cpuset(topology);

  /* retrieve the CPU binding of the current entire process */
  set = hwloc_bitmap_alloc();
  if (!set) {
    fprintf(stderr, "failed to allocate a bitmap\n");
    hwloc_topology_destroy(topology);
    return EXIT_FAILURE;
  }
  err = hwloc_get_cpubind(topology, set, HWLOC_CPUBIND_PROCESS);
  if (err < 0) {
    fprintf(stderr, "failed to get cpu binding\n");
    hwloc_bitmap_free(set);
    hwloc_topology_destroy(topology);
  }

  /* display the processing units that cannot be used by this process */
  if (hwloc_bitmap_isequal(set, cset_available)) {
    printf("this process can use all available processing units in the system\n");
  } else {
    /* compute the set where we currently cannot run.
     * we can't modify cset_available because it's a system read-only one,
     * so we do   set = available &~ set
     */
    hwloc_bitmap_andnot(set, cset_available, set);
    hwloc_bitmap_asprintf(&buffer, set);
    printf("process cannot use %d process units (%s) among %u in the system\n",
	   hwloc_bitmap_weight(set), buffer, hwloc_bitmap_weight(cset_available));
    free(buffer);
    /* restore set where it was before the &~ operation above */
    hwloc_bitmap_andnot(set, cset_available, set);
  }
  /* print the smallest object covering the current process binding */
  obj = hwloc_get_obj_covering_cpuset(topology, set);
  hwloc_obj_type_snprintf(type, sizeof(type), obj, 0);
  printf("process is bound within object %s logical index %u\n", type, obj->logical_index);

  /* retrieve the single PU where the current thread actually runs within this process binding */
  set2 = hwloc_bitmap_alloc();
  if (!set2) {
    fprintf(stderr, "failed to allocate a bitmap\n");
    hwloc_bitmap_free(set);
    hwloc_topology_destroy(topology);
    return EXIT_FAILURE;
  }
  err = hwloc_get_last_cpu_location(topology, set2, HWLOC_CPUBIND_THREAD);
  if (err < 0) {
    fprintf(stderr, "failed to get last cpu location\n");
    hwloc_bitmap_free(set);
    hwloc_bitmap_free(set2);
    hwloc_topology_destroy(topology);
  }
  /* sanity checks that are not actually needed but help the reader */
  /* this thread runs within the process binding */
  assert(hwloc_bitmap_isincluded(set2, set));
  /* this thread runs on a single PU at a time */
  assert(hwloc_bitmap_weight(set2) == 1);

  /* print the logical number of the PU where that thread runs */
  /* extract the PU OS index from the bitmap */
  i = hwloc_bitmap_first(set2);
  obj = hwloc_get_pu_obj_by_os_index(topology, i);
  printf("thread is now running on PU logical index %u (OS/physical index %u)\n",
	 obj->logical_index, i);

  /* migrate this single thread to where other PUs within the current binding */
  hwloc_bitmap_andnot(set2, set, set2);
  err = hwloc_set_cpubind(topology, set2, HWLOC_CPUBIND_THREAD);
  if (err < 0) {
    fprintf(stderr, "failed to set thread binding\n");
    hwloc_bitmap_free(set);
    hwloc_bitmap_free(set2);
    hwloc_topology_destroy(topology);
  }
  /* reprint the PU where that thread runs */
  err = hwloc_get_last_cpu_location(topology, set2, HWLOC_CPUBIND_THREAD);
  if (err < 0) {
    fprintf(stderr, "failed to get last cpu location\n");
    hwloc_bitmap_free(set);
    hwloc_bitmap_free(set2);
    hwloc_topology_destroy(topology);
  }
  /* print the logical number of the PU where that thread runs */
  /* extract the PU OS index from the bitmap */
  i = hwloc_bitmap_first(set2);
  obj = hwloc_get_pu_obj_by_os_index(topology, i);
  printf("thread is running on PU logical index %u (OS/physical index %u)\n",
	 obj->logical_index, i);

  hwloc_bitmap_free(set);
  hwloc_bitmap_free(set2);

  /* retrieve the entire set of all PUs */
  cset_all = hwloc_topology_get_complete_cpuset(topology);
  if (hwloc_bitmap_isequal(cset_all, cset_available)) {
    printf("all hardware PUs are available\n");
  } else {
    printf("only %d hardware PUs are available in the machine among %d\n",
	   hwloc_bitmap_weight(cset_available), hwloc_bitmap_weight(cset_all));
  }

  hwloc_topology_destroy(topology);
  return EXIT_SUCCESS;
}
コード例 #11
0
ファイル: integrator.hpp プロジェクト: LANTZT/feelpp
        void computeCPUOMP(int threadId, expression_type * expr, im_type * im, element_iterator * elt_it, std::vector<std::pair<element_iterator, element_iterator> > * elts)
        {
            char * a;
            int cid;
            std::ostringstream oss;

#if 0
            hwloc_cpuset_t set = nullptr;

            /* get a cpuset object */
            set = hwloc_bitmap_alloc();

            /* Get the cpu thread affinity info of the current process/thread */
            hwloc_get_cpubind(Environment::getHwlocTopology(), set, 0);
            hwloc_bitmap_asprintf(&a, set);
            oss << a;
            free(a); 
            
            cid = hwloc_bitmap_first(set);
            oss << "(";
            while(cid != -1)
            {
                oss << cid << " ";
                cid = hwloc_bitmap_next(set, cid);
            }
            oss << ")|";
            std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl;

            /* Get the latest core location of the current process/thread */
            hwloc_get_last_cpu_location(Environment::getHwlocTopology(), set, 0);
            hwloc_bitmap_asprintf(&a, set);
            oss << a;
            free(a);

            cid = hwloc_bitmap_first(set);
            oss << "(";
            while(cid != -1)
            {
                oss << cid << " ";
                cid = hwloc_bitmap_next(set, cid);
            }
            oss << ");";
            std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl;
#endif

#if defined(FEELPP_HAS_HARTS)
            perf_mng.init("cpu") ;
            perf_mng.start("cpu") ;
            perf_mng.init("1.1") ;
            perf_mng.init("1.2") ;
            perf_mng.init("2.1") ;
            perf_mng.init("2.2") ;
            perf_mng.init("3") ;
#endif
            
            //M_gm((*elt_it)->gm());
            gm_ptrtype gm = (*elt_it)->gm();
            //M_geopc(new typename eval::gmpc_type( M_gm, im->points() ));
            typename eval::gmpc_ptrtype __geopc( new typename eval::gmpc_type(gm, im->points()) );
            //M_c(new gmc_type( M_gm, *(*elt_it), M_geopc ));
            gmc_ptrtype __c( new gmc_type( gm, *(*elt_it), __geopc ) );
            //M_expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( M_c ) ) );
            eval_expr_type __expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( __c ) ) );


            for (int i = 0; i < elts->size(); i++)
            {
                /*
                std::cout << Environment::worldComm().rank() <<  " nbItems: " << elts->size() 
                          << " nbElts " << std::distance(elts->at(i), elts->at(i+1))
                          << " 1st id " << elts->at(i)->id() << std::endl;
                */

                //std::cout << Environment::worldComm().rank() << "|" << theadId << " fid=" elts.at(i).first.id() << std::endl;
                for ( auto _elt = elts->at(i).first; _elt != elts->at(i).second; ++_elt )
                {
                    //perf_mng.start("1.1") ;
                    __c->update( *_elt );
                    //perf_mng.stop("1.1") ;
                    //perf_mng.start("1.2") ;
                    map_gmc_type mapgmc( fusion::make_pair<vf::detail::gmc<0> >( __c ) );
                    //perf_mng.stop("1.2") ;

                    //perf_mng.start("2.1") ;
                    __expr.update( mapgmc );
                    //perf_mng.stop("2.1") ;
                    //perf_mng.start("2.2") ;
                    im->update( *__c );
                    //perf_mng.stop("2.2") ;

                    //perf_mng.start("3") ;
                    for ( uint16_type c1 = 0; c1 < eval::shape::M; ++c1 )
                    {
                        for ( uint16_type c2 = 0; c2 < eval::shape::N; ++c2 )
                        {
                            M_ret( c1,c2 ) += (*im)( __expr, c1, c2 );
                        }
                    }
                    //perf_mng.stop("3") ;
                }
            }

#if defined(FEELPP_HAS_HARTS)
            perf_mng.stop("cpu") ;
            M_cpuTime = perf_mng.getValueInSeconds("cpu");
#endif
        }
コード例 #12
0
ファイル: integrator.hpp プロジェクト: LANTZT/feelpp
        void computeCPU(DataArgsType& args)
        {
            char * a;
            int cid;
            hwloc_cpuset_t set = nullptr;
            std::ostringstream oss;
            
            /* This initialization takes some time */
            /* When using hartsi, the object instanciation is done when creating tasks */
            /* and this is not a parallel section, thus we lose time in initialization */
            /* doing it the computation step allows to incorporate this init time in the parallel section */
            /*
            M_threadId( threadId ),
            M_gm( new gm_type( *_elt.gm() ) ),
            M_geopc( new gmpc_type( M_gm, _im.points() ) ),
            M_c( new gmc_type( M_gm, _elt, M_geopc ) ),
            M_expr( _expr, map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( M_c ) ) ),
            M_im( _im ),
            M_ret( eval::matrix_type::Zero() ),
            M_cpuTime( 0.0 )
            */

#if 0
            /* get a cpuset object */
            set = hwloc_bitmap_alloc();

            /* Get the cpu thread affinity info of the current process/thread */
            hwloc_get_cpubind(Environment::getHwlocTopology(), set, 0);
            hwloc_bitmap_asprintf(&a, set);
            oss << a;
            free(a); 
            
            cid = hwloc_bitmap_first(set);
            oss << "(";
            while(cid != -1)
            {
                oss << cid << " ";
                cid = hwloc_bitmap_next(set, cid);
            }
            oss << ")|";
            std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl;

            /* Get the latest core location of the current process/thread */
            hwloc_get_last_cpu_location(Environment::getHwlocTopology(), set, 0);
            hwloc_bitmap_asprintf(&a, set);
            oss << a;
            free(a);

            cid = hwloc_bitmap_first(set);
            oss << "(";
            while(cid != -1)
            {
                oss << cid << " ";
                cid = hwloc_bitmap_next(set, cid);
            }
            oss << ");";
            std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl;
#endif

            perf_mng.init("1.1") ;
            perf_mng.init("1.1") ;
            perf_mng.init("2.1") ;
            perf_mng.init("2.2") ;
            perf_mng.init("3") ;

            /* free memory */
            if(set != nullptr)
            {
                hwloc_bitmap_free(set);
            }

            //perf_mng.init("data") ;
            //perf_mng.start("data") ;

            // DEFINE the range to be iterated on
            std::vector<std::pair<element_iterator, element_iterator> > * elts =
                args.get("elements")->get<std::vector<std::pair<element_iterator, element_iterator> > >();

            int * threadId = args.get("threadId")->get<int>();
            expression_type * expr = args.get("expr")->get<expression_type>();
            im_type * im = args.get("im")->get<im_type>();
            element_iterator * elt_it = args.get("elt")->get<element_iterator>();
            
            //M_gm((*elt_it)->gm());
            gm_ptrtype gm = (*elt_it)->gm();
            //M_geopc(new typename eval::gmpc_type( M_gm, im->points() ));
            typename eval::gmpc_ptrtype __geopc( new typename eval::gmpc_type(gm, im->points()) );
            //M_c(new gmc_type( M_gm, *(*elt_it), M_geopc ));
            gmc_ptrtype __c( new gmc_type( gm, *(*elt_it), __geopc ) );
            //M_expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( M_c ) ) );
            eval_expr_type __expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( __c ) ) );

            //perf_mng.stop("data");

            perf_mng.init("cpu") ;
            perf_mng.start("cpu") ;

            for (int i = 0; i < elts->size(); i++)
            {
                //std::cout << Environment::worldComm().rank() <<  " nbItems: " << elts->size() << " nbElts " << std::distance(elts->at(i), elts->at(i+1)) << std::endl;
                for ( auto _elt = elts->at(i).first; _elt != elts->at(i).second; ++_elt )
                {
                    //perf_mng.start("1.1") ;
                    //M_c->update( *_elt );
                    __c->update( *_elt );
                    //perf_mng.stop("1.1") ;
                    //perf_mng.start("1.2") ;
                    map_gmc_type mapgmc( fusion::make_pair<vf::detail::gmc<0> >( __c ) );
                    //perf_mng.stop("1.2") ;

                    //perf_mng.start("2.1") ;
                    __expr.update( mapgmc );
                    //perf_mng.stop("2.1") ;
                    //perf_mng.start("2.2") ;
                    im->update( *__c );
                    //perf_mng.stop("2.2") ;

                    //perf_mng.start("3") ;
                    for ( uint16_type c1 = 0; c1 < eval::shape::M; ++c1 )
                    {
                        for ( uint16_type c2 = 0; c2 < eval::shape::N; ++c2 )
                        {
                            M_ret( c1,c2 ) += (*im)( __expr, c1, c2 );
                        }
                    }
                    //perf_mng.stop("3") ;
                }
            }

            perf_mng.stop("cpu") ;
            M_cpuTime = perf_mng.getValueInSeconds("cpu");
        }