bool hwloc::bind_this_thread( const std::pair<unsigned,unsigned> coord ) { #if 0 std::cout << "KokkosArray::hwloc::bind_this_thread() at " ; hwloc_get_last_cpu_location( s_hwloc_topology , s_hwloc_location , HWLOC_CPUBIND_THREAD ); print_bitmap( std::cout , s_hwloc_location ); std::cout << " to " ; print_bitmap( std::cout , s_core[ coord.second + coord.first * s_core_topology.second ] ); std::cout << std::endl ; #endif // As safe and fast as possible. // Fast-lookup by caching the coordinate -> hwloc cpuset mapping in 's_core'. return coord.first < s_core_topology.first && coord.second < s_core_topology.second && 0 == hwloc_set_cpubind( s_hwloc_topology , s_core[ coord.second + coord.first * s_core_topology.second ] , HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT ); }
inline void __pact_reuse_add(void *ary, long long start, long long end, long long mem_ac) { hwloc_bitmap_t set = hwloc_bitmap_alloc(); hwloc_get_cpubind(__pact_topo, set, HWLOC_CPUBIND_THREAD); hwloc_get_last_cpu_location(__pact_topo, set, HWLOC_CPUBIND_THREAD); hwloc_bitmap_singlify(set); hwloc_set_area_membind ( __pact_topo, (const void*)ary, abs(end-start), (hwloc_const_cpuset_t)set, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_MIGRATE ); hwloc_bitmap_free(set); }
signed getCurrentCore() { hwloc_topology_t topology = getHWTopology(); hwloc_cpuset_t cpu_set = hwloc_bitmap_alloc(); if (hwloc_get_last_cpu_location(topology, cpu_set, HWLOC_CPUBIND_THREAD) < 0) { return -1; } hwloc_obj_t current_core = hwloc_get_next_obj_covering_cpuset_by_type(topology, cpu_set, HWLOC_OBJ_CORE, NULL); hwloc_bitmap_free(cpu_set); return current_core->logical_index; }
void migrate(long PageStart, long PageEnd) { SPMR_DEBUG(std::cout << "Runtime: migrate pages: " << PageStart << " to " << PageEnd << "\n"); SPMR_DEBUG(std::cout << "Runtime: hwloc call: " << (PageStart << PAGE_EXP) << ", " << ((PageEnd - PageStart) << PAGE_EXP) << "\n"); hwloc_bitmap_t set = hwloc_bitmap_alloc(); hwloc_get_cpubind(__spm_topo, set, HWLOC_CPUBIND_THREAD); hwloc_get_last_cpu_location(__spm_topo, set, HWLOC_CPUBIND_THREAD); hwloc_bitmap_singlify(set); assert( hwloc_set_area_membind(__spm_topo, (const void*)(PageStart << PAGE_EXP), (PageEnd - PageStart) << PAGE_EXP, (hwloc_const_cpuset_t)set, HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_MIGRATE) != -1 && "Unable to migrate requested pages"); hwloc_bitmap_free(set); }
std::pair<unsigned,unsigned> hwloc::get_this_thread_coordinate() { const unsigned n = s_core_topology.first * s_core_topology.second ; std::pair<unsigned,unsigned> coord(0,0); // Using the pre-allocated 's_hwloc_location' to avoid memory // allocation by this thread. This call is NOT thread-safe. hwloc_get_last_cpu_location( s_hwloc_topology , s_hwloc_location , HWLOC_CPUBIND_THREAD ); unsigned i = 0 ; while ( i < n && ! hwloc_bitmap_intersects( s_hwloc_location , s_core[ i ] ) ) ++i ; if ( i < n ) { coord.first = i / s_core_topology.second ; coord.second = i % s_core_topology.second ; } else { std::ostringstream msg ; msg << "KokkosArray::hwloc::get_this_thread_coordinate() FAILED :" ; if ( 0 != s_process_binding && 0 != s_hwloc_location ) { msg << " cpu_location" ; print_bitmap( msg , s_hwloc_location ); msg << " is not a member of the process_cpu_set" ; print_bitmap( msg , s_process_binding ); } else { msg << " not initialized" ; } throw std::runtime_error( msg.str() ); } return coord ; }
std::pair<unsigned,unsigned> get_this_thread_coordinate() { std::pair<unsigned,unsigned> coord(0u,0u); if ( ! sentinel() ) return coord ; const unsigned n = s_core_topology.first * s_core_topology.second ; // Using the pre-allocated 's_hwloc_location' to avoid memory // allocation by this thread. This call is NOT thread-safe. hwloc_get_last_cpu_location( s_hwloc_topology , s_hwloc_location , HWLOC_CPUBIND_THREAD ); unsigned i = 0 ; while ( i < n && ! hwloc_bitmap_intersects( s_hwloc_location , s_core[ i ] ) ) ++i ; if ( i < n ) { coord.first = i / s_core_topology.second ; coord.second = i % s_core_topology.second ; } return coord ; }
int main(int argc, char *argv[]) { hwloc_topology_t topology; int loaded = 0; unsigned depth; hwloc_bitmap_t cpubind_set, membind_set; int got_cpubind = 0, got_membind = 0; int working_on_cpubind = 1; /* membind if 0 */ int get_binding = 0; int get_last_cpu_location = 0; unsigned long flags = 0; int force = 0; int single = 0; int verbose = 0; int logical = 1; int taskset = 0; int cpubind_flags = 0; hwloc_membind_policy_t membind_policy = HWLOC_MEMBIND_BIND; int membind_flags = 0; int opt; int ret; int pid_number = -1; hwloc_pid_t pid = 0; /* only valid when pid_number > 0, but gcc-4.8 still reports uninitialized warnings */ char *callname; cpubind_set = hwloc_bitmap_alloc(); membind_set = hwloc_bitmap_alloc(); /* don't load now, in case some options change the config before the topology is actually used */ #define LOADED() (loaded) #define ENSURE_LOADED() do { \ if (!loaded) { \ hwloc_topology_init(&topology); \ hwloc_topology_set_all_types_filter(topology, HWLOC_TYPE_FILTER_KEEP_ALL); \ hwloc_topology_set_flags(topology, flags); \ hwloc_topology_load(topology); \ depth = hwloc_topology_get_depth(topology); \ loaded = 1; \ } \ } while (0) callname = argv[0]; /* skip argv[0], handle options */ argv++; argc--; while (argc >= 1) { if (!strcmp(argv[0], "--")) { argc--; argv++; break; } opt = 0; if (*argv[0] == '-') { if (!strcmp(argv[0], "-v") || !strcmp(argv[0], "--verbose")) { verbose++; goto next; } if (!strcmp(argv[0], "-q") || !strcmp(argv[0], "--quiet")) { verbose--; goto next; } if (!strcmp(argv[0], "--help")) { usage("hwloc-bind", stdout); return EXIT_SUCCESS; } if (!strcmp(argv[0], "--single")) { single = 1; goto next; } if (!strcmp(argv[0], "-f") || !strcmp(argv[0], "--force")) { force = 1; goto next; } if (!strcmp(argv[0], "--strict")) { cpubind_flags |= HWLOC_CPUBIND_STRICT; membind_flags |= HWLOC_MEMBIND_STRICT; goto next; } if (!strcmp(argv[0], "--pid")) { if (argc < 2) { usage ("hwloc-bind", stderr); exit(EXIT_FAILURE); } pid_number = atoi(argv[1]); opt = 1; goto next; } if (!strcmp (argv[0], "--version")) { printf("%s %s\n", callname, HWLOC_VERSION); exit(EXIT_SUCCESS); } if (!strcmp(argv[0], "-l") || !strcmp(argv[0], "--logical")) { logical = 1; goto next; } if (!strcmp(argv[0], "-p") || !strcmp(argv[0], "--physical")) { logical = 0; goto next; } if (!strcmp(argv[0], "--taskset")) { taskset = 1; goto next; } if (!strcmp (argv[0], "-e") || !strncmp (argv[0], "--get-last-cpu-location", 10)) { get_last_cpu_location = 1; goto next; } if (!strcmp (argv[0], "--get")) { get_binding = 1; goto next; } if (!strcmp (argv[0], "--cpubind")) { working_on_cpubind = 1; goto next; } if (!strcmp (argv[0], "--membind")) { working_on_cpubind = 0; goto next; } if (!strcmp (argv[0], "--mempolicy")) { if (!strncmp(argv[1], "default", 2)) membind_policy = HWLOC_MEMBIND_DEFAULT; else if (!strncmp(argv[1], "firsttouch", 2)) membind_policy = HWLOC_MEMBIND_FIRSTTOUCH; else if (!strncmp(argv[1], "bind", 2)) membind_policy = HWLOC_MEMBIND_BIND; else if (!strncmp(argv[1], "interleave", 2)) membind_policy = HWLOC_MEMBIND_INTERLEAVE; else if (!strncmp(argv[1], "nexttouch", 2)) membind_policy = HWLOC_MEMBIND_NEXTTOUCH; else { fprintf(stderr, "Unrecognized memory binding policy %s\n", argv[1]); usage ("hwloc-bind", stderr); exit(EXIT_FAILURE); } opt = 1; goto next; } if (!strcmp (argv[0], "--whole-system")) { if (loaded) { fprintf(stderr, "Input option %s disallowed after options using the topology\n", argv[0]); exit(EXIT_FAILURE); } flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM; goto next; } if (!strcmp (argv[0], "--restrict")) { hwloc_bitmap_t restrictset; int err; if (argc < 2) { usage (callname, stdout); exit(EXIT_FAILURE); } restrictset = hwloc_bitmap_alloc(); hwloc_bitmap_sscanf(restrictset, argv[1]); ENSURE_LOADED(); err = hwloc_topology_restrict (topology, restrictset, 0); if (err) { perror("Restricting the topology"); /* fallthrough */ } hwloc_bitmap_free(restrictset); argc--; argv++; goto next; } fprintf (stderr, "Unrecognized option: %s\n", argv[0]); usage("hwloc-bind", stderr); return EXIT_FAILURE; } ENSURE_LOADED(); ret = hwloc_calc_process_arg(topology, depth, argv[0], logical, working_on_cpubind ? cpubind_set : membind_set, verbose); if (ret < 0) { if (verbose > 0) fprintf(stderr, "assuming the command starts at %s\n", argv[0]); break; } if (working_on_cpubind) got_cpubind = 1; else got_membind = 1; next: argc -= opt+1; argv += opt+1; } ENSURE_LOADED(); if (pid_number > 0) { pid = hwloc_pid_from_number(pid_number, !(get_binding || get_last_cpu_location)); /* no need to set_pid() * the doc just says we're operating on pid, not that we're retrieving the topo/cpuset as seen from inside pid */ } if (get_last_cpu_location && !working_on_cpubind) { fprintf(stderr, "Options --membind and --get-last-cpu-location cannot be combined.\n"); return EXIT_FAILURE; } if ((get_binding || get_last_cpu_location) && (got_cpubind || got_membind)) { /* doesn't work because get_binding/get_last_cpu_location overwrites cpubind_set */ fprintf(stderr, "Cannot display and set binding at the same time.\n"); return EXIT_FAILURE; } if (get_binding || get_last_cpu_location) { char *s; const char *policystr = NULL; int err; if (working_on_cpubind) { if (get_last_cpu_location) { if (pid_number > 0) err = hwloc_get_proc_last_cpu_location(topology, pid, cpubind_set, 0); else err = hwloc_get_last_cpu_location(topology, cpubind_set, 0); } else { if (pid_number > 0) err = hwloc_get_proc_cpubind(topology, pid, cpubind_set, 0); else err = hwloc_get_cpubind(topology, cpubind_set, 0); } if (err) { const char *errmsg = strerror(errno); if (pid_number > 0) fprintf(stderr, "hwloc_get_proc_%s %d failed (errno %d %s)\n", get_last_cpu_location ? "last_cpu_location" : "cpubind", pid_number, errno, errmsg); else fprintf(stderr, "hwloc_get_%s failed (errno %d %s)\n", get_last_cpu_location ? "last_cpu_location" : "cpubind", errno, errmsg); return EXIT_FAILURE; } if (taskset) hwloc_bitmap_taskset_asprintf(&s, cpubind_set); else hwloc_bitmap_asprintf(&s, cpubind_set); } else { hwloc_membind_policy_t policy; if (pid_number > 0) err = hwloc_get_proc_membind(topology, pid, membind_set, &policy, 0); else err = hwloc_get_membind(topology, membind_set, &policy, 0); if (err) { const char *errmsg = strerror(errno); if (pid_number > 0) fprintf(stderr, "hwloc_get_proc_membind %d failed (errno %d %s)\n", pid_number, errno, errmsg); else fprintf(stderr, "hwloc_get_membind failed (errno %d %s)\n", errno, errmsg); return EXIT_FAILURE; } if (taskset) hwloc_bitmap_taskset_asprintf(&s, membind_set); else hwloc_bitmap_asprintf(&s, membind_set); switch (policy) { case HWLOC_MEMBIND_DEFAULT: policystr = "default"; break; case HWLOC_MEMBIND_FIRSTTOUCH: policystr = "firsttouch"; break; case HWLOC_MEMBIND_BIND: policystr = "bind"; break; case HWLOC_MEMBIND_INTERLEAVE: policystr = "interleave"; break; case HWLOC_MEMBIND_NEXTTOUCH: policystr = "nexttouch"; break; default: fprintf(stderr, "unknown memory policy %d\n", policy); assert(0); break; } } if (policystr) printf("%s (%s)\n", s, policystr); else printf("%s\n", s); free(s); } if (got_membind) { if (hwloc_bitmap_iszero(membind_set)) { if (verbose >= 0) fprintf(stderr, "cannot membind to empty set\n"); if (!force) goto failed_binding; } if (verbose > 0) { char *s; hwloc_bitmap_asprintf(&s, membind_set); fprintf(stderr, "binding on memory set %s\n", s); free(s); } if (single) hwloc_bitmap_singlify(membind_set); if (pid_number > 0) ret = hwloc_set_proc_membind(topology, pid, membind_set, membind_policy, membind_flags); else ret = hwloc_set_membind(topology, membind_set, membind_policy, membind_flags); if (ret && verbose >= 0) { int bind_errno = errno; const char *errmsg = strerror(bind_errno); char *s; hwloc_bitmap_asprintf(&s, membind_set); if (pid_number > 0) fprintf(stderr, "hwloc_set_proc_membind %s %d failed (errno %d %s)\n", s, pid_number, bind_errno, errmsg); else fprintf(stderr, "hwloc_set_membind %s failed (errno %d %s)\n", s, bind_errno, errmsg); free(s); } if (ret && !force) goto failed_binding; } if (got_cpubind) { if (hwloc_bitmap_iszero(cpubind_set)) { if (verbose >= 0) fprintf(stderr, "cannot cpubind to empty set\n"); if (!force) goto failed_binding; } if (verbose > 0) { char *s; hwloc_bitmap_asprintf(&s, cpubind_set); fprintf(stderr, "binding on cpu set %s\n", s); free(s); } if (single) hwloc_bitmap_singlify(cpubind_set); if (pid_number > 0) ret = hwloc_set_proc_cpubind(topology, pid, cpubind_set, cpubind_flags); else ret = hwloc_set_cpubind(topology, cpubind_set, cpubind_flags); if (ret && verbose >= 0) { int bind_errno = errno; const char *errmsg = strerror(bind_errno); char *s; hwloc_bitmap_asprintf(&s, cpubind_set); if (pid_number > 0) fprintf(stderr, "hwloc_set_proc_cpubind %s %d failed (errno %d %s)\n", s, pid_number, bind_errno, errmsg); else fprintf(stderr, "hwloc_set_cpubind %s failed (errno %d %s)\n", s, bind_errno, errmsg); free(s); } if (ret && !force) goto failed_binding; } hwloc_bitmap_free(cpubind_set); hwloc_bitmap_free(membind_set); hwloc_topology_destroy(topology); if (pid_number > 0) return EXIT_SUCCESS; if (0 == argc) { if (get_binding || get_last_cpu_location) return EXIT_SUCCESS; fprintf(stderr, "%s: nothing to do!\n", callname); return EXIT_FAILURE; } /* FIXME: check whether Windows execvp() passes INHERIT_PARENT_AFFINITY to CreateProcess() * because we need to propagate processor group affinity. However process-wide affinity * isn't supported with processor groups so far. */ ret = execvp(argv[0], argv); if (ret) { fprintf(stderr, "%s: Failed to launch executable \"%s\"\n", callname, argv[0]); perror("execvp"); } return EXIT_FAILURE; failed_binding: hwloc_bitmap_free(cpubind_set); hwloc_bitmap_free(membind_set); hwloc_topology_destroy(topology); return EXIT_FAILURE; }
int main(int argc, char *argv[]) { hwloc_topology_t topology; unsigned depth; hwloc_bitmap_t cpubind_set, membind_set; int got_cpubind = 0, got_membind = 0; int working_on_cpubind = 1; /* membind if 0 */ int get_binding = 0; int get_last_cpu_location = 0; unsigned long flags = HWLOC_TOPOLOGY_FLAG_WHOLE_IO|HWLOC_TOPOLOGY_FLAG_ICACHES; int force = 0; int single = 0; int verbose = 0; int logical = 1; int taskset = 0; int cpubind_flags = 0; hwloc_membind_policy_t membind_policy = HWLOC_MEMBIND_BIND; int membind_flags = 0; int opt; int ret; int pid_number = 0; hwloc_pid_t pid; char *callname; cpubind_set = hwloc_bitmap_alloc(); membind_set = hwloc_bitmap_alloc(); hwloc_topology_init(&topology); hwloc_topology_set_flags(topology, flags); hwloc_topology_load(topology); depth = hwloc_topology_get_depth(topology); callname = argv[0]; /* skip argv[0], handle options */ argv++; argc--; while (argc >= 1) { if (!strcmp(argv[0], "--")) { argc--; argv++; break; } opt = 0; if (*argv[0] == '-') { if (!strcmp(argv[0], "-v") || !strcmp(argv[0], "--verbose")) { verbose++; goto next; } else if (!strcmp(argv[0], "-q") || !strcmp(argv[0], "--quiet")) { verbose--; goto next; } else if (!strcmp(argv[0], "--help")) { usage("hwloc-bind", stdout); return EXIT_SUCCESS; } else if (!strcmp(argv[0], "--single")) { single = 1; goto next; } else if (!strcmp(argv[0], "-f") || !strcmp(argv[0], "--force")) { force = 1; goto next; } else if (!strcmp(argv[0], "--strict")) { cpubind_flags |= HWLOC_CPUBIND_STRICT; membind_flags |= HWLOC_MEMBIND_STRICT; goto next; } else if (!strcmp(argv[0], "--pid")) { if (argc < 2) { usage ("hwloc-bind", stderr); exit(EXIT_FAILURE); } pid_number = atoi(argv[1]); opt = 1; goto next; } else if (!strcmp (argv[0], "--version")) { printf("%s %s\n", callname, VERSION); exit(EXIT_SUCCESS); } if (!strcmp(argv[0], "-l") || !strcmp(argv[0], "--logical")) { logical = 1; goto next; } if (!strcmp(argv[0], "-p") || !strcmp(argv[0], "--physical")) { logical = 0; goto next; } if (!strcmp(argv[0], "--taskset")) { taskset = 1; goto next; } else if (!strcmp (argv[0], "-e") || !strncmp (argv[0], "--get-last-cpu-location", 10)) { get_last_cpu_location = 1; goto next; } else if (!strcmp (argv[0], "--get")) { get_binding = 1; goto next; } else if (!strcmp (argv[0], "--cpubind")) { working_on_cpubind = 1; goto next; } else if (!strcmp (argv[0], "--membind")) { working_on_cpubind = 0; goto next; } else if (!strcmp (argv[0], "--mempolicy")) { if (!strncmp(argv[1], "default", 2)) membind_policy = HWLOC_MEMBIND_DEFAULT; else if (!strncmp(argv[1], "firsttouch", 2)) membind_policy = HWLOC_MEMBIND_FIRSTTOUCH; else if (!strncmp(argv[1], "bind", 2)) membind_policy = HWLOC_MEMBIND_BIND; else if (!strncmp(argv[1], "interleave", 2)) membind_policy = HWLOC_MEMBIND_INTERLEAVE; else if (!strncmp(argv[1], "replicate", 2)) membind_policy = HWLOC_MEMBIND_REPLICATE; else if (!strncmp(argv[1], "nexttouch", 2)) membind_policy = HWLOC_MEMBIND_NEXTTOUCH; else { fprintf(stderr, "Unrecognized memory binding policy %s\n", argv[1]); usage ("hwloc-bind", stderr); exit(EXIT_FAILURE); } opt = 1; goto next; } else if (!strcmp (argv[0], "--whole-system")) { flags |= HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM; hwloc_topology_destroy(topology); hwloc_topology_init(&topology); hwloc_topology_set_flags(topology, flags); hwloc_topology_load(topology); depth = hwloc_topology_get_depth(topology); goto next; } else if (!strcmp (argv[0], "--restrict")) { hwloc_bitmap_t restrictset; int err; if (argc < 2) { usage (callname, stdout); exit(EXIT_FAILURE); } restrictset = hwloc_bitmap_alloc(); hwloc_bitmap_sscanf(restrictset, argv[1]); err = hwloc_topology_restrict (topology, restrictset, 0); if (err) { perror("Restricting the topology"); /* fallthrough */ } hwloc_bitmap_free(restrictset); argc--; argv++; goto next; } fprintf (stderr, "Unrecognized option: %s\n", argv[0]); usage("hwloc-bind", stderr); return EXIT_FAILURE; } ret = hwloc_calc_process_arg(topology, depth, argv[0], logical, working_on_cpubind ? cpubind_set : membind_set, verbose); if (ret < 0) { if (verbose > 0) fprintf(stderr, "assuming the command starts at %s\n", argv[0]); break; } if (working_on_cpubind) got_cpubind = 1; else got_membind = 1; next: argc -= opt+1; argv += opt+1; } pid = hwloc_pid_from_number(pid_number, !(get_binding || get_last_cpu_location)); if (get_binding || get_last_cpu_location) { char *s; const char *policystr = NULL; int err; if (working_on_cpubind) { if (get_last_cpu_location) { if (pid_number) err = hwloc_get_proc_last_cpu_location(topology, pid, cpubind_set, 0); else err = hwloc_get_last_cpu_location(topology, cpubind_set, 0); } else { if (pid_number) err = hwloc_get_proc_cpubind(topology, pid, cpubind_set, 0); else err = hwloc_get_cpubind(topology, cpubind_set, 0); } if (err) { const char *errmsg = strerror(errno); if (pid_number) fprintf(stderr, "hwloc_get_proc_%s %d failed (errno %d %s)\n", get_last_cpu_location ? "last_cpu_location" : "cpubind", pid_number, errno, errmsg); else fprintf(stderr, "hwloc_get_%s failed (errno %d %s)\n", get_last_cpu_location ? "last_cpu_location" : "cpubind", errno, errmsg); return EXIT_FAILURE; } if (taskset) hwloc_bitmap_taskset_asprintf(&s, cpubind_set); else hwloc_bitmap_asprintf(&s, cpubind_set); } else { hwloc_membind_policy_t policy; if (pid_number) err = hwloc_get_proc_membind(topology, pid, membind_set, &policy, 0); else err = hwloc_get_membind(topology, membind_set, &policy, 0); if (err) { const char *errmsg = strerror(errno); if (pid_number) fprintf(stderr, "hwloc_get_proc_membind %d failed (errno %d %s)\n", pid_number, errno, errmsg); else fprintf(stderr, "hwloc_get_membind failed (errno %d %s)\n", errno, errmsg); return EXIT_FAILURE; } if (taskset) hwloc_bitmap_taskset_asprintf(&s, membind_set); else hwloc_bitmap_asprintf(&s, membind_set); switch (policy) { case HWLOC_MEMBIND_DEFAULT: policystr = "default"; break; case HWLOC_MEMBIND_FIRSTTOUCH: policystr = "firsttouch"; break; case HWLOC_MEMBIND_BIND: policystr = "bind"; break; case HWLOC_MEMBIND_INTERLEAVE: policystr = "interleave"; break; case HWLOC_MEMBIND_REPLICATE: policystr = "replicate"; break; case HWLOC_MEMBIND_NEXTTOUCH: policystr = "nexttouch"; break; default: fprintf(stderr, "unknown memory policy %d\n", policy); assert(0); break; } } if (policystr) printf("%s (%s)\n", s, policystr); else printf("%s\n", s); free(s); return EXIT_SUCCESS; } if (got_membind) { if (hwloc_bitmap_iszero(membind_set)) { if (verbose >= 0) fprintf(stderr, "cannot membind to empty set\n"); if (!force) goto failed_binding; } if (verbose > 0) { char *s; hwloc_bitmap_asprintf(&s, membind_set); fprintf(stderr, "binding on memory set %s\n", s); free(s); } if (single) hwloc_bitmap_singlify(membind_set); if (pid_number) ret = hwloc_set_proc_membind(topology, pid, membind_set, membind_policy, membind_flags); else ret = hwloc_set_membind(topology, membind_set, membind_policy, membind_flags); if (ret && verbose >= 0) { int bind_errno = errno; const char *errmsg = strerror(bind_errno); char *s; hwloc_bitmap_asprintf(&s, membind_set); if (pid_number) fprintf(stderr, "hwloc_set_proc_membind %s %d failed (errno %d %s)\n", s, pid_number, bind_errno, errmsg); else fprintf(stderr, "hwloc_set_membind %s failed (errno %d %s)\n", s, bind_errno, errmsg); free(s); } if (ret && !force) goto failed_binding; } if (got_cpubind) { if (hwloc_bitmap_iszero(cpubind_set)) { if (verbose >= 0) fprintf(stderr, "cannot cpubind to empty set\n"); if (!force) goto failed_binding; } if (verbose > 0) { char *s; hwloc_bitmap_asprintf(&s, cpubind_set); fprintf(stderr, "binding on cpu set %s\n", s); free(s); } if (single) hwloc_bitmap_singlify(cpubind_set); if (pid_number) ret = hwloc_set_proc_cpubind(topology, pid, cpubind_set, cpubind_flags); else ret = hwloc_set_cpubind(topology, cpubind_set, cpubind_flags); if (ret && verbose >= 0) { int bind_errno = errno; const char *errmsg = strerror(bind_errno); char *s; hwloc_bitmap_asprintf(&s, cpubind_set); if (pid_number) fprintf(stderr, "hwloc_set_proc_cpubind %s %d failed (errno %d %s)\n", s, pid_number, bind_errno, errmsg); else fprintf(stderr, "hwloc_set_cpubind %s failed (errno %d %s)\n", s, bind_errno, errmsg); free(s); } if (ret && !force) goto failed_binding; } hwloc_bitmap_free(cpubind_set); hwloc_bitmap_free(membind_set); hwloc_topology_destroy(topology); if (pid_number) return EXIT_SUCCESS; if (0 == argc) { fprintf(stderr, "%s: nothing to do!\n", callname); return EXIT_FAILURE; } ret = execvp(argv[0], argv); if (ret) { fprintf(stderr, "%s: Failed to launch executable \"%s\"\n", callname, argv[0]); perror("execvp"); } return EXIT_FAILURE; failed_binding: hwloc_bitmap_free(cpubind_set); hwloc_bitmap_free(membind_set); hwloc_topology_destroy(topology); return EXIT_FAILURE; }
hwloc::hwloc() { s_core_topology = std::pair<unsigned,unsigned>(0,0); s_core_capacity = 0 ; s_hwloc_topology = 0 ; s_hwloc_location = 0 ; s_process_binding = 0 ; for ( unsigned i = 0 ; i < MAX_CORE ; ++i ) s_core[i] = 0 ; hwloc_topology_init( & s_hwloc_topology ); hwloc_topology_load( s_hwloc_topology ); s_hwloc_location = hwloc_bitmap_alloc(); s_process_binding = hwloc_bitmap_alloc(); hwloc_get_cpubind( s_hwloc_topology , s_process_binding , HWLOC_CPUBIND_PROCESS ); // Choose a hwloc object type for the NUMA level, which may not exist. hwloc_obj_type_t root_type = HWLOC_OBJ_TYPE_MAX ; { // Object types to search, in order. static const hwloc_obj_type_t candidate_root_type[] = { HWLOC_OBJ_NODE /* NUMA region */ , HWLOC_OBJ_SOCKET /* hardware socket */ , HWLOC_OBJ_MACHINE /* local machine */ }; enum { CANDIDATE_ROOT_TYPE_COUNT = sizeof(candidate_root_type) / sizeof(hwloc_obj_type_t) }; for ( int k = 0 ; k < CANDIDATE_ROOT_TYPE_COUNT && HWLOC_OBJ_TYPE_MAX == root_type ; ++k ) { if ( 0 < hwloc_get_nbobjs_by_type( s_hwloc_topology , candidate_root_type[k] ) ) { root_type = candidate_root_type[k] ; } } } // Determine which of these 'root' types are available to this process. // The process may have been bound (e.g., by MPI) to a subset of these root types. // Determine current location of the master (calling) process> hwloc_bitmap_t proc_cpuset_location = hwloc_bitmap_alloc(); hwloc_get_last_cpu_location( s_hwloc_topology , proc_cpuset_location , HWLOC_CPUBIND_THREAD ); const unsigned max_root = hwloc_get_nbobjs_by_type( s_hwloc_topology , root_type ); unsigned root_base = max_root ; unsigned root_count = 0 ; unsigned core_per_root = 0 ; unsigned pu_per_core = 0 ; bool symmetric = true ; for ( unsigned i = 0 ; i < max_root ; ++i ) { const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , i ); if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) { ++root_count ; // Remember which root (NUMA) object the master thread is running on. // This will be logical NUMA rank #0 for this process. if ( hwloc_bitmap_intersects( proc_cpuset_location, root->allowed_cpuset ) ) { root_base = i ; } // Count available cores: const unsigned max_core = hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology , root->allowed_cpuset , HWLOC_OBJ_CORE ); unsigned core_count = 0 ; for ( unsigned j = 0 ; j < max_core ; ++j ) { const hwloc_obj_t core = hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology , root->allowed_cpuset , HWLOC_OBJ_CORE , j ); // If process' cpuset intersects core's cpuset then process can access this core. // Must use intersection instead of inclusion because the Intel-Phi // MPI may bind the process to only one of the core's hyperthreads. // // Assumption: if the process can access any hyperthread of the core // then it has ownership of the entire core. // This assumes that it would be performance-detrimental // to spawn more than one MPI process per core and use nested threading. if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) { ++core_count ; const unsigned pu_count = hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology , core->allowed_cpuset , HWLOC_OBJ_PU ); if ( pu_per_core == 0 ) pu_per_core = pu_count ; // Enforce symmetry by taking the minimum: pu_per_core = std::min( pu_per_core , pu_count ); if ( pu_count != pu_per_core ) symmetric = false ; } } if ( 0 == core_per_root ) core_per_root = core_count ; // Enforce symmetry by taking the minimum: core_per_root = std::min( core_per_root , core_count ); if ( core_count != core_per_root ) symmetric = false ; } } s_core_topology.first = root_count ; s_core_topology.second = core_per_root ; s_core_capacity = pu_per_core ; // Fill the 's_core' array for fast mapping from a core coordinate to the // hwloc cpuset object required for thread location querying and binding. for ( unsigned i = 0 ; i < max_root ; ++i ) { const unsigned root_rank = ( i + root_base ) % max_root ; const hwloc_obj_t root = hwloc_get_obj_by_type( s_hwloc_topology , root_type , root_rank ); if ( hwloc_bitmap_intersects( s_process_binding , root->allowed_cpuset ) ) { const unsigned max_core = hwloc_get_nbobjs_inside_cpuset_by_type( s_hwloc_topology , root->allowed_cpuset , HWLOC_OBJ_CORE ); unsigned core_count = 0 ; for ( unsigned j = 0 ; j < max_core && core_count < core_per_root ; ++j ) { const hwloc_obj_t core = hwloc_get_obj_inside_cpuset_by_type( s_hwloc_topology , root->allowed_cpuset , HWLOC_OBJ_CORE , j ); if ( hwloc_bitmap_intersects( s_process_binding , core->allowed_cpuset ) ) { s_core[ core_count + core_per_root * i ] = core->allowed_cpuset ; ++core_count ; } } } } hwloc_bitmap_free( proc_cpuset_location ); if ( ! symmetric ) { std::cout << "KokkosArray::hwloc WARNING: Using a symmetric subset of a non-symmetric core topology." << std::endl ; } }
int main(void) { hwloc_topology_t topology; hwloc_bitmap_t set, set2; hwloc_const_bitmap_t cset_available, cset_all; hwloc_obj_t obj; char *buffer; char type[64]; unsigned i; int err; /* create a topology */ err = hwloc_topology_init(&topology); if (err < 0) { fprintf(stderr, "failed to initialize the topology\n"); return EXIT_FAILURE; } err = hwloc_topology_load(topology); if (err < 0) { fprintf(stderr, "failed to load the topology\n"); hwloc_topology_destroy(topology); return EXIT_FAILURE; } /* retrieve the entire set of available PUs */ cset_available = hwloc_topology_get_topology_cpuset(topology); /* retrieve the CPU binding of the current entire process */ set = hwloc_bitmap_alloc(); if (!set) { fprintf(stderr, "failed to allocate a bitmap\n"); hwloc_topology_destroy(topology); return EXIT_FAILURE; } err = hwloc_get_cpubind(topology, set, HWLOC_CPUBIND_PROCESS); if (err < 0) { fprintf(stderr, "failed to get cpu binding\n"); hwloc_bitmap_free(set); hwloc_topology_destroy(topology); } /* display the processing units that cannot be used by this process */ if (hwloc_bitmap_isequal(set, cset_available)) { printf("this process can use all available processing units in the system\n"); } else { /* compute the set where we currently cannot run. * we can't modify cset_available because it's a system read-only one, * so we do set = available &~ set */ hwloc_bitmap_andnot(set, cset_available, set); hwloc_bitmap_asprintf(&buffer, set); printf("process cannot use %d process units (%s) among %u in the system\n", hwloc_bitmap_weight(set), buffer, hwloc_bitmap_weight(cset_available)); free(buffer); /* restore set where it was before the &~ operation above */ hwloc_bitmap_andnot(set, cset_available, set); } /* print the smallest object covering the current process binding */ obj = hwloc_get_obj_covering_cpuset(topology, set); hwloc_obj_type_snprintf(type, sizeof(type), obj, 0); printf("process is bound within object %s logical index %u\n", type, obj->logical_index); /* retrieve the single PU where the current thread actually runs within this process binding */ set2 = hwloc_bitmap_alloc(); if (!set2) { fprintf(stderr, "failed to allocate a bitmap\n"); hwloc_bitmap_free(set); hwloc_topology_destroy(topology); return EXIT_FAILURE; } err = hwloc_get_last_cpu_location(topology, set2, HWLOC_CPUBIND_THREAD); if (err < 0) { fprintf(stderr, "failed to get last cpu location\n"); hwloc_bitmap_free(set); hwloc_bitmap_free(set2); hwloc_topology_destroy(topology); } /* sanity checks that are not actually needed but help the reader */ /* this thread runs within the process binding */ assert(hwloc_bitmap_isincluded(set2, set)); /* this thread runs on a single PU at a time */ assert(hwloc_bitmap_weight(set2) == 1); /* print the logical number of the PU where that thread runs */ /* extract the PU OS index from the bitmap */ i = hwloc_bitmap_first(set2); obj = hwloc_get_pu_obj_by_os_index(topology, i); printf("thread is now running on PU logical index %u (OS/physical index %u)\n", obj->logical_index, i); /* migrate this single thread to where other PUs within the current binding */ hwloc_bitmap_andnot(set2, set, set2); err = hwloc_set_cpubind(topology, set2, HWLOC_CPUBIND_THREAD); if (err < 0) { fprintf(stderr, "failed to set thread binding\n"); hwloc_bitmap_free(set); hwloc_bitmap_free(set2); hwloc_topology_destroy(topology); } /* reprint the PU where that thread runs */ err = hwloc_get_last_cpu_location(topology, set2, HWLOC_CPUBIND_THREAD); if (err < 0) { fprintf(stderr, "failed to get last cpu location\n"); hwloc_bitmap_free(set); hwloc_bitmap_free(set2); hwloc_topology_destroy(topology); } /* print the logical number of the PU where that thread runs */ /* extract the PU OS index from the bitmap */ i = hwloc_bitmap_first(set2); obj = hwloc_get_pu_obj_by_os_index(topology, i); printf("thread is running on PU logical index %u (OS/physical index %u)\n", obj->logical_index, i); hwloc_bitmap_free(set); hwloc_bitmap_free(set2); /* retrieve the entire set of all PUs */ cset_all = hwloc_topology_get_complete_cpuset(topology); if (hwloc_bitmap_isequal(cset_all, cset_available)) { printf("all hardware PUs are available\n"); } else { printf("only %d hardware PUs are available in the machine among %d\n", hwloc_bitmap_weight(cset_available), hwloc_bitmap_weight(cset_all)); } hwloc_topology_destroy(topology); return EXIT_SUCCESS; }
void computeCPUOMP(int threadId, expression_type * expr, im_type * im, element_iterator * elt_it, std::vector<std::pair<element_iterator, element_iterator> > * elts) { char * a; int cid; std::ostringstream oss; #if 0 hwloc_cpuset_t set = nullptr; /* get a cpuset object */ set = hwloc_bitmap_alloc(); /* Get the cpu thread affinity info of the current process/thread */ hwloc_get_cpubind(Environment::getHwlocTopology(), set, 0); hwloc_bitmap_asprintf(&a, set); oss << a; free(a); cid = hwloc_bitmap_first(set); oss << "("; while(cid != -1) { oss << cid << " "; cid = hwloc_bitmap_next(set, cid); } oss << ")|"; std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl; /* Get the latest core location of the current process/thread */ hwloc_get_last_cpu_location(Environment::getHwlocTopology(), set, 0); hwloc_bitmap_asprintf(&a, set); oss << a; free(a); cid = hwloc_bitmap_first(set); oss << "("; while(cid != -1) { oss << cid << " "; cid = hwloc_bitmap_next(set, cid); } oss << ");"; std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl; #endif #if defined(FEELPP_HAS_HARTS) perf_mng.init("cpu") ; perf_mng.start("cpu") ; perf_mng.init("1.1") ; perf_mng.init("1.2") ; perf_mng.init("2.1") ; perf_mng.init("2.2") ; perf_mng.init("3") ; #endif //M_gm((*elt_it)->gm()); gm_ptrtype gm = (*elt_it)->gm(); //M_geopc(new typename eval::gmpc_type( M_gm, im->points() )); typename eval::gmpc_ptrtype __geopc( new typename eval::gmpc_type(gm, im->points()) ); //M_c(new gmc_type( M_gm, *(*elt_it), M_geopc )); gmc_ptrtype __c( new gmc_type( gm, *(*elt_it), __geopc ) ); //M_expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( M_c ) ) ); eval_expr_type __expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( __c ) ) ); for (int i = 0; i < elts->size(); i++) { /* std::cout << Environment::worldComm().rank() << " nbItems: " << elts->size() << " nbElts " << std::distance(elts->at(i), elts->at(i+1)) << " 1st id " << elts->at(i)->id() << std::endl; */ //std::cout << Environment::worldComm().rank() << "|" << theadId << " fid=" elts.at(i).first.id() << std::endl; for ( auto _elt = elts->at(i).first; _elt != elts->at(i).second; ++_elt ) { //perf_mng.start("1.1") ; __c->update( *_elt ); //perf_mng.stop("1.1") ; //perf_mng.start("1.2") ; map_gmc_type mapgmc( fusion::make_pair<vf::detail::gmc<0> >( __c ) ); //perf_mng.stop("1.2") ; //perf_mng.start("2.1") ; __expr.update( mapgmc ); //perf_mng.stop("2.1") ; //perf_mng.start("2.2") ; im->update( *__c ); //perf_mng.stop("2.2") ; //perf_mng.start("3") ; for ( uint16_type c1 = 0; c1 < eval::shape::M; ++c1 ) { for ( uint16_type c2 = 0; c2 < eval::shape::N; ++c2 ) { M_ret( c1,c2 ) += (*im)( __expr, c1, c2 ); } } //perf_mng.stop("3") ; } } #if defined(FEELPP_HAS_HARTS) perf_mng.stop("cpu") ; M_cpuTime = perf_mng.getValueInSeconds("cpu"); #endif }
void computeCPU(DataArgsType& args) { char * a; int cid; hwloc_cpuset_t set = nullptr; std::ostringstream oss; /* This initialization takes some time */ /* When using hartsi, the object instanciation is done when creating tasks */ /* and this is not a parallel section, thus we lose time in initialization */ /* doing it the computation step allows to incorporate this init time in the parallel section */ /* M_threadId( threadId ), M_gm( new gm_type( *_elt.gm() ) ), M_geopc( new gmpc_type( M_gm, _im.points() ) ), M_c( new gmc_type( M_gm, _elt, M_geopc ) ), M_expr( _expr, map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( M_c ) ) ), M_im( _im ), M_ret( eval::matrix_type::Zero() ), M_cpuTime( 0.0 ) */ #if 0 /* get a cpuset object */ set = hwloc_bitmap_alloc(); /* Get the cpu thread affinity info of the current process/thread */ hwloc_get_cpubind(Environment::getHwlocTopology(), set, 0); hwloc_bitmap_asprintf(&a, set); oss << a; free(a); cid = hwloc_bitmap_first(set); oss << "("; while(cid != -1) { oss << cid << " "; cid = hwloc_bitmap_next(set, cid); } oss << ")|"; std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl; /* Get the latest core location of the current process/thread */ hwloc_get_last_cpu_location(Environment::getHwlocTopology(), set, 0); hwloc_bitmap_asprintf(&a, set); oss << a; free(a); cid = hwloc_bitmap_first(set); oss << "("; while(cid != -1) { oss << cid << " "; cid = hwloc_bitmap_next(set, cid); } oss << ");"; std::cout << Environment::worldComm().rank() << "|" << M_threadId << " " << oss.str() << std::endl; #endif perf_mng.init("1.1") ; perf_mng.init("1.1") ; perf_mng.init("2.1") ; perf_mng.init("2.2") ; perf_mng.init("3") ; /* free memory */ if(set != nullptr) { hwloc_bitmap_free(set); } //perf_mng.init("data") ; //perf_mng.start("data") ; // DEFINE the range to be iterated on std::vector<std::pair<element_iterator, element_iterator> > * elts = args.get("elements")->get<std::vector<std::pair<element_iterator, element_iterator> > >(); int * threadId = args.get("threadId")->get<int>(); expression_type * expr = args.get("expr")->get<expression_type>(); im_type * im = args.get("im")->get<im_type>(); element_iterator * elt_it = args.get("elt")->get<element_iterator>(); //M_gm((*elt_it)->gm()); gm_ptrtype gm = (*elt_it)->gm(); //M_geopc(new typename eval::gmpc_type( M_gm, im->points() )); typename eval::gmpc_ptrtype __geopc( new typename eval::gmpc_type(gm, im->points()) ); //M_c(new gmc_type( M_gm, *(*elt_it), M_geopc )); gmc_ptrtype __c( new gmc_type( gm, *(*elt_it), __geopc ) ); //M_expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( M_c ) ) ); eval_expr_type __expr( (*expr), map_gmc_type( fusion::make_pair<vf::detail::gmc<0> >( __c ) ) ); //perf_mng.stop("data"); perf_mng.init("cpu") ; perf_mng.start("cpu") ; for (int i = 0; i < elts->size(); i++) { //std::cout << Environment::worldComm().rank() << " nbItems: " << elts->size() << " nbElts " << std::distance(elts->at(i), elts->at(i+1)) << std::endl; for ( auto _elt = elts->at(i).first; _elt != elts->at(i).second; ++_elt ) { //perf_mng.start("1.1") ; //M_c->update( *_elt ); __c->update( *_elt ); //perf_mng.stop("1.1") ; //perf_mng.start("1.2") ; map_gmc_type mapgmc( fusion::make_pair<vf::detail::gmc<0> >( __c ) ); //perf_mng.stop("1.2") ; //perf_mng.start("2.1") ; __expr.update( mapgmc ); //perf_mng.stop("2.1") ; //perf_mng.start("2.2") ; im->update( *__c ); //perf_mng.stop("2.2") ; //perf_mng.start("3") ; for ( uint16_type c1 = 0; c1 < eval::shape::M; ++c1 ) { for ( uint16_type c2 = 0; c2 < eval::shape::N; ++c2 ) { M_ret( c1,c2 ) += (*im)( __expr, c1, c2 ); } } //perf_mng.stop("3") ; } } perf_mng.stop("cpu") ; M_cpuTime = perf_mng.getValueInSeconds("cpu"); }