int hwloc_check_version_mismatch(){ #if HWLOC_API_VERSION >= 0x0020000 /* Header uptodate for monitor */ if(hwloc_get_api_version() < 0x20000){ fprintf(stderr, "hwloc version mismatch, required version 0x20000 or later, found %#08x\n", hwloc_get_api_version()); return -1; } #else fprintf(stderr, "hwloc version too old, required version 0x20000 or later\n"); return -1; #endif return 0; }
int pocl_topology_detect_device_info(cl_device_id device) { hwloc_topology_t pocl_topology; int ret = 0; #ifdef HWLOC_API_2 if (hwloc_get_api_version () < 0x20000) POCL_MSG_ERR ("pocl was compiled against libhwloc 2.x but is" "actually running against libhwloc 1.x \n"); #else if (hwloc_get_api_version () >= 0x20000) POCL_MSG_ERR ("pocl was compiled against libhwloc 1.x but is" "actually running against libhwloc 2.x \n"); #endif /* * hwloc's OpenCL backend causes problems at the initialization stage * because it reloads libpocl.so via the ICD loader. * * See: https://github.com/pocl/pocl/issues/261 * * The only trick to stop hwloc from initializing the OpenCL plugin * I could find is to point the plugin search path to a place where there * are no plugins to be found. */ setenv ("HWLOC_PLUGINS_PATH", "/dev/null", 1); ret = hwloc_topology_init (&pocl_topology); if (ret == -1) { POCL_MSG_ERR ("Cannot initialize the topology.\n"); return ret; } #ifdef HWLOC_API_2 hwloc_topology_set_io_types_filter(pocl_topology, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_SYSTEM, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_GROUP, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_BRIDGE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_MISC, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_PCI_DEVICE, HWLOC_TYPE_FILTER_KEEP_NONE); hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_OS_DEVICE, HWLOC_TYPE_FILTER_KEEP_NONE); #else hwloc_topology_ignore_type (pocl_topology, HWLOC_TOPOLOGY_FLAG_WHOLE_IO); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_SYSTEM); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_GROUP); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_BRIDGE); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_MISC); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_PCI_DEVICE); hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_OS_DEVICE); #endif ret = hwloc_topology_load (pocl_topology); if (ret == -1) { POCL_MSG_ERR ("Cannot load the topology.\n"); goto exit_destroy; } #ifdef HWLOC_API_2 device->global_mem_size = hwloc_get_root_obj(pocl_topology)->total_memory; #else device->global_mem_size = hwloc_get_root_obj(pocl_topology)->memory.total_memory; #endif // Try to get the number of CPU cores from topology int depth = hwloc_get_type_depth(pocl_topology, HWLOC_OBJ_PU); if(depth != HWLOC_TYPE_DEPTH_UNKNOWN) device->max_compute_units = hwloc_get_nbobjs_by_depth(pocl_topology, depth); /* Find information about global memory cache by looking at the first * cache covering the first PU */ do { size_t cache_size = 0, cacheline_size = 0; hwloc_obj_t core = hwloc_get_next_obj_by_type (pocl_topology, HWLOC_OBJ_CORE, NULL); if (core) { hwloc_obj_t cache = hwloc_get_shared_cache_covering_obj (pocl_topology, core); if ((cache) && (cache->attr)) { cacheline_size = cache->attr->cache.linesize; cache_size = cache->attr->cache.size; } else core = NULL; /* fallback to L1 cache size */ } hwloc_obj_t pu = hwloc_get_next_obj_by_type (pocl_topology, HWLOC_OBJ_PU, NULL); if (!core && pu) { hwloc_obj_t cache = hwloc_get_shared_cache_covering_obj (pocl_topology, pu); if ((cache) && (cache->attr)) { cacheline_size = cache->attr->cache.linesize; cache_size = cache->attr->cache.size; } } if (!cache_size || !cacheline_size) break; device->global_mem_cache_type = 0x2; // CL_READ_WRITE_CACHE, without including all of CL/cl.h device->global_mem_cacheline_size = cacheline_size; device->global_mem_cache_size = cache_size; } while (0); // Destroy topology object and return exit_destroy: hwloc_topology_destroy (pocl_topology); return ret; }
void chpl_topo_init(void) { // // We only load hwloc topology information in configurations where // the locale model is other than "flat" or the tasking is based on // Qthreads (which will use the topology we load). We don't use // it otherwise (so far) because loading it is somewhat expensive. // if (strcmp(CHPL_LOCALE_MODEL, "flat") != 0 || strcmp(CHPL_TASKS, "qthreads") == 0) { haveTopology = true; } else { haveTopology = false; return; } // Check hwloc API version. // Require at least hwloc version 1.11 (we need 1.11.5 to not crash // in some NUMA configurations). // Check both at build time and run time. #define REQUIRE_HWLOC_VERSION 0x00010b00 #if HWLOC_API_VERSION < REQUIRE_HWLOC_VERSION #error hwloc version 1.11.5 or newer is required #endif CHK_ERR(hwloc_get_api_version() >= REQUIRE_HWLOC_VERSION); // // Allocate and initialize topology object. // CHK_ERR_ERRNO(hwloc_topology_init(&topology) == 0); // // Perform the topology detection. // CHK_ERR_ERRNO(hwloc_topology_load(topology) == 0); // // What is supported? // topoSupport = hwloc_topology_get_support(topology); // // TODO: update comment // For now, don't support setting memory locality when comm=ugni or // comm=gasnet, seg!=everything. Those are the two configurations in // which we use hugepages and/or memory registered with the comm // interface, both of which may be a problem for the set-membind call. // We will have other ways to achieve locality for these configs in // the future. // do_set_area_membind = true; if ((strcmp(CHPL_COMM, "gasnet") == 0 && strcmp(CHPL_GASNET_SEGMENT, "everything") != 0)) { do_set_area_membind = false; } // // We need depth information. // topoDepth = hwloc_topology_get_depth(topology); // // How many NUMA domains do we have? // { int level; // // Note: If there are multiple levels with NUMA nodes, this finds // only the uppermost. // for (level = 0, numaLevel = -1; level < topoDepth && numaLevel == -1; level++) { if (hwloc_get_depth_type(topology, level) == HWLOC_OBJ_NUMANODE) { numaLevel = level; } } } // // Find the NUMA nodes, that is, the objects at numaLevel that also // have CPUs. This is as opposed to things like Xeon Phi HBM, which // is memory-only, no CPUs. // { const hwloc_cpuset_t cpusetAll = hwloc_get_root_obj(topology)->cpuset; numNumaDomains = hwloc_get_nbobjs_inside_cpuset_by_depth(topology, cpusetAll, numaLevel); } }
void chpl_topo_init(void) { // // For now we don't load topology information for locModel=flat, since // we won't use it in that case and loading it is somewhat expensive. // Eventually we will probably load it even for locModel=flat and use // it as the information source for what's currently in chplsys, and // also pass it to Qthreads when we use that (so it doesn't load it // again), but that's work for the future. // haveTopology = (strcmp(CHPL_LOCALE_MODEL, "flat") != 0) ? true : false; if (!haveTopology) { return; } // Check hwloc API version. // Require at least hwloc version 1.11 (we need 1.11.5 to not crash // in some NUMA configurations). // Check both at build time and run time. #define REQUIRE_HWLOC_VERSION 0x00010b00 #if HWLOC_API_VERSION < REQUIRE_HWLOC_VERSION #error hwloc version 1.11.5 or newer is required #else { unsigned version = hwloc_get_api_version(); // check that the version is at least REQUIRE_HWLOC_VERSION if (version < REQUIRE_HWLOC_VERSION) chpl_internal_error("hwloc version 1.11.5 or newer is required"); } #endif // // Allocate and initialize topology object. // if (hwloc_topology_init(&topology)) { report_error("hwloc_topology_init()", errno); } // // Perform the topology detection. // if (hwloc_topology_load(topology)) { report_error("hwloc_topology_load()", errno); } // // What is supported? // topoSupport = hwloc_topology_get_support(topology); // // TODO: update comment // For now, don't support setting memory locality when comm=ugni or // comm=gasnet, seg!=everything. Those are the two configurations in // which we use hugepages and/or memory registered with the comm // interface, both of which may be a problem for the set-membind call. // We will have other ways to achieve locality for these configs in // the future. // do_set_area_membind = true; if ((strcmp(CHPL_COMM, "gasnet") == 0 && strcmp(CHPL_GASNET_SEGMENT, "everything") != 0)) { do_set_area_membind = false; } // // We need depth information. // topoDepth = hwloc_topology_get_depth(topology); // // How many NUMA domains do we have? // { int level; // // Note: If there are multiple levels with NUMA nodes, this finds // only the uppermost. // for (level = 0, numaLevel = -1; level < topoDepth && numaLevel == -1; level++) { if (hwloc_get_depth_type(topology, level) == HWLOC_OBJ_NUMANODE) { numaLevel = level; } } } // // Find the NUMA nodes, that is, the objects at numaLevel that also // have CPUs. This is as opposed to things like Xeon Phi HBM, which // is memory-only, no CPUs. // { const hwloc_cpuset_t cpusetAll = hwloc_get_root_obj(topology)->cpuset; numNumaDomains = hwloc_get_nbobjs_inside_cpuset_by_depth(topology, cpusetAll, numaLevel); } }