Example #1
0
int main(void)
{
  hwloc_topology_t topology;
  struct ibv_device **dev_list, *dev;
  int count, i;
  int err;

  dev_list = ibv_get_device_list(&count);
  if (!dev_list) {
    fprintf(stderr, "ibv_get_device_list failed\n");
    return 0;
  }
  printf("ibv_get_device_list found %d devices\n", count);

  hwloc_topology_init(&topology);
  hwloc_topology_set_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, HWLOC_TYPE_FILTER_KEEP_IMPORTANT);
  hwloc_topology_set_type_filter(topology, HWLOC_OBJ_OS_DEVICE, HWLOC_TYPE_FILTER_KEEP_IMPORTANT);
  hwloc_topology_load(topology);

  for(i=0; i<count; i++) {
    hwloc_bitmap_t set;
    dev = dev_list[i];

    set = hwloc_bitmap_alloc();
    err = hwloc_ibv_get_device_cpuset(topology, dev, set);
    if (err < 0) {
      printf("failed to get cpuset for device %d (%s)\n",
	     i, ibv_get_device_name(dev));
    } else {
      char *cpuset_string = NULL;
      hwloc_obj_t os;

      hwloc_bitmap_asprintf(&cpuset_string, set);
      printf("got cpuset %s for device %d (%s)\n",
	     cpuset_string, i, ibv_get_device_name(dev));
      free(cpuset_string);

      os = hwloc_ibv_get_device_osdev(topology, dev);
      if (os) {
	assert(os->type == HWLOC_OBJ_OS_DEVICE);
	printf("found OS object subtype %u lindex %u name %s\n",
	       (unsigned) os->attr->osdev.type, os->logical_index, os->name);
	assert(os->attr->osdev.type == HWLOC_OBJ_OSDEV_OPENFABRICS);
	if (strcmp(ibv_get_device_name(dev), os->name))
	  assert(0);
      }
    }
    hwloc_bitmap_free(set);
  }

  hwloc_topology_destroy(topology);

  ibv_free_device_list(dev_list);

  return 0;
}
Example #2
0
int
pocl_topology_detect_device_info(cl_device_id device)
{
  hwloc_topology_t pocl_topology;
  int ret = 0;

#ifdef HWLOC_API_2
  if (hwloc_get_api_version () < 0x20000)
    POCL_MSG_ERR ("pocl was compiled against libhwloc 2.x but is"
                  "actually running against libhwloc 1.x \n");
#else
  if (hwloc_get_api_version () >= 0x20000)
    POCL_MSG_ERR ("pocl was compiled against libhwloc 1.x but is"
                  "actually running against libhwloc 2.x \n");
#endif

  /*

   * hwloc's OpenCL backend causes problems at the initialization stage
   * because it reloads libpocl.so via the ICD loader.
   *
   * See: https://github.com/pocl/pocl/issues/261
   *
   * The only trick to stop hwloc from initializing the OpenCL plugin
   * I could find is to point the plugin search path to a place where there
   * are no plugins to be found.
   */
  setenv ("HWLOC_PLUGINS_PATH", "/dev/null", 1);

  ret = hwloc_topology_init (&pocl_topology);
  if (ret == -1)
  {
    POCL_MSG_ERR ("Cannot initialize the topology.\n");
    return ret;
  }

#ifdef HWLOC_API_2
  hwloc_topology_set_io_types_filter(pocl_topology, HWLOC_TYPE_FILTER_KEEP_NONE);
  hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_SYSTEM, HWLOC_TYPE_FILTER_KEEP_NONE);
  hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_GROUP, HWLOC_TYPE_FILTER_KEEP_NONE);
  hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_BRIDGE, HWLOC_TYPE_FILTER_KEEP_NONE);
  hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_MISC, HWLOC_TYPE_FILTER_KEEP_NONE);
  hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_PCI_DEVICE, HWLOC_TYPE_FILTER_KEEP_NONE);
  hwloc_topology_set_type_filter (pocl_topology, HWLOC_OBJ_OS_DEVICE, HWLOC_TYPE_FILTER_KEEP_NONE);
#else
  hwloc_topology_ignore_type (pocl_topology, HWLOC_TOPOLOGY_FLAG_WHOLE_IO);
  hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_SYSTEM);
  hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_GROUP);
  hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_BRIDGE);
  hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_MISC);
  hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_PCI_DEVICE);
  hwloc_topology_ignore_type (pocl_topology, HWLOC_OBJ_OS_DEVICE);
#endif

  ret = hwloc_topology_load (pocl_topology);
  if (ret == -1)
  {
    POCL_MSG_ERR ("Cannot load the topology.\n");
    goto exit_destroy;
  }

#ifdef HWLOC_API_2
  device->global_mem_size =
      hwloc_get_root_obj(pocl_topology)->total_memory;
#else
  device->global_mem_size =
      hwloc_get_root_obj(pocl_topology)->memory.total_memory;
#endif

  // Try to get the number of CPU cores from topology
  int depth = hwloc_get_type_depth(pocl_topology, HWLOC_OBJ_PU);
  if(depth != HWLOC_TYPE_DEPTH_UNKNOWN)
    device->max_compute_units = hwloc_get_nbobjs_by_depth(pocl_topology, depth);

  /* Find information about global memory cache by looking at the first
   * cache covering the first PU */
  do {
      size_t cache_size = 0, cacheline_size = 0;

      hwloc_obj_t core
          = hwloc_get_next_obj_by_type (pocl_topology, HWLOC_OBJ_CORE, NULL);
      if (core)
        {
          hwloc_obj_t cache
              = hwloc_get_shared_cache_covering_obj (pocl_topology, core);
          if ((cache) && (cache->attr))
            {
              cacheline_size = cache->attr->cache.linesize;
              cache_size = cache->attr->cache.size;
            }
          else
            core = NULL; /* fallback to L1 cache size */
        }

      hwloc_obj_t pu
          = hwloc_get_next_obj_by_type (pocl_topology, HWLOC_OBJ_PU, NULL);
      if (!core && pu)
        {
          hwloc_obj_t cache
              = hwloc_get_shared_cache_covering_obj (pocl_topology, pu);
          if ((cache) && (cache->attr))
            {
              cacheline_size = cache->attr->cache.linesize;
              cache_size = cache->attr->cache.size;
            }
        }

      if (!cache_size || !cacheline_size)
        break;

      device->global_mem_cache_type
          = 0x2; // CL_READ_WRITE_CACHE, without including all of CL/cl.h
      device->global_mem_cacheline_size = cacheline_size;
      device->global_mem_cache_size = cache_size;
  } while (0);

  // Destroy topology object and return
exit_destroy:
  hwloc_topology_destroy (pocl_topology);
  return ret;

}
Example #3
0
int main(void)
{
  hwloc_topology_t topology;
  unsigned depth;
  char buffer[1024];
  int err;

  /* check a synthetic topology */
  hwloc_topology_init(&topology);
  err = hwloc_topology_set_synthetic(topology, "pack:2 numa:3 l2:4 core:5 pu:6");
  assert(!err);
  hwloc_topology_load(topology);

  assert(hwloc_get_memory_parents_depth(topology) == 2);

  /* internal checks */

  hwloc_topology_check(topology);

  /* local checks */
  depth = hwloc_topology_get_depth(topology);
  assert(depth == 6);

  check_level(topology, 0, 1, 2);
  check_level(topology, 1, 2, 3);
  check_level(topology, 2, 6, 4);
  check_level(topology, 3, 24, 5);
  check_level(topology, 4, 120, 6);
  check_level(topology, 5, 720, 0);
  check_level(topology, HWLOC_TYPE_DEPTH_NUMANODE, 6, 0);

  err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0);
  assert(err == 83);
  err = strcmp("Package:2 Group:3 [NUMANode(memory=1073741824)] L2Cache:4(size=4194304) Core:5 PU:6", buffer);
  assert(!err);

  assert(hwloc_get_memory_parents_depth(topology) == 2);

  err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES|HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS|HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1);
  assert(err == 47);
  err = strcmp("Socket:2 Group:3 NUMANode:1 Cache:4 Core:5 PU:6", buffer);
  assert(!err);

  hwloc_topology_destroy(topology);



  hwloc_topology_init(&topology);
  err = hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L1ICACHE, HWLOC_TYPE_FILTER_KEEP_ALL);
  err = hwloc_topology_set_synthetic(topology, "pack:2(indexes=3,5) numa:2(memory=256GB indexes=pack) l3u:1(size=20mb) l2:2 l1i:1(size=16kB) l1dcache:2 core:1 pu:2(indexes=l2)");
  assert(!err);
  hwloc_topology_load(topology);

  assert(hwloc_get_memory_parents_depth(topology) == 2);

  err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0);
  assert(err == 181);
  err = strcmp("Package:2 L3Cache:2(size=20971520) [NUMANode(memory=274877906944 indexes=2*2:1*2)] L2Cache:2(size=4194304) L1iCache:1(size=16384) L1dCache:2(size=32768) Core:1 PU:2(indexes=4*8:1*4)", buffer);
  assert(!err);

  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PACKAGE, 1)->os_index == 5);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 1)->os_index == 2);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 12)->os_index == 3);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 13)->os_index == 11);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 14)->os_index == 19);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 15)->os_index == 27);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 16)->os_index == 4);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 17)->os_index == 12);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 18)->os_index == 20);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 19)->os_index == 28);

  hwloc_topology_destroy(topology);




  hwloc_topology_init(&topology);
  err = hwloc_topology_set_synthetic(topology, "pack:2 core:2 pu:2(indexes=0,4,2,6,1,5,3,7)");
  assert(!err);
  hwloc_topology_load(topology);

  assert(hwloc_get_memory_parents_depth(topology) == 0);

  err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0);
  assert(err == 72);
  err = strcmp("[NUMANode(memory=1073741824)] Package:2 Core:2 PU:2(indexes=4*2:2*2:1*2)", buffer);
  assert(!err);

  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0)->os_index == 0);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 1)->os_index == 4);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 2)->os_index == 2);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 3)->os_index == 6);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 4)->os_index == 1);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 5)->os_index == 5);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 6)->os_index == 3);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 7)->os_index == 7);

  hwloc_topology_destroy(topology);




  hwloc_topology_init(&topology);
  err = hwloc_topology_set_synthetic(topology, "pack:2 numa:2 core:1 pu:2(indexes=0,4,2,6,1,3,5,7)");
  assert(!err);
  hwloc_topology_load(topology);

  assert(hwloc_get_memory_parents_depth(topology) == 2);

  err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0);
  assert(err == 76);
  err = strcmp("Package:2 Core:2 [NUMANode(memory=1073741824)] PU:2(indexes=0,4,2,6,1,3,5,7)", buffer);
  assert(!err);

  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0)->os_index == 0);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 1)->os_index == 4);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 2)->os_index == 2);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 3)->os_index == 6);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 4)->os_index == 1);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 5)->os_index == 3);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 6)->os_index == 5);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 7)->os_index == 7);

  hwloc_topology_destroy(topology);




  hwloc_topology_init(&topology);
  err = hwloc_topology_set_synthetic(topology, "pack:2 [numa(memory=1GB)] [numa(memory=1MB)] core:2 [numa(indexes=8,7,5,6,4,3,1,2)] pu:4");
  assert(!err);
  hwloc_topology_load(topology);

  assert(hwloc_get_memory_parents_depth(topology) == HWLOC_TYPE_DEPTH_MULTIPLE);

  err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0);
  assert(err == 114);
  err = strcmp("Package:2 [NUMANode(memory=1073741824)] [NUMANode(memory=1048576)] Core:2 [NUMANode(indexes=8,7,5,6,4,3,1,2)] PU:4", buffer);
  assert(!err);

  err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1);
  assert(err == -1);
  assert(errno == EINVAL);

  err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_IGNORE_MEMORY);
  assert(err == 21);
  err = strcmp("Package:2 Core:2 PU:4", buffer);
  assert(!err);

  hwloc_topology_destroy(topology);

  return 0;
}
Example #4
0
int main(void)
{
  hwloc_topology_t topology;
  cl_int clret;
  cl_platform_id *platform_ids;
  unsigned nrp, nrd, count, i, j;
  int err;

  hwloc_topology_init(&topology);
  hwloc_topology_set_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, HWLOC_TYPE_FILTER_KEEP_IMPORTANT);
  hwloc_topology_set_type_filter(topology, HWLOC_OBJ_OS_DEVICE, HWLOC_TYPE_FILTER_KEEP_IMPORTANT);
  hwloc_topology_load(topology);

  clret = clGetPlatformIDs(0, NULL, &nrp);
  if (CL_SUCCESS != clret || !nrp)
    return 0;
  platform_ids = malloc(nrp * sizeof(*platform_ids));
  if (!platform_ids)
    return 0;
  clret = clGetPlatformIDs(nrp, platform_ids, &nrp);
  if (CL_SUCCESS != clret || !nrp)
    return 0;

  count = 0;
  for(i=0; i<nrp; i++) {
    cl_device_id *device_ids;

    clret = clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_ALL, 0, NULL, &nrd);
    if (CL_SUCCESS != clret || !nrd)
      continue;
    device_ids = malloc(nrd * sizeof(*device_ids));
    if (!device_ids)
      continue;
    clret = clGetDeviceIDs(platform_ids[i], CL_DEVICE_TYPE_ALL, nrd, device_ids, &nrd);
    if (CL_SUCCESS != clret || !nrd)
      continue;

    for(j=0; j<nrd; j++) {
      hwloc_bitmap_t set;
      hwloc_obj_t osdev, osdev2, ancestor;
      const char *value;
      unsigned p, d;

      osdev = hwloc_opencl_get_device_osdev_by_index(topology, i, j);
      /* we currently insert all OpenCL devices, except CPU devices */
      if (!osdev) {
	cl_device_type type;
	clGetDeviceInfo(device_ids[j], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
	assert(type == CL_DEVICE_TYPE_CPU);
	continue;
      }

      /* try to get it from PCI locality (only works with AMD extensions) */
      osdev2 = hwloc_opencl_get_device_osdev(topology, device_ids[j]);
      if (osdev2) {
        assert(osdev == osdev2);
      }

      ancestor = hwloc_get_non_io_ancestor_obj(topology, osdev);

      set = hwloc_bitmap_alloc();
      err = hwloc_opencl_get_device_cpuset(topology, device_ids[j], set);
      if (err < 0) {
	printf("no cpuset for platform %u device %u\n", i, j);
      } else {
	char *cpuset_string = NULL;
	hwloc_bitmap_asprintf(&cpuset_string, set);
	printf("got cpuset %s for platform %u device %u\n", cpuset_string, i, j);
	free(cpuset_string);
	if (hwloc_bitmap_isequal(hwloc_topology_get_complete_cpuset(topology), hwloc_topology_get_topology_cpuset(topology)))
	  /* only compare if the topology is complete, otherwise things can be significantly different */
	  assert(hwloc_bitmap_isequal(set, ancestor->cpuset));
      }
      hwloc_bitmap_free(set);

      printf("found OSDev %s\n", osdev->name);
      err = sscanf(osdev->name, "opencl%ud%u", &p, &d);
      assert(err == 2);
      assert(p == i);
      assert(d == j);

      value = hwloc_obj_get_info_by_name(osdev, "Backend");
      err = strcmp(value, "OpenCL");
      assert(!err);

      assert(osdev->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC);

      value = osdev->subtype;
      assert(value);
      err = strcmp(value, "OpenCL");
      assert(!err);

      value = hwloc_obj_get_info_by_name(osdev, "GPUModel");
      printf("found OSDev model %s\n", value);

      count++;
    }
  }

  hwloc_topology_destroy(topology);

  return 0;
}
Example #5
0
int main(void)
{
  hwloc_topology_t topology;
  int i;
  int err;

  hwloc_topology_init(&topology);
  hwloc_topology_set_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, HWLOC_TYPE_FILTER_KEEP_IMPORTANT);
  hwloc_topology_set_type_filter(topology, HWLOC_OBJ_OS_DEVICE, HWLOC_TYPE_FILTER_KEEP_IMPORTANT);
  hwloc_topology_load(topology);

  for(i=0; ; i++) {
    hwloc_bitmap_t set;
    hwloc_obj_t osdev, ancestor;
    const char *value;

    osdev = hwloc_intel_mic_get_device_osdev_by_index(topology, i);
    if (!osdev)
      break;
    assert(osdev);

    ancestor = hwloc_get_non_io_ancestor_obj(topology, osdev);

    printf("found OSDev %s\n", osdev->name);
    err = strncmp(osdev->name, "mic", 3);
    assert(!err);
    assert(atoi(osdev->name+3) == (int) i);

    assert(osdev->attr->osdev.type == HWLOC_OBJ_OSDEV_COPROC);

    value = hwloc_obj_get_info_by_name(osdev, "CoProcType");
    err = strcmp(value, "MIC");
    assert(!err);

    value = hwloc_obj_get_info_by_name(osdev, "MICFamily");
    printf("found MICFamily %s\n", value);
    value = hwloc_obj_get_info_by_name(osdev, "MICSKU");
    printf("found MICSKU %s\n", value);
    value = hwloc_obj_get_info_by_name(osdev, "MICActiveCores");
    printf("found MICActiveCores %s\n", value);
    value = hwloc_obj_get_info_by_name(osdev, "MICMemorySize");
    printf("found MICMemorySize %s\n", value);

    set = hwloc_bitmap_alloc();
    err = hwloc_intel_mic_get_device_cpuset(topology, i, set);
    if (err < 0) {
      printf("failed to get cpuset for device %d\n", i);
    } else {
      char *cpuset_string = NULL;
      hwloc_bitmap_asprintf(&cpuset_string, set);
      printf("got cpuset %s for device %d\n", cpuset_string, i);
      if (hwloc_bitmap_isequal(hwloc_topology_get_complete_cpuset(topology), hwloc_topology_get_topology_cpuset(topology)))
	/* only compare if the topology is complete, otherwise things can be significantly different */
	assert(hwloc_bitmap_isequal(set, ancestor->cpuset));
      free(cpuset_string);
    }
    hwloc_bitmap_free(set);
  }

  hwloc_topology_destroy(topology);

  return 0;
}
Example #6
0
int main(void)
{
  hwloc_topology_t topology;
  unsigned depth;
  unsigned i,j, width;
  char buffer[1024];
  int err;

  /* check a synthetic topology */
  hwloc_topology_init(&topology);
  err = hwloc_topology_set_synthetic(topology, "2 3 4 5 6");
  assert(!err);
  hwloc_topology_load(topology);

  /* internal checks */

  hwloc_topology_check(topology);

  /* local checks */
  depth = hwloc_topology_get_depth(topology);
  assert(depth == 6);

  width = 1;
  for(i=0; i<6; i++) {
    /* check arities */
    assert(hwloc_get_nbobjs_by_depth(topology, i) == width);
    for(j=0; j<width; j++) {
      hwloc_obj_t obj = hwloc_get_obj_by_depth(topology, i, j);
      assert(obj);
      assert(obj->arity == (i<5 ? i+2 : 0));
    }
    width *= i+2;
  }

  err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0);
  assert(err == 75);
  err = strcmp("Package:2 NUMANode:3(memory=1073741824) L2Cache:4(size=4194304) Core:5 PU:6", buffer);
  assert(!err);

  err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_EXTENDED_TYPES|HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS);
  assert(err == 42);
  err = strcmp("Package:2 NUMANode:3 L2Cache:4 Core:5 PU:6", buffer);
  assert(!err);

  hwloc_topology_destroy(topology);



  hwloc_topology_init(&topology);
  err = hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L1ICACHE, HWLOC_TYPE_FILTER_KEEP_ALL);
  err = hwloc_topology_set_synthetic(topology, "pack:2(indexes=3,5) numa:2(memory=256GB indexes=pack) l3u:1(size=20mb) l2:2 l1i:1(size=16kB) l1dcache:2 core:1 pu:2(indexes=l2)");
  assert(!err);
  hwloc_topology_load(topology);

  err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0);
  assert(err == 181);
  err = strcmp("Package:2 NUMANode:2(memory=274877906944 indexes=2*2:1*2) L3Cache:1(size=20971520) L2Cache:2(size=4194304) L1iCache:1(size=16384) L1dCache:2(size=32768) Core:1 PU:2(indexes=4*8:1*4)", buffer);
  assert(!err);

  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PACKAGE, 1)->os_index == 5);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 1)->os_index == 2);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 12)->os_index == 3);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 13)->os_index == 11);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 14)->os_index == 19);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 15)->os_index == 27);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 16)->os_index == 4);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 17)->os_index == 12);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 18)->os_index == 20);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 19)->os_index == 28);

  hwloc_topology_destroy(topology);




  hwloc_topology_init(&topology);
  err = hwloc_topology_set_synthetic(topology, "pack:2 core:2 pu:2(indexes=0,4,2,6,1,5,3,7)");
  assert(!err);
  hwloc_topology_load(topology);

  err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0);
  assert(err == 72);
  err = strcmp("NUMANode:1(memory=1073741824) Package:2 Core:2 PU:2(indexes=4*2:2*2:1*2)", buffer);
  assert(!err);

  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0)->os_index == 0);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 1)->os_index == 4);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 2)->os_index == 2);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 3)->os_index == 6);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 4)->os_index == 1);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 5)->os_index == 5);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 6)->os_index == 3);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 7)->os_index == 7);

  hwloc_topology_destroy(topology);




  hwloc_topology_init(&topology);
  err = hwloc_topology_set_synthetic(topology, "pack:2 numa:2 core:1 pu:2(indexes=0,4,2,6,1,3,5,7)");
  assert(!err);
  hwloc_topology_load(topology);

  err = hwloc_topology_export_synthetic(topology, buffer, sizeof(buffer), 0);
  assert(err == 76);
  err = strcmp("Package:2 NUMANode:2(memory=1073741824) Core:1 PU:2(indexes=0,4,2,6,1,3,5,7)", buffer);
  assert(!err);

  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0)->os_index == 0);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 1)->os_index == 4);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 2)->os_index == 2);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 3)->os_index == 6);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 4)->os_index == 1);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 5)->os_index == 3);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 6)->os_index == 5);
  assert(hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 7)->os_index == 7);

  hwloc_topology_destroy(topology);

  return 0;
}
Example #7
0
extern int
get_cpuinfo(uint16_t *p_cpus, uint16_t *p_boards,
	    uint16_t *p_sockets, uint16_t *p_cores, uint16_t *p_threads,
	    uint16_t *p_block_map_size,
	    uint16_t **p_block_map, uint16_t **p_block_map_inv)
{
	enum { SOCKET=0, CORE=1, PU=2, LAST_OBJ=3 };
	hwloc_topology_t topology;
	hwloc_obj_t obj;
	hwloc_obj_type_t objtype[LAST_OBJ];
	unsigned idx[LAST_OBJ];
	int nobj[LAST_OBJ];
	bitstr_t *used_socket = NULL;
	int *cores_per_socket;
	int actual_cpus;
	int macid;
	int absid;
	int actual_boards = 1, depth, sock_cnt, tot_socks = 0;
	int i, used_core_idx, used_sock_idx;

	debug2("hwloc_topology_init");
	if (hwloc_topology_init(&topology)) {
		/* error in initialize hwloc library */
		debug("hwloc_topology_init() failed.");
		return 1;
	}

	/* parse all system */
	hwloc_topology_set_flags(topology, HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM);

	/* ignores cache, misc */
#if HWLOC_API_VERSION < 0x00020000
	hwloc_topology_ignore_type(topology, HWLOC_OBJ_CACHE);
	hwloc_topology_ignore_type(topology, HWLOC_OBJ_MISC);
#else
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L1CACHE,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L2CACHE,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L3CACHE,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L4CACHE,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_L5CACHE,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
	hwloc_topology_set_type_filter(topology, HWLOC_OBJ_MISC,
				       HWLOC_TYPE_FILTER_KEEP_NONE);
#endif

	/* load topology */
	debug2("hwloc_topology_load");
	if (hwloc_topology_load(topology)) {
		/* error in load hardware topology */
		debug("hwloc_topology_load() failed.");
		hwloc_topology_destroy(topology);
		return 2;
	}
#if _DEBUG
	_hwloc_children(topology, hwloc_get_root_obj(topology), 0);
#endif
	/*
	 * Some processors (e.g. AMD Opteron 6000 series) contain multiple
	 * NUMA nodes per socket. This is a configuration which does not map
	 * into the hardware entities that Slurm optimizes resource allocation
	 * for (PU/thread, core, socket, baseboard, node and network switch).
	 * In order to optimize resource allocations on such hardware, Slurm
	 * will consider each NUMA node within the socket as a separate socket.
	 * You can disable this configuring "SchedulerParameters=Ignore_NUMA",
	 * in which case Slurm will report the correct socket count on the node,
	 * but not be able to optimize resource allocations on the NUMA nodes.
	 */
	objtype[SOCKET] = HWLOC_OBJ_SOCKET;
	objtype[CORE]   = HWLOC_OBJ_CORE;
	objtype[PU]     = HWLOC_OBJ_PU;
	if (hwloc_get_type_depth(topology, HWLOC_OBJ_NODE) >
	    hwloc_get_type_depth(topology, HWLOC_OBJ_SOCKET)) {
		char *sched_params = slurm_get_sched_params();
		if (sched_params &&
		    strcasestr(sched_params, "Ignore_NUMA")) {
			info("Ignoring NUMA nodes within a socket");
		} else {
			info("Considering each NUMA node as a socket");
			objtype[SOCKET] = HWLOC_OBJ_NODE;
		}
		xfree(sched_params);
	}

	/* number of objects */
	depth = hwloc_get_type_depth(topology, HWLOC_OBJ_GROUP);
	if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) {
		actual_boards = MAX(hwloc_get_nbobjs_by_depth(topology, depth),
				    1);
	}

	/*
	 * Count sockets/NUMA containing any cores.
	 * KNL NUMA with no cores are NOT counted.
	 */
	nobj[SOCKET] = 0;
	depth = hwloc_get_type_depth(topology, objtype[SOCKET]);
	used_socket = bit_alloc(_MAX_SOCKET_INX);
	cores_per_socket = xmalloc(sizeof(int) * _MAX_SOCKET_INX);
	sock_cnt = hwloc_get_nbobjs_by_depth(topology, depth);
	for (i = 0; i < sock_cnt; i++) {
		obj = hwloc_get_obj_by_depth(topology, depth, i);
		if (obj->type == objtype[SOCKET]) {
			cores_per_socket[i] = _core_child_count(topology, obj);
			if (cores_per_socket[i] > 0) {
				nobj[SOCKET]++;
				bit_set(used_socket, tot_socks);
			}
			if (++tot_socks >= _MAX_SOCKET_INX) {	/* Bitmap size */
				fatal("Socket count exceeds %d, expand data structure size",
				      _MAX_SOCKET_INX);
				break;
			}
		}
	}

	nobj[CORE] = hwloc_get_nbobjs_by_type(topology, objtype[CORE]);

	/*
	 * Workaround for hwloc bug, in some cases the topology "children" array
	 * does not get populated, so _core_child_count() always returns 0
	 */
	if (nobj[SOCKET] == 0) {
		nobj[SOCKET] = hwloc_get_nbobjs_by_type(topology,
							objtype[SOCKET]);
		if (nobj[SOCKET] == 0) {
			debug("get_cpuinfo() fudging nobj[SOCKET] from 0 to 1");
			nobj[SOCKET] = 1;
		}
		if (nobj[SOCKET] >= _MAX_SOCKET_INX) {	/* Bitmap size */
			fatal("Socket count exceeds %d, expand data structure size",
			      _MAX_SOCKET_INX);
		}
		bit_nset(used_socket, 0, nobj[SOCKET] - 1);
	}

	/*
	 * Workaround for hwloc
	 * hwloc_get_nbobjs_by_type() returns 0 on some architectures.
	 */
	if ( nobj[CORE] == 0 ) {
		debug("get_cpuinfo() fudging nobj[CORE] from 0 to 1");
		nobj[CORE] = 1;
	}
	if ( nobj[SOCKET] == -1 )
		fatal("get_cpuinfo() can not handle nobj[SOCKET] = -1");
	if ( nobj[CORE] == -1 )
		fatal("get_cpuinfo() can not handle nobj[CORE] = -1");
	actual_cpus  = hwloc_get_nbobjs_by_type(topology, objtype[PU]);
#if 0
	/* Used to find workaround above */
	info("CORE = %d SOCKET = %d actual_cpus = %d nobj[CORE] = %d",
	     CORE, SOCKET, actual_cpus, nobj[CORE]);
#endif
	if ((actual_cpus % nobj[CORE]) != 0) {
		error("Thread count (%d) not multiple of core count (%d)",
		      actual_cpus, nobj[CORE]);
	}
	nobj[PU] = actual_cpus / nobj[CORE];	/* threads per core */

	if ((nobj[CORE] % nobj[SOCKET]) != 0) {
		error("Core count (%d) not multiple of socket count (%d)",
		      nobj[CORE], nobj[SOCKET]);
	}
	nobj[CORE] /= nobj[SOCKET];		/* cores per socket */

	debug("CPUs:%d Boards:%d Sockets:%d CoresPerSocket:%d ThreadsPerCore:%d",
	      actual_cpus, actual_boards, nobj[SOCKET], nobj[CORE], nobj[PU]);

	/* allocate block_map */
	if (p_block_map_size)
		*p_block_map_size = (uint16_t)actual_cpus;
	if (p_block_map && p_block_map_inv) {
		*p_block_map     = xmalloc(actual_cpus * sizeof(uint16_t));
		*p_block_map_inv = xmalloc(actual_cpus * sizeof(uint16_t));

		/* initialize default as linear mapping */
		for (i = 0; i < actual_cpus; i++) {
			(*p_block_map)[i]     = i;
			(*p_block_map_inv)[i] = i;
		}
		/* create map with hwloc */
		used_sock_idx = -1;
		used_core_idx = -1;
		for (idx[SOCKET] = 0; (used_sock_idx + 1) < nobj[SOCKET];
		     idx[SOCKET]++) {
			if (!bit_test(used_socket, idx[SOCKET]))
				continue;
			used_sock_idx++;
			for (idx[CORE] = 0;
			     idx[CORE] < cores_per_socket[idx[SOCKET]];
			     idx[CORE]++) {
				used_core_idx++;
				for (idx[PU]=0; idx[PU]<nobj[PU]; ++idx[PU]) {
					/* get hwloc_obj by indexes */
					obj=hwloc_get_obj_below_array_by_type(
					            topology, 3, objtype, idx);
					if (!obj)
						continue;
					macid = obj->os_index;
					absid = used_core_idx * nobj[PU] + idx[PU];

					if ((macid >= actual_cpus) ||
					    (absid >= actual_cpus)) {
						/* physical or logical ID are
						 * out of range */
						continue;
					}
					debug4("CPU map[%d]=>%d S:C:T %d:%d:%d", absid, macid,
					       used_sock_idx, idx[CORE], idx[PU]);
					(*p_block_map)[absid]     = macid;
					(*p_block_map_inv)[macid] = absid;
				}
			}
		}
	}
	FREE_NULL_BITMAP(used_socket);
	xfree(cores_per_socket);
	hwloc_topology_destroy(topology);

	/* update output parameters */
	*p_cpus    = actual_cpus;
	*p_boards  = actual_boards;
	*p_sockets = nobj[SOCKET];
	*p_cores   = nobj[CORE];
	*p_threads = nobj[PU];

#if _DEBUG
	/*** Display raw data ***/
	debug("CPUs:%u Boards:%u Sockets:%u CoresPerSocket:%u ThreadsPerCore:%u",
	      *p_cpus, *p_boards, *p_sockets, *p_cores, *p_threads);

	/* Display the mapping tables */
	if (p_block_map && p_block_map_inv) {
		debug("------");
		debug("Abstract -> Machine logical CPU ID block mapping:");
		debug("AbstractId PhysicalId Inverse");
		for (i = 0; i < *p_cpus; i++) {
			debug3("   %4d      %4u       %4u",
				i, (*p_block_map)[i], (*p_block_map_inv)[i]);
		}
		debug("------");
	}
#endif
	return SLURM_SUCCESS;

}