예제 #1
0
static int
hwloc_gl_backend_notify_new_object(struct hwloc_backend *backend,
				   struct hwloc_obj *pcidev)
{
  struct hwloc_topology *topology = backend->topology;
  struct hwloc_gl_backend_data_s *data = backend->private_data;
  unsigned i, res;

  if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO)))
    return 0;

  if (!hwloc_topology_is_thissystem(topology)) {
    hwloc_debug("%s", "\nno GL detection (not thissystem)\n");
    return 0;
  }

  if (HWLOC_OBJ_PCI_DEVICE != pcidev->type)
    return 0;

  if (data->nr_display == (unsigned) -1) {
    /* first call, lookup all display */
    hwloc_gl_query_devices(data);
    /* if it fails, data->nr_display = 0 so we won't do anything below and in next callbacks */
  }

  if (!data->nr_display)
    /* found no display */
    return 0;

  /* now the display array is ready to use */
  res = 0;
  for(i=0; i<data->nr_display; i++) {
    struct hwloc_gl_display_info_s *info = &data->display[i];
    hwloc_obj_t osdev;

    if (info->pcidomain != pcidev->attr->pcidev.domain)
      continue;
    if (info->pcibus != pcidev->attr->pcidev.bus)
      continue;
    if (info->pcidevice != pcidev->attr->pcidev.dev)
      continue;
    if (info->pcifunc != pcidev->attr->pcidev.func)
      continue;

    osdev = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1);
    osdev->name = strdup(info->name);
    osdev->logical_index = -1;
    osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU;
    hwloc_obj_add_info(osdev, "Backend", "GL");
    hwloc_obj_add_info(osdev, "GPUVendor", "NVIDIA Corporation");
    if (info->productname)
      hwloc_obj_add_info(osdev, "GPUModel", info->productname);
    hwloc_insert_object_by_parent(topology, pcidev, osdev);

    res++;
    /* there may be others */
  }

  return res;
}
예제 #2
0
static int
hwloc_look_synthetic(struct hwloc_backend *backend)
{
  struct hwloc_topology *topology = backend->topology;
  struct hwloc_synthetic_backend_data_s *data = backend->private_data;
  hwloc_bitmap_t cpuset = hwloc_bitmap_alloc();
  unsigned i;

  assert(!topology->levels[0][0]->cpuset);

  hwloc_alloc_obj_cpusets(topology->levels[0][0]);

  topology->support.discovery->pu = 1;

  /* start with os_index 0 for each level */
  for (i = 0; data->level[i].arity > 0; i++)
    data->level[i].next_os_index = 0;
  /* ... including the last one */
  data->level[i].next_os_index = 0;

  /* update first level type according to the synthetic type array */
  topology->levels[0][0]->type = data->level[0].type;
  hwloc_synthetic__post_look_hooks(&data->level[0], topology->levels[0][0]);

  for (i = 0; i < data->level[0].arity; i++)
    hwloc__look_synthetic(topology, data, 1, cpuset);

  hwloc_bitmap_free(cpuset);

  hwloc_obj_add_info(topology->levels[0][0], "Backend", "Synthetic");
  hwloc_obj_add_info(topology->levels[0][0], "SyntheticDescription", data->string);
  return 1;
}
예제 #3
0
void
hwloc_look_synthetic(struct hwloc_topology *topology)
{
  hwloc_bitmap_t cpuset = hwloc_bitmap_alloc();
  unsigned first_cpu = 0, i;

  topology->support.discovery->pu = 1;

  /* start with id=0 for each level */
  for (i = 0; topology->backend_params.synthetic.arity[i] > 0; i++)
    topology->backend_params.synthetic.id[i] = 0;
  /* ... including the last one */
  topology->backend_params.synthetic.id[i] = 0;

  /* update first level type according to the synthetic type array */
  topology->levels[0][0]->type = topology->backend_params.synthetic.type[0];

  for (i = 0; i < topology->backend_params.synthetic.arity[0]; i++)
    first_cpu = hwloc__look_synthetic(topology, 1, first_cpu, cpuset);

  hwloc_bitmap_free(cpuset);

  hwloc_obj_add_info(topology->levels[0][0], "Backend", "Synthetic");
  hwloc_obj_add_info(topology->levels[0][0], "SyntheticDescription", topology->backend_params.synthetic.string);
}
예제 #4
0
int hwloc_look_hardwired_fujitsu_fx10(struct hwloc_topology *topology)
{
  /* FIXME: what if a broken core is disabled? */
  unsigned i;
  hwloc_obj_t obj;
  hwloc_bitmap_t set;

  for(i=0; i<16; i++) {
    set = hwloc_bitmap_alloc();
    hwloc_bitmap_set(set, i);

    obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1);
    obj->cpuset = hwloc_bitmap_dup(set);
    obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
    obj->attr->cache.depth = 1;
    obj->attr->cache.size = 32*1024;
    obj->attr->cache.linesize = 128;
    obj->attr->cache.associativity = 2;
    hwloc_insert_object_by_cpuset(topology, obj);

    obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1);
    obj->cpuset = hwloc_bitmap_dup(set);
    obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
    obj->attr->cache.depth = 1;
    obj->attr->cache.size = 32*1024;
    obj->attr->cache.linesize = 128;
    obj->attr->cache.associativity = 2;
    hwloc_insert_object_by_cpuset(topology, obj);

    obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i);
    obj->cpuset = set;
    hwloc_insert_object_by_cpuset(topology, obj);
  }

  set = hwloc_bitmap_alloc();
  hwloc_bitmap_set_range(set, 0, 15);

  obj = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, -1);
  obj->cpuset = hwloc_bitmap_dup(set);
  obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
  obj->attr->cache.depth = 2;
  obj->attr->cache.size = 12*1024*1024;
  obj->attr->cache.linesize = 128;
  obj->attr->cache.associativity = 24;
  hwloc_insert_object_by_cpuset(topology, obj);

  obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, 0);
  obj->cpuset = set;
  hwloc_obj_add_info(obj, "CPUVendor", "Fujitsu");
  hwloc_obj_add_info(obj, "CPUModel", "SPARC64 IXfx");
  hwloc_insert_object_by_cpuset(topology, obj);

  hwloc_setup_pu_level(topology, 16);

  return 0;
}
예제 #5
0
static hwloc_obj_t insert_task(hwloc_topology_t topology, hwloc_cpuset_t cpuset, const char * name)
{
  hwloc_obj_t obj;

  hwloc_bitmap_and(cpuset, cpuset, hwloc_topology_get_topology_cpuset(topology));
  if (hwloc_bitmap_iszero(cpuset))
    return NULL;

  /* try to insert at exact position */
  obj = hwloc_topology_insert_misc_object_by_cpuset(topology, cpuset, name);
  if (!obj) {
    /* try to insert in a larger parent */
    char *s;
    hwloc_bitmap_asprintf(&s, cpuset);
    obj = hwloc_get_obj_covering_cpuset(topology, cpuset);
    if (obj) {
      obj = hwloc_topology_insert_misc_object_by_parent(topology, obj, name);
      fprintf(stderr, "Inserted process `%s' below parent larger than cpuset %s\n", name, s);
    } else {
      fprintf(stderr, "Failed to insert process `%s' with cpuset %s\n", name, s);
    }
    free(s);
  } else {
    hwloc_obj_add_info(obj, "Type", "Process");
  }

  return obj;
}
예제 #6
0
static int
hwloc_look_solaris(struct hwloc_backend *backend)
{
  struct hwloc_topology *topology = backend->topology;
  unsigned nbprocs = hwloc_fallback_nbprocessors (topology);
  int alreadypus = 0;

  if (topology->levels[0][0]->cpuset)
    /* somebody discovered things */
    return 0;

  hwloc_alloc_obj_cpusets(topology->levels[0][0]);

#ifdef HAVE_LIBLGRP
  hwloc_look_lgrp(topology);
#endif /* HAVE_LIBLGRP */
#ifdef HAVE_LIBKSTAT
  if (hwloc_look_kstat(topology) > 0)
    alreadypus = 1;
#endif /* HAVE_LIBKSTAT */
  if (!alreadypus)
    hwloc_setup_pu_level(topology, nbprocs);

  hwloc_obj_add_info(topology->levels[0][0], "Backend", "Solaris");
  if (topology->is_thissystem)
    hwloc_add_uname_info(topology, NULL);
  return 1;
}
예제 #7
0
static void
lstopo_add_collapse_attributes(hwloc_topology_t topology)
{
  hwloc_obj_t obj, collapser = NULL;
  unsigned collapsed = 0;
  /* collapse identical PCI devs */
  for(obj = hwloc_get_next_pcidev(topology, NULL); obj; obj = hwloc_get_next_pcidev(topology, obj)) {
    if (collapser) {
      if (!obj->arity
	  && obj->parent == collapser->parent
	  && obj->attr->pcidev.vendor_id == collapser->attr->pcidev.vendor_id
	  && obj->attr->pcidev.device_id == collapser->attr->pcidev.device_id
	  && obj->attr->pcidev.subvendor_id == collapser->attr->pcidev.subvendor_id
	  && obj->attr->pcidev.subdevice_id == collapser->attr->pcidev.subdevice_id) {
	/* collapse another one */
	hwloc_obj_add_info(obj, "lstopoCollapse", "0");
	collapsed++;
	continue;
      } else if (collapsed > 1) {
	/* end this collapsing */
	char text[10];
	snprintf(text, sizeof(text), "%u", collapsed);
	hwloc_obj_add_info(collapser, "lstopoCollapse", text);
	collapser = NULL;
	collapsed = 0;
      }
    }
    if (!obj->arity) {
      /* start a new collapsing */
      collapser = obj;
      collapsed = 1;
    }
  }
  if (collapsed > 1) {
    /* end this collapsing */
    char text[10];
    snprintf(text, sizeof(text), "%u", collapsed);
    hwloc_obj_add_info(collapser, "lstopoCollapse", text);
  }
}
void
hwloc_look_solaris(struct hwloc_topology *topology)
{
  unsigned nbprocs = hwloc_fallback_nbprocessors (topology);
  int alreadypus = 0;

#ifdef HAVE_LIBLGRP
  hwloc_look_lgrp(topology);
#endif /* HAVE_LIBLGRP */
#ifdef HAVE_LIBKSTAT
  nbprocs = 0;
  if (hwloc_look_kstat(topology))
    alreadypus = 1;
#endif /* HAVE_LIBKSTAT */
  if (!alreadypus)
    hwloc_setup_pu_level(topology, nbprocs);

  hwloc_obj_add_info(topology->levels[0][0], "Backend", "Solaris");
}
예제 #9
0
static int
hwloc_look_netbsd(struct hwloc_backend *backend)
{
  struct hwloc_topology *topology = backend->topology;
  unsigned nbprocs = hwloc_fallback_nbprocessors(topology);

  if (!topology->levels[0][0]->cpuset) {
    /* Nobody (even the x86 backend) created objects yet, setup basic objects */
    hwloc_alloc_obj_cpusets(topology->levels[0][0]);
    hwloc_setup_pu_level(topology, nbprocs);
  }

  /* Add NetBSD specific information */
#if (defined HAVE_SYSCTL) && (defined HAVE_SYS_SYSCTL_H)
  hwloc_netbsd_node_meminfo_info(topology);
#endif
  hwloc_obj_add_info(topology->levels[0][0], "Backend", "NetBSD");
  hwloc_add_uname_info(topology, NULL);
  return 0;
}
예제 #10
0
static int
hwloc_opencl_discover(struct hwloc_backend *backend)
{
  struct hwloc_topology *topology = backend->topology;
  enum hwloc_type_filter_e filter;
  cl_platform_id *platform_ids = NULL;
  cl_uint nr_platforms;
  cl_int clret;
  unsigned j;

  hwloc_topology_get_type_filter(topology, HWLOC_OBJ_OS_DEVICE, &filter);
  if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
    return 0;

  clret = clGetPlatformIDs(0, NULL, &nr_platforms);
  if (CL_SUCCESS != clret || !nr_platforms)
    return -1;
  hwloc_debug("%u OpenCL platforms\n", nr_platforms);
  platform_ids = malloc(nr_platforms * sizeof(*platform_ids));
  if (!platform_ids)
    return -1;
  clret = clGetPlatformIDs(nr_platforms, platform_ids, &nr_platforms);
  if (CL_SUCCESS != clret || !nr_platforms) {
    free(platform_ids);
    return -1;
  }

  for(j=0; j<nr_platforms; j++) {
    cl_device_id *device_ids = NULL;
    cl_uint nr_devices;
    unsigned i;

    clret = clGetDeviceIDs(platform_ids[j], CL_DEVICE_TYPE_ALL, 0, NULL, &nr_devices);
    if (CL_SUCCESS != clret)
      continue;
    device_ids = malloc(nr_devices * sizeof(*device_ids));
    clret = clGetDeviceIDs(platform_ids[j], CL_DEVICE_TYPE_ALL, nr_devices, device_ids, &nr_devices);
    if (CL_SUCCESS != clret) {
      free(device_ids);
      continue;
    }

    for(i=0; i<nr_devices; i++) {
      cl_platform_id platform_id = 0;
      cl_device_type type;
#ifdef CL_DEVICE_TOPOLOGY_AMD
      cl_device_topology_amd amdtopo;
#endif
      cl_ulong globalmemsize;
      cl_uint computeunits;
      hwloc_obj_t osdev, parent;
      char buffer[64];

      hwloc_debug("This is opencl%ud%u\n", j, i);

#ifdef CL_DEVICE_TOPOLOGY_AMD
      clret = clGetDeviceInfo(device_ids[i], CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL);
      if (CL_SUCCESS != clret) {
        hwloc_debug("no AMD-specific device information: %d\n", clret);
        continue;
      } else if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) {
        hwloc_debug("AMD-specific device topology reports non-PCIe device type: %u\n", amdtopo.raw.type);
        continue;
      }
#else
      continue;
#endif

      osdev = hwloc_alloc_setup_object(topology, HWLOC_OBJ_OS_DEVICE, -1);
      snprintf(buffer, sizeof(buffer), "opencl%ud%u", j, i);
      osdev->name = strdup(buffer);
      osdev->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN;
      osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_COPROC;

      osdev->subtype = strdup("OpenCL");
      hwloc_obj_add_info(osdev, "Backend", "OpenCL");

      clGetDeviceInfo(device_ids[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
      if (type == CL_DEVICE_TYPE_GPU)
        hwloc_obj_add_info(osdev, "OpenCLDeviceType", "GPU");
      else if (type == CL_DEVICE_TYPE_ACCELERATOR)
        hwloc_obj_add_info(osdev, "OpenCLDeviceType", "Accelerator");
      else if (type == CL_DEVICE_TYPE_CPU)
        hwloc_obj_add_info(osdev, "OpenCLDeviceType", "CPU");
      else if (type == CL_DEVICE_TYPE_CUSTOM)
        hwloc_obj_add_info(osdev, "OpenCLDeviceType", "Custom");
      else
        hwloc_obj_add_info(osdev, "OpenCLDeviceType", "Unknown");

      buffer[0] = '\0';
      clGetDeviceInfo(device_ids[i], CL_DEVICE_VENDOR, sizeof(buffer), buffer, NULL);
      if (buffer[0] != '\0')
        hwloc_obj_add_info(osdev, "GPUVendor", buffer);

      buffer[0] = '\0';
#ifdef CL_DEVICE_BOARD_NAME_AMD
      clGetDeviceInfo(device_ids[i], CL_DEVICE_BOARD_NAME_AMD, sizeof(buffer), buffer, NULL);
#else
      clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, sizeof(buffer), buffer, NULL);
#endif
      if (buffer[0] != '\0')
        hwloc_obj_add_info(osdev, "GPUModel", buffer);

      snprintf(buffer, sizeof(buffer), "%u", j);
      hwloc_obj_add_info(osdev, "OpenCLPlatformIndex", buffer);

      buffer[0] = '\0';
      clret = clGetDeviceInfo(device_ids[i], CL_DEVICE_PLATFORM, sizeof(platform_id), &platform_id, NULL);
      if (CL_SUCCESS == clret) {
        clGetPlatformInfo(platform_id, CL_PLATFORM_NAME, sizeof(buffer), buffer, NULL);
        if (buffer[0] != '\0')
          hwloc_obj_add_info(osdev, "OpenCLPlatformName", buffer);
      }

      snprintf(buffer, sizeof(buffer), "%u", i);
      hwloc_obj_add_info(osdev, "OpenCLPlatformDeviceIndex", buffer);

      clGetDeviceInfo(device_ids[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(computeunits), &computeunits, NULL);
      snprintf(buffer, sizeof(buffer), "%u", computeunits);
      hwloc_obj_add_info(osdev, "OpenCLComputeUnits", buffer);

      clGetDeviceInfo(device_ids[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(globalmemsize), &globalmemsize, NULL);
      snprintf(buffer, sizeof(buffer), "%llu", (unsigned long long) globalmemsize / 1024);
      hwloc_obj_add_info(osdev, "OpenCLGlobalMemorySize", buffer);

      parent = NULL;
#ifdef CL_DEVICE_TOPOLOGY_AMD
      parent = hwloc_pcidisc_find_by_busid(topology, 0, amdtopo.pcie.bus, amdtopo.pcie.device, amdtopo.pcie.function);
      if (!parent)
        parent = hwloc_pcidisc_find_busid_parent(topology, 0, amdtopo.pcie.bus, amdtopo.pcie.device, amdtopo.pcie.function);
#endif
      if (!parent)
        parent = hwloc_get_root_obj(topology);

      hwloc_insert_object_by_parent(topology, parent, osdev);
    }
    free(device_ids);
  }
  free(platform_ids);
  return 0;
}
예제 #11
0
static int
hwloc_nvml_discover(struct hwloc_backend *backend)
{
  struct hwloc_topology *topology = backend->topology;
  nvmlReturn_t ret;
  unsigned nb, i;

  if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO)))
    return 0;

  if (!hwloc_topology_is_thissystem(topology)) {
    hwloc_debug("%s", "\nno NVML detection (not thissystem)\n");
    return 0;
  }

  ret = nvmlInit();
  if (NVML_SUCCESS != ret)
    return 0;
  ret = nvmlDeviceGetCount(&nb);
  if (NVML_SUCCESS != ret || !nb) {
    nvmlShutdown();
    return 0;
  }

  for(i=0; i<nb; i++) {
    nvmlPciInfo_t pci;
    nvmlDevice_t device;
    hwloc_obj_t osdev, parent;
    char buffer[64];

    ret = nvmlDeviceGetHandleByIndex(i, &device);
    assert(ret == NVML_SUCCESS);

    osdev = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1);
    snprintf(buffer, sizeof(buffer), "nvml%d", i);
    osdev->name = strdup(buffer);
    osdev->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN;
    osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU;

    hwloc_obj_add_info(osdev, "Backend", "NVML");
    hwloc_obj_add_info(osdev, "GPUVendor", "NVIDIA Corporation");

    buffer[0] = '\0';
    ret = nvmlDeviceGetName(device, buffer, sizeof(buffer));
    hwloc_obj_add_info(osdev, "GPUModel", buffer);

    /* these may fail with NVML_ERROR_NOT_SUPPORTED on old devices */
    buffer[0] = '\0';
    ret = nvmlDeviceGetSerial(device, buffer, sizeof(buffer));
    if (buffer[0] != '\0')
      hwloc_obj_add_info(osdev, "NVIDIASerial", buffer);

    buffer[0] = '\0';
    ret = nvmlDeviceGetUUID(device, buffer, sizeof(buffer));
    if (buffer[0] != '\0')
      hwloc_obj_add_info(osdev, "NVIDIAUUID", buffer);

    parent = NULL;
    if (NVML_SUCCESS == nvmlDeviceGetPciInfo(device, &pci)) {
      parent = hwloc_pci_belowroot_find_by_busid(topology, pci.domain, pci.bus, pci.device, 0);
      if (!parent)
	parent = hwloc_pci_find_busid_parent(topology, pci.domain, pci.bus, pci.device, 0);
#if HAVE_DECL_NVMLDEVICEGETMAXPCIELINKGENERATION
      if (parent && parent->type == HWLOC_OBJ_PCI_DEVICE) {
	unsigned maxwidth = 0, maxgen = 0;
	float lanespeed;
	nvmlDeviceGetMaxPcieLinkWidth(device, &maxwidth);
	nvmlDeviceGetMaxPcieLinkGeneration(device, &maxgen);
	/* PCIe Gen1 = 2.5GT/s signal-rate per lane with 8/10 encoding    = 0.25GB/s data-rate per lane
	 * PCIe Gen2 = 5  GT/s signal-rate per lane with 8/10 encoding    = 0.5 GB/s data-rate per lane
	 * PCIe Gen3 = 8  GT/s signal-rate per lane with 128/130 encoding = 1   GB/s data-rate per lane
	 */
	lanespeed = maxgen <= 2 ? 2.5 * maxgen * 0.8 : 8.0 * 128/130; /* Gbit/s per lane */
	if (lanespeed * maxwidth)
	  /* we found the max link speed, replace the current link speed found by pci (or none) */
	  parent->attr->pcidev.linkspeed = lanespeed * maxwidth / 8; /* GB/s */
      }
#endif
    }
    if (!parent)
      parent = hwloc_get_root_obj(topology);

    hwloc_insert_object_by_parent(topology, parent, osdev);
  }

  nvmlShutdown();
  return nb;
}
예제 #12
0
static int
hwloc_look_pci(struct hwloc_backend *backend)
{
  struct hwloc_topology *topology = backend->topology;
  struct hwloc_obj *first_obj = NULL, *last_obj = NULL;
#ifdef HWLOC_HAVE_LIBPCIACCESS
  int ret;
  struct pci_device_iterator *iter;
  struct pci_device *pcidev;
#else /* HWLOC_HAVE_PCIUTILS */
  struct pci_access *pciaccess;
  struct pci_dev *pcidev;
#endif

  if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO)))
    return 0;

  if (hwloc_get_next_pcidev(topology, NULL)) {
    hwloc_debug("%s", "PCI objects already added, ignoring pci backend.\n");
    return 0;
  }

  if (!hwloc_topology_is_thissystem(topology)) {
    hwloc_debug("%s", "\nno PCI detection (not thissystem)\n");
    return 0;
  }

  hwloc_debug("%s", "\nScanning PCI buses...\n");

  /* initialize PCI scanning */
#ifdef HWLOC_HAVE_LIBPCIACCESS
  ret = pci_system_init();
  if (ret) {
    hwloc_debug("%s", "Can not initialize libpciaccess\n");
    return -1;
  }

  iter = pci_slot_match_iterator_create(NULL);
#else /* HWLOC_HAVE_PCIUTILS */
  pciaccess = pci_alloc();
  pciaccess->error = hwloc_pci_error;
  pciaccess->warning = hwloc_pci_warning;

  if (setjmp(err_buf)) {
    pci_cleanup(pciaccess);
    return -1;
  }

  pci_init(pciaccess);
  pci_scan_bus(pciaccess);
#endif

  /* iterate over devices */
#ifdef HWLOC_HAVE_LIBPCIACCESS
  for (pcidev = pci_device_next(iter);
       pcidev;
       pcidev = pci_device_next(iter))
#else /* HWLOC_HAVE_PCIUTILS */
  for (pcidev = pciaccess->devices;
       pcidev;
       pcidev = pcidev->next)
#endif
  {
    const char *vendorname, *devicename, *fullname;
    unsigned char config_space_cache[CONFIG_SPACE_CACHESIZE];
    struct hwloc_obj *obj;
    unsigned os_index;
    unsigned domain;
    unsigned device_class;
    unsigned short tmp16;
    char name[128];
    unsigned offset;
#ifdef HWLOC_HAVE_PCI_FIND_CAP
    struct pci_cap *cap;
#endif

    /* initialize the config space in case we fail to read it (missing permissions, etc). */
    memset(config_space_cache, 0xff, CONFIG_SPACE_CACHESIZE);
#ifdef HWLOC_HAVE_LIBPCIACCESS
    pci_device_probe(pcidev);
    pci_device_cfg_read(pcidev, config_space_cache, 0, CONFIG_SPACE_CACHESIZE, NULL);
#else /* HWLOC_HAVE_PCIUTILS */
    pci_read_block(pcidev, 0, config_space_cache, CONFIG_SPACE_CACHESIZE); /* doesn't even tell how much it actually reads */
#endif

    /* try to read the domain */
#if (defined HWLOC_HAVE_LIBPCIACCESS) || (defined HWLOC_HAVE_PCIDEV_DOMAIN)
    domain = pcidev->domain;
#else
    domain = 0; /* default domain number */
#endif

    /* try to read the device_class */
#ifdef HWLOC_HAVE_LIBPCIACCESS
    device_class = pcidev->device_class >> 8;
#else /* HWLOC_HAVE_PCIUTILS */
#ifdef HWLOC_HAVE_PCIDEV_DEVICE_CLASS
    device_class = pcidev->device_class;
#else
    device_class = config_space_cache[PCI_CLASS_DEVICE] | (config_space_cache[PCI_CLASS_DEVICE+1] << 8);
#endif
#endif

    /* might be useful for debugging (note that domain might be truncated) */
    os_index = (domain << 20) + (pcidev->bus << 12) + (pcidev->dev << 4) + pcidev->func;

    obj = hwloc_alloc_setup_object(HWLOC_OBJ_PCI_DEVICE, os_index);
    obj->attr->pcidev.domain = domain;
    obj->attr->pcidev.bus = pcidev->bus;
    obj->attr->pcidev.dev = pcidev->dev;
    obj->attr->pcidev.func = pcidev->func;
    obj->attr->pcidev.vendor_id = pcidev->vendor_id;
    obj->attr->pcidev.device_id = pcidev->device_id;
    obj->attr->pcidev.class_id = device_class;
    obj->attr->pcidev.revision = config_space_cache[PCI_REVISION_ID];

    obj->attr->pcidev.linkspeed = 0; /* unknown */
#ifdef HWLOC_HAVE_PCI_FIND_CAP
    cap = pci_find_cap(pcidev, PCI_CAP_ID_EXP, PCI_CAP_NORMAL);
    offset = cap ? cap->addr : 0;
#else
    offset = hwloc_pci_find_cap(config_space_cache, PCI_CAP_ID_EXP);
#endif /* HWLOC_HAVE_PCI_FIND_CAP */

    if (0xffff == pcidev->vendor_id && 0xffff == pcidev->device_id) {
      /* SR-IOV puts ffff:ffff in Virtual Function config space.
       * The actual VF device ID is stored at a special (dynamic) location in the Physical Function config space.
       * VF and PF have the same vendor ID.
       *
       * libpciaccess just returns ffff:ffff, needs to be fixed.
       * linuxpci is OK because sysfs files are already fixed the kernel.
       * pciutils is OK when it uses those Linux sysfs files.
       *
       * Reading these files is an easy way to work around the libpciaccess issue on Linux,
       * but we have no way to know if this is caused by SR-IOV or not.
       *
       * TODO:
       *  If PF has CAP_ID_PCIX or CAP_ID_EXP (offset>0),
       *  look for extended capability PCI_EXT_CAP_ID_SRIOV (need extended config space (more than 256 bytes)),
       *  then read the VF device ID after it (PCI_IOV_DID bytes later).
       *  Needs access to extended config space (needs root on Linux).
       * TODO:
       *  Add string info attributes in VF and PF objects?
       */
#ifdef HWLOC_LINUX_SYS
      /* Workaround for Linux (the kernel returns the VF device/vendor IDs). */
      char path[64];
      char value[16];
      FILE *file;
      snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/vendor",
	       domain, pcidev->bus, pcidev->dev, pcidev->func);
      file = fopen(path, "r");
      if (file) {
	fread(value, sizeof(value), 1, file);
	fclose(file);
	obj->attr->pcidev.vendor_id = strtoul(value, NULL, 16);
      }
      snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/device",
	       domain, pcidev->bus, pcidev->dev, pcidev->func);
      file = fopen(path, "r");
      if (file) {
	fread(value, sizeof(value), 1, file);
	fclose(file);
	obj->attr->pcidev.device_id = strtoul(value, NULL, 16);
      }
#endif
    }

    if (offset > 0 && offset + 20 /* size of PCI express block up to link status */ <= CONFIG_SPACE_CACHESIZE)
      hwloc_pci_find_linkspeed(config_space_cache, offset, &obj->attr->pcidev.linkspeed);

    hwloc_pci_prepare_bridge(obj, config_space_cache);

    if (obj->type == HWLOC_OBJ_PCI_DEVICE) {
      memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_VENDOR_ID], sizeof(tmp16));
      obj->attr->pcidev.subvendor_id = tmp16;
      memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_ID], sizeof(tmp16));
      obj->attr->pcidev.subdevice_id = tmp16;
    } else {
      /* TODO:
       * bridge must lookup PCI_CAP_ID_SSVID and then look at offset+PCI_SSVID_VENDOR/DEVICE_ID
       * cardbus must look at PCI_CB_SUBSYSTEM_VENDOR_ID and PCI_CB_SUBSYSTEM_ID
       */
    }

    /* starting from pciutils 2.2, pci_lookup_name() takes a variable number
     * of arguments, and supports the PCI_LOOKUP_NO_NUMBERS flag.
     */

    /* get the vendor name */
#ifdef HWLOC_HAVE_LIBPCIACCESS
    vendorname = pci_device_get_vendor_name(pcidev);
#else /* HWLOC_HAVE_PCIUTILS */
    vendorname = pci_lookup_name(pciaccess, name, sizeof(name),
#if HAVE_DECL_PCI_LOOKUP_NO_NUMBERS
			      PCI_LOOKUP_VENDOR|PCI_LOOKUP_NO_NUMBERS,
			      pcidev->vendor_id
#else
			      PCI_LOOKUP_VENDOR,
			      pcidev->vendor_id, 0, 0, 0
#endif
			      );
#endif /* HWLOC_HAVE_PCIUTILS */
    if (vendorname && *vendorname)
      hwloc_obj_add_info(obj, "PCIVendor", vendorname);

    /* get the device name */
#ifdef HWLOC_HAVE_LIBPCIACCESS
    devicename = pci_device_get_device_name(pcidev);
#else /* HWLOC_HAVE_PCIUTILS */
    devicename = pci_lookup_name(pciaccess, name, sizeof(name),
#if HAVE_DECL_PCI_LOOKUP_NO_NUMBERS
			      PCI_LOOKUP_DEVICE|PCI_LOOKUP_NO_NUMBERS,
			      pcidev->vendor_id, pcidev->device_id
#else
			      PCI_LOOKUP_DEVICE,
			      pcidev->vendor_id, pcidev->device_id, 0, 0
#endif
			      );
#endif /* HWLOC_HAVE_PCIUTILS */
    if (devicename && *devicename)
      hwloc_obj_add_info(obj, "PCIDevice", devicename);

    /* generate or get the fullname */
#ifdef HWLOC_HAVE_LIBPCIACCESS
    snprintf(name, sizeof(name), "%s%s%s",
	     vendorname ? vendorname : "",
	     vendorname && devicename ? " " : "",
	     devicename ? devicename : "");
    fullname = name;
    if (*name)
      obj->name = strdup(name);
#else /* HWLOC_HAVE_PCIUTILS */
    fullname = pci_lookup_name(pciaccess, name, sizeof(name),
#if HAVE_DECL_PCI_LOOKUP_NO_NUMBERS
			      PCI_LOOKUP_VENDOR|PCI_LOOKUP_DEVICE|PCI_LOOKUP_NO_NUMBERS,
			      pcidev->vendor_id, pcidev->device_id
#else
			      PCI_LOOKUP_VENDOR|PCI_LOOKUP_DEVICE,
			      pcidev->vendor_id, pcidev->device_id, 0, 0
#endif
			      );
    if (fullname && *fullname)
      obj->name = strdup(fullname);
#endif /* HWLOC_HAVE_PCIUTILS */
    hwloc_debug("  %04x:%02x:%02x.%01x %04x %04x:%04x %s\n",
		domain, pcidev->bus, pcidev->dev, pcidev->func,
		device_class, pcidev->vendor_id, pcidev->device_id,
		fullname && *fullname ? fullname : "??");

    /* queue the object for now */
    if (first_obj)
      last_obj->next_sibling = obj;
    else
      first_obj = obj;
    last_obj = obj;
  }

  /* finalize device scanning */
#ifdef HWLOC_HAVE_LIBPCIACCESS
  pci_iterator_destroy(iter);
  pci_system_cleanup();
#else /* HWLOC_HAVE_PCIUTILS */
  pci_cleanup(pciaccess);
#endif

  return hwloc_insert_pci_device_list(backend, first_obj);
}
예제 #13
0
void
hwloc_look_hpux(struct hwloc_topology *topology)
{
  int has_numa = sysconf(_SC_CCNUMA_SUPPORT) == 1;
  hwloc_obj_t *nodes = NULL, obj;
  spu_t currentcpu;
  ldom_t currentnode;
  int i, nbnodes = 0;

#ifdef HAVE__SC_LARGE_PAGESIZE
  topology->levels[0][0]->attr->machine.huge_page_size_kB = sysconf(_SC_LARGE_PAGESIZE);
#endif

  if (has_numa) {
    nbnodes = mpctl(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM ?
      MPC_GETNUMLDOMS_SYS : MPC_GETNUMLDOMS, 0, 0);

    hwloc_debug("%d nodes\n", nbnodes);

    nodes = malloc(nbnodes * sizeof(*nodes));

    i = 0;
    currentnode = mpctl(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM ?
      MPC_GETFIRSTLDOM_SYS : MPC_GETFIRSTLDOM, 0, 0);
    while (currentnode != -1 && i < nbnodes) {
      hwloc_debug("node %d is %d\n", i, currentnode);
      nodes[i] = obj = hwloc_alloc_setup_object(HWLOC_OBJ_NODE, currentnode);
      obj->cpuset = hwloc_bitmap_alloc();
      obj->nodeset = hwloc_bitmap_alloc();
      hwloc_bitmap_set(obj->nodeset, currentnode);
      /* TODO: obj->attr->node.memory_kB */
      /* TODO: obj->attr->node.huge_page_free */

      currentnode = mpctl(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM ?
        MPC_GETNEXTLDOM_SYS : MPC_GETNEXTLDOM, currentnode, 0);
      i++;
    }
  }

  i = 0;
  currentcpu = mpctl(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM ?
      MPC_GETFIRSTSPU_SYS : MPC_GETFIRSTSPU, 0,0);
  while (currentcpu != -1) {
    obj = hwloc_alloc_setup_object(HWLOC_OBJ_PU, currentcpu);
    obj->cpuset = hwloc_bitmap_alloc();
    hwloc_bitmap_set(obj->cpuset, currentcpu);

    hwloc_debug("cpu %d\n", currentcpu);

    if (nodes) {
      /* Add this cpu to its node */
      currentnode = mpctl(MPC_SPUTOLDOM, currentcpu, 0);
      if ((ldom_t) nodes[i]->os_index != currentnode)
        for (i = 0; i < nbnodes; i++)
          if ((ldom_t) nodes[i]->os_index == currentnode)
            break;
      if (i < nbnodes) {
        hwloc_bitmap_set(nodes[i]->cpuset, currentcpu);
        hwloc_debug("is in node %d\n", i);
      } else {
        hwloc_debug("%s", "is in no node?!\n");
      }
    }

    /* Add cpu */
    hwloc_insert_object_by_cpuset(topology, obj);

    currentcpu = mpctl(topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM ?
      MPC_GETNEXTSPU_SYS : MPC_GETNEXTSPU, currentcpu, 0);
  }

  if (nodes) {
    /* Add nodes */
    for (i = 0 ; i < nbnodes ; i++)
      hwloc_insert_object_by_cpuset(topology, nodes[i]);
    free(nodes);
  }

  topology->support.discovery->pu = 1;

  hwloc_obj_add_info(topology->levels[0][0], "Backend", "HP-UX");
}
예제 #14
0
int hwloc_look_hardwired_fujitsu_fx100(struct hwloc_topology *topology)
{
    /* FIXME: what if a broken core is disabled? */
    unsigned i;
    hwloc_obj_t obj;
    hwloc_bitmap_t set;

    for(i=0; i<34; i++) {
        set = hwloc_bitmap_alloc();
        hwloc_bitmap_set(set, i);

        if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) {
            obj = hwloc_alloc_setup_object(HWLOC_OBJ_L1ICACHE, -1);
            obj->cpuset = hwloc_bitmap_dup(set);
            obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
            obj->attr->cache.depth = 1;
            obj->attr->cache.size = 64*1024;
            obj->attr->cache.linesize = 256;
            obj->attr->cache.associativity = 4;
            hwloc_insert_object_by_cpuset(topology, obj);
        }
        if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1CACHE)) {
            obj = hwloc_alloc_setup_object(HWLOC_OBJ_L1CACHE, -1);
            obj->cpuset = hwloc_bitmap_dup(set);
            obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
            obj->attr->cache.depth = 1;
            obj->attr->cache.size = 64*1024;
            obj->attr->cache.linesize = 256;
            obj->attr->cache.associativity = 4;
            hwloc_insert_object_by_cpuset(topology, obj);
        }
        if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) {
            obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i);
            obj->cpuset = set;
            hwloc_insert_object_by_cpuset(topology, obj);
        } else
            hwloc_bitmap_free(set);
    }

    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L2CACHE)) {
        obj = hwloc_alloc_setup_object(HWLOC_OBJ_L2CACHE, -1);
        obj->cpuset = hwloc_bitmap_alloc();
        hwloc_bitmap_set_range(obj->cpuset, 0, 15);
        hwloc_bitmap_set(obj->cpuset, 32);
        obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
        obj->attr->cache.depth = 2;
        obj->attr->cache.size = 12*1024*1024;
        obj->attr->cache.linesize = 256;
        obj->attr->cache.associativity = 24;
        hwloc_insert_object_by_cpuset(topology, obj);

        obj = hwloc_alloc_setup_object(HWLOC_OBJ_L2CACHE, -1);
        obj->cpuset = hwloc_bitmap_alloc();
        hwloc_bitmap_set_range(obj->cpuset, 16, 31);
        hwloc_bitmap_set(obj->cpuset, 33);
        obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
        obj->attr->cache.depth = 2;
        obj->attr->cache.size = 12*1024*1024;
        obj->attr->cache.linesize = 256;
        obj->attr->cache.associativity = 24;
        hwloc_insert_object_by_cpuset(topology, obj);
    }
    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) {
        obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, 0);
        obj->cpuset = hwloc_bitmap_alloc();
        hwloc_bitmap_set_range(obj->cpuset, 0, 33);
        hwloc_obj_add_info(obj, "CPUVendor", "Fujitsu");
        hwloc_obj_add_info(obj, "CPUModel", "SPARC64 XIfx");
        hwloc_insert_object_by_cpuset(topology, obj);
    }

    hwloc_setup_pu_level(topology, 34);

    return 0;
}
예제 #15
0
파일: topology-osf.c 프로젝트: IanYXXL/A1
static int
hwloc_look_osf(struct hwloc_backend *backend)
{
    struct hwloc_topology *topology = backend->topology;
    cpu_cursor_t cursor;
    unsigned nbnodes;
    radid_t radid, radid2;
    radset_t radset, radset2;
    cpuid_t cpuid;
    cpuset_t cpuset;
    struct hwloc_obj *obj;
    unsigned distance;

    if (topology->levels[0][0]->cpuset)
        /* somebody discovered things */
        return 0;

    hwloc_alloc_obj_cpusets(topology->levels[0][0]);

    nbnodes = rad_get_num();

    cpusetcreate(&cpuset);
    radsetcreate(&radset);
    radsetcreate(&radset2);
    {
        hwloc_obj_t *nodes = calloc(nbnodes, sizeof(hwloc_obj_t));
        unsigned *indexes = calloc(nbnodes, sizeof(unsigned));
        float *distances = calloc(nbnodes*nbnodes, sizeof(float));
        unsigned nfound;
        numa_attr_t attr;

        attr.nattr_type = R_RAD;
        attr.nattr_descr.rd_radset = radset;
        attr.nattr_flags = 0;

        for (radid = 0; radid < (radid_t) nbnodes; radid++) {
            rademptyset(radset);
            radaddset(radset, radid);
            cpuemptyset(cpuset);
            if (rad_get_cpus(radid, cpuset)==-1) {
                fprintf(stderr,"rad_get_cpus(%d) failed: %s\n",radid,strerror(errno));
                continue;
            }

            indexes[radid] = radid;
            nodes[radid] = obj = hwloc_alloc_setup_object(HWLOC_OBJ_NODE, radid);
            obj->cpuset = hwloc_bitmap_alloc();
            obj->memory.local_memory = rad_get_physmem(radid) * hwloc_getpagesize();
            obj->memory.page_types_len = 2;
            obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types));
            memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types));
            obj->memory.page_types[0].size = hwloc_getpagesize();
#ifdef HAVE__SC_LARGE_PAGESIZE
            obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
#endif

            cursor = SET_CURSOR_INIT;
            while((cpuid = cpu_foreach(cpuset, 0, &cursor)) != CPU_NONE)
                hwloc_bitmap_set(obj->cpuset, cpuid);

            hwloc_debug_1arg_bitmap("node %d has cpuset %s\n",
                                    radid, obj->cpuset);

            hwloc_insert_object_by_cpuset(topology, obj);

            nfound = 0;
            for (radid2 = 0; radid2 < (radid_t) nbnodes; radid2++)
                distances[radid*nbnodes+radid2] = RAD_DIST_REMOTE;
            for (distance = RAD_DIST_LOCAL; distance < RAD_DIST_REMOTE; distance++) {
                attr.nattr_distance = distance;
                /* get set of NUMA nodes at distance <= DISTANCE */
                if (nloc(&attr, radset2)) {
                    fprintf(stderr,"nloc failed: %s\n", strerror(errno));
                    continue;
                }
                cursor = SET_CURSOR_INIT;
                while ((radid2 = rad_foreach(radset2, 0, &cursor)) != RAD_NONE) {
                    if (distances[radid*nbnodes+radid2] == RAD_DIST_REMOTE) {
                        distances[radid*nbnodes+radid2] = (float) distance;
                        nfound++;
                    }
                }
                if (nfound == nbnodes)
                    /* Finished finding distances, no need to go up to RAD_DIST_REMOTE */
                    break;
            }
        }

        hwloc_distances_set(topology, HWLOC_OBJ_NODE, nbnodes, indexes, nodes, distances, 0 /* OS cannot force */);
    }
    radsetdestroy(&radset2);
    radsetdestroy(&radset);
    cpusetdestroy(&cpuset);

    /* add PU objects */
    hwloc_setup_pu_level(topology, hwloc_fallback_nbprocessors(topology));

    hwloc_obj_add_info(topology->levels[0][0], "Backend", "OSF");
    if (topology->is_thissystem)
        hwloc_add_uname_info(topology);
    return 1;
}
예제 #16
0
int main(void)
{
  hwloc_topology_t topology1, topology2;
  char *xmlbuf;
  int xmlbuflen;
  char xmlfile[] = "hwloc_backends.tmpxml.XXXXXX";
  char env[64];
  int xmlbufok = 0, xmlfileok = 0, xmlfilefd;
  const char *orig_backend_name;

  putenv("HWLOC_LIBXML_CLEANUP=1");

  printf("trying to export topology to XML buffer and file for later...\n");
  hwloc_topology_init(&topology1);
  hwloc_topology_load(topology1);
  orig_backend_name = get_backend_name(topology1);
  hwloc_obj_add_info(hwloc_get_root_obj(topology1), "Foo", "Bar");
  assert(hwloc_topology_is_thissystem(topology1));
  if (hwloc_topology_export_xmlbuffer(topology1, &xmlbuf, &xmlbuflen) < 0)
    printf("XML buffer export failed (%s), ignoring\n", strerror(errno));
  else
    xmlbufok = 1;
  xmlfilefd = mkstemp(xmlfile);
  if (xmlfilefd < 0 || hwloc_topology_export_xml(topology1, xmlfile) < 0)
    printf("XML file export failed (%s), ignoring\n", strerror(errno));
  else
    xmlfileok = 1;


  /* init+config+destroy without loading */
  printf("init...\n");
  hwloc_topology_init(&topology2);
  if (xmlfileok) {
    printf("switching to xml...\n");
    assert(!hwloc_topology_set_xml(topology2, xmlfile));
  }
  if (xmlbufok) {
    printf("switching to xmlbuffer...\n");
    assert(!hwloc_topology_set_xmlbuffer(topology2, xmlbuf, xmlbuflen));
  }
  printf("switching to synthetic...\n");
  hwloc_topology_set_synthetic(topology2, "machine:2 node:3 l1:2 pu:4");
  hwloc_topology_destroy(topology2);

  /* init+xml+load+destroy */
  if (xmlfileok) {
    printf("switching to xml and loading...\n");
    hwloc_topology_init(&topology2);
    assert(!hwloc_topology_set_xml(topology2, xmlfile));
    hwloc_topology_load(topology2);
    assert_backend_name(topology2, orig_backend_name);
    assert_foo_bar(topology2, 1);
    hwloc_topology_check(topology2);
    assert(!hwloc_topology_is_thissystem(topology2));
    hwloc_topology_destroy(topology2);
  }

  /* init+xmlbuf+load+destroy */
  if (xmlbufok) {
    printf("switching to xmlbuffer and loading...\n");
    hwloc_topology_init(&topology2);
    assert(!hwloc_topology_set_xmlbuffer(topology2, xmlbuf, xmlbuflen));
    hwloc_topology_load(topology2);
    assert_backend_name(topology2, orig_backend_name);
    assert_foo_bar(topology2, 1);
    hwloc_topology_check(topology2);
    assert(!hwloc_topology_is_thissystem(topology2));
    hwloc_topology_destroy(topology2);
  }

  /* init+synthetic+load+destroy */
  printf("switching to synthetic and loading...\n");
  hwloc_topology_init(&topology2);
  hwloc_topology_set_synthetic(topology2, "machine:2 node:3 l3i:2 pu:4");
  hwloc_topology_load(topology2);
  assert_backend_name(topology2, "Synthetic");
  assert_foo_bar(topology2, 0);
  assert(hwloc_get_nbobjs_by_type(topology2, HWLOC_OBJ_PU) == 2*3*2*4);
  hwloc_topology_check(topology2);
  assert(!hwloc_topology_is_thissystem(topology2));
  hwloc_topology_destroy(topology2);

  /* xmlenv+init+load+destroy */
  if (xmlfileok) {
    printf("switching to xml by env and loading...\n");
    snprintf(env, sizeof(env), "HWLOC_XMLFILE=%s", xmlfile);
    putenv(env);
    hwloc_topology_init(&topology2);
    hwloc_topology_load(topology2);
    assert_backend_name(topology2, orig_backend_name);
    assert_foo_bar(topology2, 1);
    hwloc_topology_check(topology2);
    assert(!hwloc_topology_is_thissystem(topology2));
    hwloc_topology_destroy(topology2);
  }

  /* syntheticenv+init+load+destroy, synthetic env overrides xml */
  printf("switching to synthetic by env and loading...\n");
  putenv("HWLOC_SYNTHETIC=node:3 pu:3");
  hwloc_topology_init(&topology2);
  hwloc_topology_load(topology2);
  assert_backend_name(topology2, "Synthetic");
  assert_foo_bar(topology2, 0);
  assert(hwloc_get_nbobjs_by_type(topology2, HWLOC_OBJ_PU) == 3*3);
  hwloc_topology_check(topology2);
  assert(!hwloc_topology_is_thissystem(topology2));
  hwloc_topology_destroy(topology2);

  /* componentsenv+init+load+destroy for testing defaults, overrides synthetic/xml/fsroot envs */
  printf("switching to default components by env and loading...\n");
  putenv("HWLOC_COMPONENTS=,"); /* don't set to empty since it means 'unset' on windows */
  hwloc_topology_init(&topology2);
  hwloc_topology_load(topology2);
  assert_backend_name(topology2, orig_backend_name);
  assert_foo_bar(topology2, 0);
  hwloc_topology_check(topology2);
  assert(hwloc_topology_is_thissystem(topology2));
  hwloc_topology_destroy(topology2);

  if (xmlbufok)
    hwloc_free_xmlbuffer(topology1, xmlbuf);
  if (xmlfilefd >= 0) {
    unlink(xmlfile);
    close(xmlfilefd);
  }
  hwloc_topology_destroy(topology1);

  return 0;
}
예제 #17
0
static int
hwloc_cuda_discover(struct hwloc_backend *backend)
{
  struct hwloc_topology *topology = backend->topology;
  enum hwloc_type_filter_e filter;
  cudaError_t cures;
  int nb, i;

  hwloc_topology_get_type_filter(topology, HWLOC_OBJ_OS_DEVICE, &filter);
  if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
    return 0;

  cures = cudaGetDeviceCount(&nb);
  if (cures)
    return -1;

  for (i = 0; i < nb; i++) {
    int domain, bus, dev;
    char cuda_name[32];
    char number[32];
    struct cudaDeviceProp prop;
    hwloc_obj_t cuda_device, parent;
    unsigned cores;

    cuda_device = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1);
    snprintf(cuda_name, sizeof(cuda_name), "cuda%d", i);
    cuda_device->name = strdup(cuda_name);
    cuda_device->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN;
    cuda_device->attr->osdev.type = HWLOC_OBJ_OSDEV_COPROC;

    cuda_device->subtype = strdup("CUDA");
    hwloc_obj_add_info(cuda_device, "Backend", "CUDA");
    hwloc_obj_add_info(cuda_device, "GPUVendor", "NVIDIA Corporation");

    cures = cudaGetDeviceProperties(&prop, i);
    if (!cures)
      hwloc_obj_add_info(cuda_device, "GPUModel", prop.name);

    snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.totalGlobalMem) >> 10);
    hwloc_obj_add_info(cuda_device, "CUDAGlobalMemorySize", number);

    snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.l2CacheSize) >> 10);
    hwloc_obj_add_info(cuda_device, "CUDAL2CacheSize", number);

    snprintf(number, sizeof(number), "%d", prop.multiProcessorCount);
    hwloc_obj_add_info(cuda_device, "CUDAMultiProcessors", number);

    cores = hwloc_cuda_cores_per_MP(prop.major, prop.minor);
    if (cores) {
      snprintf(number, sizeof(number), "%u", cores);
      hwloc_obj_add_info(cuda_device, "CUDACoresPerMP", number);
    }

    snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.sharedMemPerBlock) >> 10);
    hwloc_obj_add_info(cuda_device, "CUDASharedMemorySizePerMP", number);

    parent = NULL;
    if (hwloc_cudart_get_device_pci_ids(NULL /* topology unused */, i, &domain, &bus, &dev) == 0) {
      parent = hwloc_pci_belowroot_find_by_busid(topology, domain, bus, dev, 0);
      if (!parent)
	parent = hwloc_pci_find_busid_parent(topology, domain, bus, dev, 0);
    }
    if (!parent)
      parent = hwloc_get_root_obj(topology);

    hwloc_insert_object_by_parent(topology, parent, cuda_device);
  }

  return 0;
}
예제 #18
0
int hwloc_look_hardwired_fujitsu_fx10(struct hwloc_topology *topology)
{
  /* If a broken core gets disabled, its bit disappears and other core bits are NOT shifted towards 0.
   * Node is not given to user job, not need to handle that case properly.
   */
  unsigned i;
  hwloc_obj_t obj;
  hwloc_bitmap_t set;

  for(i=0; i<16; i++) {
    set = hwloc_bitmap_alloc();
    hwloc_bitmap_set(set, i);

    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) {
      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1ICACHE, -1);
      obj->cpuset = hwloc_bitmap_dup(set);
      obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
      obj->attr->cache.depth = 1;
      obj->attr->cache.size = 32*1024;
      obj->attr->cache.linesize = 128;
      obj->attr->cache.associativity = 2;
      hwloc_insert_object_by_cpuset(topology, obj);
    }
    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1CACHE)) {
      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1CACHE, -1);
      obj->cpuset = hwloc_bitmap_dup(set);
      obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
      obj->attr->cache.depth = 1;
      obj->attr->cache.size = 32*1024;
      obj->attr->cache.linesize = 128;
      obj->attr->cache.associativity = 2;
      hwloc_insert_object_by_cpuset(topology, obj);
    }
    if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) {
      obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_CORE, i);
      obj->cpuset = set;
      hwloc_insert_object_by_cpuset(topology, obj);
    } else
      hwloc_bitmap_free(set);
  }

  set = hwloc_bitmap_alloc();
  hwloc_bitmap_set_range(set, 0, 15);

  if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L2CACHE)) {
    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L2CACHE, -1);
    obj->cpuset = hwloc_bitmap_dup(set);
    obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
    obj->attr->cache.depth = 2;
    obj->attr->cache.size = 12*1024*1024;
    obj->attr->cache.linesize = 128;
    obj->attr->cache.associativity = 24;
    hwloc_insert_object_by_cpuset(topology, obj);
  }
  if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) {
    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PACKAGE, 0);
    obj->cpuset = set;
    hwloc_obj_add_info(obj, "CPUVendor", "Fujitsu");
    hwloc_obj_add_info(obj, "CPUModel", "SPARC64 IXfx");
    hwloc_insert_object_by_cpuset(topology, obj);
  } else
    hwloc_bitmap_free(set);

  hwloc_setup_pu_level(topology, 16);

  return 0;
}
예제 #19
0
static int
hwloc_gl_discover(struct hwloc_backend *backend)
{
  struct hwloc_topology *topology = backend->topology;
  unsigned i, res = 0;
  int err;

  if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO)))
    return 0;

  if (!hwloc_topology_is_thissystem(topology)) {
    hwloc_debug("%s", "\nno GL detection (not thissystem)\n");
    return 0;
  }

  for (i = 0; i < HWLOC_GL_SERVER_MAX; ++i) {
    Display* display;
    char displayName[10];
    int opcode, event, error;
    unsigned j;

    /* open X server */
    snprintf(displayName, sizeof(displayName), ":%u", i);
    display = XOpenDisplay(displayName);
    if (!display)
      continue;

    /* Check for NV-CONTROL extension (it's per server) */
    if(!XQueryExtension(display, "NV-CONTROL", &opcode, &event, &error)) {
      XCloseDisplay(display);
      continue;
    }

    for (j = 0; j < (unsigned) ScreenCount(display) && j < HWLOC_GL_SCREEN_MAX; j++) {
      hwloc_obj_t osdev, parent;
      const int screen = j;
      unsigned int *ptr_binary_data;
      int data_length;
      int gpu_number;
      int nv_ctrl_pci_bus;
      int nv_ctrl_pci_device;
      int nv_ctrl_pci_domain;
      int nv_ctrl_pci_func;
      char *productname;
      char name[64];

      /* the server supports NV-CONTROL but it may contain non-NVIDIA screen that don't support it */
      if (!XNVCTRLIsNvScreen(display, screen))
        continue;

      /* Gets the GPU number attached to the default screen. */
      /* For further details, see the <NVCtrl/NVCtrlLib.h> */
      err = XNVCTRLQueryTargetBinaryData (display, NV_CTRL_TARGET_TYPE_X_SCREEN, screen, 0,
                                          NV_CTRL_BINARY_DATA_GPUS_USED_BY_XSCREEN,
                                          (unsigned char **) &ptr_binary_data, &data_length);
      if (!err)
        continue;

      gpu_number = ptr_binary_data[1];
      free(ptr_binary_data);

#ifdef NV_CTRL_PCI_DOMAIN
      /* Gets the ID's of the GPU defined by gpu_number
       * For further details, see the <NVCtrl/NVCtrlLib.h> */
      err = XNVCTRLQueryTargetAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0,
                                        NV_CTRL_PCI_DOMAIN, &nv_ctrl_pci_domain);
      if (!err)
        continue;
#else
      nv_ctrl_pci_domain = 0;
#endif

      err = XNVCTRLQueryTargetAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0,
                                        NV_CTRL_PCI_BUS, &nv_ctrl_pci_bus);
      if (!err)
        continue;

      err = XNVCTRLQueryTargetAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0,
                                        NV_CTRL_PCI_DEVICE, &nv_ctrl_pci_device);
      if (!err)
        continue;

      err = XNVCTRLQueryTargetAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0,
                                        NV_CTRL_PCI_FUNCTION, &nv_ctrl_pci_func);
      if (!err)
        continue;

      productname = NULL;
      err = XNVCTRLQueryTargetStringAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0,
                                              NV_CTRL_STRING_PRODUCT_NAME, &productname);

      snprintf(name, sizeof(name), ":%u.%u", i, j);

      osdev = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1);
      osdev->name = strdup(name);
      osdev->logical_index = -1;
      osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU;
      hwloc_obj_add_info(osdev, "Backend", "GL");
      hwloc_obj_add_info(osdev, "GPUVendor", "NVIDIA Corporation");
      if (productname)
	hwloc_obj_add_info(osdev, "GPUModel", productname);

      parent = hwloc_pci_belowroot_find_by_busid(topology, nv_ctrl_pci_domain, nv_ctrl_pci_bus, nv_ctrl_pci_device, nv_ctrl_pci_func);
      if (!parent)
	parent = hwloc_pci_find_busid_parent(topology, nv_ctrl_pci_domain, nv_ctrl_pci_bus, nv_ctrl_pci_device, nv_ctrl_pci_func);
      if (!parent)
	parent = hwloc_get_root_obj(topology);

      hwloc_insert_object_by_parent(topology, parent, osdev);

      hwloc_debug("GL device %s (product %s) on PCI 0000:%02x:%02x.%u\n", name, productname,
		  nv_ctrl_pci_domain, nv_ctrl_pci_bus, nv_ctrl_pci_device, nv_ctrl_pci_func);
      res++;
    }
    XCloseDisplay(display);
  }

  return res;
}
예제 #20
0
void
hwloc_look_darwin(struct hwloc_topology *topology)
{
  int64_t _nprocs;
  unsigned nprocs;
  int64_t _npackages;
  unsigned i, j, cpu;
  struct hwloc_obj *obj;
  size_t size;
  int64_t l1cachesize;
  int64_t cacheways[2];
  int64_t l2cachesize;
  int64_t cachelinesize;
  int64_t memsize;

  if (hwloc_get_sysctlbyname("hw.ncpu", &_nprocs) || _nprocs <= 0)
    return;
  nprocs = _nprocs;
  topology->support.discovery->pu = 1;

  hwloc_debug("%u procs\n", nprocs);

  if (!hwloc_get_sysctlbyname("hw.packages", &_npackages) && _npackages > 0) {
    unsigned npackages = _npackages;
    int64_t _cores_per_package;
    int64_t _logical_per_package;
    unsigned logical_per_package;

    hwloc_debug("%u packages\n", npackages);

    if (!hwloc_get_sysctlbyname("machdep.cpu.logical_per_package", &_logical_per_package) && _logical_per_package > 0)
      logical_per_package = _logical_per_package;
    else
      /* Assume the trivia.  */
      logical_per_package = nprocs / npackages;

    hwloc_debug("%u threads per package\n", logical_per_package);


    if (nprocs == npackages * logical_per_package)
      for (i = 0; i < npackages; i++) {
        obj = hwloc_alloc_setup_object(HWLOC_OBJ_SOCKET, i);
        obj->cpuset = hwloc_bitmap_alloc();
        for (cpu = i*logical_per_package; cpu < (i+1)*logical_per_package; cpu++)
          hwloc_bitmap_set(obj->cpuset, cpu);

        hwloc_debug_1arg_bitmap("package %u has cpuset %s\n",
                   i, obj->cpuset);
        hwloc_insert_object_by_cpuset(topology, obj);
      }

    if (!hwloc_get_sysctlbyname("machdep.cpu.cores_per_package", &_cores_per_package) && _cores_per_package > 0) {
      unsigned cores_per_package = _cores_per_package;
      hwloc_debug("%u cores per package\n", cores_per_package);

      if (!(logical_per_package % cores_per_package))
        for (i = 0; i < npackages * cores_per_package; i++) {
          obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i);
          obj->cpuset = hwloc_bitmap_alloc();
          for (cpu = i*(logical_per_package/cores_per_package);
               cpu < (i+1)*(logical_per_package/cores_per_package);
               cpu++)
            hwloc_bitmap_set(obj->cpuset, cpu);

          hwloc_debug_1arg_bitmap("core %u has cpuset %s\n",
                     i, obj->cpuset);
          hwloc_insert_object_by_cpuset(topology, obj);
        }
    }
  }

  if (hwloc_get_sysctlbyname("hw.l1dcachesize", &l1cachesize))
    l1cachesize = 0;

  if (hwloc_get_sysctlbyname("hw.l2cachesize", &l2cachesize))
    l2cachesize = 0;

  if (hwloc_get_sysctlbyname("machdep.cpu.cache.L1_associativity", &cacheways[0]))
    cacheways[0] = 0;
  else if (cacheways[0] == 0xff)
    cacheways[0] = -1;

  if (hwloc_get_sysctlbyname("machdep.cpu.cache.L2_associativity", &cacheways[1]))
    cacheways[1] = 0;
  else if (cacheways[1] == 0xff)
    cacheways[1] = -1;

  if (hwloc_get_sysctlbyname("hw.cachelinesize", &cachelinesize))
    cachelinesize = 0;

  if (hwloc_get_sysctlbyname("hw.memsize", &memsize))
    memsize = 0;

  if (!sysctlbyname("hw.cacheconfig", NULL, &size, NULL, 0)) {
    unsigned n = size / sizeof(uint32_t);
    uint64_t *cacheconfig = NULL;
    uint64_t *cachesize = NULL;
    uint32_t *cacheconfig32 = NULL;

    cacheconfig = malloc(sizeof(uint64_t) * n);
    if (NULL == cacheconfig) {
        goto out;
    }
    cachesize = malloc(sizeof(uint64_t) * n);
    if (NULL == cachesize) {
        goto out;
    }
    cacheconfig32 = malloc(sizeof(uint32_t) * n);
    if (NULL == cacheconfig32) {
        goto out;
    }

    if ((!sysctlbyname("hw.cacheconfig", cacheconfig, &size, NULL, 0))) {
      /* Yeech. Darwin seemingly has changed from 32bit to 64bit integers for
       * cacheconfig, with apparently no way for detection. Assume the machine
       * won't have more than 4 billion cpus */
      if (cacheconfig[0] > 0xFFFFFFFFUL) {
        memcpy(cacheconfig32, cacheconfig, size);
        for (i = 0 ; i < size / sizeof(uint32_t); i++)
          cacheconfig[i] = cacheconfig32[i];
      }

      memset(cachesize, 0, sizeof(uint64_t) * n);
      size = sizeof(uint64_t) * n;
      if (sysctlbyname("hw.cachesize", cachesize, &size, NULL, 0)) {
        if (n > 0)
          cachesize[0] = memsize;
        if (n > 1)
          cachesize[1] = l1cachesize;
        if (n > 2)
          cachesize[2] = l2cachesize;
      }

      hwloc_debug("%s", "caches");
      for (i = 0; i < n && cacheconfig[i]; i++)
        hwloc_debug(" %"PRIu64"(%"PRIu64"kB)", cacheconfig[i], cachesize[i] / 1024);

      cacheconfig[i] = cacheconfig32[i];
      /* Now we know how many caches there are */
      n = i;
      hwloc_debug("\n%u cache levels\n", n - 1);

      /* For each cache level (0 is memory) */
      for (i = 0; i < n; i++) {
        /* cacheconfig tells us how many cpus share it, let's iterate on each cache */
        for (j = 0; j < (nprocs / cacheconfig[i]); j++) {
          obj = hwloc_alloc_setup_object(i?HWLOC_OBJ_CACHE:HWLOC_OBJ_NODE, j);
          if (!i) {
            obj->nodeset = hwloc_bitmap_alloc();
            hwloc_bitmap_set(obj->nodeset, j);
          }
          obj->cpuset = hwloc_bitmap_alloc();
          for (cpu = j*cacheconfig[i];
               cpu < ((j+1)*cacheconfig[i]);
               cpu++)
            hwloc_bitmap_set(obj->cpuset, cpu);

          if (i) {
            hwloc_debug_2args_bitmap("L%ucache %u has cpuset %s\n",
                i, j, obj->cpuset);
            obj->attr->cache.depth = i;
            obj->attr->cache.size = cachesize[i];
            obj->attr->cache.linesize = cachelinesize;
            if (i <= sizeof(cacheways) / sizeof(cacheways[0]))
              obj->attr->cache.associativity = cacheways[i-1];
            else
              obj->attr->cache.associativity = 0;
          } else {
            hwloc_debug_1arg_bitmap("node %u has cpuset %s\n",
                j, obj->cpuset);
	    obj->memory.local_memory = cachesize[i];
	    obj->memory.page_types_len = 2;
	    obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types));
	    memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types));
	    obj->memory.page_types[0].size = getpagesize();
#ifdef HAVE__SC_LARGE_PAGESIZE
	    obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
#endif
          }

          hwloc_insert_object_by_cpuset(topology, obj);
        }
      }
    }
  out:
    if (NULL != cacheconfig) {
        free(cacheconfig);
    }
    if (NULL != cachesize) {
        free(cachesize);
    }
    if (NULL != cacheconfig32) {
        free(cacheconfig32);
    }
  }


  /* add PU objects */
  hwloc_setup_pu_level(topology, nprocs);

  hwloc_obj_add_info(topology->levels[0][0], "Backend", "Darwin");
}
예제 #21
0
static int
hwloc_look_kstat(struct hwloc_topology *topology)
{
  /* FIXME this assumes that all packages are identical */
  char *CPUType = hwloc_solaris_get_chip_type();
  char *CPUModel = hwloc_solaris_get_chip_model();

  kstat_ctl_t *kc = kstat_open();
  kstat_t *ksp;
  kstat_named_t *stat;
  unsigned look_cores = 1, look_chips = 1;

  unsigned Pproc_max = 0;
  unsigned Pproc_alloc = 256;
  struct hwloc_solaris_Pproc {
    unsigned Lpkg, Ppkg, Lcore, Lproc;
  } * Pproc = malloc(Pproc_alloc * sizeof(*Pproc));

  unsigned Lproc_num = 0;
  unsigned Lproc_alloc = 256;
  struct hwloc_solaris_Lproc {
    unsigned Pproc;
  } * Lproc = malloc(Lproc_alloc * sizeof(*Lproc));

  unsigned Lcore_num = 0;
  unsigned Lcore_alloc = 256;
  struct hwloc_solaris_Lcore {
    unsigned Pcore, Ppkg;
  } * Lcore = malloc(Lcore_alloc * sizeof(*Lcore));

  unsigned Lpkg_num = 0;
  unsigned Lpkg_alloc = 256;
  struct hwloc_solaris_Lpkg {
    unsigned Ppkg;
  } * Lpkg = malloc(Lpkg_alloc * sizeof(*Lpkg));

  unsigned pkgid, coreid, cpuid;
  unsigned i;

  for (i = 0; i < Pproc_alloc; i++) {
    Pproc[i].Lproc = -1;
    Pproc[i].Lpkg = -1;
    Pproc[i].Ppkg = -1;
    Pproc[i].Lcore = -1;
  }

  if (!kc) {
    hwloc_debug("kstat_open failed: %s\n", strerror(errno));
    free(Pproc);
    free(Lproc);
    free(Lcore);
    free(Lpkg);
    return 0;
  }

  for (ksp = kc->kc_chain; ksp; ksp = ksp->ks_next)
    {
      if (strncmp("cpu_info", ksp->ks_module, 8))
	continue;

      cpuid = ksp->ks_instance;

      if (kstat_read(kc, ksp, NULL) == -1)
	{
	  fprintf(stderr, "kstat_read failed for CPU%u: %s\n", cpuid, strerror(errno));
	  continue;
	}

      hwloc_debug("cpu%u\n", cpuid);

      if (cpuid >= Pproc_alloc) {
	struct hwloc_solaris_Pproc *tmp = realloc(Pproc, 2*Pproc_alloc * sizeof(*Pproc));
	if (!tmp)
	  goto err;
	Pproc = tmp;
	Pproc_alloc *= 2;
	for(i = Pproc_alloc/2; i < Pproc_alloc; i++) {
	  Pproc[i].Lproc = -1;
	  Pproc[i].Lpkg = -1;
	  Pproc[i].Ppkg = -1;
	  Pproc[i].Lcore = -1;
	}
      }
      Pproc[cpuid].Lproc = Lproc_num;

      if (Lproc_num >= Lproc_alloc) {
	struct hwloc_solaris_Lproc *tmp = realloc(Lproc, 2*Lproc_alloc * sizeof(*Lproc));
	if (!tmp)
	  goto err;
	Lproc = tmp;
	Lproc_alloc *= 2;
      }
      Lproc[Lproc_num].Pproc = cpuid;
      Lproc_num++;

      if (cpuid >= Pproc_max)
        Pproc_max = cpuid + 1;

      stat = (kstat_named_t *) kstat_data_lookup(ksp, "state");
      if (!stat)
          hwloc_debug("could not read state for CPU%u: %s\n", cpuid, strerror(errno));
      else if (stat->data_type != KSTAT_DATA_CHAR)
          hwloc_debug("unknown kstat type %d for cpu state\n", stat->data_type);
      else
        {
          hwloc_debug("cpu%u's state is %s\n", cpuid, stat->value.c);
          if (strcmp(stat->value.c, "on-line"))
            /* not online */
            hwloc_bitmap_clr(topology->levels[0][0]->online_cpuset, cpuid);
        }

      if (look_chips) do {
	/* Get Chip ID */
	stat = (kstat_named_t *) kstat_data_lookup(ksp, "chip_id");
	if (!stat)
	  {
	    if (Lpkg_num)
	      fprintf(stderr, "could not read package id for CPU%u: %s\n", cpuid, strerror(errno));
	    else
	      hwloc_debug("could not read package id for CPU%u: %s\n", cpuid, strerror(errno));
	    look_chips = 0;
	    continue;
	  }
	switch (stat->data_type) {
	  case KSTAT_DATA_INT32:
	    pkgid = stat->value.i32;
	    break;
	  case KSTAT_DATA_UINT32:
	    pkgid = stat->value.ui32;
	    break;
#ifdef _INT64_TYPE
	  case KSTAT_DATA_UINT64:
	    pkgid = stat->value.ui64;
	    break;
	  case KSTAT_DATA_INT64:
	    pkgid = stat->value.i64;
	    break;
#endif
	  default:
	    fprintf(stderr, "chip_id type %d unknown\n", stat->data_type);
	    look_chips = 0;
	    continue;
	}
	Pproc[cpuid].Ppkg = pkgid;
	for (i = 0; i < Lpkg_num; i++)
	  if (pkgid == Lpkg[i].Ppkg)
	    break;
	Pproc[cpuid].Lpkg = i;
	hwloc_debug("%u on package %u (%u)\n", cpuid, i, pkgid);
	if (i == Lpkg_num) {
	  if (Lpkg_num == Lpkg_alloc) {
	    struct hwloc_solaris_Lpkg *tmp = realloc(Lpkg, 2*Lpkg_alloc * sizeof(*Lpkg));
	    if (!tmp)
	      goto err;
	    Lpkg = tmp;
	    Lpkg_alloc *= 2;
	  }
	  Lpkg[Lpkg_num++].Ppkg = pkgid;
	}
      } while(0);

      if (look_cores) do {
	/* Get Core ID */
	stat = (kstat_named_t *) kstat_data_lookup(ksp, "core_id");
	if (!stat)
	  {
	    if (Lcore_num)
	      fprintf(stderr, "could not read core id for CPU%u: %s\n", cpuid, strerror(errno));
	    else
	      hwloc_debug("could not read core id for CPU%u: %s\n", cpuid, strerror(errno));
	    look_cores = 0;
	    continue;
	  }
	switch (stat->data_type) {
	  case KSTAT_DATA_INT32:
	    coreid = stat->value.i32;
	    break;
	  case KSTAT_DATA_UINT32:
	    coreid = stat->value.ui32;
	    break;
#ifdef _INT64_TYPE
	  case KSTAT_DATA_UINT64:
	    coreid = stat->value.ui64;
	    break;
	  case KSTAT_DATA_INT64:
	    coreid = stat->value.i64;
	    break;
#endif
	  default:
	    fprintf(stderr, "core_id type %d unknown\n", stat->data_type);
	    look_cores = 0;
	    continue;
	}
	for (i = 0; i < Lcore_num; i++)
	  if (coreid == Lcore[i].Pcore && Pproc[cpuid].Ppkg == Lcore[i].Ppkg)
	    break;
	Pproc[cpuid].Lcore = i;
	hwloc_debug("%u on core %u (%u)\n", cpuid, i, coreid);
	if (i == Lcore_num) {
	  if (Lcore_num == Lcore_alloc) {
	    struct hwloc_solaris_Lcore *tmp = realloc(Lcore, 2*Lcore_alloc * sizeof(*Lcore));
	    if (!tmp)
	      goto err;
	    Lcore = tmp;
	    Lcore_alloc *= 2;
	  }
	  Lcore[Lcore_num].Ppkg = Pproc[cpuid].Ppkg;
	  Lcore[Lcore_num++].Pcore = coreid;
	}
      } while(0);

      /* Note: there is also clog_id for the Thread ID (not unique) and
       * pkg_core_id for the core ID (not unique).  They are not useful to us
       * however. */
    }

  if (look_chips) {
    struct hwloc_obj *obj;
    unsigned j,k;
    hwloc_debug("%d Packages\n", Lpkg_num);
    for (j = 0; j < Lpkg_num; j++) {
      obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, Lpkg[j].Ppkg);
      if (CPUType)
	hwloc_obj_add_info(obj, "CPUType", CPUType);
      if (CPUModel)
	hwloc_obj_add_info(obj, "CPUModel", CPUModel);
      obj->cpuset = hwloc_bitmap_alloc();
      for(k=0; k<Pproc_max; k++)
	if (Pproc[k].Lpkg == j)
	  hwloc_bitmap_set(obj->cpuset, k);
      hwloc_debug_1arg_bitmap("Package %d has cpuset %s\n", j, obj->cpuset);
      hwloc_insert_object_by_cpuset(topology, obj);
    }
    hwloc_debug("%s", "\n");
  }

  if (look_cores) {
    struct hwloc_obj *obj;
    unsigned j,k;
    hwloc_debug("%d Cores\n", Lcore_num);
    for (j = 0; j < Lcore_num; j++) {
      obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, Lcore[j].Pcore);
      obj->cpuset = hwloc_bitmap_alloc();
      for(k=0; k<Pproc_max; k++)
	if (Pproc[k].Lcore == j)
	  hwloc_bitmap_set(obj->cpuset, k);
      hwloc_debug_1arg_bitmap("Core %d has cpuset %s\n", j, obj->cpuset);
      hwloc_insert_object_by_cpuset(topology, obj);
    }
    hwloc_debug("%s", "\n");
  }
  if (Lproc_num) {
    struct hwloc_obj *obj;
    unsigned j,k;
    hwloc_debug("%d PUs\n", Lproc_num);
    for (j = 0; j < Lproc_num; j++) {
      obj = hwloc_alloc_setup_object(HWLOC_OBJ_PU, Lproc[j].Pproc);
      obj->cpuset = hwloc_bitmap_alloc();
      for(k=0; k<Pproc_max; k++)
	if (Pproc[k].Lproc == j)
	  hwloc_bitmap_set(obj->cpuset, k);
      hwloc_debug_1arg_bitmap("PU %d has cpuset %s\n", j, obj->cpuset);
      hwloc_insert_object_by_cpuset(topology, obj);
    }
    hwloc_debug("%s", "\n");
  }

  kstat_close(kc);

  free(Pproc);
  free(Lproc);
  free(Lcore);
  free(Lpkg);
  return Lproc_num > 0;

 err:
  kstat_close(kc);

  free(Pproc);
  free(Lproc);
  free(Lcore);
  free(Lpkg);
  return 0;
}
예제 #22
0
static int
hwloc_cuda_backend_notify_new_object(struct hwloc_backend *backend,
				     struct hwloc_obj *pcidev)
{
  struct hwloc_topology *topology = backend->topology;
  struct hwloc_cuda_backend_data_s *data = backend->private_data;
  unsigned i;

  if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO)))
    return 0;

  if (!hwloc_topology_is_thissystem(topology)) {
    hwloc_debug("%s", "\nno CUDA detection (not thissystem)\n");
    return 0;
  }

  if (HWLOC_OBJ_PCI_DEVICE != pcidev->type)
    return 0;

  if (data->nr_devices == (unsigned) -1) {
    /* first call, lookup all devices */
    hwloc_cuda_query_devices(data);
    /* if it fails, data->nr_devices = 0 so we won't do anything below and in next callbacks */
  }

  if (!data->nr_devices)
    /* found no devices */
    return 0;

  for(i=0; i<data->nr_devices; i++) {
    struct hwloc_cuda_device_info_s *info = &data->devices[i];
    char cuda_name[32];
    char number[32];
    struct cudaDeviceProp prop;
    hwloc_obj_t cuda_device;
    cudaError_t cures;
    unsigned cores;

    if (info->pcidomain != pcidev->attr->pcidev.domain)
      continue;
    if (info->pcibus != pcidev->attr->pcidev.bus)
      continue;
    if (info->pcidev != pcidev->attr->pcidev.dev)
      continue;
    if (info->pcifunc != pcidev->attr->pcidev.func)
      continue;

    cuda_device = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1);
    snprintf(cuda_name, sizeof(cuda_name), "cuda%d", info->idx);
    cuda_device->name = strdup(cuda_name);
    cuda_device->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN;
    cuda_device->attr->osdev.type = HWLOC_OBJ_OSDEV_COPROC;

    hwloc_obj_add_info(cuda_device, "CoProcType", "CUDA");
    hwloc_obj_add_info(cuda_device, "Backend", "CUDA");
    hwloc_obj_add_info(cuda_device, "GPUVendor", "NVIDIA Corporation");

    cures = cudaGetDeviceProperties(&prop, info->idx);
    if (!cures)
      hwloc_obj_add_info(cuda_device, "GPUModel", prop.name);

    snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.totalGlobalMem) >> 10);
    hwloc_obj_add_info(cuda_device, "CUDAGlobalMemorySize", number);

    snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.l2CacheSize) >> 10);
    hwloc_obj_add_info(cuda_device, "CUDAL2CacheSize", number);

    snprintf(number, sizeof(number), "%d", prop.multiProcessorCount);
    hwloc_obj_add_info(cuda_device, "CUDAMultiProcessors", number);

    cores = hwloc_cuda_cores_per_MP(prop.major, prop.minor);
    if (cores) {
      snprintf(number, sizeof(number), "%u", cores);
      hwloc_obj_add_info(cuda_device, "CUDACoresPerMP", number);
    }

    snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.sharedMemPerBlock) >> 10);
    hwloc_obj_add_info(cuda_device, "CUDASharedMemorySizePerMP", number);

    hwloc_insert_object_by_parent(topology, pcidev, cuda_device);
    return 1;
  }

  return 0;
}
예제 #23
0
static int
hwloc_look_darwin(struct hwloc_backend *backend)
{
  struct hwloc_topology *topology = backend->topology;
  int64_t _nprocs;
  unsigned nprocs;
  int64_t _npackages;
  unsigned i, j, cpu;
  struct hwloc_obj *obj;
  size_t size;
  int64_t l1dcachesize, l1icachesize;
  int64_t cacheways[2];
  int64_t l2cachesize;
  int64_t cachelinesize;
  int64_t memsize;
  char cpumodel[64];

  if (topology->levels[0][0]->cpuset)
    /* somebody discovered things */
    return -1;

  hwloc_alloc_obj_cpusets(topology->levels[0][0]);

  if (hwloc_get_sysctlbyname("hw.ncpu", &_nprocs) || _nprocs <= 0)
    return -1;
  nprocs = _nprocs;
  topology->support.discovery->pu = 1;

  hwloc_debug("%u procs\n", nprocs);

  size = sizeof(cpumodel);
  if (sysctlbyname("machdep.cpu.brand_string", cpumodel, &size, NULL, 0))
    cpumodel[0] = '\0';

  if (!hwloc_get_sysctlbyname("hw.packages", &_npackages) && _npackages > 0) {
    unsigned npackages = _npackages;
    int64_t _cores_per_package;
    int64_t _logical_per_package;
    unsigned logical_per_package;

    hwloc_debug("%u packages\n", npackages);

    if (!hwloc_get_sysctlbyname("machdep.cpu.logical_per_package", &_logical_per_package) && _logical_per_package > 0)
      logical_per_package = _logical_per_package;
    else
      /* Assume the trivia.  */
      logical_per_package = nprocs / npackages;

    hwloc_debug("%u threads per package\n", logical_per_package);

    if (nprocs == npackages * logical_per_package
	&& hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE))
      for (i = 0; i < npackages; i++) {
        obj = hwloc_alloc_setup_object(HWLOC_OBJ_PACKAGE, i);
        obj->cpuset = hwloc_bitmap_alloc();
        for (cpu = i*logical_per_package; cpu < (i+1)*logical_per_package; cpu++)
          hwloc_bitmap_set(obj->cpuset, cpu);

        hwloc_debug_1arg_bitmap("package %u has cpuset %s\n",
                   i, obj->cpuset);

        if (cpumodel[0] != '\0')
          hwloc_obj_add_info(obj, "CPUModel", cpumodel);
        hwloc_insert_object_by_cpuset(topology, obj);
      }
    else
      if (cpumodel[0] != '\0')
        hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel);

    if (!hwloc_get_sysctlbyname("machdep.cpu.cores_per_package", &_cores_per_package) && _cores_per_package > 0
	&& hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) {
      unsigned cores_per_package = _cores_per_package;
      hwloc_debug("%u cores per package\n", cores_per_package);

      if (!(logical_per_package % cores_per_package))
        for (i = 0; i < npackages * cores_per_package; i++) {
          obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, i);
          obj->cpuset = hwloc_bitmap_alloc();
          for (cpu = i*(logical_per_package/cores_per_package);
               cpu < (i+1)*(logical_per_package/cores_per_package);
               cpu++)
            hwloc_bitmap_set(obj->cpuset, cpu);

          hwloc_debug_1arg_bitmap("core %u has cpuset %s\n",
                     i, obj->cpuset);
          hwloc_insert_object_by_cpuset(topology, obj);
        }
    }
  } else
    if (cpumodel[0] != '\0')
      hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel);

  if (hwloc_get_sysctlbyname("hw.l1dcachesize", &l1dcachesize))
    l1dcachesize = 0;

  if (hwloc_get_sysctlbyname("hw.l1icachesize", &l1icachesize))
    l1icachesize = 0;

  if (hwloc_get_sysctlbyname("hw.l2cachesize", &l2cachesize))
    l2cachesize = 0;

  if (hwloc_get_sysctlbyname("machdep.cpu.cache.L1_associativity", &cacheways[0]))
    cacheways[0] = 0;
  else if (cacheways[0] == 0xff)
    cacheways[0] = -1;

  if (hwloc_get_sysctlbyname("machdep.cpu.cache.L2_associativity", &cacheways[1]))
    cacheways[1] = 0;
  else if (cacheways[1] == 0xff)
    cacheways[1] = -1;

  if (hwloc_get_sysctlbyname("hw.cachelinesize", &cachelinesize))
    cachelinesize = 0;

  if (hwloc_get_sysctlbyname("hw.memsize", &memsize))
    memsize = 0;

  if (!sysctlbyname("hw.cacheconfig", NULL, &size, NULL, 0)) {
    unsigned n = size / sizeof(uint32_t);
    uint64_t *cacheconfig = NULL;
    uint64_t *cachesize = NULL;
    uint32_t *cacheconfig32 = NULL;

    cacheconfig = malloc(sizeof(uint64_t) * n);
    if (NULL == cacheconfig) {
        goto out;
    }
    cachesize = malloc(sizeof(uint64_t) * n);
    if (NULL == cachesize) {
        goto out;
    }
    cacheconfig32 = malloc(sizeof(uint32_t) * n);
    if (NULL == cacheconfig32) {
        goto out;
    }

    if ((!sysctlbyname("hw.cacheconfig", cacheconfig, &size, NULL, 0))) {
      /* Yeech. Darwin seemingly has changed from 32bit to 64bit integers for
       * cacheconfig, with apparently no way for detection. Assume the machine
       * won't have more than 4 billion cpus */
      if (cacheconfig[0] > 0xFFFFFFFFUL) {
        memcpy(cacheconfig32, cacheconfig, size);
        for (i = 0 ; i < size / sizeof(uint32_t); i++)
          cacheconfig[i] = cacheconfig32[i];
      }

      memset(cachesize, 0, sizeof(uint64_t) * n);
      size = sizeof(uint64_t) * n;
      if (sysctlbyname("hw.cachesize", cachesize, &size, NULL, 0)) {
        if (n > 0)
          cachesize[0] = memsize;
        if (n > 1)
          cachesize[1] = l1dcachesize;
        if (n > 2)
          cachesize[2] = l2cachesize;
      }

      hwloc_debug("%s", "caches");
      for (i = 0; i < n && cacheconfig[i]; i++)
        hwloc_debug(" %"PRIu64"(%"PRIu64"kB)", cacheconfig[i], cachesize[i] / 1024);

      /* Now we know how many caches there are */
      n = i;
      hwloc_debug("\n%u cache levels\n", n - 1);

      /* For each cache level (0 is memory) */
      for (i = 0; i < n; i++) {
        /* cacheconfig tells us how many cpus share it, let's iterate on each cache */
        for (j = 0; j < (nprocs / cacheconfig[i]); j++) {
	  if (!i) {
	    obj = hwloc_alloc_setup_object(HWLOC_OBJ_NUMANODE, j);
            obj->nodeset = hwloc_bitmap_alloc();
            hwloc_bitmap_set(obj->nodeset, j);
          } else {
	    obj = hwloc_alloc_setup_object(HWLOC_OBJ_L1CACHE+i-1, -1);
	  }
          obj->cpuset = hwloc_bitmap_alloc();
          for (cpu = j*cacheconfig[i];
               cpu < ((j+1)*cacheconfig[i]);
               cpu++)
            hwloc_bitmap_set(obj->cpuset, cpu);

          if (i == 1 && l1icachesize
	      && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) {
            /* FIXME assuming that L1i and L1d are shared the same way. Darwin
             * does not yet provide a way to know.  */
            hwloc_obj_t l1i = hwloc_alloc_setup_object(HWLOC_OBJ_L1ICACHE, -1);
            l1i->cpuset = hwloc_bitmap_dup(obj->cpuset);
            hwloc_debug_1arg_bitmap("L1icache %u has cpuset %s\n",
                j, l1i->cpuset);
            l1i->attr->cache.depth = i;
            l1i->attr->cache.size = l1icachesize;
            l1i->attr->cache.linesize = cachelinesize;
            l1i->attr->cache.associativity = 0;
            l1i->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;

            hwloc_insert_object_by_cpuset(topology, l1i);
          }
          if (i) {
            hwloc_debug_2args_bitmap("L%ucache %u has cpuset %s\n",
                i, j, obj->cpuset);
            obj->attr->cache.depth = i;
            obj->attr->cache.size = cachesize[i];
            obj->attr->cache.linesize = cachelinesize;
            if (i <= sizeof(cacheways) / sizeof(cacheways[0]))
              obj->attr->cache.associativity = cacheways[i-1];
            else
              obj->attr->cache.associativity = 0;
            if (i == 1 && l1icachesize)
              obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
            else
              obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
          } else {
            hwloc_debug_1arg_bitmap("node %u has cpuset %s\n",
                j, obj->cpuset);
	    obj->memory.local_memory = cachesize[i];
	    obj->memory.page_types_len = 2;
	    obj->memory.page_types = malloc(2*sizeof(*obj->memory.page_types));
	    memset(obj->memory.page_types, 0, 2*sizeof(*obj->memory.page_types));
	    obj->memory.page_types[0].size = hwloc_getpagesize();
#ifdef HAVE__SC_LARGE_PAGESIZE
	    obj->memory.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
#endif
          }

	  if (hwloc_filter_check_keep_object_type(topology, obj->type))
	    hwloc_insert_object_by_cpuset(topology, obj);
	  else
	    hwloc_free_unlinked_object(obj); /* FIXME: don't built at all, just build the cpuset in case l1i needs it */
        }
      }
    }
  out:
    free(cacheconfig);
    free(cachesize);
    free(cacheconfig32);
  }


  /* add PU objects */
  hwloc_setup_pu_level(topology, nprocs);

  hwloc_obj_add_info(topology->levels[0][0], "Backend", "Darwin");
  hwloc_add_uname_info(topology, NULL);
  return 0;
}
예제 #24
0
static int
hwloc_look_pci(struct hwloc_backend *backend)
{
  struct hwloc_topology *topology = backend->topology;
  enum hwloc_type_filter_e pfilter, bfilter;
  struct hwloc_obj *tree = NULL, *tmp;
  int ret;
  struct pci_device_iterator *iter;
  struct pci_device *pcidev;

  hwloc_topology_get_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, &pfilter);
  hwloc_topology_get_type_filter(topology, HWLOC_OBJ_BRIDGE, &bfilter);
  if (bfilter == HWLOC_TYPE_FILTER_KEEP_NONE
      && pfilter == HWLOC_TYPE_FILTER_KEEP_NONE)
    return 0;

  /* don't do anything if another backend attached PCI already
   * (they are attached to root until later in the core discovery)
   */
  tmp = hwloc_get_root_obj(topology)->io_first_child;
  while (tmp) {
    if (tmp->type == HWLOC_OBJ_PCI_DEVICE
	|| (tmp->type == HWLOC_OBJ_BRIDGE && tmp->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI)) {
      hwloc_debug("%s", "PCI objects already added, ignoring linuxpci backend.\n");
      return 0;
    }
    tmp = tmp->next_sibling;
  }

  hwloc_debug("%s", "\nScanning PCI buses...\n");

  /* initialize PCI scanning */
  ret = pci_system_init();
  if (ret) {
    hwloc_debug("%s", "Can not initialize libpciaccess\n");
    return -1;
  }

  iter = pci_slot_match_iterator_create(NULL);

  /* iterate over devices */
  for (pcidev = pci_device_next(iter);
       pcidev;
       pcidev = pci_device_next(iter))
  {
    const char *vendorname, *devicename;
    unsigned char config_space_cache[CONFIG_SPACE_CACHESIZE];
    hwloc_obj_type_t type;
    struct hwloc_obj *obj;
    unsigned domain;
    unsigned device_class;
    unsigned short tmp16;
    unsigned offset;

    /* initialize the config space in case we fail to read it (missing permissions, etc). */
    memset(config_space_cache, 0xff, CONFIG_SPACE_CACHESIZE);
    pci_device_probe(pcidev);
    pci_device_cfg_read(pcidev, config_space_cache, 0, CONFIG_SPACE_CACHESIZE, NULL);

    /* try to read the domain */
    domain = pcidev->domain;

    /* try to read the device_class */
    device_class = pcidev->device_class >> 8;

    /* bridge or pci dev? */
    type = hwloc_pcidisc_check_bridge_type(device_class, config_space_cache);

    /* filtered? */
    if (type == HWLOC_OBJ_PCI_DEVICE) {
      enum hwloc_type_filter_e filter;
      hwloc_topology_get_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, &filter);
      if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
	continue;
      if (filter == HWLOC_TYPE_FILTER_KEEP_IMPORTANT
	  && !hwloc_filter_check_pcidev_subtype_important(device_class))
	continue;
    } else if (type == HWLOC_OBJ_BRIDGE) {
      enum hwloc_type_filter_e filter;
      hwloc_topology_get_type_filter(topology, HWLOC_OBJ_BRIDGE, &filter);
      if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
	continue;
      /* HWLOC_TYPE_FILTER_KEEP_IMPORTANT filtered later in the core */
    }

    /* fixup SR-IOV buggy VF device/vendor IDs */
    if (0xffff == pcidev->vendor_id && 0xffff == pcidev->device_id) {
      /* SR-IOV puts ffff:ffff in Virtual Function config space.
       * The actual VF device ID is stored at a special (dynamic) location in the Physical Function config space.
       * VF and PF have the same vendor ID.
       *
       * libpciaccess just returns ffff:ffff, needs to be fixed.
       * linuxpci is OK because sysfs files are already fixed in the kernel.
       * (pciutils is OK when it uses those Linux sysfs files.)
       *
       * Reading these files is an easy way to work around the libpciaccess issue on Linux,
       * but we have no way to know if this is caused by SR-IOV or not.
       *
       * TODO:
       *  If PF has CAP_ID_PCIX or CAP_ID_EXP (offset>0),
       *  look for extended capability PCI_EXT_CAP_ID_SRIOV (need extended config space (more than 256 bytes)),
       *  then read the VF device ID after it (PCI_IOV_DID bytes later).
       *  Needs access to extended config space (needs root on Linux).
       * TODO:
       *  Add string info attributes in VF and PF objects?
       */
#ifdef HWLOC_LINUX_SYS
      /* Workaround for Linux (the kernel returns the VF device/vendor IDs). */
      char path[64];
      char value[16];
      FILE *file;
      size_t read;

      snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/vendor",
	       domain, pcidev->bus, pcidev->dev, pcidev->func);
      file = fopen(path, "r");
      if (file) {
	read = fread(value, 1, sizeof(value), file);
	fclose(file);
	if (read)
	  /* fixup the pciaccess struct so that pci_device_get_vendor_name() is correct later. */
          pcidev->vendor_id = strtoul(value, NULL, 16);
      }

      snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/device",
	       domain, pcidev->bus, pcidev->dev, pcidev->func);
      file = fopen(path, "r");
      if (file) {
	read = fread(value, 1, sizeof(value), file);
	fclose(file);
	if (read)
	  /* fixup the pciaccess struct so that pci_device_get_device_name() is correct later. */
          pcidev->device_id = strtoul(value, NULL, 16);
      }
#endif
    }

    obj = hwloc_alloc_setup_object(topology, type, HWLOC_UNKNOWN_INDEX);
    obj->attr->pcidev.domain = domain;
    obj->attr->pcidev.bus = pcidev->bus;
    obj->attr->pcidev.dev = pcidev->dev;
    obj->attr->pcidev.func = pcidev->func;
    obj->attr->pcidev.vendor_id = pcidev->vendor_id;
    obj->attr->pcidev.device_id = pcidev->device_id;
    obj->attr->pcidev.class_id = device_class;
    obj->attr->pcidev.revision = config_space_cache[PCI_REVISION_ID];

    obj->attr->pcidev.linkspeed = 0; /* unknown */
    offset = hwloc_pcidisc_find_cap(config_space_cache, PCI_CAP_ID_EXP);

    if (offset > 0 && offset + 20 /* size of PCI express block up to link status */ <= CONFIG_SPACE_CACHESIZE)
      hwloc_pcidisc_find_linkspeed(config_space_cache, offset, &obj->attr->pcidev.linkspeed);

    if (type == HWLOC_OBJ_BRIDGE) {
      if (hwloc_pcidisc_setup_bridge_attr(obj, config_space_cache) < 0)
	continue;
    }

    if (obj->type == HWLOC_OBJ_PCI_DEVICE) {
      memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_VENDOR_ID], sizeof(tmp16));
      obj->attr->pcidev.subvendor_id = tmp16;
      memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_ID], sizeof(tmp16));
      obj->attr->pcidev.subdevice_id = tmp16;
    } else {
      /* TODO:
       * bridge must lookup PCI_CAP_ID_SSVID and then look at offset+PCI_SSVID_VENDOR/DEVICE_ID
       * cardbus must look at PCI_CB_SUBSYSTEM_VENDOR_ID and PCI_CB_SUBSYSTEM_ID
       */
    }

    /* get the vendor name */
    vendorname = pci_device_get_vendor_name(pcidev);
    if (vendorname && *vendorname)
      hwloc_obj_add_info(obj, "PCIVendor", vendorname);

    /* get the device name */
    devicename = pci_device_get_device_name(pcidev);
    if (devicename && *devicename)
      hwloc_obj_add_info(obj, "PCIDevice", devicename);

    hwloc_debug("  %04x:%02x:%02x.%01x %04x %04x:%04x %s %s\n",
		domain, pcidev->bus, pcidev->dev, pcidev->func,
		device_class, pcidev->vendor_id, pcidev->device_id,
		vendorname && *vendorname ? vendorname : "??",
		devicename && *devicename ? devicename : "??");

    hwloc_pcidisc_tree_insert_by_busid(&tree, obj);
  }

  /* finalize device scanning */
  pci_iterator_destroy(iter);
  pci_system_cleanup();

  hwloc_pcidisc_tree_attach(topology, tree);
  return 0;
}
예제 #25
0
static int
hwloc_look_pci(struct hwloc_backend *backend)
{
  struct hwloc_topology *topology = backend->topology;
  struct hwloc_obj *first_obj = NULL, *last_obj = NULL;
  int ret;
  struct pci_device_iterator *iter;
  struct pci_device *pcidev;
#ifdef HWLOC_LINUX_SYS
  DIR *dir;
#endif

  if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO)))
    return 0;

  if (hwloc_get_next_pcidev(topology, NULL)) {
    hwloc_debug("%s", "PCI objects already added, ignoring pci backend.\n");
    return 0;
  }

  if (!hwloc_topology_is_thissystem(topology)) {
    hwloc_debug("%s", "\nno PCI detection (not thissystem)\n");
    return 0;
  }

  hwloc_debug("%s", "\nScanning PCI buses...\n");

  /* initialize PCI scanning */
  ret = pci_system_init();
  if (ret) {
    hwloc_debug("%s", "Can not initialize libpciaccess\n");
    return -1;
  }

  iter = pci_slot_match_iterator_create(NULL);

  /* iterate over devices */
  for (pcidev = pci_device_next(iter);
       pcidev;
       pcidev = pci_device_next(iter))
  {
    const char *vendorname, *devicename, *fullname;
    unsigned char config_space_cache[CONFIG_SPACE_CACHESIZE];
    struct hwloc_obj *obj;
    unsigned os_index;
    unsigned domain;
    unsigned device_class;
    unsigned short tmp16;
    char name[128];
    unsigned offset;

    /* initialize the config space in case we fail to read it (missing permissions, etc). */
    memset(config_space_cache, 0xff, CONFIG_SPACE_CACHESIZE);
    pci_device_probe(pcidev);
    pci_device_cfg_read(pcidev, config_space_cache, 0, CONFIG_SPACE_CACHESIZE, NULL);

    /* try to read the domain */
    domain = pcidev->domain;

    /* try to read the device_class */
    device_class = pcidev->device_class >> 8;

    /* fixup SR-IOV buggy VF device/vendor IDs */
    if (0xffff == pcidev->vendor_id && 0xffff == pcidev->device_id) {
      /* SR-IOV puts ffff:ffff in Virtual Function config space.
       * The actual VF device ID is stored at a special (dynamic) location in the Physical Function config space.
       * VF and PF have the same vendor ID.
       *
       * libpciaccess just returns ffff:ffff, needs to be fixed.
       * linuxpci is OK because sysfs files are already fixed the kernel.
       * (pciutils is OK when it uses those Linux sysfs files.)
       *
       * Reading these files is an easy way to work around the libpciaccess issue on Linux,
       * but we have no way to know if this is caused by SR-IOV or not.
       *
       * TODO:
       *  If PF has CAP_ID_PCIX or CAP_ID_EXP (offset>0),
       *  look for extended capability PCI_EXT_CAP_ID_SRIOV (need extended config space (more than 256 bytes)),
       *  then read the VF device ID after it (PCI_IOV_DID bytes later).
       *  Needs access to extended config space (needs root on Linux).
       * TODO:
       *  Add string info attributes in VF and PF objects?
       */
#ifdef HWLOC_LINUX_SYS
      /* Workaround for Linux (the kernel returns the VF device/vendor IDs). */
      char path[64];
      char value[16];
      FILE *file;
      size_t read;

      snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/vendor",
	       domain, pcidev->bus, pcidev->dev, pcidev->func);
      file = fopen(path, "r");
      if (file) {
	read = fread(value, 1, sizeof(value), file);
	fclose(file);
	if (read)
	  /* fixup the pciaccess struct so that pci_device_get_vendor_name() is correct later. */
          pcidev->vendor_id = strtoul(value, NULL, 16);
      }

      snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/device",
	       domain, pcidev->bus, pcidev->dev, pcidev->func);
      file = fopen(path, "r");
      if (file) {
	read = fread(value, 1, sizeof(value), file);
	fclose(file);
	if (read)
	  /* fixup the pciaccess struct so that pci_device_get_device_name() is correct later. */
          pcidev->device_id = strtoul(value, NULL, 16);
      }
#endif
    }

    /* might be useful for debugging (note that domain might be truncated) */
    os_index = (domain << 20) + (pcidev->bus << 12) + (pcidev->dev << 4) + pcidev->func;

    obj = hwloc_alloc_setup_object(HWLOC_OBJ_PCI_DEVICE, os_index);
    obj->attr->pcidev.domain = domain;
    obj->attr->pcidev.bus = pcidev->bus;
    obj->attr->pcidev.dev = pcidev->dev;
    obj->attr->pcidev.func = pcidev->func;
    obj->attr->pcidev.vendor_id = pcidev->vendor_id;
    obj->attr->pcidev.device_id = pcidev->device_id;
    obj->attr->pcidev.class_id = device_class;
    obj->attr->pcidev.revision = config_space_cache[PCI_REVISION_ID];

    obj->attr->pcidev.linkspeed = 0; /* unknown */
    offset = hwloc_pci_find_cap(config_space_cache, PCI_CAP_ID_EXP);

    if (offset > 0 && offset + 20 /* size of PCI express block up to link status */ <= CONFIG_SPACE_CACHESIZE)
      hwloc_pci_find_linkspeed(config_space_cache, offset, &obj->attr->pcidev.linkspeed);

    hwloc_pci_prepare_bridge(obj, config_space_cache);

    if (obj->type == HWLOC_OBJ_PCI_DEVICE) {
      memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_VENDOR_ID], sizeof(tmp16));
      obj->attr->pcidev.subvendor_id = tmp16;
      memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_ID], sizeof(tmp16));
      obj->attr->pcidev.subdevice_id = tmp16;
    } else {
      /* TODO:
       * bridge must lookup PCI_CAP_ID_SSVID and then look at offset+PCI_SSVID_VENDOR/DEVICE_ID
       * cardbus must look at PCI_CB_SUBSYSTEM_VENDOR_ID and PCI_CB_SUBSYSTEM_ID
       */
    }

    /* get the vendor name */
    vendorname = pci_device_get_vendor_name(pcidev);
    if (vendorname && *vendorname)
      hwloc_obj_add_info(obj, "PCIVendor", vendorname);

    /* get the device name */
    devicename = pci_device_get_device_name(pcidev);
    if (devicename && *devicename)
      hwloc_obj_add_info(obj, "PCIDevice", devicename);

    /* generate or get the fullname */
    snprintf(name, sizeof(name), "%s%s%s",
	     vendorname ? vendorname : "",
	     vendorname && devicename ? " " : "",
	     devicename ? devicename : "");
    fullname = name;
    if (*name)
      obj->name = strdup(name);
    hwloc_debug("  %04x:%02x:%02x.%01x %04x %04x:%04x %s\n",
		domain, pcidev->bus, pcidev->dev, pcidev->func,
		device_class, pcidev->vendor_id, pcidev->device_id,
		fullname && *fullname ? fullname : "??");

    /* queue the object for now */
    if (first_obj)
      last_obj->next_sibling = obj;
    else
      first_obj = obj;
    last_obj = obj;
  }

  /* finalize device scanning */
  pci_iterator_destroy(iter);
  pci_system_cleanup();

#ifdef HWLOC_LINUX_SYS
  dir = opendir("/sys/bus/pci/slots/");
  if (dir) {
    struct dirent *dirent;
    while ((dirent = readdir(dir)) != NULL) {
      char path[64];
      FILE *file;
      if (dirent->d_name[0] == '.')
	continue;
      snprintf(path, sizeof(path), "/sys/bus/pci/slots/%s/address", dirent->d_name);
      file = fopen(path, "r");
      if (file) {
	unsigned domain, bus, dev;
	if (fscanf(file, "%x:%x:%x", &domain, &bus, &dev) == 3) {
	  hwloc_obj_t obj = first_obj;
	  while (obj) {
	    if (obj->attr->pcidev.domain == domain
		&& obj->attr->pcidev.bus == bus
		&& obj->attr->pcidev.dev == dev
		&& obj->attr->pcidev.func == 0) {
	      hwloc_obj_add_info(obj, "PCISlot", dirent->d_name);
	      break;
	    }
	    obj = obj->next_sibling;
	  }
	}
	fclose(file);
      }
    }
    closedir(dir);
  }
#endif

  return hwloc_insert_pci_device_list(backend, first_obj);
}
예제 #26
0
static int
hwloc_look_hpux(struct hwloc_backend *backend)
{
  struct hwloc_topology *topology = backend->topology;
  int has_numa = sysconf(_SC_CCNUMA_SUPPORT) == 1;
  hwloc_obj_t *nodes = NULL, obj;
  spu_t currentcpu;
  ldom_t currentnode;
  int i, nbnodes = 0;

  if (topology->levels[0][0]->cpuset)
    /* somebody discovered things */
    return -1;

  hwloc_alloc_obj_cpusets(topology->levels[0][0]);

  if (has_numa) {
    nbnodes = mpctl((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) ?
      MPC_GETNUMLDOMS_SYS : MPC_GETNUMLDOMS, 0, 0);

    hwloc_debug("%d nodes\n", nbnodes);

    nodes = malloc(nbnodes * sizeof(*nodes));

    i = 0;
    currentnode = mpctl((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) ?
      MPC_GETFIRSTLDOM_SYS : MPC_GETFIRSTLDOM, 0, 0);
    while (currentnode != -1 && i < nbnodes) {
      hwloc_debug("node %d is %d\n", i, currentnode);
      nodes[i] = obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_NUMANODE, currentnode);
      obj->cpuset = hwloc_bitmap_alloc();
      obj->nodeset = hwloc_bitmap_alloc();
      hwloc_bitmap_set(obj->nodeset, currentnode);
      /* TODO: obj->attr->node.memory_kB */
      /* TODO: obj->attr->node.huge_page_free */

      currentnode = mpctl((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) ?
        MPC_GETNEXTLDOM_SYS : MPC_GETNEXTLDOM, currentnode, 0);
      i++;
    }
  }

  i = 0;
  currentcpu = mpctl((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) ?
      MPC_GETFIRSTSPU_SYS : MPC_GETFIRSTSPU, 0,0);
  while (currentcpu != -1) {
    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PU, currentcpu);
    obj->cpuset = hwloc_bitmap_alloc();
    hwloc_bitmap_set(obj->cpuset, currentcpu);

    hwloc_debug("cpu %d\n", currentcpu);

    if (nodes) {
      /* Add this cpu to its node */
      currentnode = mpctl(MPC_SPUTOLDOM, currentcpu, 0);
      /* Hopefully it's just the same as previous cpu */
      if (i >= nbnodes || (ldom_t) nodes[i]->os_index != currentnode)
        for (i = 0; i < nbnodes; i++)
          if ((ldom_t) nodes[i]->os_index == currentnode)
            break;
      if (i < nbnodes) {
        hwloc_bitmap_set(nodes[i]->cpuset, currentcpu);
        hwloc_debug("is in node %d\n", i);
      } else {
        hwloc_debug("%s", "is in no node?!\n");
      }
    }

    /* Add cpu */
    hwloc_insert_object_by_cpuset(topology, obj);

    currentcpu = mpctl((topology->flags & HWLOC_TOPOLOGY_FLAG_WHOLE_SYSTEM) ?
      MPC_GETNEXTSPU_SYS : MPC_GETNEXTSPU, currentcpu, 0);
  }

  if (nodes) {
    /* Add nodes */
    for (i = 0 ; i < nbnodes ; i++)
      hwloc_insert_object_by_cpuset(topology, nodes[i]);
    free(nodes);
  }

  topology->support.discovery->pu = 1;

  hwloc_obj_add_info(topology->levels[0][0], "Backend", "HP-UX");
  hwloc_add_uname_info(topology, NULL);
  return 0;
}
예제 #27
0
static int
hwloc_look_aix(struct hwloc_backend *backend)
{
  struct hwloc_topology *topology = backend->topology;
  int i;

  if (topology->levels[0][0]->cpuset)
    /* somebody discovered things */
    return 0;

  hwloc_alloc_obj_cpusets(topology->levels[0][0]);

  /* TODO: R_LGPGDEF/R_LGPGFREE for large pages */

  hwloc_debug("Note: SMPSDL is at %d\n", rs_getinfo(NULL, R_SMPSDL, 0));
#ifdef R_REF1SDL
  hwloc_debug("Note: REF1SDL is at %d\n", rs_getinfo(NULL, R_REF1SDL, 0));
#endif

  for (i=0; i<=rs_getinfo(NULL, R_MAXSDL, 0); i++)
    {
      int known = 0;
#if 0
      if (i == rs_getinfo(NULL, R_SMPSDL, 0))
	/* Not enabled for now because I'm not sure what it corresponds to. On
	 * decrypthon it contains all the cpus. Is it a "machine" or a "system"
	 * level ?
	 */
	{
	  hwloc_debug("looking AIX \"SMP\" sdl %d\n", i);
	  look_rset(i, HWLOC_OBJ_MACHINE, topology, i);
	  known = 1;
	}
#endif
      if (i == rs_getinfo(NULL, R_MCMSDL, 0))
	{
	  hwloc_debug("looking AIX node sdl %d\n", i);
	  look_rset(i, HWLOC_OBJ_NODE, topology, i);
	  known = 1;
	}
#      ifdef R_L2CSDL
      if (i == rs_getinfo(NULL, R_L2CSDL, 0))
	{
	  hwloc_debug("looking AIX L2 sdl %d\n", i);
	  look_rset(i, HWLOC_OBJ_CACHE, topology, i);
	  known = 1;
	}
#      endif
#      ifdef R_PCORESDL
      if (i == rs_getinfo(NULL, R_PCORESDL, 0))
	{
	  hwloc_debug("looking AIX core sdl %d\n", i);
	  look_rset(i, HWLOC_OBJ_CORE, topology, i);
	  known = 1;
	}
#      endif
      if (i == rs_getinfo(NULL, R_MAXSDL, 0))
	{
	  hwloc_debug("looking AIX max sdl %d\n", i);
	  look_rset(i, HWLOC_OBJ_PU, topology, i);
	  known = 1;
          topology->support.discovery->pu = 1;
	}

      /* Don't know how it should be rendered, make a misc object for it.  */
      if (!known)
	{
	  hwloc_debug("looking AIX unknown sdl %d\n", i);
	  look_rset(i, HWLOC_OBJ_GROUP, topology, i);
	}
    }

  hwloc_obj_add_info(topology->levels[0][0], "Backend", "AIX");
  if (topology->is_thissystem)
    hwloc_add_uname_info(topology);
  return 1;
}
예제 #28
0
static int
hwloc_look_darwin(struct hwloc_backend *backend)
{
  struct hwloc_topology *topology = backend->topology;
  int64_t _nprocs;
  unsigned nprocs;
  int64_t _npackages;
  unsigned i, j, cpu;
  struct hwloc_obj *obj;
  size_t size;
  int64_t l1dcachesize, l1icachesize;
  int64_t cacheways[2];
  int64_t l2cachesize;
  int64_t l3cachesize;
  int64_t cachelinesize;
  int64_t memsize;
  int64_t _tmp;
  char cpumodel[64];
  char cpuvendor[64];
  char cpufamilynumber[20], cpumodelnumber[20], cpustepping[20];
  int gotnuma = 0;
  int gotnumamemory = 0;

  if (topology->levels[0][0]->cpuset)
    /* somebody discovered things */
    return -1;

  hwloc_alloc_root_sets(topology->levels[0][0]);

  /* Don't use hwloc_fallback_nbprocessors() because it would return online cpus only,
   * while we need all cpus when computing logical_per_package, etc below.
   * We don't know which CPUs are offline, but Darwin doesn't support binding anyway.
   *
   * TODO: try hw.logicalcpu_max
   */

  if (hwloc_get_sysctlbyname("hw.logicalcpu", &_nprocs) || _nprocs <= 0)
    /* fallback to deprecated way */
    if (hwloc_get_sysctlbyname("hw.ncpu", &_nprocs) || _nprocs <= 0)
      return -1;

  nprocs = _nprocs;
  topology->support.discovery->pu = 1;

  hwloc_debug("%u procs\n", nprocs);

  size = sizeof(cpuvendor);
  if (sysctlbyname("machdep.cpu.vendor", cpuvendor, &size, NULL, 0))
    cpuvendor[0] = '\0';

  size = sizeof(cpumodel);
  if (sysctlbyname("machdep.cpu.brand_string", cpumodel, &size, NULL, 0))
    cpumodel[0] = '\0';

  if (hwloc_get_sysctlbyname("machdep.cpu.family", &_tmp))
    cpufamilynumber[0] = '\0';
  else
    snprintf(cpufamilynumber, sizeof(cpufamilynumber), "%lld", (long long) _tmp);
  if (hwloc_get_sysctlbyname("machdep.cpu.model", &_tmp))
    cpumodelnumber[0] = '\0';
  else
    snprintf(cpumodelnumber, sizeof(cpumodelnumber), "%lld", (long long) _tmp);
  /* .extfamily and .extmodel are already added to .family and .model */
  if (hwloc_get_sysctlbyname("machdep.cpu.stepping", &_tmp))
    cpustepping[0] = '\0';
  else
    snprintf(cpustepping, sizeof(cpustepping), "%lld", (long long) _tmp);

  if (!hwloc_get_sysctlbyname("hw.packages", &_npackages) && _npackages > 0) {
    unsigned npackages = _npackages;
    int64_t _cores_per_package;
    unsigned cores_per_package;
    int64_t _logical_per_package;
    unsigned logical_per_package;

    hwloc_debug("%u packages\n", npackages);

    if (!hwloc_get_sysctlbyname("machdep.cpu.thread_count", &_logical_per_package) && _logical_per_package > 0)
      /* official/modern way */
      logical_per_package = _logical_per_package;
    else if (!hwloc_get_sysctlbyname("machdep.cpu.logical_per_package", &_logical_per_package) && _logical_per_package > 0)
      /* old way, gives the max supported by this "kind" of processor,
       * can be larger than the actual number for this model.
       */
      logical_per_package = _logical_per_package;
    else
      /* Assume the trivia.  */
      logical_per_package = nprocs / npackages;

    hwloc_debug("%u threads per package\n", logical_per_package);

    if (nprocs == npackages * logical_per_package
	&& hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE))
      for (i = 0; i < npackages; i++) {
        obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PACKAGE, i);
        obj->cpuset = hwloc_bitmap_alloc();
        for (cpu = i*logical_per_package; cpu < (i+1)*logical_per_package; cpu++)
          hwloc_bitmap_set(obj->cpuset, cpu);

        hwloc_debug_1arg_bitmap("package %u has cpuset %s\n",
                   i, obj->cpuset);

        if (cpuvendor[0] != '\0')
          hwloc_obj_add_info(obj, "CPUVendor", cpuvendor);
        if (cpumodel[0] != '\0')
          hwloc_obj_add_info(obj, "CPUModel", cpumodel);
        if (cpufamilynumber[0] != '\0')
          hwloc_obj_add_info(obj, "CPUFamilyNumber", cpufamilynumber);
        if (cpumodelnumber[0] != '\0')
          hwloc_obj_add_info(obj, "CPUModelNumber", cpumodelnumber);
        if (cpustepping[0] != '\0')
          hwloc_obj_add_info(obj, "CPUStepping", cpustepping);

        hwloc_insert_object_by_cpuset(topology, obj);
      }
    else {
      if (cpuvendor[0] != '\0')
        hwloc_obj_add_info(topology->levels[0][0], "CPUVendor", cpuvendor);
      if (cpumodel[0] != '\0')
        hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel);
      if (cpufamilynumber[0] != '\0')
        hwloc_obj_add_info(topology->levels[0][0], "CPUFamilyNumber", cpufamilynumber);
      if (cpumodelnumber[0] != '\0')
        hwloc_obj_add_info(topology->levels[0][0], "CPUModelNumber", cpumodelnumber);
      if (cpustepping[0] != '\0')
        hwloc_obj_add_info(topology->levels[0][0], "CPUStepping", cpustepping);
    }

    if (!hwloc_get_sysctlbyname("machdep.cpu.core_count", &_cores_per_package) && _cores_per_package > 0)
      /* official/modern way */
      cores_per_package = _cores_per_package;
    else if (!hwloc_get_sysctlbyname("machdep.cpu.cores_per_package", &_cores_per_package) && _cores_per_package > 0)
      /* old way, gives the max supported by this "kind" of processor,
       * can be larger than the actual number for this model.
       */
      cores_per_package = _cores_per_package;
    else
      /* no idea */
      cores_per_package = 0;

    if (cores_per_package > 0
	&& hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) {
      hwloc_debug("%u cores per package\n", cores_per_package);

      if (!(logical_per_package % cores_per_package))
        for (i = 0; i < npackages * cores_per_package; i++) {
          obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_CORE, i);
          obj->cpuset = hwloc_bitmap_alloc();
          for (cpu = i*(logical_per_package/cores_per_package);
               cpu < (i+1)*(logical_per_package/cores_per_package);
               cpu++)
            hwloc_bitmap_set(obj->cpuset, cpu);

          hwloc_debug_1arg_bitmap("core %u has cpuset %s\n",
                     i, obj->cpuset);
          hwloc_insert_object_by_cpuset(topology, obj);
        }
    }
  } else {
    if (cpuvendor[0] != '\0')
      hwloc_obj_add_info(topology->levels[0][0], "CPUVendor", cpuvendor);
    if (cpumodel[0] != '\0')
      hwloc_obj_add_info(topology->levels[0][0], "CPUModel", cpumodel);
    if (cpufamilynumber[0] != '\0')
      hwloc_obj_add_info(topology->levels[0][0], "CPUFamilyNumber", cpufamilynumber);
    if (cpumodelnumber[0] != '\0')
      hwloc_obj_add_info(topology->levels[0][0], "CPUModelNumber", cpumodelnumber);
    if (cpustepping[0] != '\0')
      hwloc_obj_add_info(topology->levels[0][0], "CPUStepping", cpustepping);
  }

  if (hwloc_get_sysctlbyname("hw.l1dcachesize", &l1dcachesize))
    l1dcachesize = 0;

  if (hwloc_get_sysctlbyname("hw.l1icachesize", &l1icachesize))
    l1icachesize = 0;

  if (hwloc_get_sysctlbyname("hw.l2cachesize", &l2cachesize))
    l2cachesize = 0;

  if (hwloc_get_sysctlbyname("hw.l3cachesize", &l3cachesize))
    l3cachesize = 0;

  if (hwloc_get_sysctlbyname("machdep.cpu.cache.L1_associativity", &cacheways[0]))
    cacheways[0] = 0;
  else if (cacheways[0] == 0xff)
    cacheways[0] = -1;

  if (hwloc_get_sysctlbyname("machdep.cpu.cache.L2_associativity", &cacheways[1]))
    cacheways[1] = 0;
  else if (cacheways[1] == 0xff)
    cacheways[1] = -1;

  if (hwloc_get_sysctlbyname("hw.cachelinesize", &cachelinesize))
    cachelinesize = 0;

  if (hwloc_get_sysctlbyname("hw.memsize", &memsize))
    memsize = 0;

  if (!sysctlbyname("hw.cacheconfig", NULL, &size, NULL, 0)) {
    unsigned n = size / sizeof(uint32_t);
    uint64_t cacheconfig[n];
    uint64_t cachesize[n];
    uint32_t cacheconfig32[n];

    if ((!sysctlbyname("hw.cacheconfig", cacheconfig, &size, NULL, 0))) {
      /* Yeech. Darwin seemingly has changed from 32bit to 64bit integers for
       * cacheconfig, with apparently no way for detection. Assume the machine
       * won't have more than 4 billion cpus */
      if (cacheconfig[0] > 0xFFFFFFFFUL) {
        memcpy(cacheconfig32, cacheconfig, size);
        for (i = 0 ; i < size / sizeof(uint32_t); i++)
          cacheconfig[i] = cacheconfig32[i];
      }

      memset(cachesize, 0, sizeof(uint64_t) * n);
      size = sizeof(uint64_t) * n;
      if (sysctlbyname("hw.cachesize", cachesize, &size, NULL, 0)) {
        if (n > 0)
          cachesize[0] = memsize;
        if (n > 1)
          cachesize[1] = l1dcachesize;
        if (n > 2)
          cachesize[2] = l2cachesize;
        if (n > 3)
          cachesize[3] = l3cachesize;
      }

      hwloc_debug("%s", "caches");
      for (i = 0; i < n && cacheconfig[i]; i++)
        hwloc_debug(" %"PRIu64"(%"PRIu64"kB)", cacheconfig[i], cachesize[i] / 1024);

      /* Now we know how many caches there are */
      n = i;
      hwloc_debug("\n%u cache levels\n", n - 1);

      /* For each cache level (0 is memory) */
      for (i = 0; i < n; i++) {
        /* cacheconfig tells us how many cpus share it, let's iterate on each cache */
        for (j = 0; j < (nprocs / cacheconfig[i]); j++) {
	  if (!i) {
	    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_NUMANODE, j);
            obj->nodeset = hwloc_bitmap_alloc();
            hwloc_bitmap_set(obj->nodeset, j);
	    gotnuma++;
          } else {
	    obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1CACHE+i-1, HWLOC_UNKNOWN_INDEX);
	  }
          obj->cpuset = hwloc_bitmap_alloc();
          for (cpu = j*cacheconfig[i];
               cpu < ((j+1)*cacheconfig[i]);
               cpu++)
            hwloc_bitmap_set(obj->cpuset, cpu);

          if (i == 1 && l1icachesize
	      && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_L1ICACHE)) {
            /* FIXME assuming that L1i and L1d are shared the same way. Darwin
             * does not yet provide a way to know.  */
            hwloc_obj_t l1i = hwloc_alloc_setup_object(topology, HWLOC_OBJ_L1ICACHE, HWLOC_UNKNOWN_INDEX);
            l1i->cpuset = hwloc_bitmap_dup(obj->cpuset);
            hwloc_debug_1arg_bitmap("L1icache %u has cpuset %s\n",
                j, l1i->cpuset);
            l1i->attr->cache.depth = i;
            l1i->attr->cache.size = l1icachesize;
            l1i->attr->cache.linesize = cachelinesize;
            l1i->attr->cache.associativity = 0;
            l1i->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;

            hwloc_insert_object_by_cpuset(topology, l1i);
          }
          if (i) {
            hwloc_debug_2args_bitmap("L%ucache %u has cpuset %s\n",
                i, j, obj->cpuset);
            obj->attr->cache.depth = i;
            obj->attr->cache.size = cachesize[i];
            obj->attr->cache.linesize = cachelinesize;
            if (i <= sizeof(cacheways) / sizeof(cacheways[0]))
              obj->attr->cache.associativity = cacheways[i-1];
            else
              obj->attr->cache.associativity = 0;
            if (i == 1 && l1icachesize)
              obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
            else
              obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
          } else {
            hwloc_debug_1arg_bitmap("node %u has cpuset %s\n",
                j, obj->cpuset);
	    if (cachesize[i]) {
	      obj->attr->numanode.local_memory = cachesize[i];
	      gotnumamemory++;
	    }
	    obj->attr->numanode.page_types_len = 2;
	    obj->attr->numanode.page_types = malloc(2*sizeof(*obj->attr->numanode.page_types));
	    memset(obj->attr->numanode.page_types, 0, 2*sizeof(*obj->attr->numanode.page_types));
	    obj->attr->numanode.page_types[0].size = hwloc_getpagesize();
#if HAVE_DECL__SC_LARGE_PAGESIZE
	    obj->attr->numanode.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
#endif
          }

	  if (hwloc_filter_check_keep_object_type(topology, obj->type))
	    hwloc_insert_object_by_cpuset(topology, obj);
	  else
	    hwloc_free_unlinked_object(obj); /* FIXME: don't built at all, just build the cpuset in case l1i needs it */
        }
      }
    }
  }

  if (gotnuma)
    topology->support.discovery->numa = 1;
  if (gotnumamemory)
    topology->support.discovery->numa = 1;

  /* add PU objects */
  hwloc_setup_pu_level(topology, nprocs);

  hwloc_obj_add_info(topology->levels[0][0], "Backend", "Darwin");
  hwloc_add_uname_info(topology, NULL);
  return 0;
}