示例#1
0
static int
hwloc_cuda_discover(struct hwloc_backend *backend)
{
  struct hwloc_topology *topology = backend->topology;
  enum hwloc_type_filter_e filter;
  cudaError_t cures;
  int nb, i;

  hwloc_topology_get_type_filter(topology, HWLOC_OBJ_OS_DEVICE, &filter);
  if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
    return 0;

  cures = cudaGetDeviceCount(&nb);
  if (cures)
    return -1;

  for (i = 0; i < nb; i++) {
    int domain, bus, dev;
    char cuda_name[32];
    char number[32];
    struct cudaDeviceProp prop;
    hwloc_obj_t cuda_device, parent;
    unsigned cores;

    cuda_device = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1);
    snprintf(cuda_name, sizeof(cuda_name), "cuda%d", i);
    cuda_device->name = strdup(cuda_name);
    cuda_device->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN;
    cuda_device->attr->osdev.type = HWLOC_OBJ_OSDEV_COPROC;

    cuda_device->subtype = strdup("CUDA");
    hwloc_obj_add_info(cuda_device, "Backend", "CUDA");
    hwloc_obj_add_info(cuda_device, "GPUVendor", "NVIDIA Corporation");

    cures = cudaGetDeviceProperties(&prop, i);
    if (!cures)
      hwloc_obj_add_info(cuda_device, "GPUModel", prop.name);

    snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.totalGlobalMem) >> 10);
    hwloc_obj_add_info(cuda_device, "CUDAGlobalMemorySize", number);

    snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.l2CacheSize) >> 10);
    hwloc_obj_add_info(cuda_device, "CUDAL2CacheSize", number);

    snprintf(number, sizeof(number), "%d", prop.multiProcessorCount);
    hwloc_obj_add_info(cuda_device, "CUDAMultiProcessors", number);

    cores = hwloc_cuda_cores_per_MP(prop.major, prop.minor);
    if (cores) {
      snprintf(number, sizeof(number), "%u", cores);
      hwloc_obj_add_info(cuda_device, "CUDACoresPerMP", number);
    }

    snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.sharedMemPerBlock) >> 10);
    hwloc_obj_add_info(cuda_device, "CUDASharedMemorySizePerMP", number);

    parent = NULL;
    if (hwloc_cudart_get_device_pci_ids(NULL /* topology unused */, i, &domain, &bus, &dev) == 0) {
      parent = hwloc_pci_belowroot_find_by_busid(topology, domain, bus, dev, 0);
      if (!parent)
	parent = hwloc_pci_find_busid_parent(topology, domain, bus, dev, 0);
    }
    if (!parent)
      parent = hwloc_get_root_obj(topology);

    hwloc_insert_object_by_parent(topology, parent, cuda_device);
  }

  return 0;
}
示例#2
0
static int
hwloc_look_pci(struct hwloc_backend *backend)
{
  struct hwloc_topology *topology = backend->topology;
  enum hwloc_type_filter_e pfilter, bfilter;
  struct hwloc_obj *tree = NULL, *tmp;
  int ret;
  struct pci_device_iterator *iter;
  struct pci_device *pcidev;

  hwloc_topology_get_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, &pfilter);
  hwloc_topology_get_type_filter(topology, HWLOC_OBJ_BRIDGE, &bfilter);
  if (bfilter == HWLOC_TYPE_FILTER_KEEP_NONE
      && pfilter == HWLOC_TYPE_FILTER_KEEP_NONE)
    return 0;

  /* don't do anything if another backend attached PCI already
   * (they are attached to root until later in the core discovery)
   */
  tmp = hwloc_get_root_obj(topology)->io_first_child;
  while (tmp) {
    if (tmp->type == HWLOC_OBJ_PCI_DEVICE
	|| (tmp->type == HWLOC_OBJ_BRIDGE && tmp->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI)) {
      hwloc_debug("%s", "PCI objects already added, ignoring linuxpci backend.\n");
      return 0;
    }
    tmp = tmp->next_sibling;
  }

  hwloc_debug("%s", "\nScanning PCI buses...\n");

  /* initialize PCI scanning */
  ret = pci_system_init();
  if (ret) {
    hwloc_debug("%s", "Can not initialize libpciaccess\n");
    return -1;
  }

  iter = pci_slot_match_iterator_create(NULL);

  /* iterate over devices */
  for (pcidev = pci_device_next(iter);
       pcidev;
       pcidev = pci_device_next(iter))
  {
    const char *vendorname, *devicename;
    unsigned char config_space_cache[CONFIG_SPACE_CACHESIZE];
    hwloc_obj_type_t type;
    struct hwloc_obj *obj;
    unsigned domain;
    unsigned device_class;
    unsigned short tmp16;
    unsigned offset;

    /* initialize the config space in case we fail to read it (missing permissions, etc). */
    memset(config_space_cache, 0xff, CONFIG_SPACE_CACHESIZE);
    pci_device_probe(pcidev);
    pci_device_cfg_read(pcidev, config_space_cache, 0, CONFIG_SPACE_CACHESIZE, NULL);

    /* try to read the domain */
    domain = pcidev->domain;

    /* try to read the device_class */
    device_class = pcidev->device_class >> 8;

    /* bridge or pci dev? */
    type = hwloc_pcidisc_check_bridge_type(device_class, config_space_cache);

    /* filtered? */
    if (type == HWLOC_OBJ_PCI_DEVICE) {
      enum hwloc_type_filter_e filter;
      hwloc_topology_get_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, &filter);
      if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
	continue;
      if (filter == HWLOC_TYPE_FILTER_KEEP_IMPORTANT
	  && !hwloc_filter_check_pcidev_subtype_important(device_class))
	continue;
    } else if (type == HWLOC_OBJ_BRIDGE) {
      enum hwloc_type_filter_e filter;
      hwloc_topology_get_type_filter(topology, HWLOC_OBJ_BRIDGE, &filter);
      if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
	continue;
      /* HWLOC_TYPE_FILTER_KEEP_IMPORTANT filtered later in the core */
    }

    /* fixup SR-IOV buggy VF device/vendor IDs */
    if (0xffff == pcidev->vendor_id && 0xffff == pcidev->device_id) {
      /* SR-IOV puts ffff:ffff in Virtual Function config space.
       * The actual VF device ID is stored at a special (dynamic) location in the Physical Function config space.
       * VF and PF have the same vendor ID.
       *
       * libpciaccess just returns ffff:ffff, needs to be fixed.
       * linuxpci is OK because sysfs files are already fixed in the kernel.
       * (pciutils is OK when it uses those Linux sysfs files.)
       *
       * Reading these files is an easy way to work around the libpciaccess issue on Linux,
       * but we have no way to know if this is caused by SR-IOV or not.
       *
       * TODO:
       *  If PF has CAP_ID_PCIX or CAP_ID_EXP (offset>0),
       *  look for extended capability PCI_EXT_CAP_ID_SRIOV (need extended config space (more than 256 bytes)),
       *  then read the VF device ID after it (PCI_IOV_DID bytes later).
       *  Needs access to extended config space (needs root on Linux).
       * TODO:
       *  Add string info attributes in VF and PF objects?
       */
#ifdef HWLOC_LINUX_SYS
      /* Workaround for Linux (the kernel returns the VF device/vendor IDs). */
      char path[64];
      char value[16];
      FILE *file;
      size_t read;

      snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/vendor",
	       domain, pcidev->bus, pcidev->dev, pcidev->func);
      file = fopen(path, "r");
      if (file) {
	read = fread(value, 1, sizeof(value), file);
	fclose(file);
	if (read)
	  /* fixup the pciaccess struct so that pci_device_get_vendor_name() is correct later. */
          pcidev->vendor_id = strtoul(value, NULL, 16);
      }

      snprintf(path, sizeof(path), "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/device",
	       domain, pcidev->bus, pcidev->dev, pcidev->func);
      file = fopen(path, "r");
      if (file) {
	read = fread(value, 1, sizeof(value), file);
	fclose(file);
	if (read)
	  /* fixup the pciaccess struct so that pci_device_get_device_name() is correct later. */
          pcidev->device_id = strtoul(value, NULL, 16);
      }
#endif
    }

    obj = hwloc_alloc_setup_object(topology, type, HWLOC_UNKNOWN_INDEX);
    obj->attr->pcidev.domain = domain;
    obj->attr->pcidev.bus = pcidev->bus;
    obj->attr->pcidev.dev = pcidev->dev;
    obj->attr->pcidev.func = pcidev->func;
    obj->attr->pcidev.vendor_id = pcidev->vendor_id;
    obj->attr->pcidev.device_id = pcidev->device_id;
    obj->attr->pcidev.class_id = device_class;
    obj->attr->pcidev.revision = config_space_cache[PCI_REVISION_ID];

    obj->attr->pcidev.linkspeed = 0; /* unknown */
    offset = hwloc_pcidisc_find_cap(config_space_cache, PCI_CAP_ID_EXP);

    if (offset > 0 && offset + 20 /* size of PCI express block up to link status */ <= CONFIG_SPACE_CACHESIZE)
      hwloc_pcidisc_find_linkspeed(config_space_cache, offset, &obj->attr->pcidev.linkspeed);

    if (type == HWLOC_OBJ_BRIDGE) {
      if (hwloc_pcidisc_setup_bridge_attr(obj, config_space_cache) < 0)
	continue;
    }

    if (obj->type == HWLOC_OBJ_PCI_DEVICE) {
      memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_VENDOR_ID], sizeof(tmp16));
      obj->attr->pcidev.subvendor_id = tmp16;
      memcpy(&tmp16, &config_space_cache[PCI_SUBSYSTEM_ID], sizeof(tmp16));
      obj->attr->pcidev.subdevice_id = tmp16;
    } else {
      /* TODO:
       * bridge must lookup PCI_CAP_ID_SSVID and then look at offset+PCI_SSVID_VENDOR/DEVICE_ID
       * cardbus must look at PCI_CB_SUBSYSTEM_VENDOR_ID and PCI_CB_SUBSYSTEM_ID
       */
    }

    /* get the vendor name */
    vendorname = pci_device_get_vendor_name(pcidev);
    if (vendorname && *vendorname)
      hwloc_obj_add_info(obj, "PCIVendor", vendorname);

    /* get the device name */
    devicename = pci_device_get_device_name(pcidev);
    if (devicename && *devicename)
      hwloc_obj_add_info(obj, "PCIDevice", devicename);

    hwloc_debug("  %04x:%02x:%02x.%01x %04x %04x:%04x %s %s\n",
		domain, pcidev->bus, pcidev->dev, pcidev->func,
		device_class, pcidev->vendor_id, pcidev->device_id,
		vendorname && *vendorname ? vendorname : "??",
		devicename && *devicename ? devicename : "??");

    hwloc_pcidisc_tree_insert_by_busid(&tree, obj);
  }

  /* finalize device scanning */
  pci_iterator_destroy(iter);
  pci_system_cleanup();

  hwloc_pcidisc_tree_attach(topology, tree);
  return 0;
}
示例#3
0
static int
hwloc_opencl_discover(struct hwloc_backend *backend)
{
  struct hwloc_topology *topology = backend->topology;
  enum hwloc_type_filter_e filter;
  cl_platform_id *platform_ids = NULL;
  cl_uint nr_platforms;
  cl_int clret;
  unsigned j;

  hwloc_topology_get_type_filter(topology, HWLOC_OBJ_OS_DEVICE, &filter);
  if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
    return 0;

  clret = clGetPlatformIDs(0, NULL, &nr_platforms);
  if (CL_SUCCESS != clret || !nr_platforms)
    return -1;
  hwloc_debug("%u OpenCL platforms\n", nr_platforms);
  platform_ids = malloc(nr_platforms * sizeof(*platform_ids));
  if (!platform_ids)
    return -1;
  clret = clGetPlatformIDs(nr_platforms, platform_ids, &nr_platforms);
  if (CL_SUCCESS != clret || !nr_platforms) {
    free(platform_ids);
    return -1;
  }

  for(j=0; j<nr_platforms; j++) {
    cl_device_id *device_ids = NULL;
    cl_uint nr_devices;
    unsigned i;

    clret = clGetDeviceIDs(platform_ids[j], CL_DEVICE_TYPE_ALL, 0, NULL, &nr_devices);
    if (CL_SUCCESS != clret)
      continue;
    device_ids = malloc(nr_devices * sizeof(*device_ids));
    clret = clGetDeviceIDs(platform_ids[j], CL_DEVICE_TYPE_ALL, nr_devices, device_ids, &nr_devices);
    if (CL_SUCCESS != clret) {
      free(device_ids);
      continue;
    }

    for(i=0; i<nr_devices; i++) {
      cl_platform_id platform_id = 0;
      cl_device_type type;
#ifdef CL_DEVICE_TOPOLOGY_AMD
      cl_device_topology_amd amdtopo;
#endif
      cl_ulong globalmemsize;
      cl_uint computeunits;
      hwloc_obj_t osdev, parent;
      char buffer[64];

      hwloc_debug("This is opencl%ud%u\n", j, i);

#ifdef CL_DEVICE_TOPOLOGY_AMD
      clret = clGetDeviceInfo(device_ids[i], CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL);
      if (CL_SUCCESS != clret) {
        hwloc_debug("no AMD-specific device information: %d\n", clret);
        continue;
      } else if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) {
        hwloc_debug("AMD-specific device topology reports non-PCIe device type: %u\n", amdtopo.raw.type);
        continue;
      }
#else
      continue;
#endif

      osdev = hwloc_alloc_setup_object(topology, HWLOC_OBJ_OS_DEVICE, -1);
      snprintf(buffer, sizeof(buffer), "opencl%ud%u", j, i);
      osdev->name = strdup(buffer);
      osdev->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN;
      osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_COPROC;

      osdev->subtype = strdup("OpenCL");
      hwloc_obj_add_info(osdev, "Backend", "OpenCL");

      clGetDeviceInfo(device_ids[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
      if (type == CL_DEVICE_TYPE_GPU)
        hwloc_obj_add_info(osdev, "OpenCLDeviceType", "GPU");
      else if (type == CL_DEVICE_TYPE_ACCELERATOR)
        hwloc_obj_add_info(osdev, "OpenCLDeviceType", "Accelerator");
      else if (type == CL_DEVICE_TYPE_CPU)
        hwloc_obj_add_info(osdev, "OpenCLDeviceType", "CPU");
      else if (type == CL_DEVICE_TYPE_CUSTOM)
        hwloc_obj_add_info(osdev, "OpenCLDeviceType", "Custom");
      else
        hwloc_obj_add_info(osdev, "OpenCLDeviceType", "Unknown");

      buffer[0] = '\0';
      clGetDeviceInfo(device_ids[i], CL_DEVICE_VENDOR, sizeof(buffer), buffer, NULL);
      if (buffer[0] != '\0')
        hwloc_obj_add_info(osdev, "GPUVendor", buffer);

      buffer[0] = '\0';
#ifdef CL_DEVICE_BOARD_NAME_AMD
      clGetDeviceInfo(device_ids[i], CL_DEVICE_BOARD_NAME_AMD, sizeof(buffer), buffer, NULL);
#else
      clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, sizeof(buffer), buffer, NULL);
#endif
      if (buffer[0] != '\0')
        hwloc_obj_add_info(osdev, "GPUModel", buffer);

      snprintf(buffer, sizeof(buffer), "%u", j);
      hwloc_obj_add_info(osdev, "OpenCLPlatformIndex", buffer);

      buffer[0] = '\0';
      clret = clGetDeviceInfo(device_ids[i], CL_DEVICE_PLATFORM, sizeof(platform_id), &platform_id, NULL);
      if (CL_SUCCESS == clret) {
        clGetPlatformInfo(platform_id, CL_PLATFORM_NAME, sizeof(buffer), buffer, NULL);
        if (buffer[0] != '\0')
          hwloc_obj_add_info(osdev, "OpenCLPlatformName", buffer);
      }

      snprintf(buffer, sizeof(buffer), "%u", i);
      hwloc_obj_add_info(osdev, "OpenCLPlatformDeviceIndex", buffer);

      clGetDeviceInfo(device_ids[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(computeunits), &computeunits, NULL);
      snprintf(buffer, sizeof(buffer), "%u", computeunits);
      hwloc_obj_add_info(osdev, "OpenCLComputeUnits", buffer);

      clGetDeviceInfo(device_ids[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(globalmemsize), &globalmemsize, NULL);
      snprintf(buffer, sizeof(buffer), "%llu", (unsigned long long) globalmemsize / 1024);
      hwloc_obj_add_info(osdev, "OpenCLGlobalMemorySize", buffer);

      parent = NULL;
#ifdef CL_DEVICE_TOPOLOGY_AMD
      parent = hwloc_pcidisc_find_by_busid(topology, 0, amdtopo.pcie.bus, amdtopo.pcie.device, amdtopo.pcie.function);
      if (!parent)
        parent = hwloc_pcidisc_find_busid_parent(topology, 0, amdtopo.pcie.bus, amdtopo.pcie.device, amdtopo.pcie.function);
#endif
      if (!parent)
        parent = hwloc_get_root_obj(topology);

      hwloc_insert_object_by_parent(topology, parent, osdev);
    }
    free(device_ids);
  }
  free(platform_ids);
  return 0;
}
示例#4
0
static int
hwloc_gl_discover(struct hwloc_backend *backend)
{
  struct hwloc_topology *topology = backend->topology;
  enum hwloc_type_filter_e filter;
  unsigned i;
  int err;

  hwloc_topology_get_type_filter(topology, HWLOC_OBJ_OS_DEVICE, &filter);
  if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
    return 0;

  for (i = 0; i < HWLOC_GL_SERVER_MAX; ++i) {
    Display* display;
    char displayName[10];
    int opcode, event, error;
    unsigned j;

    /* open X server */
    snprintf(displayName, sizeof(displayName), ":%u", i);
    display = XOpenDisplay(displayName);
    if (!display)
      continue;

    /* Check for NV-CONTROL extension (it's per server) */
    if(!XQueryExtension(display, "NV-CONTROL", &opcode, &event, &error)) {
      XCloseDisplay(display);
      continue;
    }

    for (j = 0; j < (unsigned) ScreenCount(display) && j < HWLOC_GL_SCREEN_MAX; j++) {
      hwloc_obj_t osdev, parent;
      const int screen = j;
      unsigned int *ptr_binary_data;
      int data_length;
      int gpu_number;
      int nv_ctrl_pci_bus;
      int nv_ctrl_pci_device;
      int nv_ctrl_pci_domain;
      int nv_ctrl_pci_func;
      char *productname;
      char name[64];

      /* the server supports NV-CONTROL but it may contain non-NVIDIA screen that don't support it */
      if (!XNVCTRLIsNvScreen(display, screen))
        continue;

      /* Gets the GPU number attached to the default screen. */
      /* For further details, see the <NVCtrl/NVCtrlLib.h> */
      err = XNVCTRLQueryTargetBinaryData (display, NV_CTRL_TARGET_TYPE_X_SCREEN, screen, 0,
                                          NV_CTRL_BINARY_DATA_GPUS_USED_BY_XSCREEN,
                                          (unsigned char **) &ptr_binary_data, &data_length);
      if (!err)
        continue;

      gpu_number = ptr_binary_data[1];
      free(ptr_binary_data);

#ifdef NV_CTRL_PCI_DOMAIN
      /* Gets the ID's of the GPU defined by gpu_number
       * For further details, see the <NVCtrl/NVCtrlLib.h> */
      err = XNVCTRLQueryTargetAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0,
                                        NV_CTRL_PCI_DOMAIN, &nv_ctrl_pci_domain);
      if (!err)
        continue;
#else
      nv_ctrl_pci_domain = 0;
#endif

      err = XNVCTRLQueryTargetAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0,
                                        NV_CTRL_PCI_BUS, &nv_ctrl_pci_bus);
      if (!err)
        continue;

      err = XNVCTRLQueryTargetAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0,
                                        NV_CTRL_PCI_DEVICE, &nv_ctrl_pci_device);
      if (!err)
        continue;

      err = XNVCTRLQueryTargetAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0,
                                        NV_CTRL_PCI_FUNCTION, &nv_ctrl_pci_func);
      if (!err)
        continue;

      productname = NULL;
      err = XNVCTRLQueryTargetStringAttribute(display, NV_CTRL_TARGET_TYPE_GPU, gpu_number, 0,
                                              NV_CTRL_STRING_PRODUCT_NAME, &productname);

      snprintf(name, sizeof(name), ":%u.%u", i, j);

      osdev = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1);
      osdev->name = strdup(name);
      osdev->logical_index = -1;
      osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU;
      hwloc_obj_add_info(osdev, "Backend", "GL");
      hwloc_obj_add_info(osdev, "GPUVendor", "NVIDIA Corporation");
      if (productname)
	hwloc_obj_add_info(osdev, "GPUModel", productname);

      parent = hwloc_pci_belowroot_find_by_busid(topology, nv_ctrl_pci_domain, nv_ctrl_pci_bus, nv_ctrl_pci_device, nv_ctrl_pci_func);
      if (!parent)
	parent = hwloc_pci_find_busid_parent(topology, nv_ctrl_pci_domain, nv_ctrl_pci_bus, nv_ctrl_pci_device, nv_ctrl_pci_func);
      if (!parent)
	parent = hwloc_get_root_obj(topology);

      hwloc_insert_object_by_parent(topology, parent, osdev);

      hwloc_debug("GL device %s (product %s) on PCI 0000:%02x:%02x.%u\n", name, productname,
		  nv_ctrl_pci_domain, nv_ctrl_pci_bus, nv_ctrl_pci_device, nv_ctrl_pci_func);
    }
    XCloseDisplay(display);
  }

  return 0;
}