Exemplo n.º 1
0
unsigned int Hwloc::getNumaNodeOfGpu( unsigned int gpu ) {
   unsigned int node = 0;
#ifdef GPU_DEV
#ifdef HWLOC
   hwloc_obj_t obj = hwloc_cudart_get_device_pcidev ( _hwlocTopology, gpu );
   if ( obj != NULL ) {
      hwloc_obj_t objNode = hwloc_get_ancestor_obj_by_type( _hwlocTopology, HWLOC_OBJ_NODE, obj );
      if ( objNode != NULL ){
         node = objNode->os_index;
      }
   }
#endif
#endif
   return node;
}
int Chip::initializeMICDevices(hwloc_obj_t chip_obj, hwloc_topology_t topology)
  {
  /* Get any Intel MIC devices */
  /* start at indes 0 and go until hwloc_intel_mic_get_device_osdev_by_index
     returns NULL */
  for (int idx = 0; ; idx++)
    {
    hwloc_obj_t mic_obj;
    hwloc_obj_t ancestor_obj;
    int is_in_tree;

    mic_obj = hwloc_intel_mic_get_device_osdev_by_index(topology, idx);
    if (mic_obj == NULL)
      break;

    if (chip_obj == NULL) 
      { /* this came from the Non NUMA */
      PCI_Device new_device;

      new_device.initializePCIDevice(mic_obj, idx, topology);
      this->devices.push_back(new_device);
      }
    else
      {
      PCI_Device new_device;
      ancestor_obj = hwloc_get_ancestor_obj_by_type(topology, HWLOC_OBJ_NODE, mic_obj);
      if (ancestor_obj != NULL)
        {
        if (ancestor_obj->logical_index == chip_obj->logical_index)
          {
          PCI_Device new_device;

          new_device.initializePCIDevice(mic_obj, idx, topology);
          this->devices.push_back(new_device);
          this->total_mics++;
          this->available_mics++;
          }
        }
      }
    }
  return(PBSE_NONE);
  }
Exemplo n.º 3
0
unsigned int Hwloc::getNumaNodeOfCpu ( unsigned int cpu )
{
   int numaNodeId = 0;
#ifdef HWLOC
   hwloc_obj_t pu = hwloc_get_pu_obj_by_os_index( _hwlocTopology, cpu );

   // Now we have the PU object, go find its parent numa node
   hwloc_obj_t numaNode =
      hwloc_get_ancestor_obj_by_type( _hwlocTopology, HWLOC_OBJ_NODE, pu );

   // If the machine is not NUMA
   if ( numaNode != NULL )
   {
      numaNodeId = numaNode->os_index;
   }

   return numaNodeId;
#else
   return numaNodeId;
#endif
}
int Machine::initializeNVIDIADevices(hwloc_obj_t machine_obj, hwloc_topology_t topology)
  {
  nvmlReturn_t rc;

  /* Initialize the NVML handle. 
   *
   * nvmlInit should be called once before invoking any other methods in the NVML library. 
   * A reference count of the number of initializations is maintained. Shutdown only occurs 
   * when the reference count reaches zero.
   * */
  rc = nvmlInit();
  if (rc != NVML_SUCCESS && rc != NVML_ERROR_ALREADY_INITIALIZED)
    {
    log_nvml_error(rc, NULL, __func__);
    return(PBSE_NONE);
    }

  unsigned int device_count = 0;

  /* Get the device count. */
  rc = nvmlDeviceGetCount(&device_count);
  if (rc == NVML_SUCCESS)
    {
    nvmlDevice_t gpu;

    /* Get the nvml device handle at each index */
    for (unsigned int idx = 0; idx < device_count; idx++)
      {
      rc = nvmlDeviceGetHandleByIndex(idx, &gpu);

      if (rc != NVML_SUCCESS)
        {
        /* TODO: get gpuid from nvmlDevice_t struct */
        log_nvml_error(rc, NULL, __func__);
        }

      /* Use the hwloc library to determine device locality */
      hwloc_obj_t gpu_obj;
      hwloc_obj_t ancestor_obj;
      int is_in_tree;
  
      gpu_obj = hwloc_nvml_get_device_osdev(topology, gpu);
      if (gpu_obj == NULL)
        {
        /* This was not an nvml device. We will look for a "card" device (GeForce or Quadra) */
        gpu_obj = this->get_non_nvml_device(topology, gpu);
        if (gpu_obj == NULL)
        continue;
        }
        
      /* The ancestor was not a numa chip. Is it the machine? */
      ancestor_obj = hwloc_get_ancestor_obj_by_type(topology, HWLOC_OBJ_MACHINE, gpu_obj);
      if (ancestor_obj != NULL)
        {
        PCI_Device new_device;
  
        new_device.initializePCIDevice(gpu_obj, idx, topology);

        store_device_on_appropriate_chip(new_device);
        }
      }
    }
  else
    {
    log_nvml_error(rc, NULL, __func__);
    }

  /* Shutdown the NVML handle. 
   *
   * nvmlShutdown should be called after NVML work is done, once for each call to nvmlInit() 
   * A reference count of the number of initializations is maintained. Shutdown only occurs when 
   * the reference count reaches zero. For backwards compatibility, no error is reported if 
   * nvmlShutdown() is called more times than nvmlInit().
   * */
  rc = nvmlShutdown();
  if (rc != NVML_SUCCESS)
    {
    log_nvml_error(rc, NULL, __func__);
    }

  return(PBSE_NONE);
  }
Exemplo n.º 5
0
static
void getNumCPUs(void) {
  //
  // accessible cores
  //

  //
  // Hwloc can't tell us the number of accessible cores directly, so
  // get that by counting the parent cores of the accessible PUs.
  //

  //
  // We could seemingly use hwloc_topology_get_allowed_cpuset() to get
  // the set of accessible PUs here.  But that seems not to reflect the
  // schedaffinity settings, so use hwloc_get_proc_cpubind() instead.
  //
  hwloc_cpuset_t logAccSet;
  CHK_ERR_ERRNO((logAccSet = hwloc_bitmap_alloc()) != NULL);
  if (hwloc_get_proc_cpubind(topology, getpid(), logAccSet, 0) != 0) {
#ifdef __APPLE__
    const int errRecoverable = (errno == ENOSYS); // no cpubind on macOS
#else
    const int errRecoverable = 0;
#endif
    if (errRecoverable) {
      hwloc_bitmap_fill(logAccSet);
    } else {
      REPORT_ERR_ERRNO(hwloc_get_proc_cpubind(topology, getpid(), logAccSet, 0)
                       == 0);
    }
  }
  hwloc_bitmap_and(logAccSet, logAccSet,
                   hwloc_topology_get_online_cpuset(topology));

  hwloc_cpuset_t physAccSet;
  CHK_ERR_ERRNO((physAccSet = hwloc_bitmap_alloc()) != NULL);

#define NEXT_PU(pu)                                                     \
  hwloc_get_next_obj_inside_cpuset_by_type(topology, logAccSet,         \
                                           HWLOC_OBJ_PU, pu)

  for (hwloc_obj_t pu = NEXT_PU(NULL); pu != NULL; pu = NEXT_PU(pu)) {
    hwloc_obj_t core;
    CHK_ERR_ERRNO((core = hwloc_get_ancestor_obj_by_type(topology,
                                                         HWLOC_OBJ_CORE,
                                                         pu))
                  != NULL);
    hwloc_bitmap_set(physAccSet, core->logical_index);
  }

#undef NEXT_PU

  numCPUsPhysAcc = hwloc_bitmap_weight(physAccSet);
  hwloc_bitmap_free(physAccSet);

  CHK_ERR(numCPUsPhysAcc > 0);

  //
  // all cores
  //
  numCPUsPhysAll = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_CORE);
  CHK_ERR(numCPUsPhysAll > 0);

  //
  // accessible PUs
  //
  numCPUsLogAcc = hwloc_bitmap_weight(logAccSet);
  CHK_ERR(numCPUsLogAcc > 0);

  hwloc_bitmap_free(logAccSet);

  //
  // all PUs
  //
  numCPUsLogAll = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_PU);
  CHK_ERR(numCPUsLogAll > 0);
}