Ejemplo n.º 1
0
/** prints a list of the available GPUs to the result of the Tcl interpreter.
    Only devices with compute capability of 1.1 or higher are listed, since
    atomic operations are required for CUDA-LB. */
static int list_gpus(Tcl_Interp *interp)
{
  int deviceCount = cuda_get_n_gpus();
  if (deviceCount < 0) {
    Tcl_AppendResult(interp, "cannot initialize CUDA", NULL);
    return TCL_ERROR;
  }

  int found = 0; 
  for (int dev = 0; dev < deviceCount; ++dev) {
    // look only for devices with compute capability > 1.1 (for atomic operations)
    if (cuda_check_gpu(dev) == ES_OK) {
      char id[4 + 64 + TCL_INTEGER_SPACE];
      char name[64];
      cuda_get_gpu_name(dev, name);
      sprintf(id, " {%d %.64s}", dev, name);
      Tcl_AppendResult(interp, id, NULL);
      found = 1;
    }
  }
  if (found == 0) {
    Tcl_AppendResult(interp, "no GPUs present", NULL);
  }

  return TCL_OK;
}
Ejemplo n.º 2
0
std::vector<EspressoGpuDevice> cuda_gather_gpus(void) {
  int n_gpus = cuda_get_n_gpus();
  char proc_name[MPI_MAX_PROCESSOR_NAME];
  int proc_name_len;
  /* List of local devices */
  std::vector<EspressoGpuDevice> devices;
  /* Global unique device list (only relevant on master) */
  std::vector<EspressoGpuDevice> g_devices;
  int *n_gpu_array = 0;

  MPI_Get_processor_name(proc_name, &proc_name_len);

  /* Truncate to 63 chars to fit struct. */
  if(strlen(proc_name) > 63)
    proc_name[63] = 0;

  for(int i = 0; i < n_gpus; ++i) {
    /* Check if device has at least mininum compute capability */
    if(cuda_check_gpu(i) == ES_OK) {
      EspressoGpuDevice device;
      if(cuda_get_device_props(i, device) == ES_OK){
	strncpy(device.proc_name, proc_name, 64);
	devices.push_back(device);
      }
    }
  }
  
  /** Update n_gpus to number of usable devices */
  n_gpus = devices.size();

  if(this_node == 0) {
    std::set<EspressoGpuDevice, CompareDevices> device_set;
    n_gpu_array = new int[n_nodes];
    MPI_Gather(&n_gpus, 1, MPI_INT, n_gpu_array, 1, MPI_INT, 0, MPI_COMM_WORLD);

    /* insert local devices */
    std::copy(devices.begin(), devices.end(), std::inserter(device_set, device_set.begin()));

    EspressoGpuDevice device;      
    MPI_Status s;
    /* Get devices from other nodes */
    for(int i = 1; i < n_nodes; ++i) {
      for(int j = 0; j < n_gpu_array[i]; ++j) {
	MPI_Recv(&device, sizeof(EspressoGpuDevice), MPI_BYTE, i, 0, MPI_COMM_WORLD, &s);
	device_set.insert(device);
      }      
    }
    /* Copy unique devices to result, if any */
    std::copy(device_set.begin(), device_set.end(), std::inserter(g_devices, g_devices.begin()));
    delete n_gpu_array;
  } else {
    /* Send number of devices to master */
    MPI_Gather(&n_gpus, 1, MPI_INT, n_gpu_array, 1, MPI_INT, 0, MPI_COMM_WORLD);
    /* Send devices to maser */
    for(std::vector<EspressoGpuDevice>::iterator device = devices.begin();
	device != devices.end(); ++device) {
      MPI_Send(&(*device), sizeof(EspressoGpuDevice), MPI_BYTE, 0, 0, MPI_COMM_WORLD);
    }
  }  
  return g_devices;
}