示例#1
0
/** prints a list of the available GPUs to the result of the Tcl interpreter.
    Only devices with compute capability of 1.1 or higher are listed, since
    atomic operations are required for CUDA-LB. */
static int list_gpus(Tcl_Interp *interp)
{
  int deviceCount = cuda_get_n_gpus();
  if (deviceCount < 0) {
    Tcl_AppendResult(interp, "cannot initialize CUDA", NULL);
    return TCL_ERROR;
  }

  int found = 0; 
  for (int dev = 0; dev < deviceCount; ++dev) {
    // look only for devices with compute capability > 1.1 (for atomic operations)
    if (cuda_check_gpu(dev) == ES_OK) {
      char id[4 + 64 + TCL_INTEGER_SPACE];
      char name[64];
      cuda_get_gpu_name(dev, name);
      sprintf(id, " {%d %.64s}", dev, name);
      Tcl_AppendResult(interp, id, NULL);
      found = 1;
    }
  }
  if (found == 0) {
    Tcl_AppendResult(interp, "no GPUs present", NULL);
  }

  return TCL_OK;
}
示例#2
0
std::vector<EspressoGpuDevice> cuda_gather_gpus(void) {
  int n_gpus = cuda_get_n_gpus();
  char proc_name[MPI_MAX_PROCESSOR_NAME];
  int proc_name_len;
  /* List of local devices */
  std::vector<EspressoGpuDevice> devices;
  /* Global unique device list (only relevant on master) */
  std::vector<EspressoGpuDevice> g_devices;
  int *n_gpu_array = 0;

  MPI_Get_processor_name(proc_name, &proc_name_len);

  /* Truncate to 63 chars to fit struct. */
  if(strlen(proc_name) > 63)
    proc_name[63] = 0;

  for(int i = 0; i < n_gpus; ++i) {
    /* Check if device has at least mininum compute capability */
    if(cuda_check_gpu(i) == ES_OK) {
      EspressoGpuDevice device;
      if(cuda_get_device_props(i, device) == ES_OK){
	strncpy(device.proc_name, proc_name, 64);
	devices.push_back(device);
      }
    }
  }
  
  /** Update n_gpus to number of usable devices */
  n_gpus = devices.size();

  if(this_node == 0) {
    std::set<EspressoGpuDevice, CompareDevices> device_set;
    n_gpu_array = new int[n_nodes];
    MPI_Gather(&n_gpus, 1, MPI_INT, n_gpu_array, 1, MPI_INT, 0, MPI_COMM_WORLD);

    /* insert local devices */
    std::copy(devices.begin(), devices.end(), std::inserter(device_set, device_set.begin()));

    EspressoGpuDevice device;      
    MPI_Status s;
    /* Get devices from other nodes */
    for(int i = 1; i < n_nodes; ++i) {
      for(int j = 0; j < n_gpu_array[i]; ++j) {
	MPI_Recv(&device, sizeof(EspressoGpuDevice), MPI_BYTE, i, 0, MPI_COMM_WORLD, &s);
	device_set.insert(device);
      }      
    }
    /* Copy unique devices to result, if any */
    std::copy(device_set.begin(), device_set.end(), std::inserter(g_devices, g_devices.begin()));
    delete n_gpu_array;
  } else {
    /* Send number of devices to master */
    MPI_Gather(&n_gpus, 1, MPI_INT, n_gpu_array, 1, MPI_INT, 0, MPI_COMM_WORLD);
    /* Send devices to maser */
    for(std::vector<EspressoGpuDevice>::iterator device = devices.begin();
	device != devices.end(); ++device) {
      MPI_Send(&(*device), sizeof(EspressoGpuDevice), MPI_BYTE, 0, 0, MPI_COMM_WORLD);
    }
  }  
  return g_devices;
}
示例#3
0
/** returns 1 if and only if the GPU with the given id is usable for
    CUDA computations.  Only devices with compute capability of 1.1 or
    higher are ok, since atomic operations are required for
    CUDA-LB. */
int tclcommand_cuda(ClientData data, Tcl_Interp *interp,
		    int argc, char **argv)
{
#ifndef CUDA
    Tcl_AppendResult(interp, "Feature CUDA required!", (char *)NULL);
    return TCL_ERROR;
#else
  if (argc <= 1) {
    Tcl_AppendResult(interp, "too few arguments to the cuda command", (char *)NULL);
    return TCL_ERROR;
  }
  argc--; argv++;
  
  if (ARG0_IS_S("list")) {
    if (argc != 1) {
      Tcl_AppendResult(interp, "cuda list takes no arguments", (char *)NULL);
      return TCL_ERROR;
    }
    return list_gpus(interp);
  }
  else if (ARG0_IS_S("setdevice")) {
    int dev;
    if (argc <= 1 || !ARG1_IS_I(dev)) {
      Tcl_AppendResult(interp, "expected: cuda setdevice <devnr>", (char *)NULL);
      return TCL_ERROR;
    }
    if (cuda_check_gpu(dev) == ES_ERROR) {
      Tcl_AppendResult(interp, "GPU not present or compute model not sufficient", (char *)NULL);
      return TCL_ERROR;
    }
    if (cuda_set_device(dev) == ES_OK) {
      return TCL_OK;
    }
    else {
      Tcl_AppendResult(interp, cuda_error, (char *)NULL);
      return TCL_ERROR;
    }
  }
  else if (ARG0_IS_S("getdevice")) {
    if (argc != 1) {
      Tcl_AppendResult(interp, "cuda getdevice takes no arguments", (char *)NULL);
      return TCL_ERROR;
    }
    int dev = cuda_get_device();
    if (dev >= 0) {
      char buffer[TCL_INTEGER_SPACE];
      sprintf(buffer, "%d", dev);
      Tcl_AppendResult(interp, buffer, (char *)NULL);
      return TCL_OK;
    }
    else {
      Tcl_AppendResult(interp, cuda_error, (char *)NULL);
      return TCL_ERROR;
    }
  }
  else {
    Tcl_AppendResult(interp, "unknown subcommand \"", argv[0], "\"", (char *)NULL);
    return TCL_ERROR;
  }
#endif /* defined(CUDA) */
}