/** prints a list of the available GPUs to the result of the Tcl interpreter. Only devices with compute capability of 1.1 or higher are listed, since atomic operations are required for CUDA-LB. */ static int list_gpus(Tcl_Interp *interp) { int deviceCount = cuda_get_n_gpus(); if (deviceCount < 0) { Tcl_AppendResult(interp, "cannot initialize CUDA", NULL); return TCL_ERROR; } int found = 0; for (int dev = 0; dev < deviceCount; ++dev) { // look only for devices with compute capability > 1.1 (for atomic operations) if (cuda_check_gpu(dev) == ES_OK) { char id[4 + 64 + TCL_INTEGER_SPACE]; char name[64]; cuda_get_gpu_name(dev, name); sprintf(id, " {%d %.64s}", dev, name); Tcl_AppendResult(interp, id, NULL); found = 1; } } if (found == 0) { Tcl_AppendResult(interp, "no GPUs present", NULL); } return TCL_OK; }
std::vector<EspressoGpuDevice> cuda_gather_gpus(void) { int n_gpus = cuda_get_n_gpus(); char proc_name[MPI_MAX_PROCESSOR_NAME]; int proc_name_len; /* List of local devices */ std::vector<EspressoGpuDevice> devices; /* Global unique device list (only relevant on master) */ std::vector<EspressoGpuDevice> g_devices; int *n_gpu_array = 0; MPI_Get_processor_name(proc_name, &proc_name_len); /* Truncate to 63 chars to fit struct. */ if(strlen(proc_name) > 63) proc_name[63] = 0; for(int i = 0; i < n_gpus; ++i) { /* Check if device has at least mininum compute capability */ if(cuda_check_gpu(i) == ES_OK) { EspressoGpuDevice device; if(cuda_get_device_props(i, device) == ES_OK){ strncpy(device.proc_name, proc_name, 64); devices.push_back(device); } } } /** Update n_gpus to number of usable devices */ n_gpus = devices.size(); if(this_node == 0) { std::set<EspressoGpuDevice, CompareDevices> device_set; n_gpu_array = new int[n_nodes]; MPI_Gather(&n_gpus, 1, MPI_INT, n_gpu_array, 1, MPI_INT, 0, MPI_COMM_WORLD); /* insert local devices */ std::copy(devices.begin(), devices.end(), std::inserter(device_set, device_set.begin())); EspressoGpuDevice device; MPI_Status s; /* Get devices from other nodes */ for(int i = 1; i < n_nodes; ++i) { for(int j = 0; j < n_gpu_array[i]; ++j) { MPI_Recv(&device, sizeof(EspressoGpuDevice), MPI_BYTE, i, 0, MPI_COMM_WORLD, &s); device_set.insert(device); } } /* Copy unique devices to result, if any */ std::copy(device_set.begin(), device_set.end(), std::inserter(g_devices, g_devices.begin())); delete n_gpu_array; } else { /* Send number of devices to master */ MPI_Gather(&n_gpus, 1, MPI_INT, n_gpu_array, 1, MPI_INT, 0, MPI_COMM_WORLD); /* Send devices to maser */ for(std::vector<EspressoGpuDevice>::iterator device = devices.begin(); device != devices.end(); ++device) { MPI_Send(&(*device), sizeof(EspressoGpuDevice), MPI_BYTE, 0, 0, MPI_COMM_WORLD); } } return g_devices; }