hwloc_obj_t Machine::get_non_nvml_device(hwloc_topology_t topology, nvmlDevice_t device) { hwloc_obj_t osdev; nvmlReturn_t nvres; nvmlPciInfo_t pci; if (!hwloc_topology_is_thissystem(topology)) { errno = EINVAL; return NULL; } nvres = nvmlDeviceGetPciInfo(device, &pci); if (NVML_SUCCESS != nvres) return NULL; osdev = NULL; while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) { hwloc_obj_t pcidev = osdev->parent; if (strncmp(osdev->name, "card", 4)) continue; if (pcidev && pcidev->type == HWLOC_OBJ_PCI_DEVICE && pcidev->attr->pcidev.domain == pci.domain && pcidev->attr->pcidev.bus == pci.bus && pcidev->attr->pcidev.dev == pci.device && pcidev->attr->pcidev.func == 0) return osdev; } return(NULL); }
static void init_device_info(struct monitor* mon) { gethostname(mon->hostname, 64); NVML_TRY(nvmlSystemGetDriverVersion(mon->driver_version, sizeof(mon->driver_version))); NVML_TRY(nvmlSystemGetNVMLVersion(mon->nvml_version, sizeof(mon->nvml_version))); NVML_TRY(nvmlDeviceGetCount(&mon->dev_count)); mon->devices = calloc(mon->dev_count, sizeof(struct device)); for(unsigned i = 0; i < mon->dev_count; ++i) { struct device dev; memset(&dev, 0, sizeof(struct device)); dev.index = i; NVML_TRY(nvmlDeviceGetHandleByIndex(i, &dev.handle)); NVML_TRY(nvmlDeviceGetName(dev.handle, dev.name, sizeof(dev.name))); NVML_TRY(nvmlDeviceGetSerial(dev.handle, dev.serial, sizeof(dev.serial))); NVML_TRY(nvmlDeviceGetUUID(dev.handle, dev.uuid, sizeof(dev.uuid))); NVML_TRY(nvmlDeviceGetPciInfo(dev.handle, &dev.pci)); NVML_TRY(nvmlDeviceGetMemoryInfo(dev.handle, &dev.memory)); unsigned long long event_types; NVML_TRY(nvmlEventSetCreate(&dev.event_set)); if(0 == NVML_TRY(nvmlDeviceGetSupportedEventTypes(dev.handle, &event_types))) { NVML_TRY(nvmlDeviceRegisterEvents(dev.handle, event_types, dev.event_set)); } else { dev.event_set = NULL; } for(nvmlClockType_t type = NVML_CLOCK_GRAPHICS; type < NVML_CLOCK_COUNT; ++type) { if(NVML_TRY(nvmlDeviceGetMaxClockInfo(dev.handle, type, &dev.max_clock[type]))) break; } get_device_features(&dev); mon->devices[i] = dev; } mon->last_update = time(NULL); }
static int detectDevices( ) { nvmlReturn_t ret; nvmlEnableState_t mode = NVML_FEATURE_DISABLED; nvmlDevice_t handle; nvmlPciInfo_t info; cudaError_t cuerr; char busId[16]; char name[64]; char inforomECC[16]; char inforomPower[16]; char names[device_count][64]; char nvml_busIds[device_count][16]; float ecc_version = 0.0, power_version = 0.0; int i = 0, j = 0; int isTesla = 0; int isFermi = 0; int isUnique = 1; unsigned int temp = 0; /* list of nvml pci_busids */ for (i=0; i < device_count; i++) { ret = nvmlDeviceGetHandleByIndex( i, &handle ); if ( NVML_SUCCESS != ret ) { SUBDBG("nvmlDeviceGetHandleByIndex(%d) failed\n", i); return PAPI_ESYS; } ret = nvmlDeviceGetPciInfo( handle, &info ); if ( NVML_SUCCESS != ret ) { SUBDBG("nvmlDeviceGetPciInfo() failed %s\n", nvmlErrorString(ret) ); return PAPI_ESYS; } strncpy(nvml_busIds[i], info.busId, 16); } /* We want to key our list of nvmlDevice_ts by each device's cuda index */ for (i=0; i < device_count; i++) { cuerr = cudaDeviceGetPCIBusId( busId, 16, i ); if ( CUDA_SUCCESS != cuerr ) { SUBDBG("cudaDeviceGetPCIBusId failed.\n"); return PAPI_ESYS; } for (j=0; j < device_count; j++ ) { if ( !strncmp( busId, nvml_busIds[j], 16) ) { ret = nvmlDeviceGetHandleByIndex(j, &devices[i] ); if ( NVML_SUCCESS != ret ) SUBDBG("nvmlDeviceGetHandleByIndex(%d, &devices[%d]) failed.\n", j, i); return PAPI_ESYS; break; } } } memset(names, 0x0, device_count*64); /* So for each card, check whats querable */ for (i=0; i < device_count; i++ ) { isTesla=0; isFermi=1; isUnique = 1; features[i] = 0; ret = nvmlDeviceGetName( devices[i], name, 64 ); if ( NVML_SUCCESS != ret) { SUBDBG("nvmlDeviceGetName failed \n"); return PAPI_ESYS; } for (j=0; j < i; j++ ) if ( 0 == strncmp( name, names[j], 64 ) ) { /* if we have a match, and IF everything is sane, * devices with the same name eg Tesla C2075 share features */ isUnique = 0; features[i] = features[j]; } if ( isUnique ) { ret = nvmlDeviceGetInforomVersion( devices[i], NVML_INFOROM_ECC, inforomECC, 16); if ( NVML_SUCCESS != ret ) { SUBDBG("nvmlGetInforomVersion carps %s\n", nvmlErrorString(ret ) ); isFermi = 0; } ret = nvmlDeviceGetInforomVersion( devices[i], NVML_INFOROM_POWER, inforomPower, 16); if ( NVML_SUCCESS != ret ) { /* This implies the card is older then Fermi */ SUBDBG("nvmlGetInforomVersion carps %s\n", nvmlErrorString(ret ) ); SUBDBG("Based upon the return to nvmlGetInforomVersion, we conclude this card is older then Fermi.\n"); isFermi = 0; } ecc_version = strtof(inforomECC, NULL ); power_version = strtof( inforomPower, NULL); ret = nvmlDeviceGetName( devices[i], name, 64 ); isTesla = ( NULL == strstr(name, "Tesla") ) ? 0:1; /* For Tesla and Quadro products from Fermi and Kepler families. */ if ( isFermi ) { features[i] |= FEATURE_CLOCK_INFO; num_events += 3; } /* For Tesla and Quadro products from Fermi and Kepler families. requires NVML_INFOROM_ECC 2.0 or higher for location-based counts requires NVML_INFOROM_ECC 1.0 or higher for all other ECC counts requires ECC mode to be enabled. */ if ( isFermi ) { ret = nvmlDeviceGetEccMode( devices[i], &mode, NULL ); if ( NVML_FEATURE_ENABLED == mode) { if ( ecc_version >= 2.0 ) { features[i] |= FEATURE_ECC_LOCAL_ERRORS; num_events += 8; /* {single bit, two bit errors} x { reg, l1, l2, memory } */ } if ( ecc_version >= 1.0 ) { features[i] |= FEATURE_ECC_TOTAL_ERRORS; num_events += 2; /* single bit errors, double bit errors */ } } } /* For all discrete products with dedicated fans */ features[i] |= FEATURE_FAN_SPEED; num_events++; /* For Tesla and Quadro products from Fermi and Kepler families. */ if ( isFermi ) { features[i] |= FEATURE_MAX_CLOCK; num_events += 3; } /* For all products */ features[i] |= FEATURE_MEMORY_INFO; num_events += 3; /* total, free, used */ /* For Tesla and Quadro products from the Fermi and Kepler families. */ if ( isFermi ) { features[i] |= FEATURE_PERF_STATES; num_events++; } /* For "GF11x" Tesla and Quadro products from the Fermi family requires NVML_INFOROM_POWER 3.0 or higher For Tesla and Quadro products from the Kepler family does not require NVML_INFOROM_POWER */ if ( isFermi ) { ret = nvmlDeviceGetPowerUsage( devices[i], &temp); if ( NVML_SUCCESS == ret ) { features[i] |= FEATURE_POWER; num_events++; } } /* For all discrete and S-class products. */ features[i] |= FEATURE_TEMP; num_events++; /* For Tesla and Quadro products from the Fermi and Kepler families */ if (isFermi) { features[i] |= FEATURE_UTILIZATION; num_events += 2; } strncpy( names[i], name, 64); } } return PAPI_OK; }
static int hwloc_nvml_discover(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; nvmlReturn_t ret; unsigned nb, i; if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO))) return 0; if (!hwloc_topology_is_thissystem(topology)) { hwloc_debug("%s", "\nno NVML detection (not thissystem)\n"); return 0; } ret = nvmlInit(); if (NVML_SUCCESS != ret) return 0; ret = nvmlDeviceGetCount(&nb); if (NVML_SUCCESS != ret || !nb) { nvmlShutdown(); return 0; } for(i=0; i<nb; i++) { nvmlPciInfo_t pci; nvmlDevice_t device; hwloc_obj_t osdev, parent; char buffer[64]; ret = nvmlDeviceGetHandleByIndex(i, &device); assert(ret == NVML_SUCCESS); osdev = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1); snprintf(buffer, sizeof(buffer), "nvml%d", i); osdev->name = strdup(buffer); osdev->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN; osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU; hwloc_obj_add_info(osdev, "Backend", "NVML"); hwloc_obj_add_info(osdev, "GPUVendor", "NVIDIA Corporation"); buffer[0] = '\0'; ret = nvmlDeviceGetName(device, buffer, sizeof(buffer)); hwloc_obj_add_info(osdev, "GPUModel", buffer); /* these may fail with NVML_ERROR_NOT_SUPPORTED on old devices */ buffer[0] = '\0'; ret = nvmlDeviceGetSerial(device, buffer, sizeof(buffer)); if (buffer[0] != '\0') hwloc_obj_add_info(osdev, "NVIDIASerial", buffer); buffer[0] = '\0'; ret = nvmlDeviceGetUUID(device, buffer, sizeof(buffer)); if (buffer[0] != '\0') hwloc_obj_add_info(osdev, "NVIDIAUUID", buffer); parent = NULL; if (NVML_SUCCESS == nvmlDeviceGetPciInfo(device, &pci)) { parent = hwloc_pci_belowroot_find_by_busid(topology, pci.domain, pci.bus, pci.device, 0); if (!parent) parent = hwloc_pci_find_busid_parent(topology, pci.domain, pci.bus, pci.device, 0); #if HAVE_DECL_NVMLDEVICEGETMAXPCIELINKGENERATION if (parent && parent->type == HWLOC_OBJ_PCI_DEVICE) { unsigned maxwidth = 0, maxgen = 0; float lanespeed; nvmlDeviceGetMaxPcieLinkWidth(device, &maxwidth); nvmlDeviceGetMaxPcieLinkGeneration(device, &maxgen); /* PCIe Gen1 = 2.5GT/s signal-rate per lane with 8/10 encoding = 0.25GB/s data-rate per lane * PCIe Gen2 = 5 GT/s signal-rate per lane with 8/10 encoding = 0.5 GB/s data-rate per lane * PCIe Gen3 = 8 GT/s signal-rate per lane with 128/130 encoding = 1 GB/s data-rate per lane */ lanespeed = maxgen <= 2 ? 2.5 * maxgen * 0.8 : 8.0 * 128/130; /* Gbit/s per lane */ if (lanespeed * maxwidth) /* we found the max link speed, replace the current link speed found by pci (or none) */ parent->attr->pcidev.linkspeed = lanespeed * maxwidth / 8; /* GB/s */ } #endif } if (!parent) parent = hwloc_get_root_obj(topology); hwloc_insert_object_by_parent(topology, parent, osdev); } nvmlShutdown(); return nb; }
void CMeasureNVML<TSkipMs, TVariant>::init(void) { if(TVariant == VARIANT_FULL) { mrLog() << ">>> 'nvml' (full version)" << std::endl; } else { mrLog() << ">>> 'nvml' (light version)" << std::endl; } nvmlReturn_t result; int32_t rv; char const* args_set_pm[] = {"gpu_management", "-p 1", NULL}; uint32_t device_count; char name[NVML_DEVICE_NAME_BUFFER_SIZE]; nvmlPciInfo_t pci; nvmlEnableState_t mode; std::string modes[2] = {"disabled", "enabled"}; std::stringstream clk_gpu_str; std::stringstream clk_mem_str; nvmlPstates_t power_state; nvmlMemory_t memory; const uint32_t count = 32; uint32_t clk_mem_cnt = count; uint32_t clk_mem[count]; uint32_t clk_mem_max = 0; uint32_t clk_mem_min = 0xffffffff; uint32_t clk_mem_set = 0; uint32_t clk_gpu_min_arr_cnt = count; uint32_t clk_gpu_min_arr[clk_gpu_min_arr_cnt]; uint32_t clk_gpu_min = 0xffffffff; uint32_t clk_gpu_max_arr_cnt = count; uint32_t clk_gpu_max_arr[clk_gpu_max_arr_cnt]; uint32_t clk_gpu_max = 0; uint32_t clk_gpu_set = 0; uint32_t memory_total = 0; result = nvmlInit(); if (NVML_SUCCESS != result) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot initialize nvml library. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } result = nvmlDeviceGetCount(&device_count); if (NVML_SUCCESS != result) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot query device count. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } if (device_count > 1) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: this software has be rewritten if you want to support more than 1 device. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } mrLog() << ">>> 'nvml' (thread main): get gpu device handler..."; mrLog.flush(); result = nvmlDeviceGetHandleByIndex(0, &mDevice); if (NVML_SUCCESS != result) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot get device handler. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } mrLog() << " done!" << std::endl; result = nvmlDeviceGetName(mDevice, name, NVML_DEVICE_NAME_BUFFER_SIZE); if (NVML_SUCCESS != result) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot get device name. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } result = nvmlDeviceGetPciInfo(mDevice, &pci); if (NVML_SUCCESS != result) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot get pci information. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } result = nvmlDeviceGetPowerManagementMode(mDevice, &mode); if (NVML_SUCCESS != result) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: no power managment supported. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } result = nvmlDeviceGetPerformanceState(mDevice, &power_state); if (NVML_SUCCESS != result) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: no performance state reading possible. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } result = nvmlDeviceGetSupportedMemoryClocks(mDevice, &clk_mem_cnt, clk_mem); if (NVML_SUCCESS != result) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot obtain memory clock. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } for (int i=0; i<(int32_t)clk_mem_cnt; ++i) { clk_mem_min = (clk_mem[i]<clk_mem_min) ? clk_mem[i] : clk_mem_min; clk_mem_max = (clk_mem[i]>clk_mem_max) ? clk_mem[i] : clk_mem_max; } result = nvmlDeviceGetSupportedGraphicsClocks(mDevice, clk_mem_min, &clk_gpu_min_arr_cnt, clk_gpu_min_arr); if (NVML_SUCCESS != result) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot obtain graphics clock. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } for (int32_t i=0; i<(int32_t)clk_gpu_min_arr_cnt; ++i) { clk_gpu_min = (clk_gpu_min_arr[i]<clk_gpu_min) ? clk_gpu_min_arr[i] : clk_gpu_min; } result = nvmlDeviceGetSupportedGraphicsClocks(mDevice, clk_mem_max, &clk_gpu_max_arr_cnt, clk_gpu_max_arr); if (NVML_SUCCESS != result) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot obtain graphics clock. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } for (int32_t i=0; i<(int32_t)clk_gpu_max_arr_cnt; ++i) { clk_gpu_max = (clk_gpu_max_arr[i]>clk_gpu_max) ? clk_gpu_max_arr[i] : clk_gpu_max; } result = nvmlDeviceGetMemoryInfo(mDevice, &memory); if (NVML_SUCCESS != result) { mrLog.lock(); mrLog(CLogger::scErr) << "!!! 'nvml thread' (thread main): Error: cannot obtain memory informations. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; mrLog.unlock(); exit(EXIT_FAILURE); } memory_total = (uint32_t)(memory.total >> 20); rv = exec_gpu_mgmt((char**)args_set_pm); if (rv) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: in gpu_management tool. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } mrLog() << ">>> 'nvml' (thread main): persistence mode enabled." << std::endl; mrLog() << ">>> 'nvml' (thread main):" << std::endl << " device : " << name << std::endl << " pcie : " << pci.busId << std::endl << " power mgmt mode: " << modes[mode] << std::endl << " power state cur: " << power_state << std::endl << " power state min: " << NVML_PSTATE_15 << std::endl << " power state max: " << NVML_PSTATE_0 << std::endl << " memory total : " << memory_total << " MiB" << std::endl << " avail mem clks : "; for (int i=0; i<(int32_t)clk_mem_cnt; ++i) { if (i<(int32_t)clk_mem_cnt-1) { mrLog() << clk_mem[i] << " MHz, "; } else { mrLog() << clk_mem[i] << " MHz" << std::endl; } } mrLog() << " memory clk min : " << clk_mem_min << " MHz" << std::endl << " avail core clks: "; for (int32_t i=0; i<(int32_t)clk_gpu_min_arr_cnt; ++i) { if (i<(int32_t)clk_gpu_min_arr_cnt-1) { mrLog() << clk_gpu_min_arr[i] << " MHz, "; } else { mrLog() << clk_gpu_min_arr[i] << " MHz" << std::endl; } } mrLog() << " core clk min : " << clk_gpu_min << " MHz" << std::endl; mrLog() << " memory clk max : " << clk_mem_max << " MHz" << std::endl << " avail core clks: "; for (int32_t i=0; i<(int32_t)clk_gpu_max_arr_cnt; ++i) { if (i<(int32_t)clk_gpu_max_arr_cnt-1) { mrLog() << clk_gpu_max_arr[i] << " MHz, "; } else { mrLog() << clk_gpu_max_arr[i] << " MHz" << std::endl; } } mrLog() << " core clk max : " << clk_gpu_max << " MHz" << std::endl; switch (mGpuFrequency) { case GPU_FREQUENCY_MIN: clk_mem_set = clk_mem_min; clk_gpu_set = clk_gpu_min; break; case GPU_FREQUENCY_MAX: clk_mem_set = clk_mem_max; clk_gpu_set = clk_gpu_max; break; case GPU_FREQUENCY_CUR: default: clk_mem_set = 0; clk_gpu_set = 0; result = nvmlDeviceGetClockInfo(mDevice, NVML_CLOCK_MEM, &clk_mem_set); if (NVML_SUCCESS != result) { mrLog.lock(); mrLog(CLogger::scErr) << "!!! 'nvml thread' (thread main): Error: cannot read frequency. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; mrLog.unlock(); exit(EXIT_FAILURE); } result = nvmlDeviceGetClockInfo(mDevice, NVML_CLOCK_GRAPHICS, &clk_gpu_set); if (NVML_SUCCESS != result) { mrLog.lock(); mrLog(CLogger::scErr) << "!!! 'nvml thread' (thread main): Error: cannot read frequency. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; mrLog.unlock(); exit(EXIT_FAILURE); } break; } if (mGpuFrequency == GPU_FREQUENCY_MIN || mGpuFrequency == GPU_FREQUENCY_MAX) { // In these cases we actually set the GPU frequencies either to the maximum or minimum value. clk_gpu_str << "-c " << clk_gpu_set; clk_mem_str << "-m " << clk_mem_set; char const* args_set_clk[] = {"gpu_management", clk_gpu_str.str().c_str() , clk_mem_str.str().c_str(), NULL}; rv = exec_gpu_mgmt((char**)args_set_clk); if (rv) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: in gpu_management tool. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } mrLog() << ">>> 'nvml' (thread main): set core clk to " << clk_gpu_set << " MHz and mem clk to " << clk_mem_set << " MHz." << std::endl; } else { // We name the values *_set, but we don't set the frequency. We just print the current GPU frequency. mrLog() << ">>> 'nvml' (thread main): current core clk is " << clk_gpu_set << " MHz and mem clk is " << clk_mem_set << " MHz." << std::endl; } mrLog() << ">>> 'nvml' (thread main): wait for 15s to throttle gpu clocks." << std::endl; sleep(15); mrLog() << ">>> 'nvml' (thread main): initialization done." << std::endl << std::endl; }
int main() { nvmlReturn_t result; unsigned int device_count, i; // First initialize NVML library result = nvmlInit(); if (NVML_SUCCESS != result) { printf("Failed to initialize NVML: %s\n", nvmlErrorString(result)); printf("Press ENTER to continue...\n"); getchar(); return 1; } result = nvmlDeviceGetCount(&device_count); if (NVML_SUCCESS != result) { printf("Failed to query device count: %s\n", nvmlErrorString(result)); goto Error; } printf("Found %d device%s\n\n", device_count, device_count != 1 ? "s" : ""); printf("Listing devices:\n"); for (i = 0; i < device_count; i++) { nvmlDevice_t device; char name[NVML_DEVICE_NAME_BUFFER_SIZE]; nvmlPciInfo_t pci; nvmlComputeMode_t compute_mode; // Query for device handle to perform operations on a device // You can also query device handle by other features like: // nvmlDeviceGetHandleBySerial // nvmlDeviceGetHandleByPciBusId result = nvmlDeviceGetHandleByIndex(i, &device); if (NVML_SUCCESS != result) { printf("Failed to get handle for device %i: %s\n", i, nvmlErrorString(result)); goto Error; } result = nvmlDeviceGetName(device, name, NVML_DEVICE_NAME_BUFFER_SIZE); if (NVML_SUCCESS != result) { printf("Failed to get name of device %i: %s\n", i, nvmlErrorString(result)); goto Error; } // pci.busId is very useful to know which device physically you're talking to // Using PCI identifier you can also match nvmlDevice handle to CUDA device. result = nvmlDeviceGetPciInfo(device, &pci); if (NVML_SUCCESS != result) { printf("Failed to get pci info for device %i: %s\n", i, nvmlErrorString(result)); goto Error; } printf("%d. %s [%s]\n", i, name, pci.busId); // This is a simple example on how you can modify GPU's state result = nvmlDeviceGetComputeMode(device, &compute_mode); if (NVML_ERROR_NOT_SUPPORTED == result) printf("\t This is not CUDA capable device\n"); else if (NVML_SUCCESS != result) { printf("Failed to get compute mode for device %i: %s\n", i, nvmlErrorString(result)); goto Error; } else { // try to change compute mode printf("\t Changing device's compute mode from '%s' to '%s'\n", convertToComputeModeString(compute_mode), convertToComputeModeString(NVML_COMPUTEMODE_PROHIBITED)); result = nvmlDeviceSetComputeMode(device, NVML_COMPUTEMODE_PROHIBITED); if (NVML_ERROR_NO_PERMISSION == result) printf("\t\t Need root privileges to do that: %s\n", nvmlErrorString(result)); else if (NVML_ERROR_NOT_SUPPORTED == result) printf("\t\t Compute mode prohibited not supported. You might be running on\n" "\t\t windows in WDDM driver model or on non-CUDA capable GPU.\n"); else if (NVML_SUCCESS != result) { printf("\t\t Failed to set compute mode for device %i: %s\n", i, nvmlErrorString(result)); goto Error; } else { printf("\t Restoring device's compute mode back to '%s'\n", convertToComputeModeString(compute_mode)); result = nvmlDeviceSetComputeMode(device, compute_mode); if (NVML_SUCCESS != result) { printf("\t\t Failed to restore compute mode for device %i: %s\n", i, nvmlErrorString(result)); goto Error; } } } } result = nvmlShutdown(); if (NVML_SUCCESS != result) printf("Failed to shutdown NVML: %s\n", nvmlErrorString(result)); printf("All done.\n"); printf("Press ENTER to continue...\n"); getchar(); return 0; Error: result = nvmlShutdown(); if (NVML_SUCCESS != result) printf("Failed to shutdown NVML: %s\n", nvmlErrorString(result)); printf("Press ENTER to continue...\n"); getchar(); return 1; }