// Build the set of device features static void get_device_features(struct device* dev) { if(nvmlDeviceGetTemperature(dev->handle, NVML_TEMPERATURE_GPU, &dev->temperature) == NVML_SUCCESS) { dev->feature_support |= TEMPERATURE; } if(nvmlDeviceGetMemoryInfo(dev->handle, &dev->memory) == NVML_SUCCESS) { dev->feature_support |= MEMORY_INFO; } if(nvmlDeviceGetPowerUsage(dev->handle, &dev->power_usage) == NVML_SUCCESS) { dev->feature_support |= POWER_USAGE; } if(nvmlDeviceGetClockInfo(dev->handle, NVML_CLOCK_GRAPHICS, &dev->clock[NVML_CLOCK_GRAPHICS]) == NVML_SUCCESS && nvmlDeviceGetClockInfo(dev->handle, NVML_CLOCK_SM, &dev->clock[NVML_CLOCK_SM]) == NVML_SUCCESS && nvmlDeviceGetClockInfo(dev->handle, NVML_CLOCK_COUNT, &dev->clock[NVML_CLOCK_COUNT]) == NVML_SUCCESS) { dev->feature_support |= CLOCK_INFO; } if(nvmlDeviceGetFanSpeed(dev->handle, &dev->fan) == NVML_SUCCESS) { dev->feature_support |= FAN_INFO; } if(nvmlDeviceGetUtilizationRates(dev->handle, &dev->util) == NVML_SUCCESS) { dev->feature_support |= UTILIZATION_INFO; } }
void CMeasureNVML<TSkipMs, TVariant>::measure(void *pMsMeasurement, int32_t& rThreadNum) { nvmlReturn_t result; MS_MEASUREMENT_GPU *pMsMeasurementGpu = (MS_MEASUREMENT_GPU *) pMsMeasurement; result = nvmlDeviceGetPowerUsage(mDevice, &(pMsMeasurementGpu->nvml_power_cur)); if (NVML_SUCCESS != result) { mrLog.lock(); mrLog(CLogger::scErr) << "!!! 'nvml thread' (thread #" << rThreadNum << "): Error: no power usage reading possible. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; mrLog.unlock(); exit(EXIT_FAILURE); } if(TVariant == VARIANT_FULL) { nvmlMemory_t memory; if(!(mMeasureCounter++ % TSkipMs)) { result = nvmlDeviceGetMemoryInfo(mDevice, &memory); if (NVML_SUCCESS != result) { mrLog.lock(); mrLog(CLogger::scErr) << "!!! 'nvml thread' (thread #" << rThreadNum << "): Error: cannot obtain memory informations. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; mrLog.unlock(); exit(EXIT_FAILURE); } pMsMeasurementGpu->nvml_memory_free_cur = (uint32_t)(memory.free >> 10); pMsMeasurementGpu->nvml_memory_used_cur = (uint32_t)(memory.used >> 10); result = nvmlDeviceGetPerformanceState(mDevice, (nvmlPstates_t*)&(pMsMeasurementGpu->internal.nvml_power_state)); if (NVML_SUCCESS != result) { mrLog(CLogger::scErr) << "!!! 'nvml thread' (thread #" << rThreadNum << "): Error: no performance state reading possible. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } nvmlTemperatureSensors_t sensorType = NVML_TEMPERATURE_GPU; result = nvmlDeviceGetTemperature(mDevice, sensorType, &(pMsMeasurementGpu->nvml_temperature_cur)); if (NVML_SUCCESS != result) { mrLog.lock(); mrLog(CLogger::scErr) << "!!! 'nvml thread' (thread #" << rThreadNum << "): Error: cannot read temperature. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; mrLog.unlock(); exit(EXIT_FAILURE); } result = nvmlDeviceGetClockInfo(mDevice, NVML_CLOCK_SM, &(pMsMeasurementGpu->nvml_clock_sm_cur)); if (NVML_SUCCESS != result) { mrLog.lock(); mrLog(CLogger::scErr) << "!!! 'nvml thread' (thread #" << rThreadNum << "): Error: cannot read frequency. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; mrLog.unlock(); exit(EXIT_FAILURE); } result = nvmlDeviceGetClockInfo(mDevice, NVML_CLOCK_MEM, &(pMsMeasurementGpu->nvml_clock_mem_cur)); if (NVML_SUCCESS != result) { mrLog.lock(); mrLog(CLogger::scErr) << "!!! 'nvml thread' (thread #" << rThreadNum << "): Error: cannot read frequency. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; mrLog.unlock(); exit(EXIT_FAILURE); } }
unsigned long long getPowerUsage( nvmlDevice_t dev ) { unsigned int power; nvmlReturn_t bad; bad = nvmlDeviceGetPowerUsage( dev, &power ); if ( NVML_SUCCESS != bad ) { SUBDBG( "something went wrong %s\n", nvmlErrorString(bad)); } return (unsigned long long) power; }
static void update_device_info(struct monitor* mon) { // TODO: NVML is thread safe, and the order we grab GPU information // here doesn't particularly matter, so might as well take advantage // of parallelism here. unsigned i; for(i = 0; i < mon->dev_count; ++i) { struct device* dev = &mon->devices[i]; if(dev->feature_support & MEMORY_INFO) { NVML_TRY(nvmlDeviceGetMemoryInfo(dev->handle, &dev->memory)); } if(dev->feature_support & TEMPERATURE) { NVML_TRY(nvmlDeviceGetTemperature(dev->handle, NVML_TEMPERATURE_GPU, &dev->temperature)); } if(dev->feature_support & POWER_USAGE) { NVML_TRY(nvmlDeviceGetPowerUsage(dev->handle, &dev->power_usage)); } if(dev->feature_support & CLOCK_INFO) { for(nvmlClockType_t type = NVML_CLOCK_GRAPHICS; type < NVML_CLOCK_COUNT; ++type) { NVML_TRY(nvmlDeviceGetClockInfo(dev->handle, type, &dev->clock[type])); } } if(dev->feature_support & FAN_INFO) { NVML_TRY(nvmlDeviceGetFanSpeed(dev->handle, &dev->fan)); } if(dev->event_set != NULL) { nvmlEventData_t data; NVML_TRY(nvmlEventSetWait(dev->event_set, &data, 1)); // TODO: Do something with the returned information. } } mon->last_update = time(NULL); }
/*_________________---------------------------__________________ _________________ nvml_tick __________________ -----------------___________________________------------------ Called every second */ void nvml_tick(HSP *sp) { if(sp->nvml.gpu_count > 0) { unsigned int i; for (i = 0; i < sp->nvml.gpu_count; ++i) { nvmlDevice_t gpu; unsigned int power_mW; nvmlUtilization_t util; if (NVML_SUCCESS != nvmlDeviceGetHandleByIndex(i, &gpu)) { continue; } if (NVML_SUCCESS == nvmlDeviceGetUtilizationRates(gpu, &util)) { sp->nvml.nvml_gpu_time += util.gpu * 10; // accumulate as mS sp->nvml.nvml_mem_time += util.memory * 10; // accumulate as mS } if (NVML_SUCCESS == nvmlDeviceGetPowerUsage(gpu, &power_mW)) { sp->nvml.nvml_energy += power_mW; // accumulate as mJ } } } }
static int detectDevices( ) { nvmlReturn_t ret; nvmlEnableState_t mode = NVML_FEATURE_DISABLED; nvmlDevice_t handle; nvmlPciInfo_t info; cudaError_t cuerr; char busId[16]; char name[64]; char inforomECC[16]; char inforomPower[16]; char names[device_count][64]; char nvml_busIds[device_count][16]; float ecc_version = 0.0, power_version = 0.0; int i = 0, j = 0; int isTesla = 0; int isFermi = 0; int isUnique = 1; unsigned int temp = 0; /* list of nvml pci_busids */ for (i=0; i < device_count; i++) { ret = nvmlDeviceGetHandleByIndex( i, &handle ); if ( NVML_SUCCESS != ret ) { SUBDBG("nvmlDeviceGetHandleByIndex(%d) failed\n", i); return PAPI_ESYS; } ret = nvmlDeviceGetPciInfo( handle, &info ); if ( NVML_SUCCESS != ret ) { SUBDBG("nvmlDeviceGetPciInfo() failed %s\n", nvmlErrorString(ret) ); return PAPI_ESYS; } strncpy(nvml_busIds[i], info.busId, 16); } /* We want to key our list of nvmlDevice_ts by each device's cuda index */ for (i=0; i < device_count; i++) { cuerr = cudaDeviceGetPCIBusId( busId, 16, i ); if ( CUDA_SUCCESS != cuerr ) { SUBDBG("cudaDeviceGetPCIBusId failed.\n"); return PAPI_ESYS; } for (j=0; j < device_count; j++ ) { if ( !strncmp( busId, nvml_busIds[j], 16) ) { ret = nvmlDeviceGetHandleByIndex(j, &devices[i] ); if ( NVML_SUCCESS != ret ) SUBDBG("nvmlDeviceGetHandleByIndex(%d, &devices[%d]) failed.\n", j, i); return PAPI_ESYS; break; } } } memset(names, 0x0, device_count*64); /* So for each card, check whats querable */ for (i=0; i < device_count; i++ ) { isTesla=0; isFermi=1; isUnique = 1; features[i] = 0; ret = nvmlDeviceGetName( devices[i], name, 64 ); if ( NVML_SUCCESS != ret) { SUBDBG("nvmlDeviceGetName failed \n"); return PAPI_ESYS; } for (j=0; j < i; j++ ) if ( 0 == strncmp( name, names[j], 64 ) ) { /* if we have a match, and IF everything is sane, * devices with the same name eg Tesla C2075 share features */ isUnique = 0; features[i] = features[j]; } if ( isUnique ) { ret = nvmlDeviceGetInforomVersion( devices[i], NVML_INFOROM_ECC, inforomECC, 16); if ( NVML_SUCCESS != ret ) { SUBDBG("nvmlGetInforomVersion carps %s\n", nvmlErrorString(ret ) ); isFermi = 0; } ret = nvmlDeviceGetInforomVersion( devices[i], NVML_INFOROM_POWER, inforomPower, 16); if ( NVML_SUCCESS != ret ) { /* This implies the card is older then Fermi */ SUBDBG("nvmlGetInforomVersion carps %s\n", nvmlErrorString(ret ) ); SUBDBG("Based upon the return to nvmlGetInforomVersion, we conclude this card is older then Fermi.\n"); isFermi = 0; } ecc_version = strtof(inforomECC, NULL ); power_version = strtof( inforomPower, NULL); ret = nvmlDeviceGetName( devices[i], name, 64 ); isTesla = ( NULL == strstr(name, "Tesla") ) ? 0:1; /* For Tesla and Quadro products from Fermi and Kepler families. */ if ( isFermi ) { features[i] |= FEATURE_CLOCK_INFO; num_events += 3; } /* For Tesla and Quadro products from Fermi and Kepler families. requires NVML_INFOROM_ECC 2.0 or higher for location-based counts requires NVML_INFOROM_ECC 1.0 or higher for all other ECC counts requires ECC mode to be enabled. */ if ( isFermi ) { ret = nvmlDeviceGetEccMode( devices[i], &mode, NULL ); if ( NVML_FEATURE_ENABLED == mode) { if ( ecc_version >= 2.0 ) { features[i] |= FEATURE_ECC_LOCAL_ERRORS; num_events += 8; /* {single bit, two bit errors} x { reg, l1, l2, memory } */ } if ( ecc_version >= 1.0 ) { features[i] |= FEATURE_ECC_TOTAL_ERRORS; num_events += 2; /* single bit errors, double bit errors */ } } } /* For all discrete products with dedicated fans */ features[i] |= FEATURE_FAN_SPEED; num_events++; /* For Tesla and Quadro products from Fermi and Kepler families. */ if ( isFermi ) { features[i] |= FEATURE_MAX_CLOCK; num_events += 3; } /* For all products */ features[i] |= FEATURE_MEMORY_INFO; num_events += 3; /* total, free, used */ /* For Tesla and Quadro products from the Fermi and Kepler families. */ if ( isFermi ) { features[i] |= FEATURE_PERF_STATES; num_events++; } /* For "GF11x" Tesla and Quadro products from the Fermi family requires NVML_INFOROM_POWER 3.0 or higher For Tesla and Quadro products from the Kepler family does not require NVML_INFOROM_POWER */ if ( isFermi ) { ret = nvmlDeviceGetPowerUsage( devices[i], &temp); if ( NVML_SUCCESS == ret ) { features[i] |= FEATURE_POWER; num_events++; } } /* For all discrete and S-class products. */ features[i] |= FEATURE_TEMP; num_events++; /* For Tesla and Quadro products from the Fermi and Kepler families */ if (isFermi) { features[i] |= FEATURE_UTILIZATION; num_events += 2; } strncpy( names[i], name, 64); } } return PAPI_OK; }