Beispiel #1
static void init_device_info(struct monitor* mon)
  gethostname(mon->hostname, 64);



  mon->devices = calloc(mon->dev_count, sizeof(struct device));

  for(unsigned i = 0; i < mon->dev_count; ++i) {
    struct device dev;
    memset(&dev, 0, sizeof(struct device));

    dev.index = i;

    NVML_TRY(nvmlDeviceGetHandleByIndex(i, &dev.handle));

    NVML_TRY(nvmlDeviceGetName(dev.handle,, sizeof(;
    NVML_TRY(nvmlDeviceGetSerial(dev.handle, dev.serial, sizeof(dev.serial)));
    NVML_TRY(nvmlDeviceGetUUID(dev.handle, dev.uuid, sizeof(dev.uuid)));

    NVML_TRY(nvmlDeviceGetPciInfo(dev.handle, &dev.pci));
    NVML_TRY(nvmlDeviceGetMemoryInfo(dev.handle, &dev.memory));

    unsigned long long event_types;
    if(0 == NVML_TRY(nvmlDeviceGetSupportedEventTypes(dev.handle, &event_types))) {
      NVML_TRY(nvmlDeviceRegisterEvents(dev.handle, event_types, dev.event_set));
    } else {
      dev.event_set = NULL;

    for(nvmlClockType_t type = NVML_CLOCK_GRAPHICS; type < NVML_CLOCK_COUNT;
        ++type) {
      if(NVML_TRY(nvmlDeviceGetMaxClockInfo(dev.handle, type,


    mon->devices[i] = dev;

  mon->last_update = time(NULL);
Beispiel #2
 * Class:     org_apache_hadoop_yarn_server_nodemanager_containermanager_launcher_GPUMonitor
 * Method:    initnvml
 * Signature: ()Ljava/lang/String;
JNIEXPORT jstring JNICALL Java_org_apache_hadoop_yarn_server_nodemanager_containermanager_launcher_GPUMonitor_initnvml
  (JNIEnv *env, jobject)
    nvmlReturn_t result;
    unsigned int device_count, i;
	char sentence[200];
	std::string err = "";

    result = nvmlInit();
    if (NVML_SUCCESS != result) { 
        printf("Failed to initialize NVML: %s\n", nvmlErrorString(result));
        sprintf(sentence, "Failed to initialize NVML: %s\n", nvmlErrorString(result));
		err.append( (std::string)sentence );
	result = nvmlDeviceGetHandleByIndex(0, &device);
	if (NVML_SUCCESS != result) { 
		printf("Failed to get handle for device %i: %s\n", i, nvmlErrorString(result));
		sprintf(sentence,"Failed to get handle for device %i: %s\n", i, nvmlErrorString(result));
		err.append( (std::string)sentence );
		result = nvmlShutdown();
		return 0;
	result = nvmlDeviceGetName(device, name, NVML_DEVICE_NAME_BUFFER_SIZE);
	if (NVML_SUCCESS != result) { 
		printf("Failed to get name of device %i: %s\n", i, nvmlErrorString(result));
		sprintf(sentence,"Failed to get name of device %i: %s\n", i, nvmlErrorString(result));
		err.append( (std::string)sentence );
		result = nvmlShutdown();
		return 0;
	printf("Device : %s\n",name);
	sprintf(sentence,"Device : %s\n",name);
	err.append( (std::string)sentence );
	return env->NewStringUTF( err.c_str() );
Beispiel #3
		static void
createNativeEvents( )
		char name[64];
		char sanitized_name[PAPI_MAX_STR_LEN];
		char names[device_count][64];

		int i, nameLen = 0, j;
		int isUnique = 1;

		nvml_native_event_entry_t* entry;
		nvmlReturn_t ret;

		nvml_native_table = (nvml_native_event_entry_t*) papi_malloc( 
						sizeof(nvml_native_event_entry_t) * num_events ); 	
		memset( nvml_native_table, 0x0, sizeof(nvml_native_event_entry_t) * num_events );
		entry = &nvml_native_table[0];

		for (i=0; i < device_count; i++ ) {
				memset( names[i], 0x0, 64 );
				isUnique = 1;
				ret = nvmlDeviceGetName( devices[i], name, 64 );

				for (j=0; j < i; j++ ) 
						if ( 0 == strncmp( name, names[j], 64 ) )
								isUnique = 0;

				if ( isUnique ) {
						nameLen = strlen(name);
						strncpy(sanitized_name, name, PAPI_MAX_STR_LEN );
						for (j=0; j < nameLen; j++)
								if ( ' ' == sanitized_name[j] )
										sanitized_name[j] = '_';

						if ( HAS_FEATURE( features[i], FEATURE_CLOCK_INFO ) ) {
								sprintf( entry->name, "NVML.%s.graphics_clock", sanitized_name );
								strncpy(entry->description,"Graphics clock domain (MHz).", PAPI_MAX_STR_LEN );
								entry->options.clock = NVML_CLOCK_GRAPHICS;
								entry->type = FEATURE_CLOCK_INFO;

								sprintf( entry->name, "NVML.%s.sm_clock", sanitized_name);
								strncpy(entry->description,"SM clock domain (MHz).", PAPI_MAX_STR_LEN);
								entry->options.clock = NVML_CLOCK_SM;
								entry->type = FEATURE_CLOCK_INFO;

								sprintf( entry->name, "NVML.%s.memory_clock", sanitized_name);
								strncpy(entry->description,"Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
								entry->options.clock = NVML_CLOCK_MEM;
								entry->type = FEATURE_CLOCK_INFO;

						if ( HAS_FEATURE( features[i], FEATURE_ECC_LOCAL_ERRORS ) ) { 
								sprintf(entry->name, "NVML.%s.l1_single_ecc_errors", sanitized_name);
								strncpy(entry->description,"L1 cache single bit ECC", PAPI_MAX_STR_LEN);
								entry->options.ecc_opts = (struct local_ecc){
										.bits = NVML_SINGLE_BIT_ECC,
												.which_one = LOCAL_ECC_L1,
								entry->type = FEATURE_ECC_LOCAL_ERRORS;

								sprintf(entry->name, "NVML.%s.l2_single_ecc_errors", sanitized_name);
								strncpy(entry->description,"L2 cache single bit ECC", PAPI_MAX_STR_LEN);
								entry->options.ecc_opts = (struct local_ecc){
										.bits = NVML_SINGLE_BIT_ECC,
												.which_one = LOCAL_ECC_L2,
								entry->type = FEATURE_ECC_LOCAL_ERRORS;

								sprintf(entry->name, "NVML.%s.memory_single_ecc_errors", sanitized_name);
								strncpy(entry->description,"Device memory single bit ECC", PAPI_MAX_STR_LEN);
								entry->options.ecc_opts = (struct local_ecc){
										.bits = NVML_SINGLE_BIT_ECC,
												.which_one = LOCAL_ECC_MEM,
								entry->type = FEATURE_ECC_LOCAL_ERRORS;

								sprintf(entry->name, "NVML.%s.regfile_single_ecc_errors", sanitized_name);
								strncpy(entry->description,"Register file single bit ECC", PAPI_MAX_STR_LEN);
								entry->options.ecc_opts = (struct local_ecc){
										.bits = NVML_SINGLE_BIT_ECC,
												.which_one = LOCAL_ECC_REGFILE,
								entry->type = FEATURE_ECC_LOCAL_ERRORS;

								sprintf(entry->name, "NVML.%s.1l_double_ecc_errors", sanitized_name);
								strncpy(entry->description,"L1 cache double bit ECC", PAPI_MAX_STR_LEN);
								entry->options.ecc_opts = (struct local_ecc){
										.bits = NVML_DOUBLE_BIT_ECC,
												.which_one = LOCAL_ECC_L1,
								entry->type = FEATURE_ECC_LOCAL_ERRORS;

								sprintf(entry->name, "NVML.%s.l2_double_ecc_errors", sanitized_name);
								strncpy(entry->description,"L2 cache double bit ECC", PAPI_MAX_STR_LEN);
								entry->options.ecc_opts = (struct local_ecc){
										.bits = NVML_DOUBLE_BIT_ECC,
												.which_one = LOCAL_ECC_L2,
								entry->type = FEATURE_ECC_LOCAL_ERRORS;

								sprintf(entry->name, "NVML.%s.memory_double_ecc_errors", sanitized_name);
								strncpy(entry->description,"Device memory double bit ECC", PAPI_MAX_STR_LEN);
								entry->options.ecc_opts = (struct local_ecc){
										.bits = NVML_DOUBLE_BIT_ECC,
												.which_one = LOCAL_ECC_MEM,
								entry->type = FEATURE_ECC_LOCAL_ERRORS;

								sprintf(entry->name, "NVML.%s.regfile_double_ecc_errors", sanitized_name);
								strncpy(entry->description,"Register file double bit ECC", PAPI_MAX_STR_LEN);
								entry->options.ecc_opts = (struct local_ecc){
										.bits = NVML_DOUBLE_BIT_ECC,
												.which_one = LOCAL_ECC_REGFILE,
								entry->type = FEATURE_ECC_LOCAL_ERRORS;

						if ( HAS_FEATURE( features[i], FEATURE_FAN_SPEED ) ) {
								sprintf( entry->name, "NVML.%s.fan_speed", sanitized_name);
								strncpy(entry->description,"The fan speed expressed as a percent of the maximum, i.e. full speed is 100%", PAPI_MAX_STR_LEN);
								entry->type = FEATURE_FAN_SPEED;

						if ( HAS_FEATURE( features[i], FEATURE_MAX_CLOCK ) ) {
								sprintf( entry->name, "NVML.%s.graphics_max_clock", sanitized_name);
								strncpy(entry->description,"Maximal Graphics clock domain (MHz).", PAPI_MAX_STR_LEN);
								entry->options.clock = NVML_CLOCK_GRAPHICS;
								entry->type = FEATURE_MAX_CLOCK;

								sprintf( entry->name, "NVML.%s.sm_max_clock", sanitized_name);
								strncpy(entry->description,"Maximal SM clock domain (MHz).", PAPI_MAX_STR_LEN);
								entry->options.clock = NVML_CLOCK_SM;
								entry->type = FEATURE_MAX_CLOCK;

								sprintf( entry->name, "NVML.%s.memory_max_clock", sanitized_name);
								strncpy(entry->description,"Maximal Memory clock domain (MHz).", PAPI_MAX_STR_LEN);
								entry->options.clock = NVML_CLOCK_MEM;
								entry->type = FEATURE_MAX_CLOCK;

						if ( HAS_FEATURE( features[i], FEATURE_MEMORY_INFO ) ) {
								sprintf( entry->name, "NVML.%s.total_memory", sanitized_name);
								strncpy(entry->description,"Total installed FB memory (in bytes).", PAPI_MAX_STR_LEN);
								entry->options.which_one = MEMINFO_TOTAL_MEMORY;
								entry->type = FEATURE_MEMORY_INFO;

								sprintf( entry->name, "NVML.%s.unallocated_memory", sanitized_name);
								strncpy(entry->description,"Uncallocated FB memory (in bytes).", PAPI_MAX_STR_LEN);
								entry->options.which_one = MEMINFO_UNALLOCED;
								entry->type = FEATURE_MEMORY_INFO;

								sprintf( entry->name, "NVML.%s.allocated_memory", sanitized_name);
								strncpy(entry->description,	"Allocated FB memory (in bytes). Note that the driver/GPU always sets aside a small amount of memory for bookkeeping.", PAPI_MAX_STR_LEN);
								entry->options.which_one = MEMINFO_ALLOCED;
								entry->type = FEATURE_MEMORY_INFO;

						if ( HAS_FEATURE( features[i], FEATURE_PERF_STATES ) ) {
								sprintf( entry->name, "NVML.%s.pstate", sanitized_name);
								strncpy(entry->description,"The performance state of the device.", PAPI_MAX_STR_LEN);
								entry->type = FEATURE_PERF_STATES;

						if ( HAS_FEATURE( features[i], FEATURE_POWER ) ) {
								sprintf( entry->name, "NVML.%s.power", sanitized_name);
								strncpy(entry->description,"Power usage reading for the device, in miliwatts. This is the power draw for the entire board, including GPU, memory, etc.\n The reading is accurate to within a range of +/-5 watts.", PAPI_MAX_STR_LEN);
								entry->type = FEATURE_POWER;

						if ( HAS_FEATURE( features[i], FEATURE_TEMP ) ) {
								sprintf( entry->name, "NVML.%s.temperature", sanitized_name);
								strncpy(entry->description,"Current temperature readings for the device, in degrees C.", PAPI_MAX_STR_LEN);
								entry->type = FEATURE_TEMP;

						if ( HAS_FEATURE( features[i], FEATURE_ECC_TOTAL_ERRORS ) ) {
								sprintf( entry->name, "NVML.%s.total_ecc_errors", sanitized_name);
								strncpy(entry->description,"Total single bit errors.", PAPI_MAX_STR_LEN);
								entry->options.ecc_opts = (struct local_ecc){ 
										.bits = NVML_SINGLE_BIT_ECC, 
								entry->type = FEATURE_ECC_TOTAL_ERRORS;

								sprintf( entry->name, "NVML.%s.total_ecc_errors", sanitized_name);
								strncpy(entry->description,"Total double bit errors.", PAPI_MAX_STR_LEN);
								entry->options.ecc_opts = (struct local_ecc){ 
										.bits = NVML_DOUBLE_BIT_ECC, 
								entry->type = FEATURE_ECC_TOTAL_ERRORS;

						if ( HAS_FEATURE( features[i], FEATURE_UTILIZATION ) ) {
								sprintf( entry->name, "NVML.%s.gpu_utilization", sanitized_name);
								strncpy(entry->description,"Percent of time over the past second during which one or more kernels was executing on the GPU.", PAPI_MAX_STR_LEN);
								entry->options.which_one = GPU_UTILIZATION;
								entry->type = FEATURE_UTILIZATION;

								sprintf( entry->name, "NVML.%s.memory_utilization", sanitized_name);
								strncpy(entry->description,"Percent of time over the past second during which global (device) memory was being read or written.", PAPI_MAX_STR_LEN);
								entry->options.which_one = MEMORY_UTILIZATION;
								entry->type = FEATURE_UTILIZATION;
						strncpy( names[i], name, 64); 
Beispiel #4
		static int 
detectDevices( ) 
		nvmlReturn_t ret;
		nvmlEnableState_t mode = NVML_FEATURE_DISABLED;
		nvmlDevice_t handle;
		nvmlPciInfo_t info;

		cudaError_t cuerr;

		char busId[16];
		char name[64];
		char inforomECC[16];
		char inforomPower[16];
		char names[device_count][64];
		char nvml_busIds[device_count][16];

		float ecc_version = 0.0, power_version = 0.0;

		int i = 0,
			j = 0;
		int isTesla = 0;
		int isFermi	= 0;
		int isUnique = 1;

		unsigned int temp = 0;

		/* list of nvml pci_busids */
	for (i=0; i < device_count; i++) {
		ret = nvmlDeviceGetHandleByIndex( i, &handle );	
		if ( NVML_SUCCESS != ret ) {
			SUBDBG("nvmlDeviceGetHandleByIndex(%d) failed\n", i);
			return PAPI_ESYS;

		ret = nvmlDeviceGetPciInfo( handle, &info );
		if ( NVML_SUCCESS != ret ) {
			SUBDBG("nvmlDeviceGetPciInfo() failed %s\n", nvmlErrorString(ret) );
			return PAPI_ESYS;

		strncpy(nvml_busIds[i], info.busId, 16);

	/* We want to key our list of nvmlDevice_ts by each device's cuda index */
	for (i=0; i < device_count; i++) {
			cuerr = cudaDeviceGetPCIBusId( busId, 16, i );
			if ( CUDA_SUCCESS != cuerr ) {
				SUBDBG("cudaDeviceGetPCIBusId failed.\n");
				return PAPI_ESYS;
			for (j=0; j < device_count; j++ ) {
					if ( !strncmp( busId, nvml_busIds[j], 16) ) {
							ret = nvmlDeviceGetHandleByIndex(j, &devices[i] );
							if ( NVML_SUCCESS != ret )
								SUBDBG("nvmlDeviceGetHandleByIndex(%d, &devices[%d]) failed.\n", j, i);
								return PAPI_ESYS;

		memset(names, 0x0, device_count*64);
		/* So for each card, check whats querable */
		for (i=0; i < device_count; i++ ) {
				isUnique = 1;
				features[i] = 0;

				ret = nvmlDeviceGetName( devices[i], name, 64 );
				if ( NVML_SUCCESS != ret) {
					SUBDBG("nvmlDeviceGetName failed \n");
					return PAPI_ESYS;

				for (j=0; j < i; j++ ) 
						if ( 0 == strncmp( name, names[j], 64 ) ) {
								/* if we have a match, and IF everything is sane, 
								 * devices with the same name eg Tesla C2075 share features */
								isUnique = 0;
								features[i] = features[j];


				if ( isUnique ) {
						ret = nvmlDeviceGetInforomVersion( devices[i], NVML_INFOROM_ECC, inforomECC, 16);
						if ( NVML_SUCCESS != ret ) {
								SUBDBG("nvmlGetInforomVersion carps %s\n", nvmlErrorString(ret ) );
								isFermi = 0;
						ret = nvmlDeviceGetInforomVersion( devices[i], NVML_INFOROM_POWER, inforomPower, 16);
						if ( NVML_SUCCESS != ret ) {
								/* This implies the card is older then Fermi */
								SUBDBG("nvmlGetInforomVersion carps %s\n", nvmlErrorString(ret ) );
								SUBDBG("Based upon the return to nvmlGetInforomVersion, we conclude this card is older then Fermi.\n");
								isFermi = 0;

						ecc_version = strtof(inforomECC, NULL );
						power_version = strtof( inforomPower, NULL);

						ret = nvmlDeviceGetName( devices[i], name, 64 );
						isTesla = ( NULL == strstr(name, "Tesla") ) ? 0:1;

						/* For Tesla and Quadro products from Fermi and Kepler families. */
						if ( isFermi ) {
								features[i] |= FEATURE_CLOCK_INFO;
								num_events += 3;

						/* 	For Tesla and Quadro products from Fermi and Kepler families. 
							requires NVML_INFOROM_ECC 2.0 or higher for location-based counts
							requires NVML_INFOROM_ECC 1.0 or higher for all other ECC counts
							requires ECC mode to be enabled. */
						if ( isFermi ) {
								ret = nvmlDeviceGetEccMode( devices[i], &mode, NULL );
								if ( NVML_FEATURE_ENABLED == mode) {
										if ( ecc_version >= 2.0 ) {
												features[i] |= FEATURE_ECC_LOCAL_ERRORS;
												num_events += 8; /* {single bit, two bit errors} x { reg, l1, l2, memory } */
										if ( ecc_version >= 1.0 ) {
												features[i] |= FEATURE_ECC_TOTAL_ERRORS;
												num_events += 2; /* single bit errors, double bit errors */

						/* For all discrete products with dedicated fans */
						features[i] |= FEATURE_FAN_SPEED;

						/* For Tesla and Quadro products from Fermi and Kepler families. */
						if ( isFermi ) {
								features[i] |= FEATURE_MAX_CLOCK;
								num_events += 3;

						/* For all products */
						features[i] |= FEATURE_MEMORY_INFO;
						num_events += 3; /* total, free, used */

						/* For Tesla and Quadro products from the Fermi and Kepler families. */
						if ( isFermi ) {
								features[i] |= FEATURE_PERF_STATES;

						/* 	For "GF11x" Tesla and Quadro products from the Fermi family
							requires NVML_INFOROM_POWER 3.0 or higher
							For Tesla and Quadro products from the Kepler family
							does not require NVML_INFOROM_POWER */
						if ( isFermi ) {
								ret = nvmlDeviceGetPowerUsage( devices[i], &temp);
								if ( NVML_SUCCESS == ret ) {
										features[i] |= FEATURE_POWER;

						/* For all discrete and S-class products. */
						features[i] |= FEATURE_TEMP;

						/* For Tesla and Quadro products from the Fermi and Kepler families */
						if (isFermi) {
								features[i] |= FEATURE_UTILIZATION;
								num_events += 2;

						strncpy( names[i], name, 64); 

		return PAPI_OK;

Beispiel #5
static int
hwloc_nvml_discover(struct hwloc_backend *backend)
  struct hwloc_topology *topology = backend->topology;
  nvmlReturn_t ret;
  unsigned nb, i;

  if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO)))
    return 0;

  if (!hwloc_topology_is_thissystem(topology)) {
    hwloc_debug("%s", "\nno NVML detection (not thissystem)\n");
    return 0;

  ret = nvmlInit();
  if (NVML_SUCCESS != ret)
    return 0;
  ret = nvmlDeviceGetCount(&nb);
  if (NVML_SUCCESS != ret || !nb) {
    return 0;

  for(i=0; i<nb; i++) {
    nvmlPciInfo_t pci;
    nvmlDevice_t device;
    hwloc_obj_t osdev, parent;
    char buffer[64];

    ret = nvmlDeviceGetHandleByIndex(i, &device);
    assert(ret == NVML_SUCCESS);

    osdev = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1);
    snprintf(buffer, sizeof(buffer), "nvml%d", i);
    osdev->name = strdup(buffer);
    osdev->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN;
    osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU;

    hwloc_obj_add_info(osdev, "Backend", "NVML");
    hwloc_obj_add_info(osdev, "GPUVendor", "NVIDIA Corporation");

    buffer[0] = '\0';
    ret = nvmlDeviceGetName(device, buffer, sizeof(buffer));
    hwloc_obj_add_info(osdev, "GPUModel", buffer);

    /* these may fail with NVML_ERROR_NOT_SUPPORTED on old devices */
    buffer[0] = '\0';
    ret = nvmlDeviceGetSerial(device, buffer, sizeof(buffer));
    if (buffer[0] != '\0')
      hwloc_obj_add_info(osdev, "NVIDIASerial", buffer);

    buffer[0] = '\0';
    ret = nvmlDeviceGetUUID(device, buffer, sizeof(buffer));
    if (buffer[0] != '\0')
      hwloc_obj_add_info(osdev, "NVIDIAUUID", buffer);

    parent = NULL;
    if (NVML_SUCCESS == nvmlDeviceGetPciInfo(device, &pci)) {
      parent = hwloc_pci_belowroot_find_by_busid(topology, pci.domain, pci.bus, pci.device, 0);
      if (!parent)
	parent = hwloc_pci_find_busid_parent(topology, pci.domain, pci.bus, pci.device, 0);
      if (parent && parent->type == HWLOC_OBJ_PCI_DEVICE) {
	unsigned maxwidth = 0, maxgen = 0;
	float lanespeed;
	nvmlDeviceGetMaxPcieLinkWidth(device, &maxwidth);
	nvmlDeviceGetMaxPcieLinkGeneration(device, &maxgen);
	/* PCIe Gen1 = 2.5GT/s signal-rate per lane with 8/10 encoding    = 0.25GB/s data-rate per lane
	 * PCIe Gen2 = 5  GT/s signal-rate per lane with 8/10 encoding    = 0.5 GB/s data-rate per lane
	 * PCIe Gen3 = 8  GT/s signal-rate per lane with 128/130 encoding = 1   GB/s data-rate per lane
	lanespeed = maxgen <= 2 ? 2.5 * maxgen * 0.8 : 8.0 * 128/130; /* Gbit/s per lane */
	if (lanespeed * maxwidth)
	  /* we found the max link speed, replace the current link speed found by pci (or none) */
	  parent->attr->pcidev.linkspeed = lanespeed * maxwidth / 8; /* GB/s */
    if (!parent)
      parent = hwloc_get_root_obj(topology);

    hwloc_insert_object_by_parent(topology, parent, osdev);

  return nb;
Beispiel #6
	void CMeasureNVML<TSkipMs, TVariant>::init(void) {
		if(TVariant == VARIANT_FULL) {
			<< ">>> 'nvml' (full version)" << std::endl;
		} else {
			<< ">>> 'nvml' (light version)" << std::endl;
		nvmlReturn_t result;
		int32_t rv;
		char const* args_set_pm[] = {"gpu_management", "-p 1", NULL};
		uint32_t device_count;
		nvmlPciInfo_t pci;
		nvmlEnableState_t mode;
		std::string modes[2] = {"disabled", "enabled"};
		std::stringstream clk_gpu_str;
		std::stringstream clk_mem_str;
		nvmlPstates_t power_state;
		nvmlMemory_t memory;
		const uint32_t count			= 32;
		uint32_t clk_mem_cnt			= count;
		uint32_t clk_mem[count];
		uint32_t clk_mem_max			= 0;
		uint32_t clk_mem_min			= 0xffffffff;
		uint32_t clk_mem_set			= 0;
		uint32_t clk_gpu_min_arr_cnt	= count;
		uint32_t clk_gpu_min_arr[clk_gpu_min_arr_cnt];
		uint32_t clk_gpu_min			= 0xffffffff;
		uint32_t clk_gpu_max_arr_cnt	= count;
		uint32_t clk_gpu_max_arr[clk_gpu_max_arr_cnt];
		uint32_t clk_gpu_max			= 0;
		uint32_t clk_gpu_set			= 0;
		uint32_t memory_total			= 0;
		result = nvmlInit();
		if (NVML_SUCCESS != result) {
			mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot initialize nvml library. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl;
		result = nvmlDeviceGetCount(&device_count);
		if (NVML_SUCCESS != result) {
			mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot query device count. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl;
		if (device_count > 1) {
			mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: this software has be rewritten if you want to support more than 1 device. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl;
		mrLog() << ">>> 'nvml' (thread main): get gpu device handler...";
		result = nvmlDeviceGetHandleByIndex(0, &mDevice);
		if (NVML_SUCCESS != result) {
			mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot get device handler. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl;
		mrLog() << " done!" << std::endl;
		result = nvmlDeviceGetName(mDevice, name, NVML_DEVICE_NAME_BUFFER_SIZE);
		if (NVML_SUCCESS != result) {
			mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot get device name. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl;
		result = nvmlDeviceGetPciInfo(mDevice, &pci);
		if (NVML_SUCCESS != result) {
			mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot get pci information. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl;
		result = nvmlDeviceGetPowerManagementMode(mDevice, &mode);
		if (NVML_SUCCESS != result) {
			mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: no power managment supported. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl;
		result = nvmlDeviceGetPerformanceState(mDevice, &power_state);
		if (NVML_SUCCESS != result) {
			mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: no performance state reading possible. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl;
		result = nvmlDeviceGetSupportedMemoryClocks(mDevice, &clk_mem_cnt, clk_mem);
		if (NVML_SUCCESS != result) {
			mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot obtain memory clock. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl;
		for (int i=0; i<(int32_t)clk_mem_cnt; ++i) {
			clk_mem_min = (clk_mem[i]<clk_mem_min) ? clk_mem[i] : clk_mem_min;
			clk_mem_max = (clk_mem[i]>clk_mem_max) ? clk_mem[i] : clk_mem_max;
		result = nvmlDeviceGetSupportedGraphicsClocks(mDevice, clk_mem_min, &clk_gpu_min_arr_cnt, clk_gpu_min_arr);
		if (NVML_SUCCESS != result) {
			mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot obtain graphics clock. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl;
		for (int32_t i=0; i<(int32_t)clk_gpu_min_arr_cnt; ++i) {
			clk_gpu_min = (clk_gpu_min_arr[i]<clk_gpu_min) ? clk_gpu_min_arr[i] : clk_gpu_min;
		result = nvmlDeviceGetSupportedGraphicsClocks(mDevice, clk_mem_max, &clk_gpu_max_arr_cnt, clk_gpu_max_arr);
		if (NVML_SUCCESS != result) {
			mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot obtain graphics clock. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl;
		for (int32_t i=0; i<(int32_t)clk_gpu_max_arr_cnt; ++i) {
			clk_gpu_max = (clk_gpu_max_arr[i]>clk_gpu_max) ? clk_gpu_max_arr[i] : clk_gpu_max;
		result = nvmlDeviceGetMemoryInfo(mDevice, &memory);
		if (NVML_SUCCESS != result) {
			mrLog(CLogger::scErr) << "!!! 'nvml thread' (thread main): Error: cannot obtain memory informations. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl;
		memory_total = (uint32_t)( >> 20);
		rv = exec_gpu_mgmt((char**)args_set_pm);
		if (rv) {
			mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: in gpu_management tool. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl;
		<< ">>> 'nvml' (thread main): persistence mode enabled." << std::endl;
		<< ">>> 'nvml' (thread main):" << std::endl
		<< "     device         : " << name << std::endl
		<< "     pcie           : " << pci.busId << std::endl
		<< "     power mgmt mode: " << modes[mode] << std::endl
		<< "     power state cur: " << power_state << std::endl
		<< "     power state min: " << NVML_PSTATE_15 << std::endl
		<< "     power state max: " << NVML_PSTATE_0 << std::endl
		<< "     memory total   : " << memory_total << " MiB" << std::endl
		<< "     avail mem clks : ";
		for (int i=0; i<(int32_t)clk_mem_cnt; ++i) {
			if (i<(int32_t)clk_mem_cnt-1) {
				mrLog() << clk_mem[i] << " MHz, ";
			} else {
				mrLog() << clk_mem[i] << " MHz" << std::endl;
		<< "     memory clk min : " << clk_mem_min << " MHz" << std::endl
		<< "     avail core clks: ";
		for (int32_t i=0; i<(int32_t)clk_gpu_min_arr_cnt; ++i) {
			if (i<(int32_t)clk_gpu_min_arr_cnt-1) {
				mrLog() << clk_gpu_min_arr[i] << " MHz, ";
			} else {
				mrLog() << clk_gpu_min_arr[i] << " MHz" << std::endl;
		<< "     core clk min   : " << clk_gpu_min << " MHz" << std::endl;
		<< "     memory clk max : " << clk_mem_max << " MHz" << std::endl
		<< "     avail core clks: ";
		for (int32_t i=0; i<(int32_t)clk_gpu_max_arr_cnt; ++i) {
			if (i<(int32_t)clk_gpu_max_arr_cnt-1) {
				mrLog() << clk_gpu_max_arr[i] << " MHz, ";
			} else {
				mrLog() << clk_gpu_max_arr[i] << " MHz" << std::endl;
		<< "     core clk max   : " << clk_gpu_max << " MHz" << std::endl;
		switch (mGpuFrequency) {
				clk_mem_set = clk_mem_min;
				clk_gpu_set = clk_gpu_min;
				clk_mem_set = clk_mem_max;
				clk_gpu_set = clk_gpu_max;
				clk_mem_set = 0;
				clk_gpu_set = 0;
				result = nvmlDeviceGetClockInfo(mDevice, NVML_CLOCK_MEM, &clk_mem_set);
				if (NVML_SUCCESS != result) {
					mrLog(CLogger::scErr) << "!!! 'nvml thread' (thread main): Error: cannot read frequency. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl;
				result = nvmlDeviceGetClockInfo(mDevice, NVML_CLOCK_GRAPHICS, &clk_gpu_set);
				if (NVML_SUCCESS != result) {
					mrLog(CLogger::scErr) << "!!! 'nvml thread' (thread main): Error: cannot read frequency. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl;
		if (mGpuFrequency == GPU_FREQUENCY_MIN || mGpuFrequency == GPU_FREQUENCY_MAX) {
			// In these cases we actually set the GPU frequencies either to the maximum or minimum value.
			clk_gpu_str << "-c " << clk_gpu_set;
			clk_mem_str << "-m " << clk_mem_set;
			char const* args_set_clk[] = {"gpu_management", clk_gpu_str.str().c_str() , clk_mem_str.str().c_str(), NULL};
			rv = exec_gpu_mgmt((char**)args_set_clk);
			if (rv) {
				mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: in gpu_management tool. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl;
			<< ">>> 'nvml' (thread main): set core clk to " << clk_gpu_set << " MHz and mem clk to " << clk_mem_set << " MHz." << std::endl;
		} else {
			// We name the values *_set, but we don't set the frequency. We just print the current GPU frequency.
			<< ">>> 'nvml' (thread main): current core clk is " << clk_gpu_set << " MHz and mem clk is " << clk_mem_set << " MHz." << std::endl;
		<< ">>> 'nvml' (thread main): wait for 15s to throttle gpu clocks." << std::endl;
		<< ">>> 'nvml' (thread main): initialization done." << std::endl
		<< std::endl;
Beispiel #7
int main()
    nvmlReturn_t result;
    unsigned int device_count, i;

    // First initialize NVML library
    result = nvmlInit();
    if (NVML_SUCCESS != result)
        printf("Failed to initialize NVML: %s\n", nvmlErrorString(result));

        printf("Press ENTER to continue...\n");
        return 1;

    result = nvmlDeviceGetCount(&device_count);
    if (NVML_SUCCESS != result)
        printf("Failed to query device count: %s\n", nvmlErrorString(result));
        goto Error;
    printf("Found %d device%s\n\n", device_count, device_count != 1 ? "s" : "");

    printf("Listing devices:\n");    
    for (i = 0; i < device_count; i++)
        nvmlDevice_t device;
        char name[NVML_DEVICE_NAME_BUFFER_SIZE];
        nvmlPciInfo_t pci;
        nvmlComputeMode_t compute_mode;

        // Query for device handle to perform operations on a device
        // You can also query device handle by other features like:
        // nvmlDeviceGetHandleBySerial
        // nvmlDeviceGetHandleByPciBusId
        result = nvmlDeviceGetHandleByIndex(i, &device);
        if (NVML_SUCCESS != result)
            printf("Failed to get handle for device %i: %s\n", i, nvmlErrorString(result));
            goto Error;

        result = nvmlDeviceGetName(device, name, NVML_DEVICE_NAME_BUFFER_SIZE);
        if (NVML_SUCCESS != result)
            printf("Failed to get name of device %i: %s\n", i, nvmlErrorString(result));
            goto Error;
        // pci.busId is very useful to know which device physically you're talking to
        // Using PCI identifier you can also match nvmlDevice handle to CUDA device.
        result = nvmlDeviceGetPciInfo(device, &pci);
        if (NVML_SUCCESS != result)
            printf("Failed to get pci info for device %i: %s\n", i, nvmlErrorString(result));
            goto Error;

        printf("%d. %s [%s]\n", i, name, pci.busId);

        // This is a simple example on how you can modify GPU's state
        result = nvmlDeviceGetComputeMode(device, &compute_mode);
        if (NVML_ERROR_NOT_SUPPORTED == result)
            printf("\t This is not CUDA capable device\n");
        else if (NVML_SUCCESS != result)
            printf("Failed to get compute mode for device %i: %s\n", i, nvmlErrorString(result));
            goto Error;
            // try to change compute mode
            printf("\t Changing device's compute mode from '%s' to '%s'\n", 

            result = nvmlDeviceSetComputeMode(device, NVML_COMPUTEMODE_PROHIBITED);
            if (NVML_ERROR_NO_PERMISSION == result)
                printf("\t\t Need root privileges to do that: %s\n", nvmlErrorString(result));
            else if (NVML_ERROR_NOT_SUPPORTED == result)
                printf("\t\t Compute mode prohibited not supported. You might be running on\n"
                       "\t\t windows in WDDM driver model or on non-CUDA capable GPU.\n");
            else if (NVML_SUCCESS != result)
                printf("\t\t Failed to set compute mode for device %i: %s\n", i, nvmlErrorString(result));
                goto Error;
                printf("\t Restoring device's compute mode back to '%s'\n", 
                result = nvmlDeviceSetComputeMode(device, compute_mode);
                if (NVML_SUCCESS != result)
                    printf("\t\t Failed to restore compute mode for device %i: %s\n", i, nvmlErrorString(result));
                    goto Error;

    result = nvmlShutdown();
    if (NVML_SUCCESS != result)
        printf("Failed to shutdown NVML: %s\n", nvmlErrorString(result));

    printf("All done.\n");

    printf("Press ENTER to continue...\n");
    return 0;

    result = nvmlShutdown();
    if (NVML_SUCCESS != result)
        printf("Failed to shutdown NVML: %s\n", nvmlErrorString(result));

    printf("Press ENTER to continue...\n");
    return 1;