Ejemplo n.º 1
0
// Build the set of device features
static void get_device_features(struct device* dev)
{
  if(nvmlDeviceGetTemperature(dev->handle, NVML_TEMPERATURE_GPU,
                              &dev->temperature) == NVML_SUCCESS) {
    dev->feature_support |= TEMPERATURE;
  }

  if(nvmlDeviceGetMemoryInfo(dev->handle, &dev->memory) == NVML_SUCCESS) {
    dev->feature_support |= MEMORY_INFO;
  }

  if(nvmlDeviceGetPowerUsage(dev->handle, &dev->power_usage) == NVML_SUCCESS) {
    dev->feature_support |= POWER_USAGE;
  }

  if(nvmlDeviceGetClockInfo(dev->handle, NVML_CLOCK_GRAPHICS,
                            &dev->clock[NVML_CLOCK_GRAPHICS]) == NVML_SUCCESS &&

     nvmlDeviceGetClockInfo(dev->handle, NVML_CLOCK_SM,
                            &dev->clock[NVML_CLOCK_SM]) == NVML_SUCCESS &&

     nvmlDeviceGetClockInfo(dev->handle, NVML_CLOCK_COUNT,
                            &dev->clock[NVML_CLOCK_COUNT]) == NVML_SUCCESS) {

    dev->feature_support |= CLOCK_INFO;
  }

  if(nvmlDeviceGetFanSpeed(dev->handle, &dev->fan) == NVML_SUCCESS) {
    dev->feature_support |= FAN_INFO;
  }

  if(nvmlDeviceGetUtilizationRates(dev->handle, &dev->util) == NVML_SUCCESS) {
    dev->feature_support |= UTILIZATION_INFO;
  }
}
Ejemplo n.º 2
0
int probe_gpustats(devstat**stats)
{

    unsigned int n_dev;
    nvmlReturn_t nvret;


    nvret=nvmlInit();
    CHK_NVML(nvret,"Init NVML");


    nvret=nvmlDeviceGetCount(&n_dev);
    CHK_NVML(nvret,"getCount");


    *stats=(devstat*)calloc(n_dev,sizeof(devstat));
    devstat*pstats=*stats;


    int i;
    for(i=0;i<n_dev;i++)
        nvmlDeviceGetHandleByIndex(i,&pstats[i].handler);

    
    for(i=0;i<n_dev;i++)
        nvmlDeviceGetMemoryInfo(pstats[i].handler,&pstats[i].meminfo);
    
    for(i=0;i<n_dev;i++)
        nvmlDeviceGetUtilizationRates(pstats[i].handler,&pstats[i].utils);

    unsigned int sampp;
    for(i=0;i<n_dev;i++)
        nvmlDeviceGetEncoderUtilization(pstats[i].handler,&pstats[i].encutil,&sampp);

    for(i=0;i<n_dev;i++)
        nvmlDeviceGetDecoderUtilization(pstats[i].handler,&pstats[i].decutil,&sampp);
#if 0
    int maxfreeind=0;
    int maxfree=0;
    for(i=0;i<n_dev;i++){

        print_devstats(&pstats[i]);

        int free=pstats[i].meminfo.free; 
//        fprintf(stderr,"<%d\n",free);
        if(free>maxfree){
            maxfree=free;
            maxfreeind=i;
        }

    }
#endif
    nvret=nvmlShutdown();
    CHK_NVML(nvret,"Shutdown NVML");


    return n_dev;
}
Ejemplo n.º 3
0
/*
 * Class:     org_apache_hadoop_yarn_server_nodemanager_containermanager_launcher_GPUMonitor
 * Method:    getState
 * Signature: ()I
 */
JNIEXPORT jint JNICALL Java_org_apache_hadoop_yarn_server_nodemanager_containermanager_launcher_GPUMonitor_getState
  (JNIEnv *, jobject)
{
	unsigned int infoCount=-1;
    nvmlReturn_t result;
	result = nvmlDeviceGetComputeRunningProcesses(device , &infoCount, pinfos);
//	startTimer( &st_gur);
	nvmlUtilization_t utilization;
	nvmlDeviceGetUtilizationRates( device, &utilization);

	unsigned int return_value=0;
//            infoCount memory    gpu
//	0000 0000 0000 0000 0000 0000 0000 0000	
	return_value  = (utilization.gpu        ) & 0x000000FF;
	return_value |= (utilization.memory << 8) & 0x0000FF00;
	return_value |= (infoCount          <<16) & 0x00FF0000;
	return return_value;
}
Ejemplo n.º 4
0
/* 	0 => gpu util
	1 => memory util
 */
		unsigned long long
getUtilization( nvmlDevice_t dev, int which_one )
{
		nvmlUtilization_t util;
		nvmlReturn_t bad; 
		bad = nvmlDeviceGetUtilizationRates( dev, &util );

		if ( NVML_SUCCESS != bad ) {
				SUBDBG( "something went wrong %s\n", nvmlErrorString(bad));
		}


		switch (which_one) {
				case GPU_UTILIZATION:
						return (unsigned long long) util.gpu;
				case MEMORY_UTILIZATION:
						return (unsigned long long) util.memory;
				default:
						;
		}

		return (unsigned long long) -1;
}
Ejemplo n.º 5
0
  /*_________________---------------------------__________________
    _________________     nvml_tick             __________________
    -----------------___________________________------------------
    Called every second
  */
  void nvml_tick(HSP *sp) {
    if(sp->nvml.gpu_count > 0) {
      unsigned int i;

      for (i = 0; i < sp->nvml.gpu_count; ++i) {
        nvmlDevice_t gpu;
        unsigned int power_mW;
	nvmlUtilization_t util;

        if (NVML_SUCCESS != nvmlDeviceGetHandleByIndex(i, &gpu)) {
          continue;
        }
        if (NVML_SUCCESS == nvmlDeviceGetUtilizationRates(gpu, &util)) {
	  sp->nvml.nvml_gpu_time += util.gpu * 10; // accumulate as mS
	  sp->nvml.nvml_mem_time += util.memory * 10; // accumulate as mS
        }
        if (NVML_SUCCESS == nvmlDeviceGetPowerUsage(gpu, &power_mW)) {
	  sp->nvml.nvml_energy += power_mW; // accumulate as mJ
        }
      }

    }
  }