nvmlReturn_t NVML_DL(nvmlInit)(void) { handle = dlopen("libnvidia-ml.so.1", RTLD_LAZY | RTLD_GLOBAL); if (handle == NULL) { return (NVML_ERROR_LIBRARY_NOT_FOUND); } return (nvmlInit()); }
int probe_gpustats(devstat**stats) { unsigned int n_dev; nvmlReturn_t nvret; nvret=nvmlInit(); CHK_NVML(nvret,"Init NVML"); nvret=nvmlDeviceGetCount(&n_dev); CHK_NVML(nvret,"getCount"); *stats=(devstat*)calloc(n_dev,sizeof(devstat)); devstat*pstats=*stats; int i; for(i=0;i<n_dev;i++) nvmlDeviceGetHandleByIndex(i,&pstats[i].handler); for(i=0;i<n_dev;i++) nvmlDeviceGetMemoryInfo(pstats[i].handler,&pstats[i].meminfo); for(i=0;i<n_dev;i++) nvmlDeviceGetUtilizationRates(pstats[i].handler,&pstats[i].utils); unsigned int sampp; for(i=0;i<n_dev;i++) nvmlDeviceGetEncoderUtilization(pstats[i].handler,&pstats[i].encutil,&sampp); for(i=0;i<n_dev;i++) nvmlDeviceGetDecoderUtilization(pstats[i].handler,&pstats[i].decutil,&sampp); #if 0 int maxfreeind=0; int maxfree=0; for(i=0;i<n_dev;i++){ print_devstats(&pstats[i]); int free=pstats[i].meminfo.free; // fprintf(stderr,"<%d\n",free); if(free>maxfree){ maxfree=free; maxfreeind=i; } } #endif nvret=nvmlShutdown(); CHK_NVML(nvret,"Shutdown NVML"); return n_dev; }
static int get_mem_info(unsigned int*ncores,unsigned int*usedarray) { nvmlReturn_t ret; ret=nvmlInit(); if(ret!=NVML_SUCCESS) { fprintf(stderr,"ERROR:: Initialize NVML{%s}..\n",nvmlErrorString(ret)); return -1; } unsigned int c; ret=nvmlDeviceGetCount(&c); if(ret!=NVML_SUCCESS) { fprintf(stderr,"ERROR:: Device Get Count{%s}..\n",nvmlErrorString(ret)); return -1; } *ncores=c; nvmlDevice_t devs[NDEV]; nvmlMemory_t meminfo; int i; for(i=0; i<c; i++) { ret=nvmlDeviceGetHandleByIndex(i,&devs[i]); if(ret!=NVML_SUCCESS) { fprintf(stderr,"ERROR:: Device Get Handle{%s}..\n",nvmlErrorString(ret)); return -1; } ret=nvmlDeviceGetMemoryInfo(devs[i],&meminfo); if(ret!=NVML_SUCCESS) { fprintf(stderr,"ERROR:: GetMemoryInfo{%s}..\n",nvmlErrorString(ret)); return -1; } usedarray[i]=meminfo.used; } ret=nvmlShutdown(); if(ret!=NVML_SUCCESS) { fprintf(stderr,"ERROR:: Shutdown NVML{%s}..\n",nvmlErrorString(ret)); return -1; } return 0; }
/*_________________---------------------------__________________ _________________ nvml_init __________________ -----------------___________________________------------------ Called at startup */ void nvml_init(HSP *sp) { unsigned int gpuCount; if (NVML_SUCCESS != nvmlInit()) { return; } if (NVML_SUCCESS != nvmlDeviceGetCount(&gpuCount)) { return; } sp->nvml.gpu_count = gpuCount; }
// NVIDIA NVML library function wrapper for GPU DVFS. int SetGPUFreq(unsigned int clock_mem, unsigned int clock_core) { nvmlDevice_t device;//int device; nvmlReturn_t result; result = nvmlInit(); result = nvmlDeviceGetHandleByIndex(0, &device);//cudaGetDevice(&device); result = nvmlDeviceSetApplicationsClocks(device, clock_mem, clock_core);//(nvmlDevice_t)device if(result != NVML_SUCCESS) { printf("Failed to set GPU core and memory frequencies: %s\n", nvmlErrorString(result)); return 1; } else { nvmlDeviceGetApplicationsClock(device, NVML_CLOCK_GRAPHICS, &clock_core); nvmlDeviceGetApplicationsClock(device, NVML_CLOCK_MEM, &clock_mem); ////printf("GPU core frequency is now set to %d MHz; GPU memory frequency is now set to %d MHz", clock_core, clock_mem); return 0; } }
/* * Class: org_apache_hadoop_yarn_server_nodemanager_containermanager_launcher_GPUMonitor * Method: initnvml * Signature: ()Ljava/lang/String; */ JNIEXPORT jstring JNICALL Java_org_apache_hadoop_yarn_server_nodemanager_containermanager_launcher_GPUMonitor_initnvml (JNIEnv *env, jobject) { nvmlReturn_t result; unsigned int device_count, i; char sentence[200]; std::string err = ""; result = nvmlInit(); if (NVML_SUCCESS != result) { printf("Failed to initialize NVML: %s\n", nvmlErrorString(result)); sprintf(sentence, "Failed to initialize NVML: %s\n", nvmlErrorString(result)); err.append( (std::string)sentence ); } char name[NVML_DEVICE_NAME_BUFFER_SIZE]; result = nvmlDeviceGetHandleByIndex(0, &device); if (NVML_SUCCESS != result) { printf("Failed to get handle for device %i: %s\n", i, nvmlErrorString(result)); sprintf(sentence,"Failed to get handle for device %i: %s\n", i, nvmlErrorString(result)); err.append( (std::string)sentence ); result = nvmlShutdown(); return 0; } result = nvmlDeviceGetName(device, name, NVML_DEVICE_NAME_BUFFER_SIZE); if (NVML_SUCCESS != result) { printf("Failed to get name of device %i: %s\n", i, nvmlErrorString(result)); sprintf(sentence,"Failed to get name of device %i: %s\n", i, nvmlErrorString(result)); err.append( (std::string)sentence ); result = nvmlShutdown(); return 0; } printf("Device : %s\n",name); sprintf(sentence,"Device : %s\n",name); err.append( (std::string)sentence ); return env->NewStringUTF( err.c_str() ); }
int main(int argc,char* argv[]){ /**Initialize signal**/ signal(SIGINT ,_end_server); signal(SIGUSR1,_end_server); /**Initialize struct proc**/ init_proc(); init_cons(); /**Process becomes dem**/ pid_t process_id = 0; pid_t sid = 0; if(argc >= 2){ process_id = fork(); if(process_id < 0){ printf("fork failed ..\n"); exit(1); } if(process_id > 0){ exit(0); } umask(0); sid = setsid(); if(sid < 0){ exit(1); } close(STDIN_FILENO); close(STDOUT_FILENO); close(STDERR_FILENO); }else{ sid = getpid(); } /**Setup the log file**/ char log[32]; sprintf(log,"log.%u",sid); // fp = fopen(log,"w+"); /**Start Initialize nvidia management library from Here!!**/ nvmlReturn_t nres; int i; nres = nvmlInit(); if(nres != NVML_SUCCESS){ perror("Failed to initialize Nvidia Managerment Library...\n"); exit(-1); } nres = nvmlDeviceGetCount(&dem.ndev); if(nres != NVML_SUCCESS){ perror("Failed to get num of device...\n"); exit(-1); } dem.devs = (nvmlDevice_t*)malloc(sizeof(nvmlDevice_t)*dem.ndev); dem.flags = (dflag*)malloc(sizeof(dflag)*dem.ndev); MAXPROC = dem.ndev * 4; for(i = 0 ; i < dem.ndev ; i ++){ nres = nvmlDeviceGetHandleByIndex(i,&dem.devs[i]); if(nres != NVML_SUCCESS){ perror("Failed to get device handle\n"); exit(-1); } dem.flags[i].sd = -1; dem.flags[i].flag = 0; dem.flags[i].stayed = 0; dem.flags[i].reserved = 0; } dem.procCounter = 0; /**Setup the socket**/ int len,rc,on = 1; int listen_sd,max_sd,new_sd; int desc_ready; int close_conn; struct sockaddr_un addr; struct timeval timeout; fd_set master_set,working_set; listen_sd = socket(AF_UNIX,SOCK_STREAM,0); if(listen_sd < 0){ perror("socket() failed\n"); exit(-1); } rc = setsockopt(listen_sd, SOL_SOCKET, SO_REUSEADDR, (char*)&on,sizeof(on)); if(rc < 0){ perror("setsockopt() failed\n"); exit(-1); } unlink("mocu_server"); memset(&addr,0,sizeof(addr)); addr.sun_family = AF_UNIX; strcpy(addr.sun_path,"mocu_server"); rc = bind(listen_sd,(struct sockaddr*)&addr,sizeof(addr)); if(rc < 0){ perror("bind() failed"); close(listen_sd); exit(-1); } rc = listen(listen_sd,SOMAXCONN); if(rc < 0){ perror("listen() failed"); close(listen_sd); exit(-1); } FD_ZERO(&master_set); max_sd = listen_sd; FD_SET(listen_sd,&master_set); timeout.tv_sec = 3*60; timeout.tv_usec = 0; long counter = 0; /**Entering main loop**/ proc_data* receivedProc = (proc_data*)malloc(sizeof(proc_data)); mocu_check(); do{ memcpy(&working_set,&master_set,sizeof(master_set)); rc = select(max_sd+1, &working_set, NULL, NULL, NULL); if(rc < 0){ perror("select() failed\n"); break; } if(rc == 0){ printf("select() time out. End program.\n"); break; } desc_ready = rc; for(i = 0 ; i < max_sd+1 && desc_ready > 0 ; ++i){ if(FD_ISSET(i,&working_set)){ desc_ready = -1; if(i == listen_sd){ new_sd = accept(listen_sd,NULL,NULL); if(new_sd < 0){ printf("accept() failed"); end_server = TRUE; } FD_SET(new_sd,&master_set); if(new_sd > max_sd){ max_sd = new_sd; } }else{ rc = recv(i,receivedProc,sizeof(proc_data),0); if(rc <= 0){ FD_CLR(i,&master_set); _FIN(i); }else{ if(receivedProc->REQUEST == CONNECT){ _CONNECT(i,receivedProc); }else if(receivedProc->REQUEST == RENEW){ _RENEW(i,receivedProc); }else if(receivedProc->REQUEST == MIGDONE){ _MIGDONE(i,receivedProc); }else if(receivedProc->REQUEST == CANRECEIVE){ _CANRECEIVE(i,receivedProc); }else if(receivedProc->REQUEST == FAILEDTOALLOC){ _FAILEDTOALLOC(i,receivedProc); exit(-1);//TEST }else if(receivedProc->REQUEST == MALLOCDONE){ _MALLOCDONE(i,receivedProc); }else if(receivedProc->REQUEST == CUDAMALLOC){ _CUDAMALLOC(i,receivedProc); }else if(receivedProc->REQUEST == BACKUPED){ _BACKUPED(i,receivedProc); }else if(receivedProc->REQUEST == CONTEXT_CHECK){ _CONTEXT_CHECK(i,receivedProc); }else if(receivedProc->REQUEST == CREATE_CONTEXT){ _CREATE_CONTEXT(i); }else if(receivedProc->REQUEST == CONSOLE){ _CONSOLE(i); }else{ printf("Unkown request...\n"); exit(-1); } } } } } mocu_check(); }while(end_server == FALSE); int closed = 0; for(i = 0 ; i < max_sd ; i ++){ if(FD_ISSET(i,&master_set)){ close(i); closed = 1; } } // fclose(fp); return 0; }
/** Initialize hardware counters, setup the function vector table * and get hardware information, this routine is called when the * PAPI process is initialized (IE PAPI_library_init) */ int _papi_nvml_init_substrate( int cidx ) { nvmlReturn_t ret; cudaError_t cuerr; int cuda_count = 0; unsigned int nvml_count = 0; ret = nvmlInit(); if ( NVML_SUCCESS != ret ) { strcpy(_nvml_vector.cmp_info.disabled_reason, "The NVIDIA managament library failed to initialize."); goto disable; } cuerr = cuInit( 0 ); if ( CUDA_SUCCESS != cuerr ) { strcpy(_nvml_vector.cmp_info.disabled_reason, "The CUDA library failed to initialize."); goto disable; } /* Figure out the number of CUDA devices in the system */ ret = nvmlDeviceGetCount( &nvml_count ); if ( NVML_SUCCESS != ret ) { strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a count of devices from the NVIDIA managament library."); goto disable; } cuerr = cudaGetDeviceCount( &cuda_count ); if ( CUDA_SUCCESS != cuerr ) { strcpy(_nvml_vector.cmp_info.disabled_reason, "Unable to get a device count from CUDA."); goto disable; } /* We can probably recover from this, when we're clever */ if ( nvml_count != cuda_count ) { strcpy(_nvml_vector.cmp_info.disabled_reason, "Cuda and the NVIDIA managament library have different device counts."); goto disable; } device_count = cuda_count; /* A per device representation of what events are present */ features = (int*)papi_malloc(sizeof(int) * device_count ); /* Handles to each device */ devices = (nvmlDevice_t*)papi_malloc(sizeof(nvmlDevice_t) * device_count); /* Figure out what events are supported on each card. */ if ( (papi_errorcode = detectDevices( ) ) != PAPI_OK ) { papi_free(features); papi_free(devices); sprintf(_nvml_vector.cmp_info.disabled_reason, "An error occured in device feature detection, please check your NVIDIA Management Library and CUDA install." ); goto disable; } /* The assumption is that if everything went swimmingly in detectDevices, all nvml calls here should be fine. */ createNativeEvents( ); /* Export the total number of events available */ _nvml_vector.cmp_info.num_native_events = num_events; /* Export the component id */ _nvml_vector.cmp_info.CmpIdx = cidx; /* Export the number of 'counters' */ _nvml_vector.cmp_info.num_cntrs = num_events; return PAPI_OK; disable: _nvml_vector.cmp_info.num_cntrs = 0; return PAPI_OK; }
static int hwloc_nvml_discover(struct hwloc_backend *backend) { struct hwloc_topology *topology = backend->topology; nvmlReturn_t ret; unsigned nb, i; if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO))) return 0; if (!hwloc_topology_is_thissystem(topology)) { hwloc_debug("%s", "\nno NVML detection (not thissystem)\n"); return 0; } ret = nvmlInit(); if (NVML_SUCCESS != ret) return 0; ret = nvmlDeviceGetCount(&nb); if (NVML_SUCCESS != ret || !nb) { nvmlShutdown(); return 0; } for(i=0; i<nb; i++) { nvmlPciInfo_t pci; nvmlDevice_t device; hwloc_obj_t osdev, parent; char buffer[64]; ret = nvmlDeviceGetHandleByIndex(i, &device); assert(ret == NVML_SUCCESS); osdev = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1); snprintf(buffer, sizeof(buffer), "nvml%d", i); osdev->name = strdup(buffer); osdev->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN; osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU; hwloc_obj_add_info(osdev, "Backend", "NVML"); hwloc_obj_add_info(osdev, "GPUVendor", "NVIDIA Corporation"); buffer[0] = '\0'; ret = nvmlDeviceGetName(device, buffer, sizeof(buffer)); hwloc_obj_add_info(osdev, "GPUModel", buffer); /* these may fail with NVML_ERROR_NOT_SUPPORTED on old devices */ buffer[0] = '\0'; ret = nvmlDeviceGetSerial(device, buffer, sizeof(buffer)); if (buffer[0] != '\0') hwloc_obj_add_info(osdev, "NVIDIASerial", buffer); buffer[0] = '\0'; ret = nvmlDeviceGetUUID(device, buffer, sizeof(buffer)); if (buffer[0] != '\0') hwloc_obj_add_info(osdev, "NVIDIAUUID", buffer); parent = NULL; if (NVML_SUCCESS == nvmlDeviceGetPciInfo(device, &pci)) { parent = hwloc_pci_belowroot_find_by_busid(topology, pci.domain, pci.bus, pci.device, 0); if (!parent) parent = hwloc_pci_find_busid_parent(topology, pci.domain, pci.bus, pci.device, 0); #if HAVE_DECL_NVMLDEVICEGETMAXPCIELINKGENERATION if (parent && parent->type == HWLOC_OBJ_PCI_DEVICE) { unsigned maxwidth = 0, maxgen = 0; float lanespeed; nvmlDeviceGetMaxPcieLinkWidth(device, &maxwidth); nvmlDeviceGetMaxPcieLinkGeneration(device, &maxgen); /* PCIe Gen1 = 2.5GT/s signal-rate per lane with 8/10 encoding = 0.25GB/s data-rate per lane * PCIe Gen2 = 5 GT/s signal-rate per lane with 8/10 encoding = 0.5 GB/s data-rate per lane * PCIe Gen3 = 8 GT/s signal-rate per lane with 128/130 encoding = 1 GB/s data-rate per lane */ lanespeed = maxgen <= 2 ? 2.5 * maxgen * 0.8 : 8.0 * 128/130; /* Gbit/s per lane */ if (lanespeed * maxwidth) /* we found the max link speed, replace the current link speed found by pci (or none) */ parent->attr->pcidev.linkspeed = lanespeed * maxwidth / 8; /* GB/s */ } #endif } if (!parent) parent = hwloc_get_root_obj(topology); hwloc_insert_object_by_parent(topology, parent, osdev); } nvmlShutdown(); return nb; }
int main() { nvml_void nvmlInit = NULL; nvml_void nvmlShutdown = NULL; // The actual filtering is done by the startd on the basis // of the SlotMergeConstraint we set for each ad we emit. #if defined(WINDOWS) putenv( "CUDA_VISIBLE_DEVICES=" ); #else unsetenv( "CUDA_VISIBLE_DEVICES" ); #endif void * nvml_handle = NULL; const char * nvml_library = "libnvidia-ml.so"; nvml_handle = dlopen( nvml_library, RTLD_LAZY ); if(! nvml_handle) { fprintf( stderr, "Unable to load %s, aborting.\n", nvml_library ); fail(); } dlerror(); nvmlInit = (nvml_void)dlsym( nvml_handle, "nvmlInit" ); nvmlShutdown = (nvml_void)dlsym( nvml_handle, "nvmlShutdown" ); nvmlDeviceGetCount = (nvml_unsigned_int)dlsym( nvml_handle, "nvmlDeviceGetCount" ); nvmlDeviceGetSamples = (nvml_dgs)dlsym( nvml_handle, "nvmlDeviceGetSamples" ); nvmlDeviceGetMemoryInfo = (nvml_dm)dlsym( nvml_handle, "nvmlDeviceGetMemoryInfo" ); nvmlDeviceGetHandleByIndex = (nvml_dghbi)dlsym( nvml_handle, "nvmlDeviceGetHandleByIndex" ); nvmlErrorString = (cc_nvml)dlsym( nvml_handle, "nvmlErrorString" ); nvmlReturn_t r = nvmlInit(); if( r != NVML_SUCCESS ) { fprintf( stderr, "nvmlInit() failed, aborting.\n" ); fail(); } unsigned int deviceCount; r = nvmlDeviceGetCount( &deviceCount ); if( r != NVML_SUCCESS ) { fprintf( stderr, "nvmlDeviceGetCount() failed, aborting.\n" ); fail(); } if( deviceCount <= 0 ) { fprintf( stderr, "Found 0 or fewer devices, aborting.\n" ); fail(); } nvmlDevice_t devices[deviceCount]; unsigned maxSampleCounts[deviceCount]; unsigned long long memoryUsage[deviceCount]; unsigned long long lastSamples[deviceCount]; unsigned long long firstSamples[deviceCount]; unsigned long long elapsedTimes[deviceCount]; for( unsigned i = 0; i < deviceCount; ++i ) { r = nvmlDeviceGetHandleByIndex( i, &(devices[i]) ); if( r != NVML_SUCCESS ) { fprintf( stderr, "nvmlGetDeviceHandleByIndex(%u) failed (%d: %s), aborting.\n", i, r, nvmlErrorString( r ) ); fail(); } lastSamples[i] = 0; firstSamples[i] = 0; elapsedTimes[i] = 0; nvmlValueType_t sampleValueType; r = nvmlDeviceGetSamples( devices[i], NVML_GPU_UTILIZATION_SAMPLES, 0, & sampleValueType, & maxSampleCounts[i], NULL ); if( r != NVML_SUCCESS ) { fprintf( stderr, "nvmlDeviceGetSamples(%u) failed while querying for the max sample count (%d: %s), aborting.\n", i, r, nvmlErrorString( r ) ); fail(); } if( sampleValueType != NVML_VALUE_TYPE_UNSIGNED_INT ) { fprintf( stderr, "nvmlDeviceGetSamples(%u) returned an unexpected type (%d) of sample when querying for the max sample count, aborting.\n", i, sampleValueType ); fail(); } memoryUsage[i] = 0; } // We deliberately ignore the first set of samples. Partly, I think we // can claim that they're from before we started monitoring. More // importantly, at least on a Tesla K40c (driver 384.81), initializing // the NVML library causes a one-second 99% usage spike on an other- // wise idle GPU. So we'll ignore as much of that as we easily can. for( unsigned i = 0; i < deviceCount; ++i ) { getElapsedTimeForDevice( devices[i], &lastSamples[i], &elapsedTimes[i], maxSampleCounts[i] ); firstSamples[i] = lastSamples[i]; elapsedTimes[i] = 0; } time_t lastReport = time( NULL ); while( 1 ) { usleep( 100000 ); for( unsigned i = 0; i < deviceCount; ++i ) { r = getElapsedTimeForDevice( devices[i], &lastSamples[i], &elapsedTimes[i], maxSampleCounts[i] ); if( r != NVML_SUCCESS ) { fprintf( stderr, "getElapsedTimeForDevice(%u) failed (%d: %s), aborting.\n", i, r, nvmlErrorString( r ) ); fail(); } if( debug ) { fprintf( stdout, "device %u: %llu / %llu = %u%%.\n", i, elapsedTimes[i], lastSamples[i] - firstSamples[i], (unsigned)(((double)elapsedTimes[i]) / (lastSamples[i] - firstSamples[i]) * 100) ); } nvmlMemory_t mi = { 0, 0, 0 }; r = nvmlDeviceGetMemoryInfo( devices[i], &mi ); if( r != NVML_SUCCESS ) { fprintf( stderr, "getDeviceGetMemoryInfo(%u) failed (%d: %s), aborting.\n", i, r, nvmlErrorString( r ) ); fail(); } if( mi.used > memoryUsage[i] ) { memoryUsage[i] = mi.used; } } if( time( NULL ) - lastReport >= reportInterval ) { for( unsigned i = 0; i < deviceCount; ++i ) { fprintf( stdout, "SlotMergeConstraint = StringListMember( \"CUDA%u\", AssignedGPUs )\n", i ); fprintf( stdout, "UptimeGPUsSeconds = %.6f\n", elapsedTimes[i] / 1000000.0 ); fprintf( stdout, "UptimeGPUsMemoryPeakUsage = %llu\n", (memoryUsage[i] + (1024 * 1024) -1) / (1024 * 1024) ); fprintf( stdout, "- GPUsSlot%u\n", i ); fflush( stdout ); // Report only the usage for each reporting period. elapsedTimes[i] = 0; firstSamples[i] = lastSamples[i]; memoryUsage[i] = 0; } lastReport = time( NULL ); } } r = nvmlShutdown(); if( r != NVML_SUCCESS ) { fprintf( stderr, "nvmlShutdown() failed (%d: %s), aborting.\n", r, nvmlErrorString( r ) ); return 1; } dlclose( nvml_handle ); return 0; }
static int get_process_info(unsigned int*ncores,unsigned int *valarray) { nvmlReturn_t ret; ret=nvmlInit(); if(ret!=NVML_SUCCESS) { fprintf(stderr,"ERROR:: Initialize NVML{%s}..\n",nvmlErrorString(ret)); return -1; } unsigned int c; ret=nvmlDeviceGetCount(&c); if(ret!=NVML_SUCCESS) { fprintf(stderr,"ERROR:: Device Get Count{%s}..\n",nvmlErrorString(ret)); return -1; } *ncores=c; /* if(c!=NDEV){ fprintf(stderr,"ERROR:: Current number of Cores is [%d],not %d....YOU NEED RECOMPILE THIS ROUTINE\n",c,NDEV); return -2; } */ nvmlDevice_t devs[NDEV]; nvmlProcessInfo_t pis[MAXPROC]; int i; for(i=0; i<c; i++) { ret=nvmlDeviceGetHandleByIndex(i,&devs[i]); if(ret!=NVML_SUCCESS) { fprintf(stderr,"ERROR:: Device Get Handle{%s}..\n",nvmlErrorString(ret)); return -1; } unsigned int np=MAXPROC; ret=nvmlDeviceGetComputeRunningProcesses(devs[i],&np,pis); if(ret!=NVML_SUCCESS) { fprintf(stderr,"ERROR:: GetRunningProcess{%s}..\n",nvmlErrorString(ret)); return -1; } valarray[i]=np; } ret=nvmlShutdown(); if(ret!=NVML_SUCCESS) { fprintf(stderr,"ERROR:: Shutdown NVML{%s}..\n",nvmlErrorString(ret)); return -1; } return 0; }
void CMeasureNVML<TSkipMs, TVariant>::init(void) { if(TVariant == VARIANT_FULL) { mrLog() << ">>> 'nvml' (full version)" << std::endl; } else { mrLog() << ">>> 'nvml' (light version)" << std::endl; } nvmlReturn_t result; int32_t rv; char const* args_set_pm[] = {"gpu_management", "-p 1", NULL}; uint32_t device_count; char name[NVML_DEVICE_NAME_BUFFER_SIZE]; nvmlPciInfo_t pci; nvmlEnableState_t mode; std::string modes[2] = {"disabled", "enabled"}; std::stringstream clk_gpu_str; std::stringstream clk_mem_str; nvmlPstates_t power_state; nvmlMemory_t memory; const uint32_t count = 32; uint32_t clk_mem_cnt = count; uint32_t clk_mem[count]; uint32_t clk_mem_max = 0; uint32_t clk_mem_min = 0xffffffff; uint32_t clk_mem_set = 0; uint32_t clk_gpu_min_arr_cnt = count; uint32_t clk_gpu_min_arr[clk_gpu_min_arr_cnt]; uint32_t clk_gpu_min = 0xffffffff; uint32_t clk_gpu_max_arr_cnt = count; uint32_t clk_gpu_max_arr[clk_gpu_max_arr_cnt]; uint32_t clk_gpu_max = 0; uint32_t clk_gpu_set = 0; uint32_t memory_total = 0; result = nvmlInit(); if (NVML_SUCCESS != result) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot initialize nvml library. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } result = nvmlDeviceGetCount(&device_count); if (NVML_SUCCESS != result) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot query device count. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } if (device_count > 1) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: this software has be rewritten if you want to support more than 1 device. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } mrLog() << ">>> 'nvml' (thread main): get gpu device handler..."; mrLog.flush(); result = nvmlDeviceGetHandleByIndex(0, &mDevice); if (NVML_SUCCESS != result) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot get device handler. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } mrLog() << " done!" << std::endl; result = nvmlDeviceGetName(mDevice, name, NVML_DEVICE_NAME_BUFFER_SIZE); if (NVML_SUCCESS != result) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot get device name. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } result = nvmlDeviceGetPciInfo(mDevice, &pci); if (NVML_SUCCESS != result) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot get pci information. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } result = nvmlDeviceGetPowerManagementMode(mDevice, &mode); if (NVML_SUCCESS != result) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: no power managment supported. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } result = nvmlDeviceGetPerformanceState(mDevice, &power_state); if (NVML_SUCCESS != result) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: no performance state reading possible. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } result = nvmlDeviceGetSupportedMemoryClocks(mDevice, &clk_mem_cnt, clk_mem); if (NVML_SUCCESS != result) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot obtain memory clock. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } for (int i=0; i<(int32_t)clk_mem_cnt; ++i) { clk_mem_min = (clk_mem[i]<clk_mem_min) ? clk_mem[i] : clk_mem_min; clk_mem_max = (clk_mem[i]>clk_mem_max) ? clk_mem[i] : clk_mem_max; } result = nvmlDeviceGetSupportedGraphicsClocks(mDevice, clk_mem_min, &clk_gpu_min_arr_cnt, clk_gpu_min_arr); if (NVML_SUCCESS != result) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot obtain graphics clock. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } for (int32_t i=0; i<(int32_t)clk_gpu_min_arr_cnt; ++i) { clk_gpu_min = (clk_gpu_min_arr[i]<clk_gpu_min) ? clk_gpu_min_arr[i] : clk_gpu_min; } result = nvmlDeviceGetSupportedGraphicsClocks(mDevice, clk_mem_max, &clk_gpu_max_arr_cnt, clk_gpu_max_arr); if (NVML_SUCCESS != result) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: cannot obtain graphics clock. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } for (int32_t i=0; i<(int32_t)clk_gpu_max_arr_cnt; ++i) { clk_gpu_max = (clk_gpu_max_arr[i]>clk_gpu_max) ? clk_gpu_max_arr[i] : clk_gpu_max; } result = nvmlDeviceGetMemoryInfo(mDevice, &memory); if (NVML_SUCCESS != result) { mrLog.lock(); mrLog(CLogger::scErr) << "!!! 'nvml thread' (thread main): Error: cannot obtain memory informations. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; mrLog.unlock(); exit(EXIT_FAILURE); } memory_total = (uint32_t)(memory.total >> 20); rv = exec_gpu_mgmt((char**)args_set_pm); if (rv) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: in gpu_management tool. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } mrLog() << ">>> 'nvml' (thread main): persistence mode enabled." << std::endl; mrLog() << ">>> 'nvml' (thread main):" << std::endl << " device : " << name << std::endl << " pcie : " << pci.busId << std::endl << " power mgmt mode: " << modes[mode] << std::endl << " power state cur: " << power_state << std::endl << " power state min: " << NVML_PSTATE_15 << std::endl << " power state max: " << NVML_PSTATE_0 << std::endl << " memory total : " << memory_total << " MiB" << std::endl << " avail mem clks : "; for (int i=0; i<(int32_t)clk_mem_cnt; ++i) { if (i<(int32_t)clk_mem_cnt-1) { mrLog() << clk_mem[i] << " MHz, "; } else { mrLog() << clk_mem[i] << " MHz" << std::endl; } } mrLog() << " memory clk min : " << clk_mem_min << " MHz" << std::endl << " avail core clks: "; for (int32_t i=0; i<(int32_t)clk_gpu_min_arr_cnt; ++i) { if (i<(int32_t)clk_gpu_min_arr_cnt-1) { mrLog() << clk_gpu_min_arr[i] << " MHz, "; } else { mrLog() << clk_gpu_min_arr[i] << " MHz" << std::endl; } } mrLog() << " core clk min : " << clk_gpu_min << " MHz" << std::endl; mrLog() << " memory clk max : " << clk_mem_max << " MHz" << std::endl << " avail core clks: "; for (int32_t i=0; i<(int32_t)clk_gpu_max_arr_cnt; ++i) { if (i<(int32_t)clk_gpu_max_arr_cnt-1) { mrLog() << clk_gpu_max_arr[i] << " MHz, "; } else { mrLog() << clk_gpu_max_arr[i] << " MHz" << std::endl; } } mrLog() << " core clk max : " << clk_gpu_max << " MHz" << std::endl; switch (mGpuFrequency) { case GPU_FREQUENCY_MIN: clk_mem_set = clk_mem_min; clk_gpu_set = clk_gpu_min; break; case GPU_FREQUENCY_MAX: clk_mem_set = clk_mem_max; clk_gpu_set = clk_gpu_max; break; case GPU_FREQUENCY_CUR: default: clk_mem_set = 0; clk_gpu_set = 0; result = nvmlDeviceGetClockInfo(mDevice, NVML_CLOCK_MEM, &clk_mem_set); if (NVML_SUCCESS != result) { mrLog.lock(); mrLog(CLogger::scErr) << "!!! 'nvml thread' (thread main): Error: cannot read frequency. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; mrLog.unlock(); exit(EXIT_FAILURE); } result = nvmlDeviceGetClockInfo(mDevice, NVML_CLOCK_GRAPHICS, &clk_gpu_set); if (NVML_SUCCESS != result) { mrLog.lock(); mrLog(CLogger::scErr) << "!!! 'nvml thread' (thread main): Error: cannot read frequency. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; mrLog.unlock(); exit(EXIT_FAILURE); } break; } if (mGpuFrequency == GPU_FREQUENCY_MIN || mGpuFrequency == GPU_FREQUENCY_MAX) { // In these cases we actually set the GPU frequencies either to the maximum or minimum value. clk_gpu_str << "-c " << clk_gpu_set; clk_mem_str << "-m " << clk_mem_set; char const* args_set_clk[] = {"gpu_management", clk_gpu_str.str().c_str() , clk_mem_str.str().c_str(), NULL}; rv = exec_gpu_mgmt((char**)args_set_clk); if (rv) { mrLog(CLogger::scErr) << "!!! 'nvml' (thread main): Error: in gpu_management tool. (file: " << __FILE__ << ", line: " << __LINE__ << ")" << std::endl; exit(EXIT_FAILURE); } mrLog() << ">>> 'nvml' (thread main): set core clk to " << clk_gpu_set << " MHz and mem clk to " << clk_mem_set << " MHz." << std::endl; } else { // We name the values *_set, but we don't set the frequency. We just print the current GPU frequency. mrLog() << ">>> 'nvml' (thread main): current core clk is " << clk_gpu_set << " MHz and mem clk is " << clk_mem_set << " MHz." << std::endl; } mrLog() << ">>> 'nvml' (thread main): wait for 15s to throttle gpu clocks." << std::endl; sleep(15); mrLog() << ">>> 'nvml' (thread main): initialization done." << std::endl << std::endl; }
int Machine::initializeNVIDIADevices(hwloc_obj_t machine_obj, hwloc_topology_t topology) { nvmlReturn_t rc; /* Initialize the NVML handle. * * nvmlInit should be called once before invoking any other methods in the NVML library. * A reference count of the number of initializations is maintained. Shutdown only occurs * when the reference count reaches zero. * */ rc = nvmlInit(); if (rc != NVML_SUCCESS && rc != NVML_ERROR_ALREADY_INITIALIZED) { log_nvml_error(rc, NULL, __func__); return(PBSE_NONE); } unsigned int device_count = 0; /* Get the device count. */ rc = nvmlDeviceGetCount(&device_count); if (rc == NVML_SUCCESS) { nvmlDevice_t gpu; /* Get the nvml device handle at each index */ for (unsigned int idx = 0; idx < device_count; idx++) { rc = nvmlDeviceGetHandleByIndex(idx, &gpu); if (rc != NVML_SUCCESS) { /* TODO: get gpuid from nvmlDevice_t struct */ log_nvml_error(rc, NULL, __func__); } /* Use the hwloc library to determine device locality */ hwloc_obj_t gpu_obj; hwloc_obj_t ancestor_obj; int is_in_tree; gpu_obj = hwloc_nvml_get_device_osdev(topology, gpu); if (gpu_obj == NULL) { /* This was not an nvml device. We will look for a "card" device (GeForce or Quadra) */ gpu_obj = this->get_non_nvml_device(topology, gpu); if (gpu_obj == NULL) continue; } /* The ancestor was not a numa chip. Is it the machine? */ ancestor_obj = hwloc_get_ancestor_obj_by_type(topology, HWLOC_OBJ_MACHINE, gpu_obj); if (ancestor_obj != NULL) { PCI_Device new_device; new_device.initializePCIDevice(gpu_obj, idx, topology); store_device_on_appropriate_chip(new_device); } } } else { log_nvml_error(rc, NULL, __func__); } /* Shutdown the NVML handle. * * nvmlShutdown should be called after NVML work is done, once for each call to nvmlInit() * A reference count of the number of initializations is maintained. Shutdown only occurs when * the reference count reaches zero. For backwards compatibility, no error is reported if * nvmlShutdown() is called more times than nvmlInit(). * */ rc = nvmlShutdown(); if (rc != NVML_SUCCESS) { log_nvml_error(rc, NULL, __func__); } return(PBSE_NONE); }
int main() { nvmlReturn_t result; unsigned int device_count, i; // First initialize NVML library result = nvmlInit(); if (NVML_SUCCESS != result) { printf("Failed to initialize NVML: %s\n", nvmlErrorString(result)); printf("Press ENTER to continue...\n"); getchar(); return 1; } result = nvmlDeviceGetCount(&device_count); if (NVML_SUCCESS != result) { printf("Failed to query device count: %s\n", nvmlErrorString(result)); goto Error; } printf("Found %d device%s\n\n", device_count, device_count != 1 ? "s" : ""); printf("Listing devices:\n"); for (i = 0; i < device_count; i++) { nvmlDevice_t device; char name[NVML_DEVICE_NAME_BUFFER_SIZE]; nvmlPciInfo_t pci; nvmlComputeMode_t compute_mode; // Query for device handle to perform operations on a device // You can also query device handle by other features like: // nvmlDeviceGetHandleBySerial // nvmlDeviceGetHandleByPciBusId result = nvmlDeviceGetHandleByIndex(i, &device); if (NVML_SUCCESS != result) { printf("Failed to get handle for device %i: %s\n", i, nvmlErrorString(result)); goto Error; } result = nvmlDeviceGetName(device, name, NVML_DEVICE_NAME_BUFFER_SIZE); if (NVML_SUCCESS != result) { printf("Failed to get name of device %i: %s\n", i, nvmlErrorString(result)); goto Error; } // pci.busId is very useful to know which device physically you're talking to // Using PCI identifier you can also match nvmlDevice handle to CUDA device. result = nvmlDeviceGetPciInfo(device, &pci); if (NVML_SUCCESS != result) { printf("Failed to get pci info for device %i: %s\n", i, nvmlErrorString(result)); goto Error; } printf("%d. %s [%s]\n", i, name, pci.busId); // This is a simple example on how you can modify GPU's state result = nvmlDeviceGetComputeMode(device, &compute_mode); if (NVML_ERROR_NOT_SUPPORTED == result) printf("\t This is not CUDA capable device\n"); else if (NVML_SUCCESS != result) { printf("Failed to get compute mode for device %i: %s\n", i, nvmlErrorString(result)); goto Error; } else { // try to change compute mode printf("\t Changing device's compute mode from '%s' to '%s'\n", convertToComputeModeString(compute_mode), convertToComputeModeString(NVML_COMPUTEMODE_PROHIBITED)); result = nvmlDeviceSetComputeMode(device, NVML_COMPUTEMODE_PROHIBITED); if (NVML_ERROR_NO_PERMISSION == result) printf("\t\t Need root privileges to do that: %s\n", nvmlErrorString(result)); else if (NVML_ERROR_NOT_SUPPORTED == result) printf("\t\t Compute mode prohibited not supported. You might be running on\n" "\t\t windows in WDDM driver model or on non-CUDA capable GPU.\n"); else if (NVML_SUCCESS != result) { printf("\t\t Failed to set compute mode for device %i: %s\n", i, nvmlErrorString(result)); goto Error; } else { printf("\t Restoring device's compute mode back to '%s'\n", convertToComputeModeString(compute_mode)); result = nvmlDeviceSetComputeMode(device, compute_mode); if (NVML_SUCCESS != result) { printf("\t\t Failed to restore compute mode for device %i: %s\n", i, nvmlErrorString(result)); goto Error; } } } } result = nvmlShutdown(); if (NVML_SUCCESS != result) printf("Failed to shutdown NVML: %s\n", nvmlErrorString(result)); printf("All done.\n"); printf("Press ENTER to continue...\n"); getchar(); return 0; Error: result = nvmlShutdown(); if (NVML_SUCCESS != result) printf("Failed to shutdown NVML: %s\n", nvmlErrorString(result)); printf("Press ENTER to continue...\n"); getchar(); return 1; }