/* * Initialize the CUPTI events data of the given VampirTrace CUPTI context. * * @param vtCtx pointer to the VampirTrace CUPTI context */ void vt_cupti_events_initContext(vt_cupti_ctx_t *vtcuptiCtx) { vt_cupti_events_t *vtcuptiEvtCtx = NULL; vt_cntl_msg(2, "[CUPTI Events] Initializing VampirTrace CUPTI events context"); /* get a pointer to eventIDArray */ { CUresult cuErr = CUDA_SUCCESS; int dev_major, dev_minor; vt_cupti_device_t *cuptiDev; /* TODO: do not trace this driver API function call */ cuErr = cuDeviceComputeCapability(&dev_major, &dev_minor, vtcuptiCtx->cuDev); VT_CUDRV_CALL(cuErr, "cuDeviceComputeCapability"); /* check if device capability already listed */ VT_CUPTI_LOCK(); cuptiDev = vtcuptievtCapList; VT_CUPTI_UNLOCK(); cuptiDev = vt_cupti_checkMetricList(cuptiDev, dev_major, dev_minor); if(cuptiDev){ /* allocate the VampirTrace CUPTI events context */ vtcuptiEvtCtx = (vt_cupti_events_t *)malloc(sizeof(vt_cupti_events_t)); if(vtcuptiEvtCtx == NULL) vt_error_msg("[CUPTI Events] malloc(sizeof(vt_cupti_events_t)) failed!"); vtcuptiEvtCtx->vtDevCap = cuptiDev; vtcuptiEvtCtx->vtGrpList = NULL; vtcuptiEvtCtx->counterData = NULL; vtcuptiEvtCtx->cuptiEvtIDs = NULL; vtcuptiCtx->events = vtcuptiEvtCtx; }else{ return; } } /* create and add the VampirTrace CUPTI groups to the context */ vt_cupti_addEvtGrpsToCtx(vtcuptiCtx); /* allocate memory for CUPTI counter reads */ { size_t allocSize = vtcuptiEvtCtx->vtGrpList->evtNum; vtcuptiEvtCtx->counterData = (uint64_t *)malloc(allocSize*sizeof(uint64_t)); vtcuptiEvtCtx->cuptiEvtIDs = (CUpti_EventID *)malloc(allocSize*sizeof(CUpti_EventID)); } vt_cuptievt_start(vtcuptiEvtCtx); }
/* * Setup a list of devices with different device capabilities and add the * metrics, which are specified by the user. * * @return a list of CUDA devices with different device capabilities */ static vt_cupti_device_t* vt_cuptievt_setupMetricList(void) { CUresult err; int deviceCount, id; vt_cupti_device_t *capList = NULL; VT_SUSPEND_MALLOC_TRACING(VT_CURRENT_THREAD); /* CUDA initialization */ VT_CUDRV_CALL(cuInit(0), "cuInit"); /* How many GPGPU devices do we have? */ err = cuDeviceGetCount( &deviceCount ); VT_CUDRV_CALL(err, "cuDeviceGetCount"); if(deviceCount == 0){ vt_error_msg("[CUPTI Events] There is no device supporting CUDA available."); } /* create list with available compute capabilities */ for(id = 0; id < deviceCount; id++){ CUdevice cuDev; vt_cupti_device_t *cuptiDev; int dev_major, dev_minor; err = cuDeviceGet(&cuDev, id); VT_CUDRV_CALL(err, "cuDeviceGet"); err = cuDeviceComputeCapability(&dev_major, &dev_minor, cuDev); VT_CUDRV_CALL(err, "cuDeviceComputeCapability"); /* check if device capability already listed */ cuptiDev = vt_cupti_checkMetricList(capList, dev_major, dev_minor); if(cuptiDev == NULL){ /* allocate memory for device list entry */ cuptiDev = (vt_cupti_device_t *)malloc(sizeof(vt_cupti_device_t)); cuptiDev->dev_major = dev_major; cuptiDev->dev_minor = dev_minor; cuptiDev->cuDev = cuDev; cuptiDev->vtcuptiEvtList = NULL; cuptiDev->evtNum = 0; cuptiDev->next = NULL; /* prepend to list */ cuptiDev->next = capList; capList = cuptiDev; } } vt_cupti_fillMetricList(capList); /* cleanup list: remove entries, which don't have metrics */ { vt_cupti_device_t *curr = capList; vt_cupti_device_t *last = capList; while(curr != NULL){ vt_cupti_device_t *freeDev = curr; curr = curr->next; if(freeDev->evtNum == 0){ /* first element */ if(freeDev == capList){ capList = capList->next; }else{ last->next = freeDev->next; } free(freeDev); }else last = freeDev; } } VT_RESUME_MALLOC_TRACING(VT_CURRENT_THREAD); return capList; }
/* * Parse the environment variable for CUPTI metrics (including CUDA device * capabilities) and fill the capability metric list. * * @param capList points to the first element of the capability metric list */ static void vt_cupti_fillMetricList(vt_cupti_device_t *capList) { char *metricString = vt_env_cupti_events(); char *metric_sep = vt_env_metrics_sep(); char *metric, *metric_cap; metric = strtok(metricString, metric_sep); while (metric != NULL){ CUptiResult cuptiErr = CUPTI_SUCCESS; vt_cupti_device_t *cuptiDev = NULL; vt_cupti_evtctr_t *vtcuptiEvt = NULL; int metr_major = 0; int metr_minor = 0; /* try to get CUDA device capability parsed from metric */ metr_major = atoi(metric); metric_cap = strchr(metric+1, '.'); if(metric_cap){ metr_minor = atoi(metric_cap+1); metric_cap = strchr(metric_cap+1, '_'); } /* check whether device capability is given or not */ if(metric_cap){ metric = metric_cap + 1; vt_cntl_msg(2, "Metric '%s', %d.%d", metric, metr_major, metr_minor); cuptiDev = vt_cupti_checkMetricList(capList, metr_major, metr_minor); if(cuptiDev == NULL){ metric = strtok(NULL, metric_sep); continue; } vtcuptiEvt = (vt_cupti_evtctr_t*)malloc(sizeof(vt_cupti_evtctr_t)); cuptiErr = cuptiEventGetIdFromName(cuptiDev->cuDev, metric, &vtcuptiEvt->cuptiEvtID); if(cuptiErr != CUPTI_SUCCESS){ if(!strncmp(metric, "help", 4)) vt_cupti_showAllCounters(capList); vt_warning("[CUPTI Events] Skipping invalid event '%s' for device %d", metric, cuptiDev->cuDev); metric = strtok(NULL, metric_sep); continue; } /* create VampirTrace counter ID */ #if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_LOCK_IDS(); #endif vtcuptiEvt->vtCID = vt_def_counter(VT_MASTER_THREAD, metric, "#", VT_CNTR_ABS | VT_CNTR_LAST | VT_CNTR_UNSIGNED, vt_cuptievt_cgid, 0); #if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_UNLOCK_IDS(); #endif cuptiDev->evtNum++; vtcuptiEvt->next = cuptiDev->vtcuptiEvtList; cuptiDev->vtcuptiEvtList = vtcuptiEvt; }else{ /* device capability is not given. Try to add metric to all devices */ uint32_t cid_metric = VT_NO_ID; cuptiDev = capList; while(cuptiDev != NULL){ vtcuptiEvt = (vt_cupti_evtctr_t*)malloc(sizeof(vt_cupti_evtctr_t)); cuptiErr = cuptiEventGetIdFromName(cuptiDev->cuDev, metric, &vtcuptiEvt->cuptiEvtID); if(cuptiErr != CUPTI_SUCCESS){ if(!strncmp(metric, "help", 4)) vt_cupti_showAllCounters(capList); vt_warning("[CUPTI Events] Skipping invalid event '%s' for device %d", metric, cuptiDev->cuDev); }else{ /* create VampirTrace counter ID, if not yet done for other device */ if(cid_metric == VT_NO_ID){ #if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_LOCK_IDS(); #endif cid_metric = vt_def_counter(VT_MASTER_THREAD, metric, "#", VT_CNTR_ABS | VT_CNTR_LAST | VT_CNTR_UNSIGNED, vt_cuptievt_cgid, 0); #if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_UNLOCK_IDS(); #endif } cuptiDev->evtNum++; vtcuptiEvt->vtCID = cid_metric; vtcuptiEvt->next = cuptiDev->vtcuptiEvtList; cuptiDev->vtcuptiEvtList = vtcuptiEvt; } cuptiDev = cuptiDev->next; } } metric = strtok(NULL, metric_sep); } }
static vt_cupti_dev_t* vt_cupti_setupMetricList(void) { CUresult err; int deviceCount, id; vt_cupti_dev_t *capList = NULL; /* CUDA initialization */ err = cuInit( 0 ); if ( err != CUDA_SUCCESS ) { printf( "Initialization of CUDA library failed.\n" ); exit( EXIT_FAILURE ); } /* How many gpgpu devices do we have? */ err = cuDeviceGetCount( &deviceCount ); CHECK_CU_ERROR(err, "cuDeviceGetCount"); if(deviceCount == 0){ printf("[CUPTI]There is no device supporting CUDA.\n"); exit(EXIT_FAILURE); } /* create list with available compute capabilities */ for(id = 0; id < deviceCount; id++){ CUdevice cuDev; vt_cupti_dev_t *cuptiDev; int dev_major, dev_minor; err = cuDeviceGet(&cuDev, id); CHECK_CU_ERROR(err, "cuDeviceGet"); err = cuDeviceComputeCapability(&dev_major, &dev_minor, cuDev); CHECK_CU_ERROR(err, "cuDeviceComputeCapability"); /* check if device capability already listed */ cuptiDev = vt_cupti_checkMetricList(capList, dev_major, dev_minor); if(cuptiDev == NULL){ /* allocate memory for device list entry */ cuptiDev = (vt_cupti_dev_t *)malloc(sizeof(vt_cupti_dev_t)); cuptiDev->dev_major = dev_major; cuptiDev->dev_minor = dev_minor; cuptiDev->cuDev = cuDev; cuptiDev->vtcuptiEvtList = NULL; cuptiDev->evtNum = 0; cuptiDev->next = NULL; /* prepend to list */ cuptiDev->next = capList; capList = cuptiDev; } } vt_cupti_fillMetricList(capList); /* cleanup list: remove entries, which don't have metrics */ { vt_cupti_dev_t *curr = capList; vt_cupti_dev_t *last = capList; while(curr != NULL){ vt_cupti_dev_t *freeDev = curr; curr = curr->next; if(freeDev->evtNum == 0){ /* first element */ if(freeDev == capList){ capList = capList->next; }else{ last->next = freeDev->next; } free(freeDev); }else last = freeDev; } } return capList; }
/* * Initializes a CUPTI host thread and create the event group. * * @param ptid the VampirTrace thread id * @param cuCtx optionally given CUDA context * * @return the created VampirTrace CUPTI host thread structure */ static vt_cupti_ctx_t* vt_cupti_initCtx(uint32_t ptid, CUcontext cuCtx) { vt_cupti_ctx_t *vtcuptiCtx = NULL; uint64_t time; vt_cntl_msg(2, "[CUPTI] Initializing VampirTrace CUPTI context (ptid=%d)", ptid); time = vt_pform_wtime(); vt_enter(ptid, &time, rid_cupti_init); /* do not trace CUDA functions invoked here */ VT_SUSPEND_CUDA_TRACING(ptid); /* initialize CUDA driver API, if necessary and get context handle */ if(cuCtx == NULL){ #if (defined(CUDA_VERSION) && (CUDA_VERSION < 4000)) CHECK_CU_ERROR(cuCtxPopCurrent(&cuCtx), "cuCtxPopCurrent"); CHECK_CU_ERROR(cuCtxPushCurrent(cuCtx), "cuCtxPushCurrent"); #else CHECK_CU_ERROR(cuCtxGetCurrent(&cuCtx), "cuCtxGetCurrent"); #endif } /* get a pointer to eventIDArray */ { CUresult cuErr = CUDA_SUCCESS; int dev_major, dev_minor; CUdevice cuDev = 0; vt_cupti_dev_t *cuptiDev; CHECK_CU_ERROR(cuCtxGetDevice(&cuDev), "cuCtxGetDevice"); cuErr = cuDeviceComputeCapability(&dev_major, &dev_minor, cuDev); CHECK_CU_ERROR(cuErr, "cuDeviceComputeCapability"); /* check if device capability already listed */ CUPTI_LOCK(); cuptiDev = vt_cupti_capList; CUPTI_UNLOCK(); cuptiDev = vt_cupti_checkMetricList(cuptiDev, dev_major, dev_minor); if(cuptiDev){ vtcuptiCtx = (vt_cupti_ctx_t*)malloc(sizeof(vt_cupti_ctx_t)); if(vtcuptiCtx == NULL) vt_error_msg("malloc(sizeof(VTCUPTIhostThrd)) failed!"); vtcuptiCtx->cuCtx = cuCtx; vtcuptiCtx->vtDevCap = cuptiDev; vtcuptiCtx->vtGrpList = NULL; vtcuptiCtx->counterData = NULL; vtcuptiCtx->cuptiEvtIDs = NULL; vtcuptiCtx->next = NULL; }else{ time = vt_pform_wtime(); vt_exit(ptid, &time); VT_RESUME_CUDA_TRACING(ptid); return NULL; } } VT_RESUME_CUDA_TRACING(ptid); /* create and add the VampirTrace CUPTI groups to the context */ vt_cupti_addEvtGrpsToCtx(vtcuptiCtx); /* allocate memory for CUPTI counter reads */ { size_t allocSize = vtcuptiCtx->vtGrpList->evtNum; vtcuptiCtx->counterData = (uint64_t *)malloc(allocSize*sizeof(uint64_t)); vtcuptiCtx->cuptiEvtIDs = (CUpti_EventID *)malloc(allocSize*sizeof(CUpti_EventID)); } /* add VampirTrace CUPTI context entry to list (as first element) */ CUPTI_LOCK(); vtcuptiCtx->next = vtcuptiCtxlist; vtcuptiCtxlist = vtcuptiCtx; CUPTI_UNLOCK(); time = vt_pform_wtime(); vt_exit(ptid, &time); return vtcuptiCtx; }