/* * Initialize the CUPTI events data of the given VampirTrace CUPTI context. * * @param vtCtx pointer to the VampirTrace CUPTI context */ void vt_cupti_events_initContext(vt_cupti_ctx_t *vtcuptiCtx) { vt_cupti_events_t *vtcuptiEvtCtx = NULL; vt_cntl_msg(2, "[CUPTI Events] Initializing VampirTrace CUPTI events context"); /* get a pointer to eventIDArray */ { CUresult cuErr = CUDA_SUCCESS; int dev_major, dev_minor; vt_cupti_device_t *cuptiDev; /* TODO: do not trace this driver API function call */ cuErr = cuDeviceComputeCapability(&dev_major, &dev_minor, vtcuptiCtx->cuDev); VT_CUDRV_CALL(cuErr, "cuDeviceComputeCapability"); /* check if device capability already listed */ VT_CUPTI_LOCK(); cuptiDev = vtcuptievtCapList; VT_CUPTI_UNLOCK(); cuptiDev = vt_cupti_checkMetricList(cuptiDev, dev_major, dev_minor); if(cuptiDev){ /* allocate the VampirTrace CUPTI events context */ vtcuptiEvtCtx = (vt_cupti_events_t *)malloc(sizeof(vt_cupti_events_t)); if(vtcuptiEvtCtx == NULL) vt_error_msg("[CUPTI Events] malloc(sizeof(vt_cupti_events_t)) failed!"); vtcuptiEvtCtx->vtDevCap = cuptiDev; vtcuptiEvtCtx->vtGrpList = NULL; vtcuptiEvtCtx->counterData = NULL; vtcuptiEvtCtx->cuptiEvtIDs = NULL; vtcuptiCtx->events = vtcuptiEvtCtx; }else{ return; } } /* create and add the VampirTrace CUPTI groups to the context */ vt_cupti_addEvtGrpsToCtx(vtcuptiCtx); /* allocate memory for CUPTI counter reads */ { size_t allocSize = vtcuptiEvtCtx->vtGrpList->evtNum; vtcuptiEvtCtx->counterData = (uint64_t *)malloc(allocSize*sizeof(uint64_t)); vtcuptiEvtCtx->cuptiEvtIDs = (CUpti_EventID *)malloc(allocSize*sizeof(CUpti_EventID)); } vt_cuptievt_start(vtcuptiEvtCtx); }
/* * Initializes a CUPTI host thread and create the event group. * * @param ptid the VampirTrace thread id * @param cuCtx optionally given CUDA context * * @return the created VampirTrace CUPTI host thread structure */ static vt_cupti_ctx_t* vt_cupti_initCtx(uint32_t ptid, CUcontext cuCtx) { vt_cupti_ctx_t *vtcuptiCtx = NULL; uint64_t time; vt_cntl_msg(2, "[CUPTI] Initializing VampirTrace CUPTI context (ptid=%d)", ptid); time = vt_pform_wtime(); vt_enter(ptid, &time, rid_cupti_init); /* do not trace CUDA functions invoked here */ VT_SUSPEND_CUDA_TRACING(ptid); /* initialize CUDA driver API, if necessary and get context handle */ if(cuCtx == NULL){ #if (defined(CUDA_VERSION) && (CUDA_VERSION < 4000)) CHECK_CU_ERROR(cuCtxPopCurrent(&cuCtx), "cuCtxPopCurrent"); CHECK_CU_ERROR(cuCtxPushCurrent(cuCtx), "cuCtxPushCurrent"); #else CHECK_CU_ERROR(cuCtxGetCurrent(&cuCtx), "cuCtxGetCurrent"); #endif } /* get a pointer to eventIDArray */ { CUresult cuErr = CUDA_SUCCESS; int dev_major, dev_minor; CUdevice cuDev = 0; vt_cupti_dev_t *cuptiDev; CHECK_CU_ERROR(cuCtxGetDevice(&cuDev), "cuCtxGetDevice"); cuErr = cuDeviceComputeCapability(&dev_major, &dev_minor, cuDev); CHECK_CU_ERROR(cuErr, "cuDeviceComputeCapability"); /* check if device capability already listed */ CUPTI_LOCK(); cuptiDev = vt_cupti_capList; CUPTI_UNLOCK(); cuptiDev = vt_cupti_checkMetricList(cuptiDev, dev_major, dev_minor); if(cuptiDev){ vtcuptiCtx = (vt_cupti_ctx_t*)malloc(sizeof(vt_cupti_ctx_t)); if(vtcuptiCtx == NULL) vt_error_msg("malloc(sizeof(VTCUPTIhostThrd)) failed!"); vtcuptiCtx->cuCtx = cuCtx; vtcuptiCtx->vtDevCap = cuptiDev; vtcuptiCtx->vtGrpList = NULL; vtcuptiCtx->counterData = NULL; vtcuptiCtx->cuptiEvtIDs = NULL; vtcuptiCtx->next = NULL; }else{ time = vt_pform_wtime(); vt_exit(ptid, &time); VT_RESUME_CUDA_TRACING(ptid); return NULL; } } VT_RESUME_CUDA_TRACING(ptid); /* create and add the VampirTrace CUPTI groups to the context */ vt_cupti_addEvtGrpsToCtx(vtcuptiCtx); /* allocate memory for CUPTI counter reads */ { size_t allocSize = vtcuptiCtx->vtGrpList->evtNum; vtcuptiCtx->counterData = (uint64_t *)malloc(allocSize*sizeof(uint64_t)); vtcuptiCtx->cuptiEvtIDs = (CUpti_EventID *)malloc(allocSize*sizeof(CUpti_EventID)); } /* add VampirTrace CUPTI context entry to list (as first element) */ CUPTI_LOCK(); vtcuptiCtx->next = vtcuptiCtxlist; vtcuptiCtxlist = vtcuptiCtx; CUPTI_UNLOCK(); time = vt_pform_wtime(); vt_exit(ptid, &time); return vtcuptiCtx; }