/* * Initialize VampirTrace IDs and registers the finalize function. * This may be done implicitly by vt_cupti_count(). */ void vt_cupti_events_init() { if(!vt_cuptievt_initialized){ /* fast check without lock */ vt_cupti_init(); VT_CUPTI_LOCK(); if(!vt_cuptievt_initialized){ vt_cntl_msg(2, "[CUPTI Events] Initializing ... "); /* create VampirTrace counter group ID only once */ #if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_LOCK_IDS(); #endif vt_cuptievt_rid_init = vt_def_region(VT_MASTER_THREAD, "vtcuptiHostThreadInit", VT_NO_ID, VT_NO_LNO, VT_NO_LNO, "VT_CUPTI", VT_FUNCTION); vt_cuptievt_cgid = vt_def_counter_group(VT_MASTER_THREAD, "CUPTI"); #if (defined(VT_MT) || defined(VT_HYB)) VTTHRD_UNLOCK_IDS(); #endif vt_cupti_events_sampling = (uint8_t)vt_env_cupti_sampling(); vtcuptievtCapList = vt_cuptievt_setupMetricList(); if(NULL == vtcuptievtCapList){ vt_cupti_events_enabled = 0; }else{ /* register the finalize function of VampirTrace CUPTI to be called before * the program exits */ atexit(vt_cupti_events_finalize); } vt_cuptievt_initialized = 1; VT_CUPTI_UNLOCK(); } } }
/* * Retrieve the VampirTrace CUPTI context from the CUDA context. * * @param cuCtx the CUDA context * @param ptid the active VampirTrace thread id */ static vt_cupti_ctx_t* vt_cupti_getCtx(CUcontext cuCtx, uint32_t ptid) { vt_cupti_ctx_t *vtcuptiCtx = NULL; /* check, if there has been at least one VampirTrace CUPTI context created */ if(vtcuptiCtxlist == NULL) vt_cupti_init(); /* check, if the current VampirTrace thread is enabled for GPU counters */ if((vt_gpu_prop[ptid] & VTGPU_NO_PC) == VTGPU_NO_PC) return NULL; /* check if CUDA context is listed (linear search) */ CUPTI_LOCK(); vtcuptiCtx = vtcuptiCtxlist; while(vtcuptiCtx != NULL){ if(vtcuptiCtx->cuCtx == cuCtx){ CUPTI_UNLOCK(); /*vt_cntl_msg(1, "[CUPTI] host thread %d (MPI rank %d)", ptid, vt_my_trace);*/ return vtcuptiCtx; } vtcuptiCtx = vtcuptiCtx->next; } CUPTI_UNLOCK(); vt_cntl_msg(2, "[CUPTI] Context for VT tid %d unknown! Creating ... ", ptid); vtcuptiCtx = vt_cupti_initCtx(ptid, cuCtx); if(vtcuptiCtx != NULL){ vt_cupti_start(vtcuptiCtx); }else{ /* no performance counters for this thread available */ vt_gpu_prop[ptid] |= VTGPU_NO_PC; vt_cntl_msg(2, "[CUPTI] Could not initialize!"); } return vtcuptiCtx; }
/* * Returns the VampirTrace CUPTI context for the CUDA context associated with * the calling host thread. * * @param ptid the VampirTrace thread id of the calling host thread */ vt_cupti_ctx_t* vt_cupti_getCurrentContext(uint32_t ptid) { CUcontext cuCtx = NULL; if(!vt_cupti_initialized) vt_cupti_init(); VT_SUSPEND_CUDA_TRACING(ptid); # if (defined(CUDA_VERSION) && (CUDA_VERSION < 4000)) CHECK_CU_ERROR(cuCtxPopCurrent(&cuCtx), "cuCtxPopCurrent"); CHECK_CU_ERROR(cuCtxPushCurrent(cuCtx), "cuCtxPushCurrent"); # else CHECK_CU_ERROR(cuCtxGetCurrent(&cuCtx), "cuCtxGetCurrent"); # endif VT_RESUME_CUDA_TRACING(ptid); if(cuCtx == NULL) { vt_cntl_msg(2, "[CUPTI] No context is bound to the calling CPU thread", cuCtx); return NULL; } return vt_cupti_getCtx(cuCtx, ptid); }