/* * Create a VampirTrace CUPTI Activity stream. * * @param devID ID of the CUDA device * @param strmID ID of the CUDA stream * * @return pointer to created VampirTrace CUPTI Activity stream */ static vt_cuptiact_strm_t* vt_cuptiact_createStream(vt_cupti_ctx_t *vtCtx, uint32_t strmID) { vt_cuptiact_strm_t *vtStrm = NULL; vtStrm = (vt_cuptiact_strm_t *)malloc(sizeof(vt_cuptiact_strm_t)); if(vtStrm == NULL) vt_error_msg("[CUPTI Activity] Could not allocate memory for stream!"); vtStrm->strmID = strmID; vtStrm->vtLastTime = vt_gpu_init_time; vtStrm->destroyed = 0; vtStrm->next = NULL; /* create VT-User-Thread with name and parent id and get its id */ { char thread_name[16] = "CUDA"; if(vt_gpu_stream_reuse){ if(vtCtx->devID != VT_NO_ID){ if(-1 == snprintf(thread_name+4, 12, "[%d]", vtCtx->devID)) vt_cntl_msg(1, "Could not create thread name for CUDA thread!"); } }else{ if(vtCtx->devID == VT_NO_ID){ if(-1 == snprintf(thread_name+4, 12, "[?:%d]", strmID)) vt_cntl_msg(1, "Could not create thread name for CUDA thread!"); }else{ if(-1 == snprintf(thread_name+4, 12, "[%d:%d]", vtCtx->devID, strmID)) vt_cntl_msg(1, "Could not create thread name for CUDA thread!"); } } VT_CHECK_THREAD; vt_gpu_registerThread(thread_name, VT_MY_THREAD, &(vtStrm->vtThrdID)); } /* if first stream created for this device, make it the default stream */ if(vtCtx->activity->strmList == NULL){ /* write enter event for GPU_IDLE on first stream */ if(vt_gpu_trace_idle == 1){ if(vt_gpu_init_time < vt_start_time) vt_gpu_init_time = vt_start_time; vt_enter(vtStrm->vtThrdID, &vt_gpu_init_time, vt_gpu_rid_idle); /*vt_warning("IDLEente: %llu (%d)", vt_gpu_init_time, vtStrm->vtThrdID);*/ vtCtx->activity->gpuIdleOn = 1; } } return vtStrm; }
/* * Create a VampirTrace CUPTI stream. * * @param vtCtx VampirTrace CUPTI context * @param cuStrm CUDA stream * @param strmID ID of the CUDA stream * * @return pointer to created VampirTrace CUPTI stream */ vt_cupti_strm_t* vt_cupti_createStream(vt_cupti_ctx_t *vtCtx, CUstream cuStrm, uint32_t strmID) { vt_cupti_strm_t *vtStrm = NULL; if(vtCtx == NULL){ vt_warning("[CUPTI] Cannot create stream without VampirTrace CUPTI context"); return NULL; } vtStrm = (vt_cupti_strm_t *)malloc(sizeof(vt_cupti_strm_t)); if(vtStrm == NULL) vt_error_msg("[CUPTI] Could not allocate memory for stream!"); vtStrm->cuStrm = cuStrm; vtStrm->vtLastTime = vt_gpu_init_time; vtStrm->destroyed = 0; vtStrm->next = NULL; #if defined(VT_CUPTI_ACTIVITY) /* create stream by VT CUPTI callbacks implementation (CUstream is given) */ if(strmID == VT_CUPTI_NO_STREAM_ID){ if(cuStrm != VT_CUPTI_NO_STREAM){ VT_CUPTI_CALL(cuptiGetStreamId(vtCtx->cuCtx, cuStrm, &strmID), "cuptiGetStreamId"); }else{ vt_warning("[CUPTI] Neither CUDA stream nor stream ID given!"); free(vtStrm); return NULL; } } #else /* only VT_CUPTI_CALLBACKS is defined */ if(vtCtx->callbacks != NULL){ strmID = vtCtx->callbacks->streamsCreated; vtCtx->callbacks->streamsCreated++; } #endif vtStrm->cuStrmID = strmID; /* create VampirTrace thread */ { char thread_name[16] = "CUDA"; if(vt_gpu_stream_reuse){ if(vtCtx->devID != VT_NO_ID){ if(-1 == snprintf(thread_name+4, 12, "[%d]", vtCtx->devID)) vt_cntl_msg(1, "Could not create thread name for CUDA thread!"); } }else{ if(vtCtx->devID == VT_NO_ID){ if(-1 == snprintf(thread_name+4, 12, "[?:%d]", strmID)) vt_cntl_msg(1, "Could not create thread name for CUDA thread!"); }else{ if(-1 == snprintf(thread_name+4, 12, "[%d:%d]", vtCtx->devID, strmID)) vt_cntl_msg(1, "Could not create thread name for CUDA thread!"); } } VT_CHECK_THREAD; vt_gpu_registerThread(thread_name, VT_MY_THREAD, &(vtStrm->vtThrdID)); } if(vt_gpu_init_time < vt_start_time) vt_gpu_init_time = vt_start_time; /* for the first stream created for this context */ if(vtCtx->strmList == NULL){ if(vt_gpu_trace_idle > 0){ /* write enter event for GPU_IDLE on first stream */ vt_enter(vtStrm->vtThrdID, &vt_gpu_init_time, vt_gpu_rid_idle); /*vt_warning("IDLEente: %llu (%d)", vt_gpu_init_time, vtStrm->vtThrdID);*/ #if defined(VT_CUPTI_ACTIVITY) if(vtCtx->activity != NULL) vtCtx->activity->gpuIdleOn = 1; #endif } /* set the counter value for cudaMalloc to 0 on first stream */ if(vt_gpu_trace_memusage > 0) vt_count(vtStrm->vtThrdID, &vt_gpu_init_time, vt_gpu_cid_memusage, 0); } if(vt_gpu_trace_kernels > 1){ /* set count values to zero */ vt_count(vtStrm->vtThrdID, &vt_gpu_init_time, vt_cupti_cid_blocksPerGrid, 0); vt_count(vtStrm->vtThrdID, &vt_gpu_init_time, vt_cupti_cid_threadsPerBlock, 0); vt_count(vtStrm->vtThrdID, &vt_gpu_init_time, vt_cupti_cid_threadsPerKernel, 0); } /* prepend the stream vtStrm->next = vtCtx->strmList; vtCtx->strmList = vtStrm;*/ return vtStrm; }