/** * If the top buffer in the queue specified by 'context' and * 'streamId' is full, then dump its contents and return the * buffer. If the top buffer is not full, return NULL. */ static uint8_t * dumpIfFull(CUcontext context, uint32_t streamId) { size_t validBufferSizeBytes; CUptiResult status; status = cuptiActivityQueryBuffer(context, streamId, &validBufferSizeBytes); if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) { return dump(context, streamId); } else if ((status != CUPTI_SUCCESS) && (status != CUPTI_ERROR_QUEUE_EMPTY)) { CUPTI_CALL(status); } return NULL; }
void vt_cuptiact_flushCtxActivities(vt_cupti_ctx_t *vtCtx) { CUptiResult status; uint8_t *buffer = NULL; size_t bufSize; CUpti_Activity *record = NULL; uint64_t hostStop, gpuStop; uint32_t ptid = VT_NO_ID; vt_cupti_activity_t *vtcuptiActivity = NULL; /* check for VampirTrace CUPTI context */ if(vtCtx == NULL || vtCtx->activity == NULL){ vt_warning("[CUPTI Activity] Context not found!"); return; } vtcuptiActivity = vtCtx->activity; /* check if the buffer contains records */ status = cuptiActivityQueryBuffer(vtCtx->cuCtx, 0, &bufSize); if(status != CUPTI_SUCCESS){ if(CUPTI_ERROR_QUEUE_EMPTY == status || CUPTI_ERROR_MAX_LIMIT_REACHED != status){ return; } } /* expose VampirTrace CUPTI activity flush as measurement overhead */ VT_CHECK_THREAD; ptid = VT_MY_THREAD; hostStop = vt_pform_wtime(); vt_enter(ptid, &hostStop, vt_cuptiact_rid_flush); vt_cntl_msg(2,"[CUPTI Activity] Handle context %d activities", vtCtx->cuCtx); /* lock the whole buffer flush VT_CUPTI_LOCK();*/ /* dump the contents of the global queue */ VT_CUPTI_CALL(cuptiActivityDequeueBuffer(vtCtx->cuCtx, 0, &buffer, &bufSize), "cuptiActivityDequeueBuffer"); /* * Get time synchronization factor between host and GPU time for measured * period */ { VT_CUPTI_CALL(cuptiGetTimestamp(&gpuStop), "cuptiGetTimestamp"); hostStop = vt_pform_wtime(); vtcuptiActivity->sync.hostStop = hostStop; vtcuptiActivity->sync.factor = (double)(hostStop - vtcuptiActivity->sync.hostStart) /(double)(gpuStop - vtcuptiActivity->sync.gpuStart); } /*vt_cntl_msg(1, "hostStop: %llu , gpuStop: %llu", hostStopTS, gpuStopTS); vt_cntl_msg(1, "factor: %lf", syncFactor);*/ do{ status = cuptiActivityGetNextRecord(buffer, bufSize, &record); if(status == CUPTI_SUCCESS) { vt_cuptiact_writeRecord(record, vtCtx); }else if(status == CUPTI_ERROR_MAX_LIMIT_REACHED){ break; }else{ VT_CUPTI_CALL(status, "cuptiActivityGetNextRecord"); } }while(1); /* report any records dropped from the global queue */ { size_t dropped; VT_CUPTI_CALL(cuptiActivityGetNumDroppedRecords(vtCtx->cuCtx, 0, &dropped), "cuptiActivityGetNumDroppedRecords"); if(dropped != 0) vt_warning("[CUPTI Activity] Dropped %u records. Current buffer size: %llu bytes\n" "To avoid dropping of records increase the buffer size!\n" "Proposed minimum VT_CUDATRACE_BUFFER_SIZE=%llu", (unsigned int)dropped, vt_cuptiact_bufSize, vt_cuptiact_bufSize + dropped/2 * (sizeof(CUpti_ActivityKernel) + sizeof(CUpti_ActivityMemcpy))); } /* enter GPU idle region after last kernel, if exited before */ if(vtcuptiActivity->gpuIdleOn == 0){ vt_enter(vtcuptiActivity->strmList->vtThrdID, &(vtcuptiActivity->vtLastGPUTime), vt_gpu_rid_idle); vtcuptiActivity->gpuIdleOn = 1; /*vt_warning("IDLfente: %llu (%d)", vtCtx->vtLastGPUTime, vtCtx->strmList->vtThrdID);*/ } /* enqueue buffer again */ VT_CUPTI_CALL(cuptiActivityEnqueueBuffer(vtCtx->cuCtx, 0, buffer, vt_cuptiact_bufSize), "cuptiActivityEnqueueBuffer"); /* set new synchronization point */ vtcuptiActivity->sync.hostStart = hostStop; vtcuptiActivity->sync.gpuStart = gpuStop; /*VT_CUPTI_UNLOCK();*/ /* use local variable hostStop to write exit event for activity flush */ hostStop = vt_pform_wtime(); vt_exit(ptid, &hostStop); }