static void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize) { CUptiResult status; CUpti_Activity *record = NULL; do { status = cuptiActivityGetNextRecord(buffer, validSize, &record); if(status == CUPTI_SUCCESS) { printActivity(record); } else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) { break; } else { CUPTI_CALL(status); } } while (1); size_t dropped; CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped)); if (dropped != 0) { printf("Dropped %u activity records\n", (unsigned int)dropped); } printf("\n\n\n\n\n\n"); printf("************* STALL SUMMARY ********************\n"); int i; for(i=0;i<12;++i) if(stall_name[i] != NULL) printf("%s = %d \n",stall_name[i],val[i]); printf("*************************************************\n\n"); }
/** * Dump the contents of the top buffer in the queue specified by * 'context' and 'streamId', and return the top buffer. If the queue * is empty return NULL. */ static uint8_t * dump(CUcontext context, uint32_t streamId) { uint8_t *buffer = NULL; size_t validBufferSizeBytes; CUptiResult status; status = cuptiActivityDequeueBuffer(context, streamId, &buffer, &validBufferSizeBytes); if (status == CUPTI_ERROR_QUEUE_EMPTY) { return NULL; } CUPTI_CALL(status); if (context == NULL) { printf("[CUPTI] Starting dump for global\n"); } else if (streamId == 0) { printf("[CUPTI] Starting dump for context %p\n", context); } else { printf("[CUPTI] Starting dump for context %p, stream %u\n", context, streamId); } CUpti_Activity *record = NULL; do { status = cuptiActivityGetNextRecord(buffer, validBufferSizeBytes, &record); if(status == CUPTI_SUCCESS) { printActivity(record); } else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) { printf ("[CUPTI] CUPTI_ERROR_MAX_LIMIT_REACHED\n"); break; } else { CUPTI_CALL(status); } } while (1); // report any records dropped from the queue size_t dropped; CUPTI_CALL(cuptiActivityGetNumDroppedRecords(context, streamId, &dropped)); if (dropped != 0) { printf("Dropped %u activity records\n", (unsigned int)dropped); } if (context == NULL) { printf("[CUPTI] Finished dump for global\n"); } else if (streamId == 0) { printf("[CUPTI] Finished dump for context %p \n", context); } else { printf("[CUPTI] Finished dump for context %p, stream %u\n", context, streamId); } return buffer; }
void vt_cuptiact_flushCtxActivities(vt_cupti_ctx_t *vtCtx) { CUptiResult status; uint8_t *buffer = NULL; size_t bufSize; CUpti_Activity *record = NULL; uint64_t hostStop, gpuStop; uint32_t ptid = VT_NO_ID; vt_cupti_activity_t *vtcuptiActivity = NULL; /* check for VampirTrace CUPTI context */ if(vtCtx == NULL || vtCtx->activity == NULL){ vt_warning("[CUPTI Activity] Context not found!"); return; } vtcuptiActivity = vtCtx->activity; /* check if the buffer contains records */ status = cuptiActivityQueryBuffer(vtCtx->cuCtx, 0, &bufSize); if(status != CUPTI_SUCCESS){ if(CUPTI_ERROR_QUEUE_EMPTY == status || CUPTI_ERROR_MAX_LIMIT_REACHED != status){ return; } } /* expose VampirTrace CUPTI activity flush as measurement overhead */ VT_CHECK_THREAD; ptid = VT_MY_THREAD; hostStop = vt_pform_wtime(); vt_enter(ptid, &hostStop, vt_cuptiact_rid_flush); vt_cntl_msg(2,"[CUPTI Activity] Handle context %d activities", vtCtx->cuCtx); /* lock the whole buffer flush VT_CUPTI_LOCK();*/ /* dump the contents of the global queue */ VT_CUPTI_CALL(cuptiActivityDequeueBuffer(vtCtx->cuCtx, 0, &buffer, &bufSize), "cuptiActivityDequeueBuffer"); /* * Get time synchronization factor between host and GPU time for measured * period */ { VT_CUPTI_CALL(cuptiGetTimestamp(&gpuStop), "cuptiGetTimestamp"); hostStop = vt_pform_wtime(); vtcuptiActivity->sync.hostStop = hostStop; vtcuptiActivity->sync.factor = (double)(hostStop - vtcuptiActivity->sync.hostStart) /(double)(gpuStop - vtcuptiActivity->sync.gpuStart); } /*vt_cntl_msg(1, "hostStop: %llu , gpuStop: %llu", hostStopTS, gpuStopTS); vt_cntl_msg(1, "factor: %lf", syncFactor);*/ do{ status = cuptiActivityGetNextRecord(buffer, bufSize, &record); if(status == CUPTI_SUCCESS) { vt_cuptiact_writeRecord(record, vtCtx); }else if(status == CUPTI_ERROR_MAX_LIMIT_REACHED){ break; }else{ VT_CUPTI_CALL(status, "cuptiActivityGetNextRecord"); } }while(1); /* report any records dropped from the global queue */ { size_t dropped; VT_CUPTI_CALL(cuptiActivityGetNumDroppedRecords(vtCtx->cuCtx, 0, &dropped), "cuptiActivityGetNumDroppedRecords"); if(dropped != 0) vt_warning("[CUPTI Activity] Dropped %u records. Current buffer size: %llu bytes\n" "To avoid dropping of records increase the buffer size!\n" "Proposed minimum VT_CUDATRACE_BUFFER_SIZE=%llu", (unsigned int)dropped, vt_cuptiact_bufSize, vt_cuptiact_bufSize + dropped/2 * (sizeof(CUpti_ActivityKernel) + sizeof(CUpti_ActivityMemcpy))); } /* enter GPU idle region after last kernel, if exited before */ if(vtcuptiActivity->gpuIdleOn == 0){ vt_enter(vtcuptiActivity->strmList->vtThrdID, &(vtcuptiActivity->vtLastGPUTime), vt_gpu_rid_idle); vtcuptiActivity->gpuIdleOn = 1; /*vt_warning("IDLfente: %llu (%d)", vtCtx->vtLastGPUTime, vtCtx->strmList->vtThrdID);*/ } /* enqueue buffer again */ VT_CUPTI_CALL(cuptiActivityEnqueueBuffer(vtCtx->cuCtx, 0, buffer, vt_cuptiact_bufSize), "cuptiActivityEnqueueBuffer"); /* set new synchronization point */ vtcuptiActivity->sync.hostStart = hostStop; vtcuptiActivity->sync.gpuStart = gpuStop; /*VT_CUPTI_UNLOCK();*/ /* use local variable hostStop to write exit event for activity flush */ hostStop = vt_pform_wtime(); vt_exit(ptid, &hostStop); }