Example #1
0
static void
handleResource(CUpti_CallbackId cbid, const CUpti_ResourceData *resourceData)
{
  // enqueue buffers on a context's queue when the context is created
  if (cbid == CUPTI_CBID_RESOURCE_CONTEXT_CREATED) {
    queueNewBuffer(resourceData->context, 0);
    queueNewBuffer(resourceData->context, 0);
  }
  // dump all buffers on a context destroy
  else if (cbid == CUPTI_CBID_RESOURCE_CONTEXT_DESTROY_STARTING) {
    while (dump(resourceData->context, 0) != NULL) ;
  }

  // enqueue buffers on a stream's queue when a non-default stream is created
  if (cbid == CUPTI_CBID_RESOURCE_STREAM_CREATED) {
    uint32_t streamId;
    CUPTI_CALL(cuptiGetStreamId(resourceData->context, resourceData->resourceHandle.stream, &streamId));
    queueNewBuffer(resourceData->context, streamId);
    queueNewBuffer(resourceData->context, streamId);
  }
  // dump all buffers on a stream destroy
  else if (cbid == CUPTI_CBID_RESOURCE_STREAM_DESTROY_STARTING) {
    uint32_t streamId;
    CUPTI_CALL(cuptiGetStreamId(resourceData->context, resourceData->resourceHandle.stream, &streamId));
    while (dump(resourceData->context, streamId) != NULL) ;
  }
}
/*
 * Create a VampirTrace CUPTI activity context.
 * 
 * @return pointer to created VampirTrace CUPTI Activity context
 */
static vt_cupti_activity_t* vt_cuptiact_createCtxActivity(CUcontext cuCtx)
{
  vt_cupti_activity_t* vtCtxAct = NULL;
  
  /* create new context, as it is not listed */
  vtCtxAct = (vt_cupti_activity_t *)malloc(sizeof(vt_cupti_activity_t));
  if(vtCtxAct == NULL) 
    vt_error_msg("[CUPTI Activity] Could not allocate memory for activity context!");
  vtCtxAct->strmList = NULL;
  vtCtxAct->gpuMemAllocated = 0;
  vtCtxAct->gpuMemList = NULL;
  vtCtxAct->buffer = NULL;
  vtCtxAct->vtLastGPUTime = vt_gpu_init_time;
  vtCtxAct->gpuIdleOn = 1;
  
  /* 
   * Get time synchronization factor between host and GPU time for measurement 
   * interval 
   */
  {
    VT_CUPTI_CALL(cuptiGetTimestamp(&(vtCtxAct->sync.gpuStart)), "cuptiGetTimestamp");
    vtCtxAct->sync.hostStart = vt_pform_wtime();
  }
  
    /* set default CUPTI stream ID (needed for memory usage and idle tracing) */
  VT_CUPTI_CALL(cuptiGetStreamId(cuCtx, NULL, &(vtCtxAct->defaultStrmID)), 
                                 "cuptiGetStreamId");
  
  return vtCtxAct;
}
Example #3
0
static void
handleSync(CUpti_CallbackId cbid, const CUpti_SynchronizeData *syncData)
{
  // check the top buffer of the global queue and dequeue if full. If
  // we dump a buffer add it back to the queue
  uint8_t *buffer = dumpIfFull(NULL, 0);
  if (buffer != NULL) {
    CUPTI_CALL(cuptiActivityEnqueueBuffer(NULL, 0, buffer, BUF_SIZE));
  }

  // dump context buffer on context sync
  if (cbid == CUPTI_CBID_SYNCHRONIZE_CONTEXT_SYNCHRONIZED) {
    buffer = dumpIfFull(syncData->context, 0);
    if (buffer != NULL) {
      CUPTI_CALL(cuptiActivityEnqueueBuffer(syncData->context, 0, buffer, BUF_SIZE));
    }
  }
  // dump stream buffer on stream sync
  else if (cbid == CUPTI_CBID_SYNCHRONIZE_STREAM_SYNCHRONIZED) {
    uint32_t streamId;
    CUPTI_CALL(cuptiGetStreamId(syncData->context, syncData->stream, &streamId));
    buffer = dumpIfFull(syncData->context, streamId);
    if (buffer != NULL) {
      CUPTI_CALL(cuptiActivityEnqueueBuffer(syncData->context, streamId, buffer, BUF_SIZE));
    }
  }
}
/*
 * Create a VampirTrace CUPTI Activity context.
 * 
 * @param ctxID ID of the CUDA context
 * @param devID ID of the CUDA device
 * 
 * @return pointer to created VampirTrace CUPTI Activity context
 */
static vt_cuptiact_ctx_t* vt_cuptiact_createContext(uint32_t ctxID, 
                                                    CUcontext cuCtx, 
                                                    uint32_t devID)
{
  vt_cuptiact_ctx_t* vtCtx = NULL;
  
  /* create new context, as it is not listed */
  vtCtx = (vt_cuptiact_ctx_t *)malloc(sizeof(vt_cuptiact_ctx_t));
  if(vtCtx == NULL) 
    vt_error_msg("[CUPTI Activity] Could not allocate memory for context!");
  vtCtx->ctxID = ctxID;
  vtCtx->next = NULL;
  vtCtx->strmList = NULL;
  vtCtx->gpuMemAllocated = 0;
  vtCtx->gpuMemList = NULL;
  vtCtx->buffer = NULL;
  vtCtx->vtLastGPUTime = vt_gpu_init_time;
  vtCtx->gpuIdleOn = 1;
  
  /* 
   * Get time synchronization factor between host and GPU time for measurement 
   * interval 
   */
  {
    VT_CUPTI_CALL(cuptiGetTimestamp(&(vtCtx->sync.gpuStart)), "cuptiGetTimestamp");
    vtCtx->sync.hostStart = vt_pform_wtime();
  }
  
  VT_CHECK_THREAD;
  vtCtx->ptid = VT_MY_THREAD;
  
  if(cuCtx == NULL) CHECK_CU_ERROR(cuCtxGetCurrent(&cuCtx), NULL);
  vtCtx->cuCtx = cuCtx;
  
  /* set default CUPTI stream ID (needed for memory usage and idle tracing) */
  VT_CUPTI_CALL(cuptiGetStreamId(vtCtx->cuCtx, NULL, &(vtCtx->defaultStrmID)), 
                                 "cuptiGetStreamId");
  
  if(devID == (uint32_t)-1){
    CUdevice cuDev;
    
    /* driver API prog: correct cuDev, but result is 201 (invalid context) */
    if(CUDA_SUCCESS != cuCtxGetDevice(&cuDev)){
      devID = VT_NO_ID;
    }else{
      devID = (uint32_t)cuDev;
    }
  }
  
  vtCtx->devID = devID;
  vtCtx->cuDev = devID;
  
  /*vt_cntl_msg(1,"device id: %d", devID);*/
  
  return vtCtx;
}
/*
 * Create a VampirTrace CUPTI stream.
 * 
 * @param vtCtx VampirTrace CUPTI context
 * @param cuStrm CUDA stream
 * @param strmID ID of the CUDA stream
 * 
 * @return pointer to created VampirTrace CUPTI stream
 */
vt_cupti_strm_t* vt_cupti_createStream(vt_cupti_ctx_t *vtCtx, 
                                       CUstream cuStrm, uint32_t strmID)
{
  vt_cupti_strm_t *vtStrm = NULL;
  
  if(vtCtx == NULL){
    vt_warning("[CUPTI] Cannot create stream without VampirTrace CUPTI context");
    return NULL;
  }
  
  vtStrm = (vt_cupti_strm_t *)malloc(sizeof(vt_cupti_strm_t));
  if(vtStrm == NULL)
    vt_error_msg("[CUPTI] Could not allocate memory for stream!");
  vtStrm->cuStrm = cuStrm;
  vtStrm->vtLastTime = vt_gpu_init_time;
  vtStrm->destroyed = 0;
  vtStrm->next = NULL;
  
#if defined(VT_CUPTI_ACTIVITY)
  /* create stream by VT CUPTI callbacks implementation (CUstream is given) */
  if(strmID == VT_CUPTI_NO_STREAM_ID){
    if(cuStrm != VT_CUPTI_NO_STREAM){
      VT_CUPTI_CALL(cuptiGetStreamId(vtCtx->cuCtx, cuStrm, &strmID), 
                                     "cuptiGetStreamId");
    }else{
      vt_warning("[CUPTI] Neither CUDA stream nor stream ID given!");
      free(vtStrm);
      return NULL;
    }
  }
#else /* only VT_CUPTI_CALLBACKS is defined */
  if(vtCtx->callbacks != NULL){
    strmID = vtCtx->callbacks->streamsCreated;
    vtCtx->callbacks->streamsCreated++;
  }
#endif

  vtStrm->cuStrmID = strmID;
  
  /* create VampirTrace thread */
  {
    char thread_name[16] = "CUDA";

    if(vt_gpu_stream_reuse){
      if(vtCtx->devID != VT_NO_ID){
        if(-1 == snprintf(thread_name+4, 12, "[%d]", vtCtx->devID))
          vt_cntl_msg(1, "Could not create thread name for CUDA thread!");
      }
    }else{
      if(vtCtx->devID == VT_NO_ID){
        if(-1 == snprintf(thread_name+4, 12, "[?:%d]", strmID))
          vt_cntl_msg(1, "Could not create thread name for CUDA thread!");
      }else{
        if(-1 == snprintf(thread_name+4, 12, "[%d:%d]", vtCtx->devID, strmID))
          vt_cntl_msg(1, "Could not create thread name for CUDA thread!");
      }
    }
    
    VT_CHECK_THREAD;
    vt_gpu_registerThread(thread_name, VT_MY_THREAD, &(vtStrm->vtThrdID));
  }
  
  if(vt_gpu_init_time < vt_start_time)
      vt_gpu_init_time = vt_start_time;
  
  /* for the first stream created for this context */
  if(vtCtx->strmList == NULL){
    if(vt_gpu_trace_idle > 0){      
      /* write enter event for GPU_IDLE on first stream */
      vt_enter(vtStrm->vtThrdID, &vt_gpu_init_time, vt_gpu_rid_idle);
      /*vt_warning("IDLEente: %llu (%d)", vt_gpu_init_time, vtStrm->vtThrdID);*/
      
#if defined(VT_CUPTI_ACTIVITY)
      if(vtCtx->activity != NULL)
        vtCtx->activity->gpuIdleOn = 1;
#endif
    }
    
    /* set the counter value for cudaMalloc to 0 on first stream */
    if(vt_gpu_trace_memusage > 0)
      vt_count(vtStrm->vtThrdID, &vt_gpu_init_time, vt_gpu_cid_memusage, 0);
  }

  if(vt_gpu_trace_kernels > 1){
    /* set count values to zero */
    vt_count(vtStrm->vtThrdID, &vt_gpu_init_time, vt_cupti_cid_blocksPerGrid, 0);
    vt_count(vtStrm->vtThrdID, &vt_gpu_init_time, vt_cupti_cid_threadsPerBlock, 0);
    vt_count(vtStrm->vtThrdID, &vt_gpu_init_time, vt_cupti_cid_threadsPerKernel, 0);
  }
  
  /* prepend the stream 
  vtStrm->next = vtCtx->strmList;
  vtCtx->strmList = vtStrm;*/
  
  return vtStrm;
}