Пример #1
0
/*
 * Create a VampirTrace CUPTI Activity stream.
 * 
 * @param devID ID of the CUDA device
 * @param strmID ID of the CUDA stream
 * 
 * @return pointer to created VampirTrace CUPTI Activity stream
 */
static vt_cuptiact_strm_t* vt_cuptiact_createStream(vt_cupti_ctx_t *vtCtx, 
                                                    uint32_t strmID)
{
  vt_cuptiact_strm_t *vtStrm = NULL;
  
  vtStrm = (vt_cuptiact_strm_t *)malloc(sizeof(vt_cuptiact_strm_t));
  if(vtStrm == NULL)
    vt_error_msg("[CUPTI Activity] Could not allocate memory for stream!");
  vtStrm->strmID = strmID;
  vtStrm->vtLastTime = vt_gpu_init_time;
  vtStrm->destroyed = 0;
  vtStrm->next = NULL;
  
  /* create VT-User-Thread with name and parent id and get its id */
  {
    char thread_name[16] = "CUDA";

    if(vt_gpu_stream_reuse){
      if(vtCtx->devID != VT_NO_ID){
        if(-1 == snprintf(thread_name+4, 12, "[%d]", vtCtx->devID))
          vt_cntl_msg(1, "Could not create thread name for CUDA thread!");
      }
    }else{
      if(vtCtx->devID == VT_NO_ID){
        if(-1 == snprintf(thread_name+4, 12, "[?:%d]", strmID))
          vt_cntl_msg(1, "Could not create thread name for CUDA thread!");
      }else{
        if(-1 == snprintf(thread_name+4, 12, "[%d:%d]", vtCtx->devID, strmID))
          vt_cntl_msg(1, "Could not create thread name for CUDA thread!");
      }
    }
    
    VT_CHECK_THREAD;
    vt_gpu_registerThread(thread_name, VT_MY_THREAD, &(vtStrm->vtThrdID));
  }
  
  /* if first stream created for this device, make it the default stream */
  if(vtCtx->activity->strmList == NULL){
    /* write enter event for GPU_IDLE on first stream */
    if(vt_gpu_trace_idle == 1){
      if(vt_gpu_init_time < vt_start_time)
        vt_gpu_init_time = vt_start_time;
          
      vt_enter(vtStrm->vtThrdID, &vt_gpu_init_time, vt_gpu_rid_idle);
      /*vt_warning("IDLEente: %llu (%d)", vt_gpu_init_time, vtStrm->vtThrdID);*/
      vtCtx->activity->gpuIdleOn = 1;
    }
  }
  
  return vtStrm;
}
/*
 * Create a VampirTrace CUPTI stream.
 * 
 * @param vtCtx VampirTrace CUPTI context
 * @param cuStrm CUDA stream
 * @param strmID ID of the CUDA stream
 * 
 * @return pointer to created VampirTrace CUPTI stream
 */
vt_cupti_strm_t* vt_cupti_createStream(vt_cupti_ctx_t *vtCtx, 
                                       CUstream cuStrm, uint32_t strmID)
{
  vt_cupti_strm_t *vtStrm = NULL;
  
  if(vtCtx == NULL){
    vt_warning("[CUPTI] Cannot create stream without VampirTrace CUPTI context");
    return NULL;
  }
  
  vtStrm = (vt_cupti_strm_t *)malloc(sizeof(vt_cupti_strm_t));
  if(vtStrm == NULL)
    vt_error_msg("[CUPTI] Could not allocate memory for stream!");
  vtStrm->cuStrm = cuStrm;
  vtStrm->vtLastTime = vt_gpu_init_time;
  vtStrm->destroyed = 0;
  vtStrm->next = NULL;
  
#if defined(VT_CUPTI_ACTIVITY)
  /* create stream by VT CUPTI callbacks implementation (CUstream is given) */
  if(strmID == VT_CUPTI_NO_STREAM_ID){
    if(cuStrm != VT_CUPTI_NO_STREAM){
      VT_CUPTI_CALL(cuptiGetStreamId(vtCtx->cuCtx, cuStrm, &strmID), 
                                     "cuptiGetStreamId");
    }else{
      vt_warning("[CUPTI] Neither CUDA stream nor stream ID given!");
      free(vtStrm);
      return NULL;
    }
  }
#else /* only VT_CUPTI_CALLBACKS is defined */
  if(vtCtx->callbacks != NULL){
    strmID = vtCtx->callbacks->streamsCreated;
    vtCtx->callbacks->streamsCreated++;
  }
#endif

  vtStrm->cuStrmID = strmID;
  
  /* create VampirTrace thread */
  {
    char thread_name[16] = "CUDA";

    if(vt_gpu_stream_reuse){
      if(vtCtx->devID != VT_NO_ID){
        if(-1 == snprintf(thread_name+4, 12, "[%d]", vtCtx->devID))
          vt_cntl_msg(1, "Could not create thread name for CUDA thread!");
      }
    }else{
      if(vtCtx->devID == VT_NO_ID){
        if(-1 == snprintf(thread_name+4, 12, "[?:%d]", strmID))
          vt_cntl_msg(1, "Could not create thread name for CUDA thread!");
      }else{
        if(-1 == snprintf(thread_name+4, 12, "[%d:%d]", vtCtx->devID, strmID))
          vt_cntl_msg(1, "Could not create thread name for CUDA thread!");
      }
    }
    
    VT_CHECK_THREAD;
    vt_gpu_registerThread(thread_name, VT_MY_THREAD, &(vtStrm->vtThrdID));
  }
  
  if(vt_gpu_init_time < vt_start_time)
      vt_gpu_init_time = vt_start_time;
  
  /* for the first stream created for this context */
  if(vtCtx->strmList == NULL){
    if(vt_gpu_trace_idle > 0){      
      /* write enter event for GPU_IDLE on first stream */
      vt_enter(vtStrm->vtThrdID, &vt_gpu_init_time, vt_gpu_rid_idle);
      /*vt_warning("IDLEente: %llu (%d)", vt_gpu_init_time, vtStrm->vtThrdID);*/
      
#if defined(VT_CUPTI_ACTIVITY)
      if(vtCtx->activity != NULL)
        vtCtx->activity->gpuIdleOn = 1;
#endif
    }
    
    /* set the counter value for cudaMalloc to 0 on first stream */
    if(vt_gpu_trace_memusage > 0)
      vt_count(vtStrm->vtThrdID, &vt_gpu_init_time, vt_gpu_cid_memusage, 0);
  }

  if(vt_gpu_trace_kernels > 1){
    /* set count values to zero */
    vt_count(vtStrm->vtThrdID, &vt_gpu_init_time, vt_cupti_cid_blocksPerGrid, 0);
    vt_count(vtStrm->vtThrdID, &vt_gpu_init_time, vt_cupti_cid_threadsPerBlock, 0);
    vt_count(vtStrm->vtThrdID, &vt_gpu_init_time, vt_cupti_cid_threadsPerKernel, 0);
  }
  
  /* prepend the stream 
  vtStrm->next = vtCtx->strmList;
  vtCtx->strmList = vtStrm;*/
  
  return vtStrm;
}