/*
 * Initialize the CUPTI events data of the given VampirTrace CUPTI context.
 * 
 * @param vtCtx pointer to the VampirTrace CUPTI context
 */
void vt_cupti_events_initContext(vt_cupti_ctx_t *vtcuptiCtx)
{
  vt_cupti_events_t *vtcuptiEvtCtx = NULL;
  
  vt_cntl_msg(2, "[CUPTI Events] Initializing VampirTrace CUPTI events context");

  /* get a pointer to eventIDArray */
  {
    CUresult cuErr = CUDA_SUCCESS;
    int dev_major, dev_minor;
    vt_cupti_device_t *cuptiDev;

    /* TODO: do not trace this driver API function call */
    cuErr = cuDeviceComputeCapability(&dev_major, &dev_minor, vtcuptiCtx->cuDev);
    VT_CUDRV_CALL(cuErr, "cuDeviceComputeCapability");

    /* check if device capability already listed */
    VT_CUPTI_LOCK();
      cuptiDev = vtcuptievtCapList;
    VT_CUPTI_UNLOCK();
    
    cuptiDev = vt_cupti_checkMetricList(cuptiDev, dev_major, dev_minor);
    if(cuptiDev){
      /* allocate the VampirTrace CUPTI events context */
      vtcuptiEvtCtx = (vt_cupti_events_t *)malloc(sizeof(vt_cupti_events_t));
      if(vtcuptiEvtCtx == NULL)
        vt_error_msg("[CUPTI Events] malloc(sizeof(vt_cupti_events_t)) failed!");
      
      vtcuptiEvtCtx->vtDevCap = cuptiDev;
      vtcuptiEvtCtx->vtGrpList = NULL;
      vtcuptiEvtCtx->counterData = NULL;
      vtcuptiEvtCtx->cuptiEvtIDs = NULL;
      
      vtcuptiCtx->events = vtcuptiEvtCtx;
    }else{
      return;
    }
  }

  /* create and add the VampirTrace CUPTI groups to the context */
  vt_cupti_addEvtGrpsToCtx(vtcuptiCtx);

  /* allocate memory for CUPTI counter reads */
  {
    size_t allocSize = vtcuptiEvtCtx->vtGrpList->evtNum;
    
    vtcuptiEvtCtx->counterData = 
            (uint64_t *)malloc(allocSize*sizeof(uint64_t));
    vtcuptiEvtCtx->cuptiEvtIDs = 
            (CUpti_EventID *)malloc(allocSize*sizeof(CUpti_EventID));
  }
  
  vt_cuptievt_start(vtcuptiEvtCtx);
}
/*
 * Setup a list of devices with different device capabilities and add the 
 * metrics, which are specified by the user.
 * 
 * @return a list of CUDA devices with different device capabilities
 */
static vt_cupti_device_t* vt_cuptievt_setupMetricList(void)
{
  CUresult err;
  int deviceCount, id;
  vt_cupti_device_t *capList = NULL;
  
  VT_SUSPEND_MALLOC_TRACING(VT_CURRENT_THREAD);
  
  /* CUDA initialization */
	VT_CUDRV_CALL(cuInit(0), "cuInit");
  
  /* How many GPGPU devices do we have? */
	err = cuDeviceGetCount( &deviceCount );
	VT_CUDRV_CALL(err, "cuDeviceGetCount");
	if(deviceCount == 0){
		vt_error_msg("[CUPTI Events] There is no device supporting CUDA available.");
	}

  /* create list with available compute capabilities */
  for(id = 0; id < deviceCount; id++){
    CUdevice cuDev;
    vt_cupti_device_t *cuptiDev;
    int dev_major, dev_minor;

    err = cuDeviceGet(&cuDev, id);
		VT_CUDRV_CALL(err, "cuDeviceGet");

    err = cuDeviceComputeCapability(&dev_major, &dev_minor, cuDev);
    VT_CUDRV_CALL(err, "cuDeviceComputeCapability");

    /* check if device capability already listed */
    cuptiDev = vt_cupti_checkMetricList(capList, dev_major, dev_minor);

    if(cuptiDev == NULL){
      /* allocate memory for device list entry */
      cuptiDev = (vt_cupti_device_t *)malloc(sizeof(vt_cupti_device_t));
      cuptiDev->dev_major = dev_major;
      cuptiDev->dev_minor = dev_minor;
      cuptiDev->cuDev = cuDev;
      cuptiDev->vtcuptiEvtList = NULL;
      cuptiDev->evtNum = 0;
      cuptiDev->next = NULL;

      /* prepend to list */
      cuptiDev->next = capList;
      capList = cuptiDev;
    }
  }

  vt_cupti_fillMetricList(capList);

  /* cleanup list: remove entries, which don't have metrics */
  {
    vt_cupti_device_t *curr = capList;
    vt_cupti_device_t *last = capList;

    while(curr != NULL){
      vt_cupti_device_t *freeDev = curr;
      curr = curr->next;

      if(freeDev->evtNum == 0){
        /* first element */
        if(freeDev == capList){
          capList = capList->next;
        }else{
          last->next = freeDev->next;
        }
        free(freeDev);
      }else last = freeDev;
    }
  }
  
  VT_RESUME_MALLOC_TRACING(VT_CURRENT_THREAD);

  return capList;
}
/*
 * Parse the environment variable for CUPTI metrics (including CUDA device
 * capabilities) and fill the capability metric list.
 *
 * @param capList points to the first element of the capability metric list
 */
static void vt_cupti_fillMetricList(vt_cupti_device_t *capList)
{
  char *metricString = vt_env_cupti_events();
  char *metric_sep = vt_env_metrics_sep();
  char *metric, *metric_cap;

  metric = strtok(metricString, metric_sep);

  while (metric != NULL){
    CUptiResult cuptiErr = CUPTI_SUCCESS;
    vt_cupti_device_t *cuptiDev = NULL;
    vt_cupti_evtctr_t *vtcuptiEvt = NULL;
    int metr_major = 0;
    int metr_minor = 0;

    /* try to get CUDA device capability parsed from metric */
    metr_major = atoi(metric);
    metric_cap = strchr(metric+1, '.');
    if(metric_cap){
      metr_minor = atoi(metric_cap+1);
      metric_cap = strchr(metric_cap+1, '_');
    }
    
    /* check whether device capability is given or not */
    if(metric_cap){
      metric = metric_cap + 1;

      vt_cntl_msg(2, "Metric '%s', %d.%d", metric, metr_major, metr_minor);

      cuptiDev = vt_cupti_checkMetricList(capList, metr_major, metr_minor);
      if(cuptiDev == NULL){
        metric = strtok(NULL, metric_sep);
        continue;
      }
      
      vtcuptiEvt = (vt_cupti_evtctr_t*)malloc(sizeof(vt_cupti_evtctr_t));
      cuptiErr = cuptiEventGetIdFromName(cuptiDev->cuDev, metric,
                                         &vtcuptiEvt->cuptiEvtID);
      if(cuptiErr != CUPTI_SUCCESS){
        if(!strncmp(metric, "help", 4)) vt_cupti_showAllCounters(capList);
        vt_warning("[CUPTI Events] Skipping invalid event '%s' for device %d",
                   metric, cuptiDev->cuDev);
        metric = strtok(NULL, metric_sep);
        continue;
      }

      /* create VampirTrace counter ID */
#if (defined(VT_MT) || defined(VT_HYB))
      VTTHRD_LOCK_IDS();
#endif
      vtcuptiEvt->vtCID = vt_def_counter(VT_MASTER_THREAD, metric, "#",
            VT_CNTR_ABS | VT_CNTR_LAST | VT_CNTR_UNSIGNED, vt_cuptievt_cgid, 0);
#if (defined(VT_MT) || defined(VT_HYB))
      VTTHRD_UNLOCK_IDS();
#endif

      cuptiDev->evtNum++;
      vtcuptiEvt->next = cuptiDev->vtcuptiEvtList;
      cuptiDev->vtcuptiEvtList = vtcuptiEvt;
    }else{ 
      /* device capability is not given. Try to add metric to all devices */
      uint32_t cid_metric = VT_NO_ID;

      cuptiDev = capList;
      while(cuptiDev != NULL){
        vtcuptiEvt = (vt_cupti_evtctr_t*)malloc(sizeof(vt_cupti_evtctr_t));
        cuptiErr = cuptiEventGetIdFromName(cuptiDev->cuDev, metric,
                                           &vtcuptiEvt->cuptiEvtID);

        if(cuptiErr != CUPTI_SUCCESS){
          if(!strncmp(metric, "help", 4)) vt_cupti_showAllCounters(capList);
          vt_warning("[CUPTI Events] Skipping invalid event '%s' for device %d",
                     metric, cuptiDev->cuDev);
        }else{
          /* create VampirTrace counter ID, if not yet done for other device */
          if(cid_metric == VT_NO_ID){
#if (defined(VT_MT) || defined(VT_HYB))
      VTTHRD_LOCK_IDS();
#endif
      cid_metric = vt_def_counter(VT_MASTER_THREAD, metric, "#", 
            VT_CNTR_ABS | VT_CNTR_LAST | VT_CNTR_UNSIGNED, vt_cuptievt_cgid, 0);
#if (defined(VT_MT) || defined(VT_HYB))
      VTTHRD_UNLOCK_IDS();
#endif
          }
          
          cuptiDev->evtNum++;
          vtcuptiEvt->vtCID = cid_metric;
          vtcuptiEvt->next = cuptiDev->vtcuptiEvtList;
          cuptiDev->vtcuptiEvtList = vtcuptiEvt;
        }

        cuptiDev = cuptiDev->next;
      }
    }

    metric = strtok(NULL, metric_sep);
  }
}
예제 #4
0
static vt_cupti_dev_t* vt_cupti_setupMetricList(void)
{
  CUresult err;
  int deviceCount, id;
  vt_cupti_dev_t *capList = NULL;

  /* CUDA initialization */
	err = cuInit( 0 );
	if ( err != CUDA_SUCCESS ) {
		printf( "Initialization of CUDA library failed.\n" );
		exit( EXIT_FAILURE );
	}

  /* How many gpgpu devices do we have? */
	err = cuDeviceGetCount( &deviceCount );
	CHECK_CU_ERROR(err, "cuDeviceGetCount");
	if(deviceCount == 0){
		printf("[CUPTI]There is no device supporting CUDA.\n");
		exit(EXIT_FAILURE);
	}

  /* create list with available compute capabilities */
  for(id = 0; id < deviceCount; id++){
    CUdevice cuDev;
    vt_cupti_dev_t *cuptiDev;
    int dev_major, dev_minor;

    err = cuDeviceGet(&cuDev, id);
		CHECK_CU_ERROR(err, "cuDeviceGet");

    err = cuDeviceComputeCapability(&dev_major, &dev_minor, cuDev);
    CHECK_CU_ERROR(err, "cuDeviceComputeCapability");

    /* check if device capability already listed */
    cuptiDev = vt_cupti_checkMetricList(capList, dev_major, dev_minor);

    if(cuptiDev == NULL){
      /* allocate memory for device list entry */
      cuptiDev = (vt_cupti_dev_t *)malloc(sizeof(vt_cupti_dev_t));
      cuptiDev->dev_major = dev_major;
      cuptiDev->dev_minor = dev_minor;
      cuptiDev->cuDev = cuDev;
      cuptiDev->vtcuptiEvtList = NULL;
      cuptiDev->evtNum = 0;
      cuptiDev->next = NULL;

      /* prepend to list */
      cuptiDev->next = capList;
      capList = cuptiDev;
    }
  }

  vt_cupti_fillMetricList(capList);

  /* cleanup list: remove entries, which don't have metrics */
  {
    vt_cupti_dev_t *curr = capList;
    vt_cupti_dev_t *last = capList;

    while(curr != NULL){
      vt_cupti_dev_t *freeDev = curr;
      curr = curr->next;

      if(freeDev->evtNum == 0){
        /* first element */
        if(freeDev == capList){
          capList = capList->next;
        }else{
          last->next = freeDev->next;
        }
        free(freeDev);
      }else last = freeDev;
    }
  }

  return capList;
}
예제 #5
0
/*
 * Initializes a CUPTI host thread and create the event group.
 *
 * @param ptid the VampirTrace thread id
 * @param cuCtx optionally given CUDA context
 *
 * @return the created VampirTrace CUPTI host thread structure
 */
static vt_cupti_ctx_t* vt_cupti_initCtx(uint32_t ptid, CUcontext cuCtx)
{
  vt_cupti_ctx_t *vtcuptiCtx = NULL;
  uint64_t time;

  vt_cntl_msg(2, "[CUPTI] Initializing VampirTrace CUPTI context (ptid=%d)",
              ptid);
  
  time = vt_pform_wtime();
  vt_enter(ptid, &time, rid_cupti_init);

  /* do not trace CUDA functions invoked here */
  VT_SUSPEND_CUDA_TRACING(ptid);

  /* initialize CUDA driver API, if necessary and get context handle */
  if(cuCtx == NULL){
#if (defined(CUDA_VERSION) && (CUDA_VERSION < 4000))
    CHECK_CU_ERROR(cuCtxPopCurrent(&cuCtx), "cuCtxPopCurrent");
    CHECK_CU_ERROR(cuCtxPushCurrent(cuCtx), "cuCtxPushCurrent");
#else
    CHECK_CU_ERROR(cuCtxGetCurrent(&cuCtx), "cuCtxGetCurrent");
#endif
  }

  /* get a pointer to eventIDArray */
  {
    CUresult cuErr = CUDA_SUCCESS;
    int dev_major, dev_minor;
    CUdevice cuDev = 0;
    vt_cupti_dev_t *cuptiDev;

    CHECK_CU_ERROR(cuCtxGetDevice(&cuDev), "cuCtxGetDevice");

    cuErr = cuDeviceComputeCapability(&dev_major, &dev_minor, cuDev);
    CHECK_CU_ERROR(cuErr, "cuDeviceComputeCapability");

    /* check if device capability already listed */
    CUPTI_LOCK();
      cuptiDev = vt_cupti_capList;
    CUPTI_UNLOCK();
    
    cuptiDev = vt_cupti_checkMetricList(cuptiDev, dev_major, dev_minor);
    if(cuptiDev){
      vtcuptiCtx = (vt_cupti_ctx_t*)malloc(sizeof(vt_cupti_ctx_t));
      if(vtcuptiCtx == NULL)
        vt_error_msg("malloc(sizeof(VTCUPTIhostThrd)) failed!");
      vtcuptiCtx->cuCtx = cuCtx;
      vtcuptiCtx->vtDevCap = cuptiDev;
      vtcuptiCtx->vtGrpList = NULL;
      vtcuptiCtx->counterData = NULL;
      vtcuptiCtx->cuptiEvtIDs = NULL;
      vtcuptiCtx->next = NULL;
    }else{
      time = vt_pform_wtime();
      vt_exit(ptid, &time);
      VT_RESUME_CUDA_TRACING(ptid);
      return NULL;
    }
  }

  VT_RESUME_CUDA_TRACING(ptid);

  /* create and add the VampirTrace CUPTI groups to the context */
  vt_cupti_addEvtGrpsToCtx(vtcuptiCtx);

  /* allocate memory for CUPTI counter reads */
  {
    size_t allocSize = vtcuptiCtx->vtGrpList->evtNum;
    
    vtcuptiCtx->counterData = (uint64_t *)malloc(allocSize*sizeof(uint64_t));
    vtcuptiCtx->cuptiEvtIDs = (CUpti_EventID *)malloc(allocSize*sizeof(CUpti_EventID));
  }

  /* add VampirTrace CUPTI context entry to list (as first element) */
  CUPTI_LOCK();
    vtcuptiCtx->next = vtcuptiCtxlist;
    vtcuptiCtxlist = vtcuptiCtx;
  CUPTI_UNLOCK();

  time = vt_pform_wtime();
  vt_exit(ptid, &time);

  return vtcuptiCtx;
}