Exemplo n.º 1
0
/*
 * Print all available counters for a given CUDA device to stdout.
 *
 * @param cuDev the CUDA device
 */
static void vt_cupti_showAllCounters(CUdevice cuDev)
{
  CUptiResult cuptiErr = CUPTI_SUCCESS;
  CUpti_EventDomainID *domainId = NULL;
  uint32_t maxDomains = 0;
  uint32_t i;
  size_t size = 0;

  cuptiErr = cuptiDeviceGetNumEventDomains(cuDev, &maxDomains);
  CHECK_CUPTI_ERROR(cuptiErr, "cuptiDeviceGetNumEventDomains");

  if(maxDomains == 0){
    vt_cntl_msg(1, "[CUPTI] No domain is exposed by dev = %d\n", cuDev);
    return;
  }

  size = sizeof(CUpti_EventDomainID) * maxDomains;
  domainId = (CUpti_EventDomainID*)malloc(size);
  if(domainId == NULL){
    vt_cntl_msg(1, "[CUPTI] Failed to allocate memory to domain ID");
    return;
  }
  memset(domainId, 0, size);

  cuptiErr = cuptiDeviceEnumEventDomains(cuDev, &size, domainId);
  CHECK_CUPTI_ERROR(cuptiErr, "cuptiDeviceEnumEventDomains");

  /* enum domains */
  for(i = 0; i < maxDomains; i++) enumEvents(cuDev, domainId[i]);

  free(domainId);
}
Exemplo n.º 2
0
/* 
 * De-initialize the VampirTrace CUPTI context without destroying it.
 * 
 * @param vtcuptiCtx pointer to the VampirTrace CUPTI context
 */
static void vt_cupti_finish(vt_cupti_ctx_t *vtcuptiCtx)
{
  CUptiResult cuptiErr = CUPTI_SUCCESS;

  if(vtcuptiCtx == NULL || vt_gpu_debug) return;

  /*uint64_t time = vt_pform_wtime();
  vt_cupti_resetCounter(vtcuptiCtx, 0, &time);*/

  /* stop CUPTI counter capturing */
  vt_cupti_stop(vtcuptiCtx);

  /* destroy all CUPTI event groups, which have been created */
  {
    vt_cupti_grp_t *vtcuptiGrp = vtcuptiCtx->vtGrpList;
    
    while(vtcuptiGrp != NULL){
      cuptiErr = cuptiEventGroupRemoveAllEvents(vtcuptiGrp->evtGrp);
      CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGroupRemoveAllEvents");

      cuptiErr = cuptiEventGroupDestroy(vtcuptiGrp->evtGrp);
      CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGroupDestroy");

      vtcuptiGrp = vtcuptiGrp->next;
    }
  }
}
Exemplo n.º 3
0
__attribute__((constructor)) void Trace_start()
{

	cuptierr = cuptiSubscribe(&subscriber, (CUpti_CallbackFunc)getTimestampCallback , &trace);
	CHECK_CUPTI_ERROR(cuptierr, "cuptiSubscribe");
	cuptierr = cuptiEnableDomain(1, subscriber, CUPTI_CB_DOMAIN_RUNTIME_API);
	CHECK_CUPTI_ERROR(cuptierr, "cuptiEnableDomain");

	printf("<-----------register Trace_end--------------->\n");
	atexit (Trace_end);
}
Exemplo n.º 4
0
/*
 * Finalizes CUPTI device.
 * 
 * @param cleanExit 1 to cleanup CUPTI event group, otherwise 0
 */
void vt_cupti_finalize_device(uint32_t ptid, uint8_t cleanExit){
  CUptiResult cuptiErr = CUPTI_SUCCESS;
  vt_cupti_ctx_t *vtcuptiCtx = NULL;

  vt_cntl_msg(2, "[CUPTI] Finalize device ... ");

  {
    CUcontext cuCtx = NULL;

    VT_SUSPEND_CUDA_TRACING(ptid);
    
#if (defined(CUDA_VERSION) && (CUDA_VERSION < 4000))
    CHECK_CU_ERROR(cuCtxPopCurrent(&cuCtx), "cuCtxPopCurrent");
    CHECK_CU_ERROR(cuCtxPushCurrent(cuCtx), "cuCtxPushCurrent");
#else
    CHECK_CU_ERROR(cuCtxGetCurrent(&cuCtx), "cuCtxGetCurrent");
#endif
    
    VT_RESUME_CUDA_TRACING(ptid);

    vtcuptiCtx = vt_cupti_takeCtxFromList(cuCtx);
    if(vtcuptiCtx == NULL) return;
  }

  if(cleanExit && vt_gpu_debug != 0){
    /*uint64_t time = vt_pform_wtime();

    vt_cupti_resetCounter(vtcuptiCtx, 0, &time);*/

    /* stop CUPTI counter capturing */
    vt_cupti_stop(vtcuptiCtx);

    /* destroy all CUPTI event groups, which have been created */
    {
      vt_cupti_grp_t *vtcuptiGrp = vtcuptiCtx->vtGrpList;

      while(vtcuptiGrp != NULL){
        cuptiErr = cuptiEventGroupRemoveAllEvents(vtcuptiGrp->evtGrp);
        CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGroupRemoveAllEvents");

        cuptiErr = cuptiEventGroupDestroy(vtcuptiGrp->evtGrp);
        CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGroupDestroy");

        vtcuptiGrp = vtcuptiGrp->next;
      }
    }
  }

  /* free VampirTrace CUPTI context */
  vt_cupti_freeCtx(vtcuptiCtx);
}
Exemplo n.º 5
0
int
CUDA_update_control_state( hwd_control_state_t * ptr,
						   NativeInfo_t * native, int count,
						   hwd_context_t * ctx )
{
	( void ) ctx;
	CUDA_control_state_t * CUDA_ptr = ( CUDA_control_state_t * ) ptr;
	int index, i;
	CUptiResult cuptiErr = CUPTI_SUCCESS;

    /* Disable the CUDA eventGroup;
     it also frees the perfmon hardware on the GPU */
	cuptiErr = (*cuptiEventGroupDisablePtr)( CUDA_ptr->eventGroup );
	CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupDisable" );

    cuptiErr = (*cuptiEventGroupRemoveAllEventsPtr)( CUDA_ptr->eventGroup );
	CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupRemoveAllEvents" );
    
    // otherwise, add the events to the eventset
	for ( i = 0; i < count; i++ ) {
       
		index = native[i].ni_event;
		native[i].ni_position = index;

		/* store events, that have been added to the CuPTI eveentGroup 
		   in a seperate place (addedEvents).
		   Needed, so that we can read the values for the added events only */
		CUDA_ptr->addedEvents.count = count;
		CUDA_ptr->addedEvents.list[i] = index;

		/* if this device name is different from the actual device the code is running on, then exit */
		if ( 0 != strncmp( device[currentDeviceID].name,
						   cuda_native_table[index].name,
						   strlen( device[currentDeviceID].name ) ) ) {
			fprintf( stderr, "Device %s is used -- BUT event %s is collected. \n ---> ERROR: Specify events for the device that is used!\n\n",
				  device[currentDeviceID].name, cuda_native_table[index].name );
			
			return ( PAPI_ENOSUPP );	// Not supported 
		}

		/* Add events to the CuPTI eventGroup */
		cuptiErr =
			(*cuptiEventGroupAddEventPtr)( CUDA_ptr->eventGroup,
									 cuda_native_table[index].resources.
									 eventId );
		CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupAddEvent" );
	}

	return ( PAPI_OK );
}
Exemplo n.º 6
0
/*
 * Control of counters (Reading/Writing/Starting/Stopping/Setup)
 * functions
 */
int
CUDA_init_control_state( hwd_control_state_t * ctrl )
{
	CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
	CUptiResult cuptiErr = CUPTI_SUCCESS;
	int i;

	/* allocate memory for the list of events that are added to the CuPTI eventGroup */
	CUDA_ctrl->addedEvents.list = malloc( sizeof ( int ) * NUM_EVENTS );
	if ( CUDA_ctrl->addedEvents.list == NULL ) {
		perror
		( "malloc(): Failed to allocate memory to table of events that are added to CuPTI eventGroup" );
		return ( PAPI_ENOSUPP );
	}
	
	/* initialize the event list */
	for ( i = 0; i < NUM_EVENTS; i++ )
		CUDA_ctrl->addedEvents.list[i] = 0;

	
	
	cuptiErr = (*cuptiEventGroupCreatePtr)( cuCtx, &CUDA_ctrl->eventGroup, 0 );
	CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupCreate" );
	
	return PAPI_OK;
}
Exemplo n.º 7
0
/*
 * Reset the VampirTrace counter values (to zero) for active CUPTI counters.
 *
 * @param vtcuptiCtx pointer to the VampirTrace CUPTI context
 * @param strmid the stream id for the counter values
 * @param time the VampirTrace timestamps
 */
void vt_cupti_resetCounter(vt_cupti_ctx_t *vtcuptiCtx, uint32_t strmid,
                           uint64_t *time)
{
  size_t i;
  vt_cupti_grp_t *vtcuptiGrp = NULL;

  if(vtcuptiCtx == NULL){
    VT_CHECK_THREAD;
    vtcuptiCtx = vt_cupti_getCurrentContext(VT_MY_THREAD);
    if(vtcuptiCtx == NULL) return;
  }

  vtcuptiGrp = vtcuptiCtx->vtGrpList;
  while(vtcuptiGrp != NULL){
    for(i = 0; i < vtcuptiGrp->evtNum; i++){
      vt_count(strmid, time, *(vtcuptiGrp->vtCIDs+i), 0);
    }

    /* reset counter values of this group */
    CHECK_CUPTI_ERROR(cuptiEventGroupResetAllEvents(vtcuptiGrp->evtGrp),
                      "cuptiEventGroupResetAllEvents");
    
    vtcuptiGrp = vtcuptiGrp->next;
  }
}
Exemplo n.º 8
0
int
CUDA_start( hwd_context_t * ctx, hwd_control_state_t * ctrl )
{
	( void ) ctx;
	int i;
	CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
	CUptiResult cuptiErr = CUPTI_SUCCESS;
	
	// reset all event values to 0
	for ( i = 0; i < NUM_EVENTS; i++ )
		CUDA_ctrl->counts[i] = 0;

	cuptiErr = (*cuptiEventGroupEnablePtr)( CUDA_ctrl->eventGroup );
	CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupEnable" );

	/* Resets all events in the CuPTI eventGroup to zero */
	cuptiErr = (*cuptiEventGroupResetAllEventsPtr)( CUDA_ctrl->eventGroup );
	CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupResetAllEvents" );

	return ( PAPI_OK );
}
Exemplo n.º 9
0
/*
 * Disable and Destoy the CUDA eventGroup */
int
CUDA_cleanup_eventset( hwd_control_state_t * ctrl )
{
    ( void ) ctrl;
    
    // TODO: after cleanup_eventset() which destroys the eventset, update_control_state()
    // is called, which operates on the already destroyed eventset. Bad!
#if 0
	CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
	CUptiResult cuptiErr = CUPTI_SUCCESS;

	/* Disable the CUDA eventGroup;
	   it also frees the perfmon hardware on the GPU */
	cuptiErr = (*cuptiEventGroupDisablePtr)( CUDA_ctrl->eventGroup );
	CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupDisable" );

	/* Call the CuPTI cleaning function before leaving */
	cuptiErr = (*cuptiEventGroupDestroyPtr)( CUDA_ctrl->eventGroup );
	CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupDestroy" );
#endif
	return ( PAPI_OK );
}
Exemplo n.º 10
0
int
CUDA_reset( hwd_context_t * ctx, hwd_control_state_t * ctrl )
{
	( void ) ctx;
	CUDA_control_state_t * CUDA_ctrl = ( CUDA_control_state_t * ) ctrl;
	CUptiResult cuptiErr = CUPTI_SUCCESS;

	/* Resets all events in the CuPTI eventGroup to zero */
	cuptiErr = (*cuptiEventGroupResetAllEventsPtr)( CUDA_ctrl->eventGroup );
	CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupResetAllEvents" );

	return ( PAPI_OK );
}
Exemplo n.º 11
0
/*
 * Returns all event values from the CuPTI eventGroup 
 */
static int
getEventValue( long long *counts, CUpti_EventGroup eventGroup, AddedEvents_t addedEvents )
{
	CUptiResult cuptiErr = CUPTI_SUCCESS;
	size_t events_read, bufferSizeBytes, arraySizeBytes, i;
	uint64_t *counterDataBuffer;
	CUpti_EventID *eventIDArray;
	int j;

	bufferSizeBytes = addedEvents.count * sizeof ( uint64_t );
	counterDataBuffer = ( uint64_t * ) malloc( bufferSizeBytes );

	arraySizeBytes = addedEvents.count * sizeof ( CUpti_EventID );
	eventIDArray = ( CUpti_EventID * ) malloc( arraySizeBytes );

	/* read counter data for the specified event from the CuPTI eventGroup */
	cuptiErr = (*cuptiEventGroupReadAllEventsPtr)( eventGroup,
											 CUPTI_EVENT_READ_FLAG_NONE,
											 &bufferSizeBytes,
											 counterDataBuffer, &arraySizeBytes,
											 eventIDArray, &events_read );
	CHECK_CUPTI_ERROR( cuptiErr, "cuptiEventGroupReadAllEvents" );

	if ( events_read != ( size_t ) addedEvents.count )
		return -1;

	/* Since there is no guarantee that returned counter values are in the same 
	   order as the counters in the PAPI addedEvents.list, we need to map the
	   CUpti_EventID to PAPI event ID values.
	   According to CuPTI doc: counter return values of counterDataBuffer 
	   correspond to the return event IDs in eventIDArray */
	for ( i = 0; i < events_read; i++ )
		for ( j = 0; j < addedEvents.count; j++ )
			if ( cuda_native_table[addedEvents.list[j]].resources.eventId ==
				 eventIDArray[i] )
				// since cuptiEventGroupReadAllEvents() resets counter values to 0;
				// we have to accumulate ourselves 
				counts[addedEvents.list[j]] = counts[addedEvents.list[j]] + counterDataBuffer[i];

	free( counterDataBuffer );
	free( eventIDArray );
	return 0;
}
Exemplo n.º 12
0
static vt_cupti_grp_t* vt_cupti_createEvtGrp(vt_cupti_ctx_t *vtcuptiCtx)
{
  CUptiResult cuptiErr = CUPTI_SUCCESS;
  vt_cupti_grp_t *vtcuptiGrp = NULL;

  vtcuptiGrp = (vt_cupti_grp_t*)malloc(sizeof(vt_cupti_grp_t));
  vtcuptiGrp->evtNum = 0;
  vtcuptiGrp->enabled = 0;
  vtcuptiGrp->next = NULL;

  /* create initial CUPTI counter group */
  cuptiErr = cuptiEventGroupCreate(vtcuptiCtx->cuCtx, &(vtcuptiGrp->evtGrp), 0);
  CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGroupCreate");

  vtcuptiGrp->cuptiEvtIDs = (CUpti_EventID *)malloc(
                            vtcuptiCtx->vtDevCap->evtNum*sizeof(CUpti_EventID));
  vtcuptiGrp->vtCIDs = (uint32_t *)malloc(
                       vtcuptiCtx->vtDevCap->evtNum*sizeof(uint32_t));

  return vtcuptiGrp;
}
Exemplo n.º 13
0
/*
 * Stop CUPTI counter capturing by disabling the CUPTI event groups.
 * 
 * @param vtcuptiCtx pointer to the VampirTrace CUPTI context
 */
static void vt_cupti_stop(vt_cupti_ctx_t *vtcuptiCtx)
{
  vt_cupti_grp_t *vtcuptiGrp = NULL;
  /*vt_cntl_msg(1, "[CUPTI] vt_cupti_stop() ... ");*/

  if(vtcuptiCtx == NULL || vt_gpu_debug) return;

  /* stop counter reading for all groups */
  vtcuptiGrp = vtcuptiCtx->vtGrpList;
  while(vtcuptiGrp != NULL){
    if(vtcuptiGrp->enabled){
      CUptiResult cuptiErr = CUPTI_SUCCESS;

      cuptiErr = cuptiEventGroupDisable(vtcuptiGrp->evtGrp);
      CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGroupDisable");

      vtcuptiGrp->enabled = 0;
    }

    vtcuptiGrp = vtcuptiGrp->next;
  }
}
Exemplo n.º 14
0
void Trace_end ()
{
	displayTimestamps(trace);
	cuptierr = cuptiUnsubscribe(subscriber);
	CHECK_CUPTI_ERROR(cuptierr, "cuptiUnsubscribe");
}
Exemplo n.º 15
0
/*
 * Request the CUTPI counter values and write it to the given VampirTrace
 * stream with the given timestamps.
 *
 * @param vtcuptiCtx pointer to the VampirTrace CUPTI context
 * @param strmid the stream id for the counter values
 * @param time the VampirTrace timestamps
 */
void vt_cupti_writeCounter(vt_cupti_ctx_t *vtcuptiCtx, uint32_t strmid,
                           uint64_t *time)
{
  CUptiResult cuptiErr = CUPTI_SUCCESS;
  vt_cupti_grp_t *vtcuptiGrp = NULL;

  size_t bufferSizeBytes;
  size_t arraySizeBytes;
  size_t numCountersRead;

  if(vtcuptiCtx == NULL){
    VT_CHECK_THREAD;
    vtcuptiCtx = vt_cupti_getCurrentContext(VT_MY_THREAD);
    if(vtcuptiCtx == NULL) return;
  }

  vtcuptiGrp = vtcuptiCtx->vtGrpList;
  while(vtcuptiGrp != NULL){
    /* read events only, if the event group is enabled */
    if(vtcuptiGrp->enabled){

      bufferSizeBytes = vtcuptiGrp->evtNum * sizeof(uint64_t);
      arraySizeBytes = vtcuptiGrp->evtNum * sizeof(CUpti_EventID);

      /* read events */
      cuptiErr = cuptiEventGroupReadAllEvents(vtcuptiGrp->evtGrp,
                                              CUPTI_EVENT_READ_FLAG_NONE,
                                              &bufferSizeBytes, vtcuptiCtx->counterData,
                                              &arraySizeBytes, vtcuptiCtx->cuptiEvtIDs,
                                              &numCountersRead);
      CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGroupReadAllEvents");

      if(vtcuptiGrp->evtNum != numCountersRead){
        vt_error_msg("[CUPTI] %d counter reads, %d metrics specified in "
                   "VT_CUPTI_METRICS!", numCountersRead, vtcuptiGrp->evtNum);
      }

      /* For all events of the event group: map added event IDs to just read event
       * IDs, as the order may not be the same. For small numbers of counter reads
       * this simple mapping should be fast enough.
       */
      {
        size_t j;

        for(j = 0; j < numCountersRead; j++){
          size_t i;
          for(i = 0; i < vtcuptiGrp->evtNum; i++){
            if(vtcuptiCtx->cuptiEvtIDs[j] == *(vtcuptiGrp->cuptiEvtIDs+i)){
              /* write the counter value as VampirTrace counter */
              vt_count(strmid, time, *(vtcuptiGrp->vtCIDs+i), vtcuptiCtx->counterData[i]);
            }
          }
        }
      }

    }

    vtcuptiGrp = vtcuptiGrp->next;
  }
  
}
Exemplo n.º 16
0
/*
 * Detect supported domains for specified device
 */
static int
enumEventDomains( CUdevice dev, int deviceId )
{
	CUptiResult err = CUPTI_SUCCESS;
	CUpti_EventDomainID *domainId = NULL;
	uint32_t id = 0;
	size_t size = 0;

	device[deviceId].domainCount = 0;

	/* get number of domains for device dev */
	err = (*cuptiDeviceGetNumEventDomainsPtr)( dev, &device[deviceId].domainCount );
	CHECK_CUPTI_ERROR( err, "cuptiDeviceGetNumEventDomains" );

	if ( device[deviceId].domainCount == 0 ) {
		printf( "No domain is exposed by dev = %d\n", dev );
		return -1;
	}

	/* CuPTI domain struct */
	size = sizeof ( CUpti_EventDomainID ) * device[deviceId].domainCount;
	domainId = ( CUpti_EventDomainID * ) malloc( size );
	if ( domainId == NULL ) {
		perror( "malloc(): Failed to allocate memory to CuPTI domain ID" );
		return -1;
	}
	memset( domainId, 0, size );

	/* PAPI domain struct */
	device[deviceId].domain =
		( DomainData_t * ) malloc( sizeof ( DomainData_t ) *
								   device[deviceId].domainCount );
	if ( device[deviceId].domain == NULL ) {
		perror( "malloc(): Failed to allocate memory to PAPI domain struct" );
		free(domainId);
		return -1;
	}

	/* Enumerates the event domains for a device dev */
	err = (*cuptiDeviceEnumEventDomainsPtr)( dev, &size, domainId );
	CHECK_CUPTI_ERROR( err, "cuptiDeviceEnumEventDomains" );

	/* enum domains */
	for ( id = 0; id < device[deviceId].domainCount; id++ ) {
		device[deviceId].domain[id].domainId = domainId[id];

		/* query domain name */
		size = PAPI_MIN_STR_LEN;
#ifdef CUDA_4_0
		err = cuptiEventDomainGetAttribute( dev,
										   device[deviceId].domain[id].
										   domainId,
										   CUPTI_EVENT_DOMAIN_ATTR_NAME, &size,
										   ( void * ) device[deviceId].
										   domain[id].name );
		CHECK_CUPTI_ERROR( err, "cuptiEventDomainGetAttribute" );
		
		/* query num of events avaialble in the domain */
		size = sizeof ( device[deviceId].domain[id].eventCount );
		err = cuptiEventDomainGetAttribute( dev,
										   device[deviceId].domain[id].
										   domainId,
										   CUPTI_EVENT_DOMAIN_MAX_EVENTS,
										   &size,
										   ( void * ) &device[deviceId].
										   domain[id].eventCount );
		CHECK_CUPTI_ERROR( err, "cuptiEventDomainGetAttribute" );
		
		/* enumerate the events for the domain[id] on the device dev */
		if ( 0 != enumEvents( dev, deviceId, id ) )
			return -1;
#else
		err = (*cuptiDeviceGetEventDomainAttributePtr)( dev,
												  device[deviceId].domain[id].domainId,
												  CUPTI_EVENT_DOMAIN_ATTR_NAME, &size,
												  ( void * ) device[deviceId].domain[id].name );
		CHECK_CUPTI_ERROR( err, "cuptiDeviceGetEventDomainAttribute" );

		/* query num of events avaialble in the domain */
		err = (*cuptiEventDomainGetNumEventsPtr)( device[deviceId].domain[id].domainId,
										    &device[deviceId].domain[id].eventCount );
		CHECK_CUPTI_ERROR( err, "cuptiEventDomainGetNumEvents" );

		/* enumerate the events for the domain[id] on the device deviceId */
		if ( 0 != enumEvents( deviceId, id ) )
			return -1;
#endif
	}

	totalDomainCount += device[deviceId].domainCount;
	free( domainId );
	return 0;
}
Exemplo n.º 17
0
static int
enumEvents( int deviceId, int domainId )
#endif
{
	CUptiResult err = CUPTI_SUCCESS;
	CUpti_EventID *eventId = NULL;
	size_t size = 0;
	uint32_t id = 0;

	/* CuPTI event struct */
	size =
		sizeof ( CUpti_EventID ) * device[deviceId].domain[domainId].eventCount;
	eventId = ( CUpti_EventID * ) malloc( size );
	if ( eventId == NULL ) {
		perror( "malloc(): Failed to allocate memory to CuPTI event ID" );
		return -1;
	}
	memset( eventId, 0, size );

	/* PAPI event struct */
	device[deviceId].domain[domainId].event =
		( EventData_t * ) malloc( sizeof ( EventData_t ) *
								  device[deviceId].domain[domainId].
								  eventCount );
	if ( device[deviceId].domain[domainId].event == NULL ) {
		perror( "malloc(): Failed to allocate memory to PAPI event struct" );
		free(eventId);
		return -1;
	}

	/* enumerate the events for the domain[domainId] on the device[deviceId] */
#ifdef CUDA_4_0
	err =
		(*cuptiEventDomainEnumEventsPtr)( dev,
							   ( CUpti_EventDomainID ) device[deviceId].
							   domain[domainId].domainId, &size, eventId );
#else
	err =
		(*cuptiEventDomainEnumEventsPtr)( ( CUpti_EventDomainID ) device[deviceId].
									domain[domainId].domainId, &size, eventId );
#endif
	CHECK_CUPTI_ERROR( err, "cuptiEventDomainEnumEvents" );

	/* query event info */
	for ( id = 0; id < device[deviceId].domain[domainId].eventCount; id++ ) {
		device[deviceId].domain[domainId].event[id].eventId = eventId[id];

		/* query event name */
		size = PAPI_MIN_STR_LEN;
#ifdef CUDA_4_0
		err = (*cuptiEventGetAttributePtr)( dev,
									 device[deviceId].domain[domainId].
									 event[id].eventId, CUPTI_EVENT_ATTR_NAME,
									 &size,
									 ( uint8_t * ) device[deviceId].
									 domain[domainId].event[id].name );		
#else
		err = (*cuptiEventGetAttributePtr)( device[deviceId].domain[domainId].
									  event[id].eventId, CUPTI_EVENT_ATTR_NAME,
									  &size,
									  ( uint8_t * ) device[deviceId].
									  domain[domainId].event[id].name );
#endif
		CHECK_CUPTI_ERROR( err, "cuptiEventGetAttribute" );

		/* query event description */
		size = PAPI_2MAX_STR_LEN;
#ifdef CUDA_4_0
		err = (*cuptiEventGetAttributePtr)( dev,
									 device[deviceId].domain[domainId].
									 event[id].eventId,
									 CUPTI_EVENT_ATTR_SHORT_DESCRIPTION, &size,
									 ( uint8_t * ) device[deviceId].
									 domain[domainId].event[id].desc );		
#else
		err = (*cuptiEventGetAttributePtr)( device[deviceId].domain[domainId].
									  event[id].eventId,
									  CUPTI_EVENT_ATTR_SHORT_DESCRIPTION, &size,
									  ( uint8_t * ) device[deviceId].
									  domain[domainId].event[id].desc );
#endif
		CHECK_CUPTI_ERROR( err, "cuptiEventGetAttribute" );
	}

	totalEventCount += device[deviceId].domain[domainId].eventCount;
	free( eventId );
	return 0;
}
Exemplo n.º 18
0
/*
 * Enumerate/Print the available CUPTI events for a given CUDA device and
 * domain.
 *
 * @param cuDev the CUDA device
 * @param domainId the CUPTI event domain
 */
static void enumEvents(CUdevice cuDev, CUpti_EventDomainID domainId)
{
  CUptiResult cuptiErr = CUPTI_SUCCESS;
  /* size_t DESC_SHORT = 512; */
  CUpti_EventID *eventId = NULL;
  uint32_t maxEvents = 0;
  uint32_t i = 0;
  size_t size = 0;

  /* query num of events available in the domain */
  cuptiErr = cuptiEventDomainGetNumEvents(cuDev,
                                          (CUpti_EventDomainID)domainId,
                                          &maxEvents);
  if(cuptiErr == CUPTI_ERROR_INVALID_EVENT_DOMAIN_ID){
    vt_error_msg("Domain Id %d is not supported by device", domainId);
  }else{
    CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventDomainGetNumEvents");
  }

  size = sizeof(CUpti_EventID) * maxEvents;
  eventId = (CUpti_EventID*)malloc(size);
  if(eventId == NULL) vt_error_msg("Failed to allocate memory to event ID");
  memset(eventId, 0, size);

  cuptiErr = cuptiEventDomainEnumEvents(cuDev,
                                        (CUpti_EventDomainID)domainId,
                                        &size,
                                        eventId);
  CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventDomainEnumEvents");

  /* query event info */
  {
    size_t NAME_SHORT = 32;
    char *eventname = (char*)malloc(NAME_SHORT*sizeof(char)); /* event name */
    /*char *shortdesc = malloc(DESC_SHORT*sizeof(char));  short desc of the event */

    for(i = 0; i < maxEvents; i++){
      NAME_SHORT = 32;
      cuptiErr = cuptiEventGetAttribute(cuDev,
                                        eventId[i],
                                        CUPTI_EVENT_ATTR_NAME,
                                        &NAME_SHORT,
                                        eventname);
      CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGetAttribute");

      /*cuptiErr = cuptiEventGetAttribute(cuDev,
                                         eventId[i],
                                         CUPTI_EVENT_ATTR_SHORT_DESCRIPTION,
                                         &DESC_SHORT,
                                         (uint8_t*)shortdesc);
      CHECK_CUPTI_ERROR(cuptiErr, "cuptiEventGetAttribute");*/

      vt_cntl_msg(1, "Id:Name   = %d: %s", eventId[i], eventname);
      /*vt_cntl_msg(1, "Shortdesc = %s\n", shortdesc);*/
    }

    free(eventname);
  }

  free(eventId);
}
Exemplo n.º 19
0
void CUPTIAPI
getTimestampCallback(void *userdata, CUpti_CallbackDomain domain,
                     CUpti_CallbackId cbid, const CUpti_CallbackData *cbInfo)
{
  static int memTransCount = 0;
  uint64_t startTimestamp;
  uint64_t endTimestamp;
	printf ("<------------getTimestampCallback--------------->\n");
  RuntimeApiTrace_t *traceData = (RuntimeApiTrace_t*)userdata;
  CUptiResult cuptiErr;
      
  // Data is collected only for the following API
  if ((cbid == CUPTI_RUNTIME_TRACE_CBID_cudaLaunch_v3020) || 
      (cbid == CUPTI_RUNTIME_TRACE_CBID_cudaThreadSynchronize_v3020) || 
      (cbid == CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_v3020))  { 
     
    // Set pointer depending on API
    if (cbid == CUPTI_RUNTIME_TRACE_CBID_cudaLaunch_v3020)
      traceData = traceData + KERNEL;
    else if (cbid == CUPTI_RUNTIME_TRACE_CBID_cudaThreadSynchronize_v3020) 
      traceData = traceData + THREAD_SYNC;
    else if (cbid == CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_v3020)
      traceData = traceData + MEMCPY_H2D1 + memTransCount;
                 
    if (cbInfo->callbackSite == CUPTI_API_ENTER) {
      // for a kernel launch report the kernel name, otherwise use the API
      // function name.
      if (cbid == CUPTI_RUNTIME_TRACE_CBID_cudaLaunch_v3020) {
        traceData->functionName = cbInfo->symbolName;
      }
      else {
        traceData->functionName = cbInfo->functionName;
      }
	printf ("%s\t",traceData->functionName);

      // Store parameters passed to cudaMemcpy
      if (cbid == CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_v3020) {
        traceData->memcpy_bytes = ((cudaMemcpy_v3020_params *)(cbInfo->functionParams))->count;
        traceData->memcpy_kind = ((cudaMemcpy_v3020_params *)(cbInfo->functionParams))->kind;
      }

        
      // Collect timestamp for API start
      cuptiErr = cuptiDeviceGetTimestamp(cbInfo->context, &startTimestamp);
      CHECK_CUPTI_ERROR(cuptiErr, "cuptiDeviceGetTimestamp");
            
      traceData->startTimestamp = startTimestamp;
	printf ("%llu\n", traceData->startTimestamp);
    }

    if (cbInfo->callbackSite == CUPTI_API_EXIT) {
      // Collect timestamp for API exit
      cuptiErr = cuptiDeviceGetTimestamp(cbInfo->context, &endTimestamp);
      CHECK_CUPTI_ERROR(cuptiErr, "cuptiDeviceGetTimestamp");
            
      traceData->endTimestamp = endTimestamp;
     
      // Advance to the next memory transfer operation
      if (cbid == CUPTI_RUNTIME_TRACE_CBID_cudaMemcpy_v3020) {
        memTransCount++;
      }

    } 
  }
//displayTimestamps(traceData);
}