Пример #1
0
bool LPS::detectDeviceAndAddress(deviceType device, sa0State sa0)
{
  if (sa0 == sa0_auto || sa0 == sa0_high)
  {
    address = SA0_HIGH_ADDRESS;
    if (detectDevice(device)) return true;
  }
  if (sa0 == sa0_auto || sa0 == sa0_low)
  {
    address = SA0_LOW_ADDRESS;
    if (detectDevice(device)) return true;
  }

  return false;
}
Пример #2
0
/* Initialize hardware counters, setup the function vector table
 * and get hardware information, this routine is called when the 
 * PAPI process is initialized (IE PAPI_library_init)
 *
 * NOTE: only called by main thread (not by every thread) !!!
 *
 * Starting in CUDA 4.0, multiple CPU threads can access the same CUDA context.
 * This is a much easier programming model then pre-4.0 as threads - using the 
 * same context - can share memory, data, etc. 
 * It's possible to create a different context for each thread, but then we are
 * likely running into a limitation that only one context can be profiled at a time.
 * ==> and we don't want this. That's why CUDA context creation is done in 
 * CUDA_init_component() (called only by main thread) rather than CUDA_init() 
 * or CUDA_init_control_state() (both called by each thread).
 */
int
CUDA_init_component( int cidx )
{
	SUBDBG ("Entry: cidx: %d\n", cidx);
	CUresult cuErr = CUDA_SUCCESS;

	/* link in all the cuda libraries and resolve the symbols we need to use */
	if (linkCudaLibraries() != PAPI_OK) {
		SUBDBG ("Dynamic link of CUDA libraries failed, component will be disabled.\n");
		SUBDBG ("See disable reason in papi_component_avail output for more details.\n");
		return (PAPI_ENOSUPP);
	}

	/* Create dynamic event table */
	NUM_EVENTS = detectDevice(  );
	if (NUM_EVENTS < 0) {
		strncpy(_cuda_vector.cmp_info.disabled_reason, "Call to detectDevice failed.",PAPI_MAX_STR_LEN);
		return (PAPI_ENOSUPP);
	}
	/* TODO: works only for one device right now;
	 need to find out if user can use 2 or more devices at same time */

	/* want create a CUDA context for either the default device or
	 the device specified with cudaSetDevice() in user code */
	if ( CUDA_SUCCESS != (*cudaGetDevicePtr)( &currentDeviceID ) ) {
		strncpy(_cuda_vector.cmp_info.disabled_reason, "No NVIDIA GPU's found.",PAPI_MAX_STR_LEN);
		return ( PAPI_ENOSUPP );
	}
	
	if ( getenv( "PAPI_VERBOSE" ) ) {
		printf( "DEVICE USED: %s (%d)\n", device[currentDeviceID].name,
			   currentDeviceID );
	}
	
	/* get the CUDA context from the calling CPU thread */
	cuErr = (*cuCtxGetCurrentPtr)( &cuCtx );

	/* if no CUDA context is bound to the calling CPU thread yet, create one */
	if ( cuErr != CUDA_SUCCESS || cuCtx == NULL ) {
		cuErr = (*cuCtxCreatePtr)( &cuCtx, 0, device[currentDeviceID].dev );
		CHECK_CU_ERROR( cuErr, "cuCtxCreate" );
	}

	/* cuCtxGetCurrent() can return a non-null context that is not valid 
	   because the context has not yet been initialized.
	   Here is a workaround: 
	   cudaFree(NULL) forces the context to be initialized
	   if cudaFree(NULL) returns success then we are able to use the context in subsequent calls
	   if cudaFree(NULL) returns an error (or subsequent cupti* calls) then the context is not usable,
	   and will never be useable */
	if ( CUDA_SUCCESS != (*cudaFreePtr)( NULL ) ) {
		strncpy(_cuda_vector.cmp_info.disabled_reason, "Problem initializing CUDA context.",PAPI_MAX_STR_LEN);
		return ( PAPI_ENOSUPP );
	}

	/* Create dynamic event table */
	cuda_native_table = ( CUDA_native_event_entry_t * )
		malloc( sizeof ( CUDA_native_event_entry_t ) * NUM_EVENTS );
	if ( cuda_native_table == NULL ) {
		perror( "malloc(): Failed to allocate memory to events table" );
		strncpy(_cuda_vector.cmp_info.disabled_reason, "Failed to allocate memory to events table.",PAPI_MAX_STR_LEN);
		return ( PAPI_ENOSUPP );
	}

	if ( NUM_EVENTS != createNativeEvents(  ) ) {
		strncpy(_cuda_vector.cmp_info.disabled_reason, "Error creating CUDA event list.",PAPI_MAX_STR_LEN);
		return ( PAPI_ENOSUPP );
	}
	
	/* Export the component id */
	_cuda_vector.cmp_info.CmpIdx = cidx;

	return ( PAPI_OK );
}