コード例 #1
0
void CudaModule::launchKernel(CUfunction kernel, const Vec2i& blockSize, 
                              const Vec2i& gridSize, bool async, 
                              CUstream stream)
{
  if (!kernel) {
    fail("CudaModule: No kernel specified!");
  }

#if (CUDA_VERSION >= 3000)
  if (NULL != cuFuncSetCacheConfig)
  {
    CUfunc_cache cache = (s_preferL1)? CU_FUNC_CACHE_PREFER_L1 : 
                                       CU_FUNC_CACHE_PREFER_SHARED;  
    checkError("cuFuncSetCacheConfig", cuFuncSetCacheConfig( kernel, cache) );
  }
#endif

  updateGlobals();
  updateTexRefs(kernel);
  checkError("cuFuncSetBlockShape", cuFuncSetBlockShape(kernel, blockSize.x, blockSize.y, 1));

  if (async && (NULL != cuLaunchGridAsync)) 
  {
    checkError("cuLaunchGridAsync", 
                cuLaunchGridAsync(kernel, gridSize.x, gridSize.y, stream));
  } 
  else 
  {
    checkError("cuLaunchGrid", 
                cuLaunchGrid(kernel, gridSize.x, gridSize.y));
  }
}
コード例 #2
0
ファイル: CudaRuntime2.c プロジェクト: Chiru2015/rootbeer1
/*
 * Class:     edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2
 * Method:    loadFunction
 * Signature: ()V
 */
JNIEXPORT void JNICALL Java_edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2_loadFunction
  (JNIEnv *env, jobject this_obj, jlong heap_end_ptr, jstring filename, jint num_blocks){

  void * cubin_file;
  int offset;
  CUresult status;
  char * native_filename;
  heapEndPtr = heap_end_ptr;
  
  native_filename = (*env)->GetStringUTFChars(env, filename, 0);
  status = cuModuleLoad(&cuModule, native_filename);
  CHECK_STATUS(env, "error in cuModuleLoad", status);
  (*env)->ReleaseStringUTFChars(env, filename, native_filename);

  status = cuModuleGetFunction(&cuFunction, cuModule, "_Z5entryPcS_PiPxS1_S0_S0_i"); 
  CHECK_STATUS(env,"error in cuModuleGetFunction",status)

  status = cuFuncSetCacheConfig(cuFunction, CU_FUNC_CACHE_PREFER_L1);
  CHECK_STATUS(env,"error in cuFuncSetCacheConfig",status)

  status = cuParamSetSize(cuFunction, (7 * sizeof(CUdeviceptr) + sizeof(int))); 
  CHECK_STATUS(env,"error in cuParamSetSize",status)

  offset = 0;
  status = cuParamSetv(cuFunction, offset, (void *) &gcInfoSpace, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gcInfoSpace",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(cuFunction, offset, (void *) &gpuToSpace, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gpuToSpace",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(cuFunction, offset, (void *) &gpuHandlesMemory, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gpuHandlesMemory %",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(cuFunction, offset, (void *) &gpuHeapEndPtr, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gpuHeapEndPtr",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(cuFunction, offset, (void *) &gpuBufferSize, sizeof(CUdeviceptr));
  CHECK_STATUS(env,"error in cuParamSetv gpuBufferSize",status)
  offset += sizeof(CUdeviceptr); 

  status = cuParamSetv(cuFunction, offset, (void *) &gpuExceptionsMemory, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gpuExceptionsMemory",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(cuFunction, offset, (void *) &gpuClassMemory, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gpuClassMemory",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSeti(cuFunction, offset, num_blocks); 
  CHECK_STATUS(env,"error in cuParamSetv num_blocks",status)
  offset += sizeof(int);
}
コード例 #3
0
ファイル: CudaRuntime2.c プロジェクト: breznak/rootbeer1
/*
 * Class:     edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2
 * Method:    loadFunction
 * Signature: ()V
 */
JNIEXPORT void JNICALL Java_edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2_loadFunction
  (JNIEnv *env, jobject this_obj, jlong heap_end_ptr, jobject buffers, jint size, 
   jint total_size, jint num_blocks){

  void * cubin_file;
  int offset;
  CUresult status;
  heapEndPtr = heap_end_ptr;
  
  //void * cubin_file = readCubinFile("code_file.cubin");
  cubin_file = readCubinFileFromBuffers(env, buffers, size, total_size);
  status = cuModuleLoadData(&cuModule, cubin_file);
  CHECK_STATUS(env,"error in cuModuleLoad",status)
  
  free(cubin_file);

  status = cuModuleGetFunction(&cuFunction, cuModule, "_Z5entryPcS_PiPxS1_S0_i"); 
  CHECK_STATUS(env,"error in cuModuleGetFunction",status)

  status = cuFuncSetCacheConfig(cuFunction, CU_FUNC_CACHE_PREFER_L1);
  CHECK_STATUS(env,"error in cuFuncSetCacheConfig",status)

  status = cuParamSetSize(cuFunction, (6 * sizeof(CUdeviceptr) + sizeof(int))); 
  CHECK_STATUS(env,"error in cuParamSetSize",status)

  offset = 0;
  status = cuParamSetv(cuFunction, offset, (void *) &gcInfoSpace, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gcInfoSpace",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(cuFunction, offset, (void *) &gpuToSpace, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gpuToSpace",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(cuFunction, offset, (void *) &gpuHandlesMemory, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gpuHandlesMemory %",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(cuFunction, offset, (void *) &gpuHeapEndPtr, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gpuHeapEndPtr",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(cuFunction, offset, (void *) &gpuBufferSize, sizeof(CUdeviceptr));
  CHECK_STATUS(env,"error in cuParamSetv gpuBufferSize",status)
  offset += sizeof(CUdeviceptr); 

  status = cuParamSetv(cuFunction, offset, (void *) &gpuExceptionsMemory, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env,"error in cuParamSetv gpuExceptionsMemory",status)
  offset += sizeof(CUdeviceptr);

  status = cuParamSeti(cuFunction, offset, num_blocks); 
  CHECK_STATUS(env,"error in cuParamSetv num_blocks",status)
  offset += sizeof(int);
}
コード例 #4
0
ファイル: autoFunction.c プロジェクト: PachoAlvarez/RCUDA
SEXP R_auto_cuFuncSetCacheConfig(SEXP r_hfunc, SEXP r_config)
{
    SEXP r_ans = R_NilValue;
    CUfunction hfunc = (CUfunction) getRReference(r_hfunc);
    CUfunc_cache config = (CUfunc_cache) INTEGER(r_config)[0];
    
    CUresult ans;
    ans = cuFuncSetCacheConfig(hfunc, config);
    
    r_ans = Renum_convert_CUresult(ans) ;
    
    return(r_ans);
}
コード例 #5
0
ファイル: swanlib_nv.c プロジェクト: liyuming1978/swan
void swanRunKernelAsync( const char *kernel,  block_config_t grid , block_config_t block, size_t shmem, int flags, void *ptrs[], int *types  ) {
	// find the kernel

	if( !grid.x || !grid.y || !grid.z || !block.x || !block.y || !block.z ) { return; } // suppress launch of kernel if any of the launch dims are 0

	CUfunction f = NULL;
	int i;
	int offset = 0;
	CUresult err;

	int type;
	int idx=0;
	try_init();
	for( i=0; i < state.num_funcs; i++ ) {
		if( !strcmp( state.func_names[i], kernel ) ) {
			f = state.funcs[i];
			break;
		}
	}

	if( f == NULL ) {
		for( i=0; i < state.num_mods; i++ ) {
			cuModuleGetFunction( &f, state.mods[i], kernel );
			if( f!= NULL ) { 
				// found a kernel. store it for future use
				int j = state.num_funcs;
				state.num_funcs++;
				state.funcs      = (CUfunction*) realloc( state.funcs, sizeof(CUfunction) * state.num_funcs );
				state.funcs[j]   = f;
				state.func_names = (char**)      realloc( state.func_names, sizeof(char*) * state.num_funcs );
				state.func_names[j] = (char*) malloc( strlen(kernel) + 1 );
				strcpy( state.func_names[j], kernel );
				break; 
			}
		}
	}

	if( f== NULL ) {
		fprintf(stderr, "Error running kernel [%s] : \n", kernel );
		error( "No kernel found" );
	}

	if( grid.z != 1 ) {
		printf("Kernel [%s] launched with (%d %d %d)(%d %d %d)\n", kernel, grid.x, grid.y, grid.z, block.x, block.y, block.z );
		error( "grid.z needs to be 1" );
	}

//printf("Running kernel [%s]\n", kernel );

	type = types[idx];
	while( type != SWAN_END ) {
		void *ptr = ptrs[idx];
		switch( type ) {
//			DEBLOCK( SWAN_uchar, uchar,  1 );
			DEBLOCK( SWAN_uchar2, uchar2,  2 );
			DEBLOCK( SWAN_uchar3, uchar3,  1 );
			DEBLOCK( SWAN_uchar4, uchar4,  4 );
			DEBLOCK( SWAN_char , int,  1 );
//			DEBLOCK( SWAN_char1 , char1,  1 );
			DEBLOCK( SWAN_char2 , char2,  2 );
			DEBLOCK( SWAN_char3 , char3,  1 );
			DEBLOCK( SWAN_char4 , char4,  4 );
			DEBLOCK( SWAN_int, int,  4 );
//			DEBLOCK( SWAN_int1, int1,  4 );
			DEBLOCK( SWAN_int2, int2,  8 );
			DEBLOCK( SWAN_int3, int3,  4 );
			DEBLOCK( SWAN_int4, int4,  16 );
//			DEBLOCK( SWAN_float, double,  4 );
//			DEBLOCK( SWAN_float1, float1,  4 );
			DEBLOCK( SWAN_float2, float2,  8 );
			DEBLOCK( SWAN_float3, float3,  4 );
			DEBLOCK( SWAN_float4, float4,  16 );

			DEBLOCK( SWAN_uint, uint,  4 );
			DEBLOCK( SWAN_uint2, uint2,  8 );
			DEBLOCK( SWAN_uint3, uint3,  4 );
			DEBLOCK( SWAN_uint4, uint4,  16 );
			DEBLOCK( SWAN_float, float,  4 );


//#define DEBLOCK(swan_type,type,OFFSET) 
#if ( CUDA_MAJOR == 3 && CUDA_MINOR >= 2 ) || CUDA_MAJOR >= 4
			case SWAN_PTR: 
				{
//printf("PTR as NATIVE\n");
				ALIGN_UP( offset, (sizeof(void*)));
				cuParamSetv( f, offset, ptr, sizeof(void*) );
				offset += sizeof(void*); }
			break;
#else
			case SWAN_PTR: 
				{
//printf("PTR as INT\n");
				ALIGN_UP( offset, (sizeof(int)));
				cuParamSetv( f, offset, ptr, sizeof(int) );
				offset += sizeof(int); }
			break;
#endif



			default:
        printf("%d\n", type );
				error("Parameter type not handled\n");


		}
		idx++;
		type = types[idx];
	}

//printf("Launching kernel [%s] [%X]  with (%d %d %d) (%d %d %d)\n", kernel, f, grid.x, grid.y, grid.z, block.x, block.y, block.z );
//printf(" TOTAL OFFSET %d\n", offset );
	CU_SAFE_CALL_NO_SYNC( cuParamSetSize( f, offset ) );
	CU_SAFE_CALL_NO_SYNC( cuFuncSetBlockShape( f, block.x, block.y, block.z ) );
	CU_SAFE_CALL_NO_SYNC( cuFuncSetSharedSize( f, shmem ) );
#if (CUDA_MAJOR ==3 && CUDA_MINOR >=1 ) || CUDA_MAJOR>=4
	cuFuncSetCacheConfig( f, CU_FUNC_CACHE_PREFER_SHARED ); // This seems to be better in every case for acemd
#endif

	err = cuLaunchGridAsync( f, grid.x, grid.y, NULL ) ; //state.stream ) ;

	if( err != CUDA_SUCCESS ) {
		fprintf( stderr , "SWAN : FATAL : Failure executing kernel [%s] [%d] [%d,%d,%d][%d,%d,%d]\n", kernel, err, grid.x ,grid.y, grid.z, block.x, block.y, block.z );
	assert(0);
		exit(-99);
	}

//printf("Kernel completed\n" );
}