示例#1
0
void getBestDevice(JNIEnv *env){
  int num_devices;
  int status;
  int i;
  CUdevice temp_device;
  int curr_multiprocessors;
  int max_multiprocessors = -1;
  int max_i = -1;
  
  status = cuDeviceGetCount(&num_devices);
  CHECK_STATUS(env,"error in cuDeviceGetCount",status)
          
  if(num_devices == 0)
      throw_cuda_errror_exception(env,"0 Cuda Devices were found",0);
  
  for(i = 0; i < num_devices; ++i){
    status = cuDeviceGet(&temp_device, i);
    CHECK_STATUS(env,"error in cuDeviceGet",status)
            
    status = cuDeviceGetAttribute(&curr_multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, temp_device);    
    CHECK_STATUS(env,"error in cuDeviceGetAttribute",status)
            
    if(curr_multiprocessors > max_multiprocessors)
    {
      max_multiprocessors = curr_multiprocessors;
      max_i = i;
    }
  }

  status = cuDeviceGet(&cuDevice, max_i); 
  CHECK_STATUS(env,"error in cuDeviceGet",status)
          
  status = cuDeviceGetAttribute(&maxGridDim, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, cuDevice);    
  CHECK_STATUS(env,"error in cuDeviceGetAttribute",status)
          
  numMultiProcessors = max_multiprocessors;

}
示例#2
0
/*
 * Class:     edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2
 * Method:    findReserveMem
 * Signature: ()I
 */
JNIEXPORT jlong JNICALL Java_edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2_findReserveMem
  (JNIEnv * env, jobject this_ref, jint max_blocks_per_proc, jint max_threads_per_block)
{
  size_t to_space_size;
  size_t temp_size;
  int status;
  int deviceCount = 0;
  jlong prev_i;
  jlong i;
  size_t f_mem;
  size_t t_mem;
  jint num_blocks;

  status = cuInit(0);
  CHECK_STATUS(env,"error in cuInit",status)

  printf("automatically determining CUDA reserve space...\n");
  
  to_space_size = initContext(env, max_blocks_per_proc, max_threads_per_block);

  //space for 100 types in the scene
  classMemSize = sizeof(jint)*100;

  num_blocks = numMultiProcessors * max_threads_per_block * max_blocks_per_proc;
  
  gc_space_size = 1024;
  to_space_size -= (num_blocks * sizeof(jlong));
  to_space_size -= (num_blocks * sizeof(jlong));
  to_space_size -= gc_space_size;
  to_space_size -= classMemSize;
  
  for(i = 1024L*1024L; i < to_space_size; i += 100L*1024L*1024L){
    temp_size = to_space_size - i;
  
    printf("attempting allocation with temp_size: %lu to_space_size: %lu i: %ld\n", temp_size, to_space_size, i);
 
    status = cuMemHostAlloc(&toSpace, temp_size, 0);  
    if(status != CUDA_SUCCESS){
	    cuCtxDestroy(cuContext);
	    initContext(env, max_blocks_per_proc, max_threads_per_block);
      continue;
    }
    
    status = cuMemAlloc(&gpuToSpace, temp_size);
    if(status != CUDA_SUCCESS){
	    cuCtxDestroy(cuContext);
	    initContext(env, max_blocks_per_proc, max_threads_per_block);
      continue;
    } 

    status = cuMemAlloc(&gpuClassMemory, classMemSize);
    if(status != CUDA_SUCCESS){
	    cuCtxDestroy(cuContext);
	    initContext(env, max_blocks_per_proc, max_threads_per_block);
      continue;
    } 

    status = cuMemHostAlloc(&handlesMemory, num_blocks * sizeof(jlong), CU_MEMHOSTALLOC_WRITECOMBINED); 
    if(status != CUDA_SUCCESS){
	    cuCtxDestroy(cuContext);
	    initContext(env, max_blocks_per_proc, max_threads_per_block);
      continue;
    } 

    status = cuMemAlloc(&gpuHandlesMemory, num_blocks * sizeof(jlong)); 
    if(status != CUDA_SUCCESS){
	    cuCtxDestroy(cuContext);
	    initContext(env, max_blocks_per_proc, max_threads_per_block);
      continue;
    } 

    status = cuMemHostAlloc(&exceptionsMemory, num_blocks * sizeof(jlong), 0); 
    if(status != CUDA_SUCCESS){
	    cuCtxDestroy(cuContext);
	    initContext(env, max_blocks_per_proc, max_threads_per_block);
      continue;
    } 

    status = cuMemAlloc(&gpuExceptionsMemory, num_blocks * sizeof(jlong)); 
    if(status != CUDA_SUCCESS){
	    cuCtxDestroy(cuContext);
	    initContext(env, max_blocks_per_proc, max_threads_per_block);
      continue;
    } 

    status = cuMemAlloc(&gcInfoSpace, gc_space_size);  
    if(status != CUDA_SUCCESS){
	    cuCtxDestroy(cuContext);
	    initContext(env, max_blocks_per_proc, max_threads_per_block);
      continue;
    } 

    status = cuMemAlloc(&gpuHeapEndPtr, 8);
    if(status != CUDA_SUCCESS){
	    cuCtxDestroy(cuContext);
	    initContext(env, max_blocks_per_proc, max_threads_per_block);
      continue;
    } 

    status = cuMemAlloc(&gpuBufferSize, 8);
    if(status != CUDA_SUCCESS){
	    cuCtxDestroy(cuContext);
	    initContext(env, max_blocks_per_proc, max_threads_per_block);
      continue;
    } 

    //done, free everything
    cuMemFree(gpuToSpace);
    cuMemFree(gpuClassMemory);
    cuMemFree(gpuHandlesMemory);
    cuMemFree(gpuExceptionsMemory);
    cuMemFree(gcInfoSpace);
    cuMemFree(gpuHeapEndPtr);
    cuMemFree(gpuBufferSize);

	  cuMemFreeHost(toSpace);
	  cuMemFreeHost(handlesMemory);
	  cuMemFreeHost(exceptionsMemory);

    return i;
  }
  throw_cuda_errror_exception(env, "unable to find enough space using CUDA", 0); 
  return 0;
}
示例#3
0
/*
 * Class:     edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2
 * Method:    setup
 * Signature: ()V
 */
JNIEXPORT void JNICALL Java_edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2_setup
  (JNIEnv *env, jobject this_ref, jint max_blocks_per_proc, jint max_threads_per_block, jint free_space)
{
  int status;
  jint num_blocks;
  int deviceCount = 0;
  size_t f_mem;
  size_t t_mem;
  size_t to_space_size;
  //size_t free_space = 1530L*1024L*1024L;
  textureMemSize = 1;
  
  status = cuInit(0);
  if (CUDA_SUCCESS != status) 
  {
    printf("error in cuInit\n");
  }
  
  status = cuDeviceGetCount(&deviceCount);
  if (CUDA_SUCCESS != status) 
  {
    printf("error in cuDeviceGet\n");
  }

  getBestDevice();
  
  status = cuCtxCreate(&cuContext, CU_CTX_MAP_HOST, cuDevice);  
  if (CUDA_SUCCESS != status) 
  {
    printf("error in cuCtxCreate %d\n", status);
  }
  
  // ddb - not using this as this returns the total memory not the free memory
  //to_space_size = memSize();
  
  cuMemGetInfo(&f_mem, &t_mem);
  
  to_space_size = f_mem;
  
  num_blocks = numMultiProcessors * max_threads_per_block * max_blocks_per_proc;
  
#if DEBUG

  printf("Memory: %i(MB)/%i(MB) (Free/Total)\n",f_mem/1024/1024, t_mem/1024/1024);
  
  printf("num_blocks = %i\n",num_blocks);
  printf("numMultiProcessors = %i\n",numMultiProcessors);
  printf("max_threads_per_block = %i\n",max_threads_per_block);
  printf("max_blocks_per_proc = %i\n",max_blocks_per_proc);
  fflush(stdout);
#endif
  
  gc_space_size = 1024;
  to_space_size -= (num_blocks * sizeof(jlong));
  to_space_size -= (num_blocks * sizeof(jlong));
  to_space_size -= gc_space_size;
  to_space_size -= free_space;
  //to_space_size -= textureMemSize;
  bufferSize = to_space_size;

  status = cuMemHostAlloc(&toSpace, to_space_size, 0);  
  
  if (CUDA_SUCCESS != status) {
    throw_cuda_errror_exception(env, "toSpace memory allocation failed", status);
    return;
  }
  
  status = cuMemAlloc(&gpuToSpace, to_space_size);
  
  if (CUDA_SUCCESS != status) {
    throw_cuda_errror_exception(env, "gpuToSpace memory allocation failed", status);
    return;
  }
  
/*
  status = cuMemHostAlloc(&textureMemory, textureMemSize, 0);  
  if (CUDA_SUCCESS != status) 
  {
    printf("error in cuMemHostAlloc textureMemory %d\n", status);
  }

  status = cuMemAlloc(&gpuTexture, textureMemSize);
  if (CUDA_SUCCESS != status) 
  {
    printf("error in cuMemAlloc gpuTexture %d\n", status);
  }
*/
  status = cuMemHostAlloc(&handlesMemory, num_blocks * sizeof(jlong), CU_MEMHOSTALLOC_WRITECOMBINED); 
  
  if (CUDA_SUCCESS != status) {
    throw_cuda_errror_exception(env, "handlesMemory memory allocation failed", status);
    return;
  }

  status = cuMemAlloc(&gpuHandlesMemory, num_blocks * sizeof(jlong)); 
  
  if (CUDA_SUCCESS != status) {
    throw_cuda_errror_exception(env, "gpuHandlesMemory memory allocation failed", status);
    return;
  }

  status = cuMemHostAlloc(&exceptionsMemory, num_blocks * sizeof(jlong), 0); 
  
  if (CUDA_SUCCESS != status) {
    throw_cuda_errror_exception(env, "exceptionsMemory memory allocation failed", status);
    return;
  }

  status = cuMemAlloc(&gpuExceptionsMemory, num_blocks * sizeof(jlong)); 
 
  if (CUDA_SUCCESS != status) {
    throw_cuda_errror_exception(env, "gpuExceptionsMemory memory allocation failed", status);
    return;
  }

  status = cuMemAlloc(&gcInfoSpace, gc_space_size);  
  
  if (CUDA_SUCCESS != status) {
    throw_cuda_errror_exception(env, "gcInfoSpace memory allocation failed", status);
    return;
  }

  status = cuMemAlloc(&gpuHeapEndPtr, 8);
  
  if (CUDA_SUCCESS != status) {
    throw_cuda_errror_exception(env, "gpuHeapEndPtr memory allocation failed", status);
    return;
  }

  status = cuMemAlloc(&gpuBufferSize, 8);
  
  if (CUDA_SUCCESS != status) {
    throw_cuda_errror_exception(env, "gpuBufferSize memory allocation failed", status);
    return;
  }

  thisRefClass = (*env)->GetObjectClass(env, this_ref);
  setLongField(env, this_ref, "m_ToSpaceAddr", (jlong) toSpace);
  setLongField(env, this_ref, "m_GpuToSpaceAddr", (jlong) gpuToSpace);
  setLongField(env, this_ref, "m_TextureAddr", (jlong) textureMemory);
  setLongField(env, this_ref, "m_GpuTextureAddr", (jlong) gpuTexture);
  setLongField(env, this_ref, "m_HandlesAddr", (jlong) handlesMemory);
  setLongField(env, this_ref, "m_GpuHandlesAddr", (jlong) gpuHandlesMemory);
  setLongField(env, this_ref, "m_ExceptionsHandlesAddr", (jlong) exceptionsMemory);
  setLongField(env, this_ref, "m_GpuExceptionsHandlesAddr", (jlong) gpuExceptionsMemory);
  setLongField(env, this_ref, "m_ToSpaceSize", (jlong) bufferSize);
  setLongField(env, this_ref, "m_MaxGridDim", (jlong) maxGridDim);
  setLongField(env, this_ref, "m_NumMultiProcessors", (jlong) numMultiProcessors);
}