Ejemplo n.º 1
0
size_t initContext(JNIEnv * env, jint max_blocks_per_proc, jint max_threads_per_block)
{
  size_t to_space_size;
  int status;
  int deviceCount = 0;
  size_t f_mem;
  size_t t_mem;
  jint num_blocks;
  
  status = cuDeviceGetCount(&deviceCount);
  CHECK_STATUS_RTN(env,"error in cuDeviceGetCount",status, 0);

  getBestDevice(env);

  status = cuCtxCreate(&cuContext, CU_CTX_MAP_HOST, cuDevice);  
  CHECK_STATUS_RTN(env,"error in cuCtxCreate",status, 0)
  
  status = cuMemGetInfo (&f_mem, &t_mem);
  CHECK_STATUS_RTN(env,"error in cuMemGetInfo",status, 0)
  
  to_space_size = f_mem;

  //space for 100 types in the scene
  classMemSize = sizeof(jint)*100;
  
  num_blocks = numMultiProcessors * max_threads_per_block * max_blocks_per_proc;
  
  gc_space_size = 1024;
  to_space_size -= (num_blocks * sizeof(jlong));
  to_space_size -= (num_blocks * sizeof(jlong));
  to_space_size -= gc_space_size;
  to_space_size -= classMemSize;
  
  return to_space_size;
}
Ejemplo n.º 2
0
/*
 * Class:     edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2
 * Method:    setup
 * Signature: ()V
 */
JNIEXPORT void JNICALL Java_edu_syr_pcpratts_rootbeer_runtime2_cuda_CudaRuntime2_setup
  (JNIEnv *env, jobject this_ref, jint max_blocks_per_proc, jint max_threads_per_block, jint free_space)
{
  int status;
  jint num_blocks;
  int deviceCount = 0;
  size_t f_mem;
  size_t t_mem;
  size_t to_space_size;
  //size_t free_space = 1530L*1024L*1024L;
  textureMemSize = 1;
  
  status = cuInit(0);
  if (CUDA_SUCCESS != status) 
  {
    printf("error in cuInit\n");
  }
  
  status = cuDeviceGetCount(&deviceCount);
  if (CUDA_SUCCESS != status) 
  {
    printf("error in cuDeviceGet\n");
  }

  getBestDevice();
  
  status = cuCtxCreate(&cuContext, CU_CTX_MAP_HOST, cuDevice);  
  if (CUDA_SUCCESS != status) 
  {
    printf("error in cuCtxCreate %d\n", status);
  }
  
  // ddb - not using this as this returns the total memory not the free memory
  //to_space_size = memSize();
  
  cuMemGetInfo(&f_mem, &t_mem);
  
  to_space_size = f_mem;
  
  num_blocks = numMultiProcessors * max_threads_per_block * max_blocks_per_proc;
  
#if DEBUG

  printf("Memory: %i(MB)/%i(MB) (Free/Total)\n",f_mem/1024/1024, t_mem/1024/1024);
  
  printf("num_blocks = %i\n",num_blocks);
  printf("numMultiProcessors = %i\n",numMultiProcessors);
  printf("max_threads_per_block = %i\n",max_threads_per_block);
  printf("max_blocks_per_proc = %i\n",max_blocks_per_proc);
  fflush(stdout);
#endif
  
  gc_space_size = 1024;
  to_space_size -= (num_blocks * sizeof(jlong));
  to_space_size -= (num_blocks * sizeof(jlong));
  to_space_size -= gc_space_size;
  to_space_size -= free_space;
  //to_space_size -= textureMemSize;
  bufferSize = to_space_size;

  status = cuMemHostAlloc(&toSpace, to_space_size, 0);  
  
  if (CUDA_SUCCESS != status) {
    throw_cuda_errror_exception(env, "toSpace memory allocation failed", status);
    return;
  }
  
  status = cuMemAlloc(&gpuToSpace, to_space_size);
  
  if (CUDA_SUCCESS != status) {
    throw_cuda_errror_exception(env, "gpuToSpace memory allocation failed", status);
    return;
  }
  
/*
  status = cuMemHostAlloc(&textureMemory, textureMemSize, 0);  
  if (CUDA_SUCCESS != status) 
  {
    printf("error in cuMemHostAlloc textureMemory %d\n", status);
  }

  status = cuMemAlloc(&gpuTexture, textureMemSize);
  if (CUDA_SUCCESS != status) 
  {
    printf("error in cuMemAlloc gpuTexture %d\n", status);
  }
*/
  status = cuMemHostAlloc(&handlesMemory, num_blocks * sizeof(jlong), CU_MEMHOSTALLOC_WRITECOMBINED); 
  
  if (CUDA_SUCCESS != status) {
    throw_cuda_errror_exception(env, "handlesMemory memory allocation failed", status);
    return;
  }

  status = cuMemAlloc(&gpuHandlesMemory, num_blocks * sizeof(jlong)); 
  
  if (CUDA_SUCCESS != status) {
    throw_cuda_errror_exception(env, "gpuHandlesMemory memory allocation failed", status);
    return;
  }

  status = cuMemHostAlloc(&exceptionsMemory, num_blocks * sizeof(jlong), 0); 
  
  if (CUDA_SUCCESS != status) {
    throw_cuda_errror_exception(env, "exceptionsMemory memory allocation failed", status);
    return;
  }

  status = cuMemAlloc(&gpuExceptionsMemory, num_blocks * sizeof(jlong)); 
 
  if (CUDA_SUCCESS != status) {
    throw_cuda_errror_exception(env, "gpuExceptionsMemory memory allocation failed", status);
    return;
  }

  status = cuMemAlloc(&gcInfoSpace, gc_space_size);  
  
  if (CUDA_SUCCESS != status) {
    throw_cuda_errror_exception(env, "gcInfoSpace memory allocation failed", status);
    return;
  }

  status = cuMemAlloc(&gpuHeapEndPtr, 8);
  
  if (CUDA_SUCCESS != status) {
    throw_cuda_errror_exception(env, "gpuHeapEndPtr memory allocation failed", status);
    return;
  }

  status = cuMemAlloc(&gpuBufferSize, 8);
  
  if (CUDA_SUCCESS != status) {
    throw_cuda_errror_exception(env, "gpuBufferSize memory allocation failed", status);
    return;
  }

  thisRefClass = (*env)->GetObjectClass(env, this_ref);
  setLongField(env, this_ref, "m_ToSpaceAddr", (jlong) toSpace);
  setLongField(env, this_ref, "m_GpuToSpaceAddr", (jlong) gpuToSpace);
  setLongField(env, this_ref, "m_TextureAddr", (jlong) textureMemory);
  setLongField(env, this_ref, "m_GpuTextureAddr", (jlong) gpuTexture);
  setLongField(env, this_ref, "m_HandlesAddr", (jlong) handlesMemory);
  setLongField(env, this_ref, "m_GpuHandlesAddr", (jlong) gpuHandlesMemory);
  setLongField(env, this_ref, "m_ExceptionsHandlesAddr", (jlong) exceptionsMemory);
  setLongField(env, this_ref, "m_GpuExceptionsHandlesAddr", (jlong) gpuExceptionsMemory);
  setLongField(env, this_ref, "m_ToSpaceSize", (jlong) bufferSize);
  setLongField(env, this_ref, "m_MaxGridDim", (jlong) maxGridDim);
  setLongField(env, this_ref, "m_NumMultiProcessors", (jlong) numMultiProcessors);
}
Ejemplo n.º 3
0
void initDevice(JNIEnv * env, jobject this_ref, jint max_blocks_per_proc, jint max_threads_per_block, jlong free_space)
{          
  int status;
  jint num_blocks;
  int deviceCount = 0;
  size_t f_mem;
  size_t t_mem;
  size_t to_space_size;
  textureMemSize = 1;

  status = cuDeviceGetCount(&deviceCount);
  CHECK_STATUS(env,"error in cuDeviceGetCount",status)

  getBestDevice(env);
  
  status = cuCtxCreate(&cuContext, CU_CTX_MAP_HOST, cuDevice);  
  CHECK_STATUS(env,"error in cuCtxCreate",status)
  
  status = cuMemGetInfo (&f_mem, &t_mem);
  CHECK_STATUS(env,"error in cuMemGetInfo",status)
          
  to_space_size = f_mem;
  
  num_blocks = numMultiProcessors * max_threads_per_block * max_blocks_per_proc;
  
#if DEBUG

  printf("Memory: %i(MB)/%i(MB) (Free/Total)\n",f_mem/1024/1024, t_mem/1024/1024);
  
  printf("num_blocks = %i\n",num_blocks);
  printf("numMultiProcessors = %i\n",numMultiProcessors);
  printf("max_threads_per_block = %i\n",max_threads_per_block);
  printf("max_blocks_per_proc = %i\n",max_blocks_per_proc);
  fflush(stdout);
#endif

  //space for 100 types in the scene
  classMemSize = sizeof(jint)*100;
  
  gc_space_size = 1024;
  to_space_size -= (num_blocks * sizeof(jlong));
  to_space_size -= (num_blocks * sizeof(jlong));
  to_space_size -= gc_space_size;
  to_space_size -= free_space;
  to_space_size -= classMemSize;

  //to_space_size -= textureMemSize;
  bufferSize = to_space_size;

  status = cuMemHostAlloc(&toSpace, to_space_size, 0);  
  CHECK_STATUS(env,"toSpace memory allocation failed",status)
    
  status = cuMemAlloc(&gpuToSpace, to_space_size);
  CHECK_STATUS(env,"gpuToSpace memory allocation failed",status)
    
  status = cuMemAlloc(&gpuClassMemory, classMemSize);
  CHECK_STATUS(env,"gpuClassMemory memory allocation failed",status)
  
/*
  status = cuMemHostAlloc(&textureMemory, textureMemSize, 0);  
  if (CUDA_SUCCESS != status) 
  {
    printf("error in cuMemHostAlloc textureMemory %d\n", status);
  }

  status = cuMemAlloc(&gpuTexture, textureMemSize);
  if (CUDA_SUCCESS != status) 
  {
    printf("error in cuMemAlloc gpuTexture %d\n", status);
  }
*/

  status = cuMemHostAlloc(&handlesMemory, num_blocks * sizeof(jlong), CU_MEMHOSTALLOC_WRITECOMBINED); 
  CHECK_STATUS(env,"handlesMemory memory allocation failed",status)

  status = cuMemAlloc(&gpuHandlesMemory, num_blocks * sizeof(jlong)); 
  CHECK_STATUS(env,"gpuHandlesMemory memory allocation failed",status)

  status = cuMemHostAlloc(&exceptionsMemory, num_blocks * sizeof(jlong), 0); 
  CHECK_STATUS(env,"exceptionsMemory memory allocation failed",status)

  status = cuMemAlloc(&gpuExceptionsMemory, num_blocks * sizeof(jlong)); 
  CHECK_STATUS(env,"gpuExceptionsMemory memory allocation failed",status)

  status = cuMemAlloc(&gcInfoSpace, gc_space_size);  
  CHECK_STATUS(env,"gcInfoSpace memory allocation failed",status)

  status = cuMemAlloc(&gpuHeapEndPtr, 8);
  CHECK_STATUS(env,"gpuHeapEndPtr memory allocation failed",status)

  status = cuMemAlloc(&gpuBufferSize, 8);
  CHECK_STATUS(env,"gpuBufferSize memory allocation failed",status)

  thisRefClass = (*env)->GetObjectClass(env, this_ref);
  setLongField(env, this_ref, "m_ToSpaceAddr", (jlong) toSpace);
  setLongField(env, this_ref, "m_GpuToSpaceAddr", (jlong) gpuToSpace);
  setLongField(env, this_ref, "m_TextureAddr", (jlong) textureMemory);
  setLongField(env, this_ref, "m_GpuTextureAddr", (jlong) gpuTexture);
  setLongField(env, this_ref, "m_HandlesAddr", (jlong) handlesMemory);
  setLongField(env, this_ref, "m_GpuHandlesAddr", (jlong) gpuHandlesMemory);
  setLongField(env, this_ref, "m_ExceptionsHandlesAddr", (jlong) exceptionsMemory);
  setLongField(env, this_ref, "m_GpuExceptionsHandlesAddr", (jlong) gpuExceptionsMemory);
  setLongField(env, this_ref, "m_ToSpaceSize", (jlong) bufferSize);
  setLongField(env, this_ref, "m_MaxGridDim", (jlong) maxGridDim);
  setLongField(env, this_ref, "m_NumMultiProcessors", (jlong) numMultiProcessors);
}
Ejemplo n.º 4
0
 compute_context::compute_context()
     : device(getBestDevice()),
       context(device), 
       transfer_q(context, device),
       computation_q(context, device)
 { }