Esempio n. 1
0
SEXP
R_auto_cuModuleLoadFatBinary(SEXP r_fatCubin)
{
    SEXP r_ans = R_NilValue;
    CUmodule module;
    const void * fatCubin = GET_REF(r_fatCubin, const void );
    CUresult ans;
    ans = cuModuleLoadFatBinary(& module,  fatCubin);
    if(ans)
       return(R_cudaErrorInfo(ans));
    r_ans = R_createRef(module, "CUmodule") ;
    return(r_ans);
}
Esempio n. 2
0
JNIEXPORT void JNICALL Java_org_trifort_rootbeer_runtime_CUDAContext_cudaRun
  (JNIEnv *env, jobject this_ref, jint device_index, jbyteArray cubin_file, 
   jint cubin_length, jint block_shape_x, jint grid_shape_x, jint num_threads, 
   jobject object_mem, jobject handles_mem, jobject exceptions_mem, 
   jobject class_mem)
{
  CUresult status;
  CUdevice device;
  CUcontext context;
  CUmodule module;
  CUfunction function;
  void * fatcubin;
  int offset;
  int info_space_size;

  CUdeviceptr gpu_info_space;
  CUdeviceptr gpu_object_mem;
  CUdeviceptr gpu_handles_mem;
  CUdeviceptr gpu_exceptions_mem;
  CUdeviceptr gpu_class_mem;
  CUdeviceptr gpu_heap_end;
  CUdeviceptr gpu_buffer_size;

  void * cpu_object_mem;
  void * cpu_handles_mem;
  void * cpu_exceptions_mem;
  void * cpu_class_mem;
  jlong cpu_object_mem_size;
  jlong cpu_handles_mem_size;
  jlong cpu_exceptions_mem_size;
  jlong cpu_class_mem_size;
  jlong cpu_heap_end;

  jclass cuda_memory_class;
  jmethodID get_address_method;
  jmethodID get_size_method;
  jmethodID get_heap_end_method;
  
  jlong * info_space;

  //----------------------------------------------------------------------------
  //init device and function
  //----------------------------------------------------------------------------
  status = cuDeviceGet(&device, device_index);
  CHECK_STATUS(env, "Error in cuDeviceGet", status, device)

  status = cuCtxCreate(&context, CU_CTX_MAP_HOST, device);  
  CHECK_STATUS(env,"Error in cuCtxCreate", status, device)

  fatcubin = malloc(cubin_length);
  (*env)->GetByteArrayRegion(env, cubin_file, 0, cubin_length, fatcubin);

  status = cuModuleLoadFatBinary(&module, fatcubin);
  CHECK_STATUS(env, "Error in cuModuleLoad", status, device)
  free(fatcubin);

  status = cuModuleGetFunction(&function, module, "_Z5entryPcS_PiPxS1_S0_S0_i"); 
  CHECK_STATUS(env, "Error in cuModuleGetFunction", status, device)

  //----------------------------------------------------------------------------
  //get handles from java
  //----------------------------------------------------------------------------
  cuda_memory_class = (*env)->FindClass(env, "org/trifort/rootbeer/runtime/FixedMemory");
  get_address_method = (*env)->GetMethodID(env, cuda_memory_class, "getAddress", "()J");
  get_size_method = (*env)->GetMethodID(env, cuda_memory_class, "getSize", "()J");
  get_heap_end_method = (*env)->GetMethodID(env, cuda_memory_class, "getHeapEndPtr", "()J");

  cpu_object_mem = (void *) (*env)->CallLongMethod(env, object_mem, get_address_method);
  cpu_object_mem_size = (*env)->CallLongMethod(env, object_mem, get_size_method);
  cpu_heap_end = (*env)->CallLongMethod(env, object_mem, get_heap_end_method);

  cpu_handles_mem = (void *) (*env)->CallLongMethod(env, handles_mem, get_address_method);
  cpu_handles_mem_size = (*env)->CallLongMethod(env, handles_mem, get_size_method);

  cpu_exceptions_mem = (void *) (*env)->CallLongMethod(env, exceptions_mem, get_address_method);
  cpu_exceptions_mem_size = (*env)->CallLongMethod(env, exceptions_mem, get_size_method);

  cpu_class_mem = (void *) (*env)->CallLongMethod(env, class_mem, get_address_method);
  cpu_class_mem_size = (*env)->CallLongMethod(env, class_mem, get_size_method);

  info_space_size = 1024;
  info_space = (jlong *) malloc(info_space_size);
  info_space[1] = (*env)->CallLongMethod(env, object_mem, get_heap_end_method);

  //----------------------------------------------------------------------------
  //allocate mem
  //----------------------------------------------------------------------------
  status = cuMemAlloc(&gpu_info_space, info_space_size);  
  CHECK_STATUS(env, "Error in cuMemAlloc: gpu_info_mem", status, device)

  status = cuMemAlloc(&gpu_object_mem, cpu_object_mem_size);  
  CHECK_STATUS(env, "Error in cuMemAlloc: gpu_object_mem", status, device)

  status = cuMemAlloc(&gpu_handles_mem, cpu_handles_mem_size); 
  CHECK_STATUS(env, "Error in cuMemAlloc: gpu_handles_mem", status, device)
    
  status = cuMemAlloc(&gpu_exceptions_mem, cpu_exceptions_mem_size); 
  CHECK_STATUS(env, "Error in cuMemAlloc: gpu_exceptions_mem", status, device)

  status = cuMemAlloc(&gpu_class_mem, cpu_class_mem_size);
  CHECK_STATUS(env, "Error in cuMemAlloc: gpu_class_mem", status, device)

  status = cuMemAlloc(&gpu_heap_end, 8);
  CHECK_STATUS(env, "Error in cuMemAlloc: gpu_heap_end", status, device)

  status = cuMemAlloc(&gpu_buffer_size, 8);
  CHECK_STATUS(env, "Error in cuMemAlloc: gpu_buffer_size", status, device)

  //----------------------------------------------------------------------------
  //set function parameters
  //----------------------------------------------------------------------------
  status = cuParamSetSize(function, (7 * sizeof(CUdeviceptr) + sizeof(int))); 
  CHECK_STATUS(env, "Error in cuParamSetSize", status, device)

  offset = 0;
  status = cuParamSetv(function, offset, (void *) &gpu_info_space, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env, "Error in cuParamSetv gpu_info_space", status, device)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(function, offset, (void *) &gpu_object_mem, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env, "Error in cuParamSetv: gpu_object_mem", status, device)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(function, offset, (void *) &gpu_handles_mem, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env, "Error in cuParamSetv: gpu_handles_mem %", status, device)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(function, offset, (void *) &gpu_heap_end, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env, "Error in cuParamSetv: gpu_heap_end", status, device)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(function, offset, (void *) &gpu_buffer_size, sizeof(CUdeviceptr));
  CHECK_STATUS(env, "Error in cuParamSetv: gpu_buffer_size", status, device)
  offset += sizeof(CUdeviceptr); 

  status = cuParamSetv(function, offset, (void *) &gpu_exceptions_mem, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env, "Error in cuParamSetv: gpu_exceptions_mem", status, device)
  offset += sizeof(CUdeviceptr);

  status = cuParamSetv(function, offset, (void *) &gpu_class_mem, sizeof(CUdeviceptr)); 
  CHECK_STATUS(env, "Error in cuParamSetv: gpu_class_mem", status, device)
  offset += sizeof(CUdeviceptr);

  status = cuParamSeti(function, offset, num_threads); 
  CHECK_STATUS(env, "Error in cuParamSetv: num_threads", status, device)
  offset += sizeof(int);

  //----------------------------------------------------------------------------
  //copy data
  //----------------------------------------------------------------------------
  status = cuMemcpyHtoD(gpu_info_space, info_space, info_space_size);
  CHECK_STATUS(env, "Error in cuMemcpyHtoD: info_space", status, device)

  status = cuMemcpyHtoD(gpu_object_mem, cpu_object_mem, cpu_object_mem_size);
  CHECK_STATUS(env, "Error in cuMemcpyHtoD: gpu_object_mem", status, device)

  status = cuMemcpyHtoD(gpu_handles_mem, cpu_handles_mem, cpu_handles_mem_size);
  CHECK_STATUS(env, "Error in cuMemcpyHtoD: gpu_handles_mem", status, device)

  status = cuMemcpyHtoD(gpu_class_mem, cpu_class_mem, cpu_class_mem_size);
  CHECK_STATUS(env, "Error in cuMemcpyHtoD: gpu_class_mem", status, device)

  status = cuMemcpyHtoD(gpu_heap_end, &cpu_heap_end, sizeof(jlong));
  CHECK_STATUS(env, "Error in cuMemcpyHtoD: gpu_heap_end", status, device)

  status = cuMemcpyHtoD(gpu_buffer_size, &cpu_object_mem_size, sizeof(jlong));
  CHECK_STATUS(env, "Error in cuMemcpyHtoD: gpu_buffer_size", status, device)

  status = cuMemcpyHtoD(gpu_exceptions_mem, cpu_exceptions_mem, cpu_exceptions_mem_size);
  CHECK_STATUS(env, "Error in cuMemcpyDtoH: gpu_exceptions_mem", status, device)

  //----------------------------------------------------------------------------
  //launch
  //----------------------------------------------------------------------------
  status = cuFuncSetBlockShape(function, block_shape_x, 1, 1);
  CHECK_STATUS(env, "Error in cuFuncSetBlockShape", status, device);

  status = cuLaunchGrid(function, grid_shape_x, 1);
  CHECK_STATUS(env, "Error in cuLaunchGrid", status, device)

  status = cuCtxSynchronize();  
  CHECK_STATUS(env, "Error in cuCtxSynchronize", status, device)

  //----------------------------------------------------------------------------
  //copy data back
  //----------------------------------------------------------------------------
  status = cuMemcpyDtoH(info_space, gpu_info_space, info_space_size);
  CHECK_STATUS(env, "Error in cuMemcpyDtoH: gpu_info_space", status, device)

  cpu_heap_end = info_space[1];

  status = cuMemcpyDtoH(cpu_object_mem, gpu_object_mem, cpu_heap_end);
  CHECK_STATUS(env, "Error in cuMemcpyDtoH: gpu_object_mem", status, device)

  status = cuMemcpyDtoH(cpu_exceptions_mem, gpu_exceptions_mem, cpu_exceptions_mem_size);
  CHECK_STATUS(env, "Error in cuMemcpyDtoH: gpu_exceptions_mem", status, device)

  //----------------------------------------------------------------------------
  //free resources
  //----------------------------------------------------------------------------
  free(info_space);

  cuMemFree(gpu_info_space);
  cuMemFree(gpu_object_mem);
  cuMemFree(gpu_handles_mem);
  cuMemFree(gpu_exceptions_mem);
  cuMemFree(gpu_class_mem);
  cuMemFree(gpu_heap_end);
  cuMemFree(gpu_buffer_size);

  cuCtxDestroy(context);
}