// copies real array from CPU host to GPU device
void gpuCreateCopy_todevice_realw (gpu_realw_mem *d_array_addr_ptr, realw *h_array, int size) {

  TRACE ("gpuCreateCopy_todevice_realw");

  // allocates memory on GPU
#ifdef USE_OPENCL
  if (run_opencl) {
    cl_int errcode;

    d_array_addr_ptr->ocl = clCreateBuffer (mocl.context, CL_MEM_READ_WRITE, size * sizeof (realw),
                                            NULL, clck_(&errcode));

    // copies values onto GPU
    clCheck (clEnqueueWriteBuffer (mocl.command_queue, d_array_addr_ptr->ocl, CL_TRUE, 0,
                                   size * sizeof (realw), h_array, 0, NULL, NULL));
  }
#endif
#ifdef USE_CUDA
  if (run_cuda) {
    // allocates memory on GPU
    print_CUDA_error_if_any(cudaMalloc((void**)&d_array_addr_ptr->cuda,size*sizeof(realw)),22001);
    // copies values onto GPU
    print_CUDA_error_if_any(cudaMemcpy((realw*) d_array_addr_ptr->cuda,h_array,size*sizeof(realw),cudaMemcpyHostToDevice),22002);
  }
#endif
}
// copies integer array from CPU host to GPU device
void gpuCreateCopy_todevice_int (gpu_int_mem *d_array_addr_ptr, int *h_array, int size) {

  TRACE ("gpuCreateCopy_todevice_int");

#ifdef USE_OPENCL
  if (run_opencl) {
    cl_int errcode;

    // allocates memory on GPU
    d_array_addr_ptr->ocl = clCreateBuffer (mocl.context, CL_MEM_READ_WRITE,
                                            size * sizeof (int), NULL, clck_(&errcode));

    // copies values onto GPU
    clCheck (clEnqueueWriteBuffer (mocl.command_queue, d_array_addr_ptr->ocl, CL_TRUE, 0,
                                   size*sizeof (int), h_array, 0, NULL, NULL));
  }
#endif
#ifdef USE_CUDA
  if (run_cuda) {
    // allocates memory on GPU
    //
    // note: cudaMalloc uses a double-pointer, such that it can return an error code in case it fails
    //          we thus pass the address to the pointer above (as void double-pointer) to have it
    //          pointing to the correct pointer of the array here
    print_CUDA_error_if_any(cudaMalloc((void**)&d_array_addr_ptr->cuda,size*sizeof(int)),12001);

    // copies values onto GPU
    //
    // note: cudaMemcpy uses the pointer to the array, we thus re-cast the value of
    //          the double-pointer above to have the correct pointer to the array
    print_CUDA_error_if_any(cudaMemcpy((int*) d_array_addr_ptr->cuda,h_array,size*sizeof(int),cudaMemcpyHostToDevice),12002);
  }
#endif
}
cl_kernel *setup_ocl_memset (int do_setup) {

  static int inited = 0;
  static cl_kernel memset_kern;
  cl_int errcode;

  if (do_setup) {
    if (!inited) {
      // creates openCL kernel
      cl_program memset_program = clCreateProgramWithSource(mocl.context, 1,
                                                            memset_kern_code, 0,
                                                            clck_(&errcode));
      clCheck (clBuildProgram (memset_program, 0, NULL, NULL, NULL, NULL));
      memset_kern = clCreateKernel (memset_program, "memset_uint4", clck_(&errcode));
      inited = 1;
    }
  } else {
    // releases kernel
    if (inited) { clCheck(clReleaseKernel (memset_kern)); }
  }

  return &memset_kern;
}
Esempio n. 4
0
void *ocl_setParameterValue (struct ld_kernel_s *ldKernel,
                             struct ld_kern_param_s *ldParam,
                             void *buffer,  size_t size)
{
  cl_mem mem_obj = (void *) -1;
  cl_int errcode_ret;

  if (ldParam->is_pointer) {
    DEFAULT_SIZE(size)
    
    mem_obj = real_clCreateBuffer(ldOclEnv.context, CL_MEM_READ_WRITE, size, NULL, clck_(&errcode_ret));

    clCheck(real_clEnqueueWriteBuffer(ldOclEnv.command_queue,
                                      (cl_mem) mem_obj,
                                      CL_TRUE,
                                      0, size, buffer,
                                      0, NULL, NULL));
    buffer = &mem_obj;
    size = sizeof(cl_mem);
  }

  if (size == 0 && strstr(ldParam->name, "_tex")) {
    cl_image_format format = {CL_R, CL_UNSIGNED_INT32};
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
    mem_obj = clCreateImage2D (ldOclEnv.context, CL_MEM_READ_ONLY, &format, 100, 1, 0, &format, clck_(&errcode_ret));
#pragma GCC diagnostic pop
    buffer = &mem_obj;
    size = sizeof(cl_mem);
  }
  
  clCheck(real_clSetKernelArg ((cl_kernel) ldKernel->handle, ldParam->index,
                               size, buffer));

  return mem_obj;
    
}
// setup functions
void gpuSetConst (gpu_realw_mem *buffer, size_t size, realw *array) {

  TRACE ("gpuSetConst");

  // allocates array on GPU
#ifdef USE_OPENCL
  if (run_opencl) {
    cl_int errcode;
    buffer->ocl = clCreateBuffer (mocl.context, CL_MEM_READ_ONLY, size * sizeof(realw), NULL, clck_(&errcode));
    clCheck (clEnqueueWriteBuffer (mocl.command_queue, buffer->ocl, CL_TRUE, 0, size * sizeof(realw), array, 0, NULL, NULL));
  }
#endif
#ifdef USE_CUDA
  if (run_cuda) {
    print_CUDA_error_if_any(cudaMalloc(&buffer->cuda, size * sizeof(realw)), 1400);
    print_CUDA_error_if_any(cudaMemcpy(buffer->cuda, array, size * sizeof(realw), cudaMemcpyHostToDevice),1401);
  }
#endif
}
// creates double array on GPU
void gpuMalloc_int (gpu_int_mem *buffer, int size) {

  TRACE ("gpuMalloc_int");

  // allocates array on GPU
#ifdef USE_OPENCL
  if (run_opencl) {
    cl_int errcode;
    buffer->ocl = clCreateBuffer (mocl.context, CL_MEM_READ_WRITE, size * sizeof(int), NULL, clck_(&errcode));
  }
#endif
#ifdef USE_CUDA
  if (run_cuda) {
    print_CUDA_error_if_any(cudaMalloc((void**)&buffer->cuda, size * sizeof(int)), 44003);
  }
#endif
}