// copies real array from CPU host to GPU device void gpuCreateCopy_todevice_realw (gpu_realw_mem *d_array_addr_ptr, realw *h_array, int size) { TRACE ("gpuCreateCopy_todevice_realw"); // allocates memory on GPU #ifdef USE_OPENCL if (run_opencl) { cl_int errcode; d_array_addr_ptr->ocl = clCreateBuffer (mocl.context, CL_MEM_READ_WRITE, size * sizeof (realw), NULL, clck_(&errcode)); // copies values onto GPU clCheck (clEnqueueWriteBuffer (mocl.command_queue, d_array_addr_ptr->ocl, CL_TRUE, 0, size * sizeof (realw), h_array, 0, NULL, NULL)); } #endif #ifdef USE_CUDA if (run_cuda) { // allocates memory on GPU print_CUDA_error_if_any(cudaMalloc((void**)&d_array_addr_ptr->cuda,size*sizeof(realw)),22001); // copies values onto GPU print_CUDA_error_if_any(cudaMemcpy((realw*) d_array_addr_ptr->cuda,h_array,size*sizeof(realw),cudaMemcpyHostToDevice),22002); } #endif }
// copies integer array from CPU host to GPU device void gpuCreateCopy_todevice_int (gpu_int_mem *d_array_addr_ptr, int *h_array, int size) { TRACE ("gpuCreateCopy_todevice_int"); #ifdef USE_OPENCL if (run_opencl) { cl_int errcode; // allocates memory on GPU d_array_addr_ptr->ocl = clCreateBuffer (mocl.context, CL_MEM_READ_WRITE, size * sizeof (int), NULL, clck_(&errcode)); // copies values onto GPU clCheck (clEnqueueWriteBuffer (mocl.command_queue, d_array_addr_ptr->ocl, CL_TRUE, 0, size*sizeof (int), h_array, 0, NULL, NULL)); } #endif #ifdef USE_CUDA if (run_cuda) { // allocates memory on GPU // // note: cudaMalloc uses a double-pointer, such that it can return an error code in case it fails // we thus pass the address to the pointer above (as void double-pointer) to have it // pointing to the correct pointer of the array here print_CUDA_error_if_any(cudaMalloc((void**)&d_array_addr_ptr->cuda,size*sizeof(int)),12001); // copies values onto GPU // // note: cudaMemcpy uses the pointer to the array, we thus re-cast the value of // the double-pointer above to have the correct pointer to the array print_CUDA_error_if_any(cudaMemcpy((int*) d_array_addr_ptr->cuda,h_array,size*sizeof(int),cudaMemcpyHostToDevice),12002); } #endif }
cl_kernel *setup_ocl_memset (int do_setup) { static int inited = 0; static cl_kernel memset_kern; cl_int errcode; if (do_setup) { if (!inited) { // creates openCL kernel cl_program memset_program = clCreateProgramWithSource(mocl.context, 1, memset_kern_code, 0, clck_(&errcode)); clCheck (clBuildProgram (memset_program, 0, NULL, NULL, NULL, NULL)); memset_kern = clCreateKernel (memset_program, "memset_uint4", clck_(&errcode)); inited = 1; } } else { // releases kernel if (inited) { clCheck(clReleaseKernel (memset_kern)); } } return &memset_kern; }
void *ocl_setParameterValue (struct ld_kernel_s *ldKernel, struct ld_kern_param_s *ldParam, void *buffer, size_t size) { cl_mem mem_obj = (void *) -1; cl_int errcode_ret; if (ldParam->is_pointer) { DEFAULT_SIZE(size) mem_obj = real_clCreateBuffer(ldOclEnv.context, CL_MEM_READ_WRITE, size, NULL, clck_(&errcode_ret)); clCheck(real_clEnqueueWriteBuffer(ldOclEnv.command_queue, (cl_mem) mem_obj, CL_TRUE, 0, size, buffer, 0, NULL, NULL)); buffer = &mem_obj; size = sizeof(cl_mem); } if (size == 0 && strstr(ldParam->name, "_tex")) { cl_image_format format = {CL_R, CL_UNSIGNED_INT32}; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" mem_obj = clCreateImage2D (ldOclEnv.context, CL_MEM_READ_ONLY, &format, 100, 1, 0, &format, clck_(&errcode_ret)); #pragma GCC diagnostic pop buffer = &mem_obj; size = sizeof(cl_mem); } clCheck(real_clSetKernelArg ((cl_kernel) ldKernel->handle, ldParam->index, size, buffer)); return mem_obj; }
// setup functions void gpuSetConst (gpu_realw_mem *buffer, size_t size, realw *array) { TRACE ("gpuSetConst"); // allocates array on GPU #ifdef USE_OPENCL if (run_opencl) { cl_int errcode; buffer->ocl = clCreateBuffer (mocl.context, CL_MEM_READ_ONLY, size * sizeof(realw), NULL, clck_(&errcode)); clCheck (clEnqueueWriteBuffer (mocl.command_queue, buffer->ocl, CL_TRUE, 0, size * sizeof(realw), array, 0, NULL, NULL)); } #endif #ifdef USE_CUDA if (run_cuda) { print_CUDA_error_if_any(cudaMalloc(&buffer->cuda, size * sizeof(realw)), 1400); print_CUDA_error_if_any(cudaMemcpy(buffer->cuda, array, size * sizeof(realw), cudaMemcpyHostToDevice),1401); } #endif }
// creates double array on GPU void gpuMalloc_int (gpu_int_mem *buffer, int size) { TRACE ("gpuMalloc_int"); // allocates array on GPU #ifdef USE_OPENCL if (run_opencl) { cl_int errcode; buffer->ocl = clCreateBuffer (mocl.context, CL_MEM_READ_WRITE, size * sizeof(int), NULL, clck_(&errcode)); } #endif #ifdef USE_CUDA if (run_cuda) { print_CUDA_error_if_any(cudaMalloc((void**)&buffer->cuda, size * sizeof(int)), 44003); } #endif }