int32_t execute_cl(const CLKernelInfo& clkernelinfo, size_t cq_i){ cl_int err; CLInfo* clinfo = CLInfo::instance(); if (!clinfo->initialized() || !clinfo->has_command_queue(cq_i)) return -1; //enqueue the kernel command for execution if (clkernelinfo.work_dim < 1 || clkernelinfo.work_dim > 3) { std::cerr << "Kernel work dimensions not set" << std::endl; return 1; } if (clkernelinfo.arg_count < 0) { std::cerr << "Kernel argument count not set" << std::endl; return 1; } for (int8_t i = 0; i < clkernelinfo.work_dim; ++i) { if (clkernelinfo.global_work_size[i] <= 0) { std::cerr << "Kernel global work size not set" << std::endl; return 1; } } bool local_size_set = true; for (int8_t i = 0; i < clkernelinfo.work_dim; ++i) local_size_set = local_size_set && (clkernelinfo.local_work_size[i] > 0); // std::cerr << "Enqueueing the kernel command for execution" << std::endl; if (local_size_set) err = clEnqueueNDRangeKernel(clinfo->get_command_queue(cq_i), clkernelinfo.kernel, clkernelinfo.work_dim, clkernelinfo.global_work_offset, clkernelinfo.global_work_size, clkernelinfo.local_work_size, 0,NULL,NULL); else err = clEnqueueNDRangeKernel(clinfo->get_command_queue(cq_i), clkernelinfo.kernel, clkernelinfo.work_dim, clkernelinfo.global_work_offset, clkernelinfo.global_work_size, NULL, 0,NULL,NULL); if (error_cl(err, "clEnqueueNDRangeKernel")) return 1; /* finish command queue */ err = clFinish(clinfo->get_command_queue(cq_i)); if (error_cl(err, "clFinish")) return 1; return 0; }
int create_cl_mem_from_gl_tex(const GLuint gl_tex, cl_mem* mem, size_t cq_i) { CLInfo* clinfo = CLInfo::instance(); if (!clinfo->initialized() || !clinfo->has_command_queue(cq_i)) return -1; cl_int err; *mem = clCreateFromGLTexture2D(clinfo->context, CL_MEM_READ_WRITE, GL_TEXTURE_2D,0, gl_tex,&err); if (error_cl(err, "clCreateFromGLTexture2D")) return 1; err = clEnqueueAcquireGLObjects(clinfo->get_command_queue(cq_i), 1, mem, 0,0,0); if (error_cl(err, "clEnqueueAcquireGLObjects")) return 1; err = clFinish(clinfo->get_command_queue(cq_i)); if (error_cl(err, "clFinish")) return 1; return 0; }
int32_t copy_from_cl_mem(uint32_t size, void* values, cl_mem& mem, uint32_t offset, size_t cq_i) { CLInfo* clinfo = CLInfo::instance(); if (!clinfo->initialized() || !clinfo->has_command_queue(cq_i)) return -1; cl_int err; err = clEnqueueReadBuffer(clinfo->get_command_queue(cq_i), mem, true, /* Blocking write */ offset, /* Offset */ size, /* Bytes to read */ values, /* Pointer to write to */ 0, /*Not using event lists for now*/ NULL, NULL); if (error_cl(err, "clEnqueueReadBuffer")) return 1; err = clFinish(clinfo->get_command_queue(cq_i)); if (error_cl(err, "clFinsish")) return 1; return 0; }
void print_cl_image_2d_info(const cl_mem& mem) { // Query image created from texture CLInfo* clinfo = CLInfo::instance(); if (!clinfo->initialized()) return; cl_int err; size_t img_width, img_height, img_depth; cl_image_format img_format; size_t bytes_written; err = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(size_t), &img_width, &bytes_written); if (error_cl(err, "clGetImageInfo CL_IMAGE_WIDTH")) return; err = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(size_t), &img_height, &bytes_written); if (error_cl(err, "clGetImageInfo CL_IMAGE_HEIGHT")) return; err = clGetImageInfo(mem, CL_IMAGE_DEPTH, sizeof(size_t), &img_depth, &bytes_written); if (error_cl(err, "clGetImageInfo CL_IMAGE_DEPTH")) return; err = clGetImageInfo(mem, CL_IMAGE_FORMAT, sizeof(cl_image_format), &img_format, &bytes_written); if (error_cl(err, "clGetImageInfo CL_IMAGE_FORMAT")) return; std::cout << "OpenCL image from texture info: " << std::endl; std::cout << "\tTexture image width: " << img_width << std::endl; std::cout << "\tTexture image height: " << img_height << std::endl; std::cout << "\tTexture image depth: " << img_depth << std::endl; std::cout << "\tTexture image channel order: " << img_format.image_channel_order << std::endl; std::cout << "\tTexture image channel data type: " << img_format.image_channel_data_type << std::endl; }
void print_cl_mem_info(const cl_mem& mem){ CLInfo* clinfo = CLInfo::instance(); if (!clinfo->initialized()) return; size_t mem_size, bytes_written; cl_mem_object_type mem_type; cl_int err; err = clGetMemObjectInfo(mem, CL_MEM_SIZE, sizeof(size_t), &mem_size, &bytes_written); if (error_cl(err, "clGetMemObjectInfo CL_MEM_SIZE")) return; std::cerr << "CL mem object size: " << mem_size << std::endl; err = clGetMemObjectInfo(mem, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, &bytes_written); if (error_cl(err, "clGetMemObjectInfo CL_MEM_TYPE")) return; std::cerr << "CL mem type: "; switch (mem_type) { case CL_MEM_OBJECT_BUFFER: std::cerr << "CL_MEM_OBJECT_BUFFER" << std::endl; break; case CL_MEM_OBJECT_IMAGE2D: std::cerr << "CL_MEM_OBJECT_IMAGE2D" << std::endl; break; case CL_MEM_OBJECT_IMAGE3D: std::cerr << "CL_MEM_OBJECT_IMAGE3D" << std::endl; break; default: std::cerr << "UNKNOWN" << std::endl; break; } }
void* extra_thread_function(void* arg) { (void)arg; static int i = 0; static int dir = 1; std::cout << "In extra thread!!\n" ; print_cl_mem_info(cl_buf_mem); std::cout << "Returned extra thread!!\n" ; while (1) { cl_int err; pthread_barrier_wait(&thread_barrier); cl_int arg = i%STEPS; err = clSetKernelArg(clkernelinfo.kernel,1,sizeof(cl_int),&arg); if (error_cl(err, "clSetKernelArg 1")) exit(1); execute_cl(clkernelinfo); ////////////////// Immediate mode textured quad pthread_barrier_wait(&thread_barrier); i+= dir; if (!(i % (STEPS-1))) dir *= -1; } return NULL; }
int32_t release_gl_tex(cl_mem& tex_mem, size_t cq_i) { CLInfo* clinfo = CLInfo::instance(); if (!clinfo->initialized() || !clinfo->has_command_queue(cq_i)) return -1; cl_int err; err = clEnqueueReleaseGLObjects (clinfo->get_command_queue(cq_i), 1, &tex_mem, 0, NULL, NULL); if (error_cl(err, "clEnqueueReleaseGLObjects ")) return -1; err = clFinish(clinfo->get_command_queue(cq_i)); if (error_cl(err, "clEnqueueReleaseGLObjects -> clFinish")) return -1; return 0; }
size_t cl_mem_size(const cl_mem& mem) { size_t mem_size, bytes_written; cl_int err; err = clGetMemObjectInfo(mem, CL_MEM_SIZE, sizeof(size_t), &mem_size, &bytes_written); if (error_cl(err, "clGetMemObjectInfo CL_MEM_SIZE")) return -1; return mem_size; }
int32_t create_empty_cl_mem(cl_mem_flags flags, uint32_t size, cl_mem* mem) { CLInfo* clinfo = CLInfo::instance(); if (!clinfo->initialized()) return -1; cl_int err; *mem = clCreateBuffer(clinfo->context, flags, size, NULL, &err); if (error_cl(err, "clCreateBuffer")) return 1; return 0; }
int32_t create_host_cl_mem(cl_mem_flags flags, uint32_t size, void* ptr, cl_mem* mem) { CLInfo* clinfo = CLInfo::instance(); if (!clinfo->initialized()) return -1; cl_int err; *mem = clCreateBuffer(clinfo->context, flags | CL_MEM_USE_HOST_PTR, size, ptr, //This is ugly but necessary // Kronos, why u not have a const variant ?!?! &err); if (error_cl(err, "clCreateBuffer")) return 1; return 0; }
cl_int CLInfo::initialize(size_t requested_command_queues) { if (m_initialized || requested_command_queues < 1 || requested_command_queues > 1024) return CL_DEVICE_NOT_FOUND; GLInfo* glinfo = GLInfo::instance(); if (!glinfo->initialized()) return CL_DEVICE_NOT_FOUND; cl_int err; size_t bytes_returned; //retrieve the list of platforms available std::cout << "Retrieving the list of platforms available" << std::endl; err = clGetPlatformIDs(1, &platform_id, &num_of_platforms); if (error_cl(err, "glGetPlatformIDs")) return err; //try to get a supported gpu device std::cout << "Trying to get a supported gpu device" << std::endl; err = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &num_of_devices); if (error_cl(err, "clGetDeviceIDs")) return err; cl_int prop_count = 0; #ifdef _WIN32 properties[prop_count++] = CL_CONTEXT_PLATFORM; properties[prop_count++] = (cl_context_properties) platform_id; properties[prop_count++] = CL_GL_CONTEXT_KHR; properties[prop_count++] = (cl_context_properties)glinfo->renderingContext; properties[prop_count++] = CL_WGL_HDC_KHR; properties[prop_count++] = (cl_context_properties)glinfo->deviceContext; properties[prop_count++] = 0; #elif defined __linux__ properties[prop_count++] = CL_CONTEXT_PLATFORM; properties[prop_count++] = (cl_context_properties)platform_id; properties[prop_count++] = CL_GL_CONTEXT_KHR; properties[prop_count++] = (cl_context_properties)glinfo->renderingContext; properties[prop_count++] = CL_GLX_DISPLAY_KHR; properties[prop_count++] = (cl_context_properties)glinfo->renderingDisplay; properties[prop_count++] = 0; #else #error "UNKNOWN PLATFORM" #endif //properties[0] = CL_CONTEXT_PLATFORM; //properties[1] = (cl_context_properties) platform_id; //properties[2] = 0; //create a context with the GPU device std::cerr << "Creating a context with the GPU device" << std::endl; context = clCreateContext (properties,1,&device_id,NULL,NULL,&err); if (error_cl(err, "clCreateContext")) return err; //create command queues using the context and device std::cerr << "Creating command queues using the context and device" << std::endl; for (size_t i = 0; i < requested_command_queues; ++i) { cl_command_queue cq = clCreateCommandQueue(context, device_id, 0, //CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); if (error_cl(err, "clCreateCommandQueue")) return err; command_queues.push_back(cq); } // Get size of global memory in the device err = clGetDeviceInfo (device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), &global_mem_size, &bytes_returned); if (error_cl(err, "clGetDeviceInfo CL_DEVICE_GLOBAL_MEM_SIZE")) return err; // Get size of local memory in the device err = clGetDeviceInfo (device_id, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &local_mem_size, &bytes_returned); if (error_cl(err, "clGetDeviceInfo CL_DEVICE_LOCAL_MEM_SIZE")) return err; // Get bool stating if cl device supports images err = clGetDeviceInfo (device_id, CL_DEVICE_IMAGE_SUPPORT, sizeof(cl_bool), &image_support, &bytes_returned); if (error_cl(err, "clGetDeviceInfo CL_DEVICE_IMAGE_SUPPORT")) return err; // Get max image2d height err = clGetDeviceInfo (device_id, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &image2d_max_height, &bytes_returned); if (error_cl(err, "clGetDeviceInfo CL_DEVICE_IMAGE2D_MAX_HEIGHT")) return err; // Get max image2d width err = clGetDeviceInfo (device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &image2d_max_width, &bytes_returned); if (error_cl(err, "clGetDeviceInfo CL_DEVICE_IMAGE2D_MAX_WIDTH")) return err; // Get amount of compute units in the device err = clGetDeviceInfo (device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &max_compute_units, &bytes_returned); if (error_cl(err, "clGetDeviceInfo CL_DEVICE_MAX_COMPUTE_UNITS")) return err; // Get maximum size of global mem objects for the device err = clGetDeviceInfo (device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &max_mem_alloc_size, &bytes_returned); if (error_cl(err, "clGetDeviceInfo CL_DEVICE_MAX_MEM_ALLOC_SIZE")) return err; // Get maximum size of a work group err = clGetDeviceInfo (device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_work_group_size, &bytes_returned); if (error_cl(err, "clGetDeviceInfo CL_DEVICE_MAX_WORK_GROUP_SIZE")) return err; // Get maximum number of items that can be specified in each dimension err = clGetDeviceInfo (device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_work_item_sizes), &max_work_item_sizes, &bytes_returned); if (error_cl(err, "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES")) return err; m_initialized = true; return CL_SUCCESS; }
int32_t init_cl_kernel(const char* kernel_file, std::string kernel_name, CLKernelInfo* clkernelinfo) { cl_int err; CLInfo* clinfo = CLInfo::instance(); if (!clinfo->initialized()) return -1; clkernelinfo->clinfo = clinfo; //create a program from the kernel source code std::ifstream kernel_source_file(kernel_file); if (!kernel_source_file.good()){ std::cerr << "Error: could not read kernel file." << std::endl; return 1; } std::streampos file_size; kernel_source_file.seekg (0, std::ios::beg); file_size = kernel_source_file.tellg(); kernel_source_file.seekg (0, std::ios::end); file_size = kernel_source_file.tellg() - file_size; char *kernel_source = new char[(int)file_size+1]; std::memset(kernel_source,0,file_size); kernel_source_file.seekg (0, std::ios::beg); kernel_source_file.read(kernel_source,file_size); kernel_source_file.close(); kernel_source[file_size] = 0; clkernelinfo->program = clCreateProgramWithSource(clinfo->context,1, (const char**)&kernel_source,NULL,&err); delete[] kernel_source; if (error_cl(err, "clCreateProgramWithSource")) return 1; err = clBuildProgram(clkernelinfo->program, 0, NULL, NULL, NULL, NULL); if (error_cl(err, "clBuildProgram")){ char build_log[8196]; size_t bytes_returned; err = clGetProgramBuildInfo (clkernelinfo->program, clkernelinfo->clinfo->device_id, CL_PROGRAM_BUILD_LOG, sizeof(build_log), build_log, &bytes_returned); if (!error_cl(err, "clGetProgramBuildInfo")){ std::cerr << "Program build error log:" << std::endl << build_log << std::endl; } return 1; } //specify which kernel from the program to execute clkernelinfo->kernel = clCreateKernel(clkernelinfo->program, kernel_name.c_str(),&err); if (error_cl(err, "clCreateKernel")) return 1; return 0; }
void CLInfo::print_info() { if (!m_initialized) return; cl_int err; cl_uint max_samplers; char device_vendor[128], device_name[128], device_cl_version[128], c_version[128]; size_t bytes_returned; err = clGetDeviceInfo (device_id, CL_DEVICE_VENDOR, sizeof(device_vendor), device_vendor, &bytes_returned); if (error_cl(err, "clGetDeviceInfo CL_DEVICE_VENDOR")) return; err = clGetDeviceInfo (device_id, CL_DEVICE_VERSION, sizeof(device_cl_version), device_cl_version, &bytes_returned); if (error_cl(err, "clGetDeviceInfo CL_DEVICE_VERSION")) return; err = clGetDeviceInfo (device_id, CL_DEVICE_NAME, sizeof(device_name), device_name, &bytes_returned); if (error_cl(err, "clGetDeviceInfo CL_DEVICE_NAME")) return; err = clGetDeviceInfo (device_id, CL_DEVICE_OPENCL_C_VERSION, sizeof(c_version), c_version, &bytes_returned); if (error_cl(err, "clGetDeviceInfo CL_DEVICE_OPENCL_C_VERSION")) return; err = clGetDeviceInfo (device_id, CL_DEVICE_MAX_SAMPLERS, sizeof(max_samplers), &max_samplers, &bytes_returned); if (error_cl(err, "clGetDeviceInfo CL_DEVICE_MAX_SAMPLERS")) return; std::cout << "OpenCL Info:" << std::endl; std::cout << "\tOpenCL device vendor: " << device_vendor << std::endl; std::cout << "\tOpenCL device name: " << device_name << std::endl; std::cout << "\tOpenCL device OpenCL version: " << device_cl_version << std::endl; std::cout << "\tOpenCL device OpenCL C version: " << c_version << std::endl; std::cout << "\tOpenCL device maximum sampler support: " << max_samplers << std::endl; std::cout << "\tOpenCL device global mem size: " << global_mem_size << std::endl; std::cout << "\tOpenCL device local mem size: " << local_mem_size << std::endl; std::cout << "\tOpenCL device maximum mem alloc size: " << max_mem_alloc_size << std::endl; std::cout << "\tOpenCL device image2d max size: " << image2d_max_width << " x " << image2d_max_width << std::endl; std::cout << "\tOpenCL device compute units: " << max_compute_units << std::endl; std::cout << "\tOpenCL device max work group size: " << max_work_group_size << std::endl; std::cout << "\tOpenCL device max work item sizes per dimension: " << max_work_item_sizes[0] << " " << max_work_item_sizes[1] << " " << max_work_item_sizes[2] << std::endl; std::cout << std::endl; }
int main(int argc, char** argv) { size_t window_size[] = {TEX_WIDTH, TEX_HEIGHT}; cl_int err; GLInfo* glinfo = GLInfo::instance(); if (glinfo->initialize(argc,argv, window_size, "OpenCL-OpenGL interop test") != 0){ std::cerr << "Failed to initialize GL" << std::endl; exit(1); } else { std::cout << "Initialized GL succesfully" << std::endl; } CLInfo* clinfo = CLInfo::instance(); if (clinfo->initialize() != CL_SUCCESS){ std::cerr << "Failed to initialize CL" << std::endl; exit(1); } else { std::cout << "Initialized CL succesfully" << std::endl; } clinfo->print_info(); ////////////// Create gl_tex and buf ///////////////// gl_tex = create_tex_gl(TEX_WIDTH,TEX_HEIGHT); gl_buf = create_buf_gl(1024); print_gl_tex_2d_info(gl_tex); ///////////// Create empty cl_mem if (create_empty_cl_mem(CL_MEM_READ_WRITE, 1024, &cl_buf_mem)) { std::cerr << "*** Error creating OpenCL buffer" << std::endl; } else { std::cerr << "Created OpenCl buffer succesfully" << std::endl; print_cl_mem_info(cl_buf_mem); } print_cl_mem_info(cl_buf_mem); ////////////// Create cl_mem from gl_tex if (create_cl_mem_from_gl_tex(gl_tex, &cl_tex_mem)) exit(1); print_cl_image_2d_info(cl_tex_mem); if (init_cl_kernel("src/kernel/clgl-test.cl", "Grad", &clkernelinfo)){ std::cerr << "Failed to initialize CL kernel" << std::endl; exit(1); } else { std::cerr << "Initialized CL kernel succesfully" << std::endl; } clkernelinfo.work_dim = 2; clkernelinfo.arg_count = 2; clkernelinfo.global_work_size[0] = TEX_WIDTH; clkernelinfo.global_work_size[1] = TEX_HEIGHT; std::cout << "Setting texture mem object argument for kernel" << std::endl; err = clSetKernelArg(clkernelinfo.kernel,0,sizeof(cl_mem),&cl_tex_mem); if (error_cl(err, "clSetKernelArg 0")) exit(1); glutKeyboardFunc(gl_key); glutDisplayFunc(gl_loop); glutIdleFunc(gl_loop); #ifdef __linux__ clock_gettime(CLOCK_MONOTONIC, &tp); #elif defined _WIN32 tp = snap_time(); #endif ///////////////////////////// Extra thread std::cout << "Creating thread!\n"; int ret; ret = pthread_create( &extra_thread, NULL, extra_thread_function, NULL); ret = pthread_barrier_init(&thread_barrier, NULL, 2); std::cout << std::endl; glutMainLoop(); return 0; }
void engine_render( Engine * e ) { world_recalculate_dirty( e->world ); cl_int err = 0; cl_int numObj = (cl_int)world_geo_n( e->world ); cl_int numVert = (cl_int)world_vert_n( e->world ); /*Zero old buffers*/ static char * empty_buf = NULL; if ( empty_buf == NULL ) empty_buf = calloc( 1, e->dim_internal[0]*e->dim_internal[1]*4*sizeof(float) ); err = clEnqueueWriteBuffer( e->queue, e->colour_buf, CL_TRUE, 0, e->dim_internal[0]*e->dim_internal[1]*4*sizeof(float), empty_buf, 0, NULL, NULL ); error_cl( __LINE__, err ); err = clEnqueueWriteBuffer( e->queue, e->light_buf, CL_TRUE, 0, e->dim_internal[0]*e->dim_internal[1]*4*sizeof(float), empty_buf, 0, NULL, NULL ); error_cl( __LINE__, err ); /*Recalculate buffers,if needed*/ if ( e->world->dirty ) { if ( e->objects_bb ) { err = clReleaseMemObject( e->objects_bb ); error_cl( __LINE__, err ); } if ( e->vertices ) { err = clReleaseMemObject( e->vertices ); error_cl( __LINE__, err ); } if ( e->objects ) { err = clReleaseMemObject( e->objects ); error_cl( __LINE__, err ); } e->objects_bb = clCreateBuffer( e->context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float)*8*(unsigned)numObj, world_serialize_bb( e->world ), &err ); error_cl( __LINE__, err ); e->vertices = clCreateBuffer( e->context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_float)*8*(unsigned)numVert, world_serialize_vert( e->world ), &err ); error_cl( __LINE__, err ); e->objects = clCreateBuffer( e->context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(cl_int)*2*(unsigned)numObj, world_serialize_obj( e->world ), &err ); error_cl( __LINE__, err ); e->world->dirty = 0; /*Set kernel args for the newly created buffers*/ err = clSetKernelArg( e->kernels[K_SORT], 1, sizeof( cl_mem ), &e->objects_bb ); error_cl( __LINE__, err ); err = clSetKernelArg( e->kernels[K_SORT], 2, sizeof( cl_int ), &numObj ); error_cl( __LINE__, err ); err = clSetKernelArg( e->kernels[K_LIGHTSORT], 1, sizeof( cl_mem ), &e->objects_bb ); error_cl( __LINE__, err ); err = clSetKernelArg( e->kernels[K_LIGHTSORT], 2, sizeof( cl_int ), &numObj ); error_cl( __LINE__, err ); err = clSetKernelArg( e->kernels[K_TRACE], 2, sizeof( cl_mem ), &e->objects ); error_cl( __LINE__, err ); err = clSetKernelArg( e->kernels[K_TRACE], 3, sizeof( cl_mem ), &e->vertices ); error_cl( __LINE__, err ); err = clSetKernelArg( e->kernels[K_LIGHT], 4, sizeof( cl_mem ), &e->objects ); error_cl( __LINE__, err ); err = clSetKernelArg( e->kernels[K_LIGHT], 5, sizeof( cl_mem ), &e->vertices ); error_cl( __LINE__, err ); } /*Ray generation*/ float time = (float)glfwGetTime(); //float time = 3.441f; cl_float cambuf[16] = { 0.0f }; float pos[3] = { 10.0f*cosf(time), 10.0f*sinf(time), 4.0f }; float dir[3] = { cosf(time + 3.14159f ), sinf(time + 3.14159f ), -0.6f }; float nor[3] = { 0.0f, 0.0f, 1.0f }; rtmath_lookTo( cambuf, pos, dir, nor, 1.0f, (float)e->dim_window[0]/(float)e->dim_window[1] ); err = clSetKernelArg( e->kernels[K_GEN], 0, sizeof( cl_float )*4, &cambuf[0] ); error_cl( __LINE__, err ); err = clSetKernelArg( e->kernels[K_GEN], 1, sizeof( cl_float )*4, &cambuf[4] ); error_cl( __LINE__, err ); err = clSetKernelArg( e->kernels[K_GEN], 2, sizeof( cl_float )*4, &cambuf[8] ); error_cl( __LINE__, err ); err = clSetKernelArg( e->kernels[K_GEN], 3, sizeof( cl_float )*4, &cambuf[12] ); error_cl( __LINE__, err ); err = clEnqueueNDRangeKernel( e->queue, e->kernels[K_GEN], 2, NULL, e->dim_internal, NULL, 0, NULL, NULL ); error_cl( __LINE__, err ); for( int i=0; i<3; i++ ) { /*Sort objects*/ err = clEnqueueNDRangeKernel( e->queue, e->kernels[K_SORT], 2, NULL, e->dim_internal, NULL, 0, NULL, NULL ); error_cl( __LINE__, err ); /*Raytrace*/ err = clEnqueueNDRangeKernel( e->queue, e->kernels[K_TRACE], 2, NULL, e->dim_internal, NULL, 0, NULL, NULL ); error_cl( __LINE__, err ); /*Set light position*/ float lightp[4] = { -4.0f, -4.0f, 4.0f, 0.0f }; err = clSetKernelArg( e->kernels[K_LIGHT], 1, sizeof( cl_float )*4, lightp ); error_cl( __LINE__, err ); err = clSetKernelArg( e->kernels[K_LIGHTSORT], 3, sizeof( cl_float )*4, lightp ); error_cl( __LINE__, err ); /*Sort light*/ err = clEnqueueNDRangeKernel( e->queue, e->kernels[K_LIGHTSORT], 2, NULL, e->dim_internal, NULL, 0, NULL, NULL ); error_cl( __LINE__, err ); /*Calculate light*/ err = clEnqueueNDRangeKernel( e->queue, e->kernels[K_LIGHT], 2, NULL, e->dim_internal, NULL, 0, NULL, NULL ); error_cl( __LINE__, err ); /*Compose image*/ err = clEnqueueNDRangeKernel( e->queue, e->kernels[K_COMPOSE], 2, NULL, e->dim_internal, NULL, 0, NULL, NULL ); error_cl( __LINE__, err ); } /*Acquire the OpenGL texture*/ if ( e->gl_sharing_support == 1 ) { err = clEnqueueAcquireGLObjects( e->queue, 1, &e->output_texture_CL, 0, NULL, NULL ); error_cl( __LINE__, err ); } err = clEnqueueNDRangeKernel( e->queue, e->kernels[K_INTEROP], 2, NULL, e->dim_internal, NULL, 0, NULL, NULL ); error_cl( __LINE__, err ); /*Release the OpenGL texture*/ if ( e->gl_sharing_support == 1 ) { err = clEnqueueReleaseGLObjects( e->queue, 1, &e->output_texture_CL, 0, NULL, NULL ); error_cl( __LINE__, err ); } //clFinish( e->queue ); /*=== OpenGL ===*/ error_GL(); //glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT ); glActiveTexture( GL_TEXTURE0 ); glBindTexture( GL_TEXTURE_2D, e->output_texture_GL ); glBindVertexArray( e->VAO ); glDrawArrays( GL_TRIANGLES, 0, 6 ); glfwSwapBuffers( e->window ); glBindTexture( GL_TEXTURE_2D, 0 ); glfwPollEvents(); //glFinish(); }