int main() { cl_int err; cl_kernel kernel; cl_program program; cl_mem buffer; cl_ulong ans; int i; char *source; size_t localws[3]; size_t globalws[3]; initopencl(); source=getsource("pe001.cl"); /* create a program object and load source code into it */ program=clCreateProgramWithSource(context,1,(const char **)&source,NULL,&err); if(CL_SUCCESS!=err) clerror("error creating program",err); /* builds a program associated with a program object */ if(CL_SUCCESS!=(err=clBuildProgram(program,1,devices+DEVICEID,NULL,NULL,NULL))) clerror("error building program",err); /* create kernel object from built program */ kernel=clCreateKernel(program,"pe001",&err); if(CL_SUCCESS!=err) clerror("error creating kernel",err); free(source); /* create memory buffer: one cl_ulong for each kernel, a total of MAX=N/KERNEL */ buffer=clCreateBuffer(context,CL_MEM_READ_WRITE,MAX*sizeof(cl_ulong),NULL,&err); if(CL_SUCCESS!=err) clerror("error creating buffer",err); /* set up kernel arguments */ if(CL_SUCCESS!=(err=clSetKernelArg(kernel,0,sizeof(cl_mem),&buffer))) clerror("error setting kernel argument 0",err); globalws[0]=MAX; localws[0]=LOCAL; /* run kernel */ if(CL_SUCCESS!=(err=clEnqueueNDRangeKernel(queue,kernel,1,NULL,globalws,localws,0,NULL,NULL))) clerror("error running kernel",err); /* wait until kernel has finished */ if(CL_SUCCESS!=(err=clFinish(queue))) clerror("error waiting for queue",err); /* copy to host memory */ if(CL_SUCCESS!=(err=clEnqueueReadBuffer(queue,buffer,CL_TRUE,0,MAX*sizeof(cl_ulong),a,0,NULL,NULL))) clerror("error copying result to host",err); if(CL_SUCCESS!=(err=clFinish(queue))) clerror("error waiting for queue",err); /* assemble final answer */ ans=0; for(i=0; i<ACTUAL/KERNEL; i++) ans+=a[i]; printf("ans: "LL"\n",ans); /* TODO deallocate buffer */ clReleaseKernel(kernel); clReleaseProgram(program); shutdownopencl(); return 0; }
generic_info memory_object::get_info(cl_uint param_name) const { switch ((cl_mem_info)param_name) { case CL_MEM_TYPE: return pyopencl_get_int_info(cl_mem_object_type, MemObject, PYOPENCL_CL_CASTABLE_THIS, param_name); case CL_MEM_FLAGS: return pyopencl_get_int_info(cl_mem_flags, MemObject, PYOPENCL_CL_CASTABLE_THIS, param_name); case CL_MEM_SIZE: return pyopencl_get_int_info(size_t, MemObject, PYOPENCL_CL_CASTABLE_THIS, param_name); case CL_MEM_HOST_PTR: throw clerror("MemoryObject.get_info", CL_INVALID_VALUE, "Use MemoryObject.get_host_array to get " "host pointer."); case CL_MEM_MAP_COUNT: case CL_MEM_REFERENCE_COUNT: return pyopencl_get_int_info(cl_uint, MemObject, PYOPENCL_CL_CASTABLE_THIS, param_name); case CL_MEM_CONTEXT: return pyopencl_get_opaque_info(context, MemObject, PYOPENCL_CL_CASTABLE_THIS, param_name); #if PYOPENCL_CL_VERSION >= 0x1010 // TODO // case CL_MEM_ASSOCIATED_MEMOBJECT: // { // cl_mem param_value; // PYOPENCL_CALL_GUARDED(clGetMemObjectInfo, (this, param_name, sizeof(param_value), ¶m_value, 0)); // if (param_value == 0) // { // // no associated memory object? no problem. // return py::object(); // } // return create_mem_object_wrapper(param_value); // } case CL_MEM_OFFSET: return pyopencl_get_int_info(size_t, MemObject, PYOPENCL_CL_CASTABLE_THIS, param_name); #endif #if PYOPENCL_CL_VERSION >= 0x2000 case CL_MEM_USES_SVM_POINTER: return pyopencl_get_int_info(cl_bool, MemObject, PYOPENCL_CL_CASTABLE_THIS, param_name); #endif default: throw clerror("MemoryObject.get_info", CL_INVALID_VALUE); } }
cl_context_properties get_apple_cgl_share_group() { #ifdef __APPLE__ #ifdef HAVE_GL CGLContextObj kCGLContext = CGLGetCurrentContext(); CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext); return (cl_context_properties)kCGLShareGroup; #else throw clerror("get_apple_cgl_share_group unavailable: " "GL interop not compiled", CL_INVALID_VALUE); #endif #else throw clerror("get_apple_cgl_share_group unavailable: non-Apple platform", CL_INVALID_VALUE); #endif /* __APPLE__ */ }
generic_info gl_texture::get_gl_texture_info(cl_gl_texture_info param_name) const { switch (param_name) { case CL_GL_TEXTURE_TARGET: return pyopencl_get_int_info(GLenum, GLTexture, PYOPENCL_CL_CASTABLE_THIS, param_name); case CL_GL_MIPMAP_LEVEL: return pyopencl_get_int_info(GLint, GLTexture, PYOPENCL_CL_CASTABLE_THIS, param_name); default: throw clerror("MemoryObject.get_gl_texture_info", CL_INVALID_VALUE); } }
generic_info program::get_info(cl_uint param) const { switch ((cl_program_info)param) { case CL_PROGRAM_CONTEXT: return pyopencl_get_opaque_info(context, Program, this, param); case CL_PROGRAM_REFERENCE_COUNT: case CL_PROGRAM_NUM_DEVICES: return pyopencl_get_int_info(cl_uint, Program, this, param); case CL_PROGRAM_DEVICES: return pyopencl_get_opaque_array_info(device, Program, this, param); case CL_PROGRAM_SOURCE: return pyopencl_get_str_info(Program, this, param); case CL_PROGRAM_BINARY_SIZES: return pyopencl_get_array_info(size_t, Program, this, param); case CL_PROGRAM_BINARIES: { auto sizes = pyopencl_get_vec_info(size_t, Program, this, CL_PROGRAM_BINARY_SIZES); pyopencl_buf<char*> result_ptrs(sizes.len()); for (size_t i = 0;i < sizes.len();i++) { result_ptrs[i] = (char*)malloc(sizes[i]); } try { pyopencl_call_guarded(clGetProgramInfo, this, CL_PROGRAM_BINARIES, sizes.len() * sizeof(char*), result_ptrs.get(), nullptr); } catch (...) { for (size_t i = 0;i < sizes.len();i++) { free(result_ptrs[i]); } } pyopencl_buf<generic_info> gis(sizes.len()); for (size_t i = 0;i < sizes.len();i++) { gis[i].value = result_ptrs[i]; gis[i].dontfree = 0; gis[i].opaque_class = CLASS_NONE; gis[i].type = _copy_str(std::string("char[") + tostring(sizes[i]) + "]"); } return pyopencl_convert_array_info(generic_info, gis); } #if PYOPENCL_CL_VERSION >= 0x1020 case CL_PROGRAM_NUM_KERNELS: return pyopencl_get_int_info(size_t, Program, this, param); case CL_PROGRAM_KERNEL_NAMES: return pyopencl_get_str_info(Program, this, param); #endif default: throw clerror("Program.get_info", CL_INVALID_VALUE); } }
generic_info platform::get_info(cl_uint param_name) const { switch ((cl_platform_info)param_name) { case CL_PLATFORM_PROFILE: case CL_PLATFORM_VERSION: case CL_PLATFORM_NAME: case CL_PLATFORM_VENDOR: #if !(defined(CL_PLATFORM_NVIDIA) && CL_PLATFORM_NVIDIA == 0x3001) case CL_PLATFORM_EXTENSIONS: #endif return pyopencl_get_str_info(Platform, this, param_name); default: throw clerror("Platform.get_info", CL_INVALID_VALUE); } }
PYOPENCL_USE_RESULT generic_info kernel::get_arg_info(cl_uint idx, cl_kernel_arg_info param) const { switch (param) { case CL_KERNEL_ARG_ADDRESS_QUALIFIER: return pyopencl_get_int_info(cl_kernel_arg_address_qualifier, KernelArg, this, idx, param); case CL_KERNEL_ARG_ACCESS_QUALIFIER: return pyopencl_get_int_info(cl_kernel_arg_access_qualifier, KernelArg, this, idx, param); case CL_KERNEL_ARG_TYPE_NAME: case CL_KERNEL_ARG_NAME: return pyopencl_get_str_info(KernelArg, this, idx, param); default: throw clerror("Kernel.get_arg_info", CL_INVALID_VALUE); } }
error* svm_alloc( clobj_t _ctx, cl_mem_flags flags, size_t size, cl_uint alignment, void **result) { #if PYOPENCL_CL_VERSION >= 0x2000 auto ctx = static_cast<context*>(_ctx); return c_handle_retry_mem_error([&] { *result = clSVMAlloc(ctx->data(), flags, size, alignment); if (!*result) throw clerror("clSVMalloc", CL_INVALID_VALUE, "(allocation failure, unspecified reason)"); }); #else PYOPENCL_UNSUPPORTED_BEFORE(clSVMAlloc, "CL 2.0") #endif }
error* memory_object__get_host_array(clobj_t _obj, void **hostptr, size_t *size) { auto obj = static_cast<memory_object*>(_obj); return c_handle_error([&] { cl_mem_flags flags; pyopencl_call_guarded(clGetMemObjectInfo, obj, CL_MEM_FLAGS, size_arg(flags), nullptr); if (!(flags & CL_MEM_USE_HOST_PTR)) throw clerror("MemoryObject.get_host_array", CL_INVALID_VALUE, "Only MemoryObject with USE_HOST_PTR " "is supported."); pyopencl_call_guarded(clGetMemObjectInfo, obj, CL_MEM_HOST_PTR, size_arg(*hostptr), nullptr); pyopencl_call_guarded(clGetMemObjectInfo, obj, CL_MEM_SIZE, size_arg(*size), nullptr); }); }
PYOPENCL_USE_RESULT static gl_texture* create_from_gl_texture(const context *ctx, cl_mem_flags flags, GLenum texture_target, GLint miplevel, GLuint texture, unsigned dims) { if (dims == 2) { cl_mem mem = pyopencl_call_guarded(clCreateFromGLTexture2D, ctx, flags, texture_target, miplevel, texture); return pyopencl_convert_obj(gl_texture, clReleaseMemObject, mem); } else if (dims == 3) { cl_mem mem = pyopencl_call_guarded(clCreateFromGLTexture3D, ctx, flags, texture_target, miplevel, texture); return pyopencl_convert_obj(gl_texture, clReleaseMemObject, mem); } else { throw clerror("Image", CL_INVALID_VALUE, "invalid dimension"); } }
generic_info kernel::get_info(cl_uint param) const { switch ((cl_kernel_info)param) { case CL_KERNEL_FUNCTION_NAME: return pyopencl_get_str_info(Kernel, PYOPENCL_CL_CASTABLE_THIS, param); case CL_KERNEL_NUM_ARGS: case CL_KERNEL_REFERENCE_COUNT: return pyopencl_get_int_info(cl_uint, Kernel, PYOPENCL_CL_CASTABLE_THIS, param); case CL_KERNEL_CONTEXT: return pyopencl_get_opaque_info(context, Kernel, PYOPENCL_CL_CASTABLE_THIS, param); case CL_KERNEL_PROGRAM: return pyopencl_get_opaque_info(program, Kernel, PYOPENCL_CL_CASTABLE_THIS, param); #if PYOPENCL_CL_VERSION >= 0x1020 case CL_KERNEL_ATTRIBUTES: return pyopencl_get_str_info(Kernel, PYOPENCL_CL_CASTABLE_THIS, param); #endif default: throw clerror("Kernel.get_info", CL_INVALID_VALUE); } }
void context::get_version(cl_context ctx, int *major, int *minor) { cl_device_id s_buff[16]; size_t size; pyopencl_buf<cl_device_id> d_buff(0); cl_device_id *devs = s_buff; pyopencl_call_guarded(clGetContextInfo, ctx, CL_CONTEXT_DEVICES, 0, nullptr, buf_arg(size)); if (PYOPENCL_UNLIKELY(!size)) { throw clerror("Context.get_version", CL_INVALID_VALUE, "Cannot get devices from context."); } if (PYOPENCL_UNLIKELY(size > sizeof(s_buff))) { d_buff.resize(size / sizeof(cl_device_id)); devs = d_buff.get(); } pyopencl_call_guarded(clGetContextInfo, ctx, CL_CONTEXT_DEVICES, size_arg(devs, size), buf_arg(size)); device::get_version(devs[0], major, minor); }
generic_info image::get_image_info(cl_image_info param) const { switch (param) { case CL_IMAGE_FORMAT: return pyopencl_get_int_info(cl_image_format, Image, this, param); case CL_IMAGE_ELEMENT_SIZE: case CL_IMAGE_ROW_PITCH: case CL_IMAGE_SLICE_PITCH: case CL_IMAGE_WIDTH: case CL_IMAGE_HEIGHT: case CL_IMAGE_DEPTH: #if PYOPENCL_CL_VERSION >= 0x1020 case CL_IMAGE_ARRAY_SIZE: #endif return pyopencl_get_int_info(size_t, Image, this, param); #if PYOPENCL_CL_VERSION >= 0x1020 // TODO: // case CL_IMAGE_BUFFER: // { // cl_mem param_value; // PYOPENCL_CALL_GUARDED(clGetImageInfo, (this, param, sizeof(param_value), ¶m_value, 0)); // if (param_value == 0) // { // // no associated memory object? no problem. // return py::object(); // } // return create_mem_object_wrapper(param_value); // } case CL_IMAGE_NUM_MIP_LEVELS: case CL_IMAGE_NUM_SAMPLES: return pyopencl_get_int_info(cl_uint, Image, this, param); #endif default: throw clerror("Image.get_image_info", CL_INVALID_VALUE); } }
generic_info kernel::get_work_group_info(cl_kernel_work_group_info param, const device *dev) const { switch (param) { #if PYOPENCL_CL_VERSION >= 0x1010 case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: #endif case CL_KERNEL_WORK_GROUP_SIZE: return pyopencl_get_int_info(size_t, KernelWorkGroup, PYOPENCL_CL_CASTABLE_THIS, dev, param); case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: return pyopencl_get_array_info(size_t, KernelWorkGroup, PYOPENCL_CL_CASTABLE_THIS, dev, param); case CL_KERNEL_LOCAL_MEM_SIZE: #if PYOPENCL_CL_VERSION >= 0x1010 case CL_KERNEL_PRIVATE_MEM_SIZE: #endif return pyopencl_get_int_info(cl_ulong, KernelWorkGroup, PYOPENCL_CL_CASTABLE_THIS, dev, param); default: throw clerror("Kernel.get_work_group_info", CL_INVALID_VALUE); } }
void platform::get_version(cl_platform_id plat, int *major, int *minor) { char s_buff[128]; size_t size; pyopencl_buf<char> d_buff(0); char *name = s_buff; pyopencl_call_guarded(clGetPlatformInfo, plat, CL_PLATFORM_VERSION, 0, nullptr, buf_arg(size)); if (PYOPENCL_UNLIKELY(size > sizeof(s_buff))) { d_buff.resize(size); name = d_buff.get(); } pyopencl_call_guarded(clGetPlatformInfo, plat, CL_PLATFORM_VERSION, size_arg(name, size), buf_arg(size)); std::cmatch ver_match; if (!std::regex_match(name, ver_match, ver_regex)) { throw clerror("Platform.get_version", CL_INVALID_VALUE, "platform returned non-conformant " "platform version string"); } *major = atoi(name + ver_match.position(1)); *minor = atoi(name + ver_match.position(2)); }
generic_info program::get_build_info(const device *dev, cl_program_build_info param) const { switch (param) { case CL_PROGRAM_BUILD_STATUS: return pyopencl_get_int_info(cl_build_status, ProgramBuild, this, dev, param); case CL_PROGRAM_BUILD_OPTIONS: case CL_PROGRAM_BUILD_LOG: return pyopencl_get_str_info(ProgramBuild, this, dev, param); #if PYOPENCL_CL_VERSION >= 0x1020 case CL_PROGRAM_BINARY_TYPE: return pyopencl_get_int_info(cl_program_binary_type, ProgramBuild, this, dev, param); #endif #if PYOPENCL_CL_VERSION >= 0x2000 case CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE: return pyopencl_get_int_info(size_t, ProgramBuild, this, dev, param); #endif default: throw clerror("Program.get_build_info", CL_INVALID_VALUE); } }
/* create context and create command queue, use platform and device as indicated by PLATFORMID and DEVICEID */ void initopencl() { cl_int err; /* get platform ids */ if(CL_SUCCESS!=(err=clGetPlatformIDs(0,NULL,&numplatforms))) clerror("couldn't get number of platforms",err); platforms=malloc(numplatforms*sizeof(cl_platform_id)); if(CL_SUCCESS!=(err=clGetPlatformIDs(numplatforms,platforms,NULL))) clerror("couldn't get platforms",err); /* get device */ if(CL_SUCCESS!=(err=clGetDeviceIDs(platforms[PLATFORMID],CL_DEVICE_TYPE_ALL,0,NULL,&numdevices))) clerror("couldn't get number of devices",err); devices=malloc(numdevices*sizeof(cl_device_id)); if(CL_SUCCESS!=(err=clGetDeviceIDs(platforms[PLATFORMID],CL_DEVICE_TYPE_ALL,numdevices,devices,NULL))) clerror("couldn't get devices",err); /* create context */ context=clCreateContext(NULL,1,devices+DEVICEID,NULL,NULL,&err); if(CL_SUCCESS!=err) clerror("couldn't get context",err); /* create command queue */ queue=clCreateCommandQueue(context,devices[DEVICEID],0,&err); if(CL_SUCCESS!=err) clerror("couldn't create command queue",err); }
generic_info context::get_info(cl_uint param_name) const { switch ((cl_context_info)param_name) { case CL_CONTEXT_REFERENCE_COUNT: return pyopencl_get_int_info(cl_uint, Context, PYOPENCL_CL_CASTABLE_THIS, param_name); case CL_CONTEXT_DEVICES: return pyopencl_get_opaque_array_info(device, Context, PYOPENCL_CL_CASTABLE_THIS, param_name); case CL_CONTEXT_PROPERTIES: { auto result = pyopencl_get_vec_info( cl_context_properties, Context, PYOPENCL_CL_CASTABLE_THIS, param_name); pyopencl_buf<generic_info> py_result(result.len() / 2); size_t i = 0; for (;i < py_result.len();i++) { cl_context_properties key = result[i * 2]; if (key == 0) break; cl_context_properties value = result[i * 2 + 1]; generic_info &info = py_result[i]; info.dontfree = 0; info.opaque_class = CLASS_NONE; switch (key) { case CL_CONTEXT_PLATFORM: info.opaque_class = CLASS_PLATFORM; info.type = "void *"; info.value = new platform( reinterpret_cast<cl_platform_id>(value)); break; #if defined(PYOPENCL_GL_SHARING_VERSION) && (PYOPENCL_GL_SHARING_VERSION >= 1) #if defined(__APPLE__) && defined(HAVE_GL) case CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE: #else case CL_GL_CONTEXT_KHR: case CL_EGL_DISPLAY_KHR: case CL_GLX_DISPLAY_KHR: case CL_WGL_HDC_KHR: case CL_CGL_SHAREGROUP_KHR: #endif info.type = "intptr_t *"; info.value = (void*)value; // we do not own this object info.dontfree = 1; break; #endif default: throw clerror("Context.get_info", CL_INVALID_VALUE, "unknown context_property key encountered"); } } py_result.resize(i); return pyopencl_convert_array_info(generic_info, py_result); } #if PYOPENCL_CL_VERSION >= 0x1010 case CL_CONTEXT_NUM_DEVICES: return pyopencl_get_int_info(cl_uint, Context, PYOPENCL_CL_CASTABLE_THIS, param_name); #endif default: throw clerror("Context.get_info", CL_INVALID_VALUE); } }
int main() { cl_int err; cl_kernel kernel; cl_program program; cl_mem buffer; cl_ulong ans,cur; int i,x3,y3; char *source; size_t localws[3]; size_t globalws[3]; initopencl(); source=getsource("pe184.cl"); /* create a program object and load source code into it */ program=clCreateProgramWithSource(context,1,(const char **)&source,NULL,&err); if(CL_SUCCESS!=err) clerror("error creating program",err); /* builds a program associated with a program object */ if(CL_SUCCESS!=(err=clBuildProgram(program,1,devices+DEVICEID,NULL,NULL,NULL))) clerror("error building program",err); /* create kernel object from built program */ kernel=clCreateKernel(program,"pe184",&err); if(CL_SUCCESS!=err) clerror("error creating kernel",err); free(source); /* create memory buffer: one cl_ulong for each kernel, a total of MAX=N/KERNEL */ buffer=clCreateBuffer(context,CL_MEM_READ_WRITE,N2*N2*sizeof(cl_uint),NULL,&err); if(CL_SUCCESS!=err) clerror("error creating buffer",err); globalws[0]=N2; globalws[1]=N2; globalws[2]=1; localws[0]=N2; localws[1]=1; localws[2]=1; ans=0; for(x3=0;x3<N;x3++) { for(y3=0;y3<N;y3++) if(x3*x3+y3*y3<N*N) { /* set up kernel arguments */ if(CL_SUCCESS!=(err=clSetKernelArg(kernel,0,sizeof(cl_mem),&buffer))) clerror("error setting kernel argument 0",err); if(CL_SUCCESS!=(err=clSetKernelArg(kernel,1,sizeof(cl_int),&x3))) clerror("error setting kernel argument 1",err); if(CL_SUCCESS!=(err=clSetKernelArg(kernel,2,sizeof(cl_int),&y3))) clerror("error setting kernel argument 2",err); /* run kernel */ if(CL_SUCCESS!=(err=clEnqueueNDRangeKernel(queue,kernel,3,NULL,globalws,localws,0,NULL,NULL))) clerror("error running kernel",err); /* wait until kernel has finished */ if(CL_SUCCESS!=(err=clFinish(queue))) clerror("error waiting for queue",err); /* copy to host memory */ if(CL_SUCCESS!=(err=clEnqueueReadBuffer(queue,buffer,CL_TRUE,0,N2*N2*sizeof(cl_uint),a,0,NULL,NULL))) clerror("error copying result to host",err); if(CL_SUCCESS!=(err=clFinish(queue))) clerror("error waiting for queue",err); /* assemble final answer */ for(cur=i=0;i<N2*N2;i++) cur+=a[i]; if(x3) cur*=2; if(y3) cur*=2; ans+=cur; } printf("done x3=%d/%d\n",x3+1,N); } printf("answer: "LL"\n",ans/6); clReleaseMemObject(buffer); clReleaseKernel(kernel); clReleaseProgram(program); shutdownopencl(); return 0; }