void Dilation::execute() { Image::pointer input = getInputData<Image>(); if(input->getDataType() != TYPE_UINT8) { throw Exception("Data type of image given to Dilation must be UINT8"); } Image::pointer output = getOutputData<Image>(); output->createFromImage(input); SceneGraph::setParentNode(output, input); output->fill(0); OpenCLDevice::pointer device = std::dynamic_pointer_cast<OpenCLDevice>(getMainDevice()); cl::CommandQueue queue = device->getCommandQueue(); cl::Program program = getOpenCLProgram(device); cl::Kernel dilateKernel(program, "dilate"); Vector3ui size = input->getSize(); OpenCLImageAccess::pointer access = input->getOpenCLImageAccess(ACCESS_READ, device); dilateKernel.setArg(0, *access->get3DImage()); dilateKernel.setArg(2, mSize/2); if(!device->isWritingTo3DTexturesSupported()) { OpenCLBufferAccess::pointer access2 = output->getOpenCLBufferAccess(ACCESS_READ_WRITE, device); dilateKernel.setArg(1, *access2->get()); queue.enqueueNDRangeKernel( dilateKernel, cl::NullRange, cl::NDRange(size.x(), size.y(), size.z()), cl::NullRange ); } else { OpenCLImageAccess::pointer access2 = output->getOpenCLImageAccess(ACCESS_READ_WRITE, device); dilateKernel.setArg(1, *access2->get3DImage()); queue.enqueueNDRangeKernel( dilateKernel, cl::NullRange, cl::NDRange(size.x(), size.y(), size.z()), cl::NullRange ); } }
std::tr1::tuple<cl::Kernel,cl::Kernel,std::vector<cl::Buffer*>,cl::CommandQueue,cl::NDRange,cl::NDRange,cl::NDRange> init_cl(int levels, unsigned w, unsigned h, unsigned bits, std::string source, int deviceNumber) { std::vector<cl::Platform> platforms; cl::Platform::get(&platforms); if(platforms.size()==0) throw std::runtime_error("No OpenCL platforms found."); std::cerr<<"Found "<<platforms.size()<<" platforms\n"; for(unsigned i=0;i<platforms.size();i++){ std::string vendor=platforms[0].getInfo<CL_PLATFORM_VENDOR>(); std::cerr<<" Platform "<<i<<" : "<<vendor<<"\n"; } int selectedPlatform=0; if(getenv("HPCE_SELECT_PLATFORM")){ selectedPlatform=atoi(getenv("HPCE_SELECT_PLATFORM")); } std::cerr<<"Choosing platform "<<selectedPlatform<<"\n"; cl::Platform platform=platforms.at(selectedPlatform); std::vector<cl::Device> devices; platform.getDevices(CL_DEVICE_TYPE_ALL, &devices); if(devices.size()==0){ throw std::runtime_error("No opencl devices found.\n"); } std::cerr<<"Found "<<devices.size()<<" devices\n"; for(unsigned i=0;i<devices.size();i++){ std::string name=devices[i].getInfo<CL_DEVICE_NAME>(); std::cerr<<" Device "<<i<<" : "<<name<<"\n"; } int selectedDevice=0; if (deviceNumber != -1) selectedDevice = deviceNumber; std::cerr<<"Choosing device "<<selectedDevice<<"\n"; cl::Device device=devices.at(selectedDevice); cl::Context context(devices); std::string kernelSource=LoadSource(source.c_str()); cl::Program::Sources sources; // A vector of (data,length) pairs sources.push_back(std::make_pair(kernelSource.c_str(), kernelSource.size()+1)); // push on our single string cl::Program program(context, sources); try{ program.build(devices); }catch(...){ for(unsigned i=0;i<devices.size();i++){ std::cerr<<"Log for device "<<devices[i].getInfo<CL_DEVICE_NAME>()<<":\n\n"; std::cerr<<program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[i])<<"\n\n"; } throw; } size_t cbBuffer= (w*bits)/2; std::vector<cl::Buffer*> gpuBuffers; for (int i=0; i<abs(levels); i++) { gpuBuffers.push_back(new cl::Buffer(context, CL_MEM_READ_WRITE, cbBuffer)); gpuBuffers.push_back(new cl::Buffer(context, CL_MEM_READ_WRITE, cbBuffer)); } gpuBuffers.push_back(new cl::Buffer(context, CL_MEM_READ_WRITE, cbBuffer)); // ... and one for luck. std::string erodeKernelName; std::string dilateKernelName; switch (bits) { case 1: erodeKernelName = "erode_kernel_1"; dilateKernelName = "dilate_kernel_1"; break; case 2: erodeKernelName = "erode_kernel_2"; dilateKernelName = "dilate_kernel_2"; break; case 4: erodeKernelName = "erode_kernel_4"; dilateKernelName = "dilate_kernel_4"; break; case 8: erodeKernelName = "erode_kernel_8"; dilateKernelName = "dilate_kernel_8"; break; case 16: erodeKernelName = "erode_kernel_16"; dilateKernelName = "dilate_kernel_16"; break; case 32: erodeKernelName = "erode_kernel_32"; dilateKernelName = "dilate_kernel_32"; break; default: break; } cl::Kernel erodeKernel(program, erodeKernelName.c_str()); cl::Kernel dilateKernel(program, dilateKernelName.c_str()); cl::CommandQueue queue(context, device); cl::NDRange offset(0, 0); // Always start iterations at x=0, y=0 cl::NDRange globalSize((w*bits)/32, 1); // Global size must match the original loops cl::NDRange localSize=cl::NullRange; // We don't care about local size return std::tr1::make_tuple(erodeKernel,dilateKernel,gpuBuffers,queue,offset,globalSize,localSize); }