Example #1
0
void Dilation::execute() {
    Image::pointer input = getInputData<Image>();
    if(input->getDataType() != TYPE_UINT8) {
        throw Exception("Data type of image given to Dilation must be UINT8");
    }

    Image::pointer output = getOutputData<Image>();
    output->createFromImage(input);
    SceneGraph::setParentNode(output, input);
    output->fill(0);

    OpenCLDevice::pointer device = std::dynamic_pointer_cast<OpenCLDevice>(getMainDevice());
    cl::CommandQueue queue = device->getCommandQueue();
    cl::Program program = getOpenCLProgram(device);
    cl::Kernel dilateKernel(program, "dilate");

    Vector3ui size = input->getSize();

    OpenCLImageAccess::pointer access = input->getOpenCLImageAccess(ACCESS_READ, device);
    dilateKernel.setArg(0, *access->get3DImage());
    dilateKernel.setArg(2, mSize/2);

    if(!device->isWritingTo3DTexturesSupported()) {
        OpenCLBufferAccess::pointer access2 = output->getOpenCLBufferAccess(ACCESS_READ_WRITE, device);
        dilateKernel.setArg(1, *access2->get());

        queue.enqueueNDRangeKernel(
            dilateKernel,
            cl::NullRange,
            cl::NDRange(size.x(), size.y(), size.z()),
            cl::NullRange
        );
    } else {
        OpenCLImageAccess::pointer access2 = output->getOpenCLImageAccess(ACCESS_READ_WRITE, device);
        dilateKernel.setArg(1, *access2->get3DImage());

        queue.enqueueNDRangeKernel(
            dilateKernel,
            cl::NullRange,
            cl::NDRange(size.x(), size.y(), size.z()),
            cl::NullRange
        );
    }

}
Example #2
0
std::tr1::tuple<cl::Kernel,cl::Kernel,std::vector<cl::Buffer*>,cl::CommandQueue,cl::NDRange,cl::NDRange,cl::NDRange> init_cl(int levels, unsigned w, unsigned h, unsigned bits, std::string source, int deviceNumber)
{
    std::vector<cl::Platform> platforms;
    
	cl::Platform::get(&platforms);
	
    if(platforms.size()==0) throw std::runtime_error("No OpenCL platforms found.");
    
    std::cerr<<"Found "<<platforms.size()<<" platforms\n";
	for(unsigned i=0;i<platforms.size();i++){
		std::string vendor=platforms[0].getInfo<CL_PLATFORM_VENDOR>();
		std::cerr<<"  Platform "<<i<<" : "<<vendor<<"\n";
	}
    
    int selectedPlatform=0;
	if(getenv("HPCE_SELECT_PLATFORM")){
		selectedPlatform=atoi(getenv("HPCE_SELECT_PLATFORM"));
	}
	std::cerr<<"Choosing platform "<<selectedPlatform<<"\n";
	cl::Platform platform=platforms.at(selectedPlatform);
    
    std::vector<cl::Device> devices;
	platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
	if(devices.size()==0){
		throw std::runtime_error("No opencl devices found.\n");
	}
    
	std::cerr<<"Found "<<devices.size()<<" devices\n";
	for(unsigned i=0;i<devices.size();i++){
		std::string name=devices[i].getInfo<CL_DEVICE_NAME>();
		std::cerr<<"  Device "<<i<<" : "<<name<<"\n";
	}
    
    int selectedDevice=0;
    
    if (deviceNumber != -1) selectedDevice = deviceNumber;
    
	std::cerr<<"Choosing device "<<selectedDevice<<"\n";
	cl::Device device=devices.at(selectedDevice);
    
    cl::Context context(devices);
    
    std::string kernelSource=LoadSource(source.c_str());
	
	cl::Program::Sources sources;	// A vector of (data,length) pairs
	sources.push_back(std::make_pair(kernelSource.c_str(), kernelSource.size()+1));	// push on our single string
    
	cl::Program program(context, sources);
	try{
		program.build(devices);
	}catch(...){
		for(unsigned i=0;i<devices.size();i++){
			std::cerr<<"Log for device "<<devices[i].getInfo<CL_DEVICE_NAME>()<<":\n\n";
			std::cerr<<program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[i])<<"\n\n";
		}
		throw;
	}
    
    size_t cbBuffer= (w*bits)/2;
    
    std::vector<cl::Buffer*> gpuBuffers;
    
    for (int i=0; i<abs(levels); i++)
    {
        gpuBuffers.push_back(new cl::Buffer(context, CL_MEM_READ_WRITE, cbBuffer));
        gpuBuffers.push_back(new cl::Buffer(context, CL_MEM_READ_WRITE, cbBuffer));
    }
    gpuBuffers.push_back(new cl::Buffer(context, CL_MEM_READ_WRITE, cbBuffer)); // ... and one for luck.
    
    std::string erodeKernelName;
    std::string dilateKernelName;
    
    switch (bits) {
        case 1:
            erodeKernelName = "erode_kernel_1";
            dilateKernelName = "dilate_kernel_1";
            break;
        case 2:
            erodeKernelName = "erode_kernel_2";
            dilateKernelName = "dilate_kernel_2";
            break;
        case 4:
            erodeKernelName = "erode_kernel_4";
            dilateKernelName = "dilate_kernel_4";
            break;
        case 8:
            erodeKernelName = "erode_kernel_8";
            dilateKernelName = "dilate_kernel_8";
            break;
        case 16:
            erodeKernelName = "erode_kernel_16";
            dilateKernelName = "dilate_kernel_16";
            break;
        case 32:
            erodeKernelName = "erode_kernel_32";
            dilateKernelName = "dilate_kernel_32";
            break;
        default:
            break;
    }
    
    cl::Kernel erodeKernel(program, erodeKernelName.c_str());
    cl::Kernel dilateKernel(program, dilateKernelName.c_str());
    
    cl::CommandQueue queue(context, device);
    
    cl::NDRange offset(0, 0);				 // Always start iterations at x=0, y=0
    cl::NDRange globalSize((w*bits)/32, 1);  // Global size must match the original loops
    cl::NDRange localSize=cl::NullRange;	 // We don't care about local size
    
    return std::tr1::make_tuple(erodeKernel,dilateKernel,gpuBuffers,queue,offset,globalSize,localSize);
}