Esempio n. 1
0
void kernel(cl::Buffer& devOut, cl::CommandQueue& queue)
{
    static std::once_flag   compileFlag;
    static cl::Program      prog;
    static cl::Kernel       kern;

    std::call_once(compileFlag,
        [queue]() {
        prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(), fractal_ocl_kernel, true);
            kern = cl::Kernel(prog, "julia");
        });

    //auto juliaOp = cl::make_kernel<Buffer, unsigned, unsigned>(kern);

    static const NDRange local(8, 8);
    NDRange global(local[0] * divup(DIMX, local[0]),
                   local[1] * divup(DIMY, local[1]));

    kern.setArg(0, devOut);
    kern.setArg(1, DIMX);
    kern.setArg(2, DIMY);
    queue.enqueueNDRangeKernel(kern, cl::NullRange, global, local);

    //juliaOp(EnqueueArgs(queue, global, local), devOut, DIMX, DIMY);
}
Esempio n. 2
0
inline ImageBuffer DoBasicOp(cl::Kernel& kernel, const ImageBuffer& i1,
                             ImageBuffer o) {
  kernel.setArg(0, i1.mem());
  kernel.setArg(1, o.mem());
  Enqueue(kernel, o);
  return o;
}
Esempio n. 3
0
void initSimulation() {
    // source: http://stackoverflow.com/questions/26517114/how-to-compile-opencl-project-with-kernels

    try {
        std::vector<cl::Platform> platforms;
        cl::Platform::get(&platforms);

        std::vector<cl::Device> devices;
        platforms[PLATFORM_ID].getDevices(CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_GPU, &devices);

        context = cl::Context(devices);
        queue = cl::CommandQueue(context, devices[DEVICE_ID]);

        std::ifstream sourceFile{"kernels/programs.cl"};
        std::string sourceCode(std::istreambuf_iterator<char>(sourceFile), (std::istreambuf_iterator<char>()));
        cl::Program::Sources source(1, std::make_pair(sourceCode.c_str(), sourceCode.length()));

        simulationProgram = cl::Program(context, source);
        simulationProgram.build(devices);

        visualizationBufferGPU = cl::Buffer(context, CL_MEM_WRITE_ONLY,
                                            sizeof(unsigned char) * 4 * fieldWidth * fieldHeight,
                                            nullptr, nullptr);
        randomizeField();

        stepKernel = cl::Kernel(simulationProgram, "tick");
        stepKernel.setArg(0, fieldWidth);
        stepKernel.setArg(1, fieldHeight);
        stepKernel.setArg(3, visualizationBufferGPU);
    } catch (cl::Error err) {
        std::cout << "Error: " << err.what() << "(" << err.err() << ")" << std::endl;
        exit(2);
    }
}
Esempio n. 4
0
void CoreSersicProfile::add_kernel_parameters(unsigned int index, cl::Kernel &kernel) const {
	kernel.setArg(index++, static_cast<FT>(re));
	kernel.setArg(index++, static_cast<FT>(rb));
	kernel.setArg(index++, static_cast<FT>(nser));
	kernel.setArg(index++, static_cast<FT>(a));
	kernel.setArg(index++, static_cast<FT>(b));
	kernel.setArg(index++, static_cast<FT>(_bn));
}
Esempio n. 5
0
 void initParticles(uint32_t num_particles)
 {
     m_geom = gl::Geometry::create();
     m_geom->setPrimitiveType(GL_POINTS);
     m_mesh = gl::Mesh::create(m_geom, m_pointMaterial);
     
     m_numParticles = num_particles;
     GLsizei numBytes = m_numParticles * sizeof(vec4);
     
     m_geom->vertices().resize(m_numParticles, vec3(0));
     m_geom->colors().resize(m_numParticles, vec4(1));
     m_geom->point_sizes().resize(m_numParticles, 9.f);
     m_geom->createGLBuffers();
     m_mesh->material()->setPointSize(2.f);
     scene().addObject(m_mesh);
     try
     {
         // shared position buffer for OpenGL / OpenCL
         m_positions = cl::BufferGL(m_context, CL_MEM_READ_WRITE, m_geom->vertexBuffer().id());
         m_colors = cl::BufferGL(m_context, CL_MEM_READ_WRITE, m_geom->colorBuffer().id());
         
         //create the OpenCL only arrays
         m_velocities = cl::Buffer( m_context, CL_MEM_WRITE_ONLY, numBytes );
         m_positionGen = cl::Buffer( m_context, CL_MEM_WRITE_ONLY, numBytes );
         m_velocityGen = cl::Buffer( m_context, CL_MEM_WRITE_ONLY, numBytes );
         
         vector<vec4> posGen, velGen;
         for (int i = 0; i < m_numParticles; i++)
         {
             posGen.push_back( vec4(glm::ballRand(20.0f), 1.f) );
             vec2 tmp = glm::linearRand(vec2(-100), vec2(100));
             float life = kinski::random(2.f, 5.f);
             float yVel = kinski::random<float>(5, 15);
             velGen.push_back(vec4(tmp.x, yVel, tmp.y, life));
             m_geom->point_sizes()[i] = kinski::random(5.f, 15.f);
         }
         m_geom->createGLBuffers();
         
         m_queue.enqueueWriteBuffer(m_velocities, CL_TRUE, 0, numBytes, &velGen[0]);
         m_queue.enqueueWriteBuffer(m_positionGen, CL_TRUE, 0, numBytes, &posGen[0]);
         m_queue.enqueueWriteBuffer(m_velocityGen, CL_TRUE, 0, numBytes, &velGen[0]);
         
         m_particleKernel.setArg(0, m_positions);
         m_particleKernel.setArg(1, m_colors);
         m_particleKernel.setArg(2, m_velocities);
         m_particleKernel.setArg(3, m_positionGen);
         m_particleKernel.setArg(4, m_velocityGen);
     }
     catch(cl::Error &error)
     {
         LOG_ERROR << error.what() << "(" << oclErrorString(error.err()) << ")";
     }
 }
Esempio n. 6
0
inline ImageBuffer DoBasicOp(cl::Kernel& kernel,
                             const ImageBuffer& i1, const ImageBuffer& i2,
                             const ImageBuffer& i3, const ImageBuffer& i4,
                             ImageBuffer o) {
  kernel.setArg(0, i1.mem());
  kernel.setArg(1, i2.mem());
  kernel.setArg(2, i3.mem());
  kernel.setArg(3, i4.mem());
  kernel.setArg(4, o.mem());
  Enqueue(kernel, o);
  return o;
}
Esempio n. 7
0
void MetaBallsApp::updateParticles()
{
    int random = rand();
    float time = 1.0f / 60.0f;

    mClParticleUpdate.setArg( 3, sizeof(float), &time );
    mClParticleUpdate.setArg( 4, sizeof(int32_t), &random );

    // Queue the kernel up for execution across the array
    mClCommandQueue.enqueueNDRangeKernel( mClParticleUpdate,
                                          cl::NullRange,
                                          cl::NDRange( NUM_PARTICLES ) );
}
Esempio n. 8
0
void MetaBallsApp::setupParticleKernel()
{
    auto program = ocl::createProgram( mClContext, loadAsset( "kernels/particles.cl" ), true );
    mClParticleUpdate = ocl::Kernel( program, "particle_update" );

    float maxLife = 60.0;
    float minVelSqd = 0.5 * 0.5;

    mClParticleUpdate.setArg( 0, mClParticleBuf );
    mClParticleUpdate.setArg( 1, sizeof(float), &maxLife );
    mClParticleUpdate.setArg( 2, sizeof(float), &minVelSqd );
    mClParticleUpdate.setArg( 5, sizeof(cl_int), &NUM_PARTICLES );
}
Esempio n. 9
0
void neo::randomUniform(cl::Image3D &image3D, sys::ComputeSystem &cs, cl::Kernel &randomUniform3DKernel, cl_int3 size, cl_float2 range, std::mt19937 &rng) {
	int argIndex = 0;

	std::uniform_int_distribution<int> seedDist;

	cl_uint2 seed = { seedDist(rng), seedDist(rng) };

	randomUniform3DKernel.setArg(argIndex++, image3D);
	randomUniform3DKernel.setArg(argIndex++, seed);
	randomUniform3DKernel.setArg(argIndex++, range);

	cs.getQueue().enqueueNDRangeKernel(randomUniform3DKernel, cl::NullRange, cl::NDRange(size.x, size.y, size.z));
}
Esempio n. 10
0
void updateLevelSetFunction(
        OpenCL &ocl,
        cl::Kernel &kernel,
        cl::Image3D &input,
        cl::Buffer &positions,
        int activeVoxels,
        int numberOfThreads,
        int groupSize,
        cl::Memory * phi_read,
        cl::Memory * phi_write,
        float threshold,
        float epsilon,
        float alpha
        ) {

    kernel.setArg(0, input);
    kernel.setArg(1, positions);
    kernel.setArg(2, activeVoxels);
    kernel.setArg(3, *phi_read);
    kernel.setArg(4, *phi_write);
    kernel.setArg(5, threshold);
    kernel.setArg(6, epsilon);
    kernel.setArg(7, alpha);

    ocl.queue.enqueueNDRangeKernel(
            kernel,
            cl::NullRange,
            cl::NDRange(numberOfThreads),
            cl::NDRange(groupSize)
    );
}
Esempio n. 11
0
void simulationStep() {
    try {
        // copy
        auto buffer = cl::Buffer(context, CL_MEM_READ_ONLY,
                                 sizeof(unsigned char) * 4 * fieldWidth * fieldHeight,
                                 nullptr, nullptr);
        queue.enqueueWriteBuffer(buffer, CL_TRUE, 0,
                                 sizeof(unsigned char) * 4 * fieldWidth * fieldHeight,
                                 visualizationBufferCPU, NULL, NULL);

        // enque
        stepKernel.setArg(2, buffer);
        cl::NDRange global((size_t) (fieldWidth * fieldHeight));
        queue.enqueueNDRangeKernel(stepKernel, cl::NullRange, global, cl::NullRange);

        // read back
        queue.enqueueReadBuffer(visualizationBufferGPU, CL_TRUE, 0,
                                sizeof(unsigned char) * 4 * fieldWidth * fieldHeight,
                                visualizationBufferCPU, NULL, NULL);

        // finish
        queue.finish();
    } catch (cl::Error err) {
        std::cout << "Error: " << err.what() << "(" << err.err() << ")" << std::endl;
        exit(3);
    }

    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, fieldWidth, fieldHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE,
                 visualizationBufferCPU);
}
inline void OpenCL::addkernelarg(std::size_t i, T const & arg, cl::Kernel & kernel,std::vector<cl::Buffer> &outputbuffer,cl::CommandQueue &quene) const
{

    kernel.setArg(i,arg);
//  Push back a dummy since we actually dont need to allocate anything for a scalar
    outputbuffer.push_back(cl::Buffer());
}
Esempio n. 13
0
    void updateParticles(float timeDelta)
    {
        try
        {
            vector<cl::Memory> glBuffers;
            glBuffers.push_back(m_positions);
            glBuffers.push_back(m_colors);
            
            //this will update our system by calculating new velocity and updating the positions of our particles
            //Make sure OpenGL is done using our VBOs
            glFinish();
            
            // map OpenGL buffer object for writing from OpenCL
            // this passes in the vector of VBO buffer objects (position and color)
            m_queue.enqueueAcquireGLObjects(&glBuffers);
            
            m_particleKernel.setArg(5, timeDelta); //pass in the timestep
            
            //execute the kernel
            m_queue.enqueueNDRangeKernel(m_particleKernel, cl::NullRange, cl::NDRange(m_numParticles),
                                         cl::NullRange);
            //Release the VBOs so OpenGL can play with them
            m_queue.enqueueReleaseGLObjects(&glBuffers, NULL);

            m_queue.finish();
        }
        catch(cl::Error &error)
        {
            LOG_ERROR << error.what() << "(" << oclErrorString(error.err()) << ")";
        }
    }
void RectangularBorderHandler::setarg(int arg,cl::Kernel& ker, int dimx, int dimy)
{
	if (m_bufferMap.find(std::make_pair(dimx,dimy)) == m_bufferMap.end())
		genBuffer(dimx,dimy);

	ker.setArg(arg,m_bufferMap.at(std::make_pair(dimx,dimy)) ());
}
void ParallelepipedalBorderHandler::setarg(int arg,cl::Kernel& ker, int dimx, int dimy,int dimz)
{
	if (m_bufferMap.find(tri(dimx,dimy,dimz)) == m_bufferMap.end())
		genBuffer(dimx,dimy,dimz);

	ker.setArg(arg,m_bufferMap.at(tri(dimx,dimy,dimz)) ());
}
inline void OpenCL::addkernelarg(std::size_t i, T const (& arg)[N], cl::Kernel & kernel,cl::CommandQueue &quene) const
{
    cl::Buffer buffer(this->context,CL_MEM_READ_WRITE,N*sizeof(T));
//    std::cout << "enqeue\n";
    quene.enqueueWriteBuffer(buffer,CL_FALSE,0,sizeof(T)*N,&arg);
    kernel.setArg(i,buffer);

}
inline void OpenCL::addkernelarg(std::size_t i, std::vector<T> const & arg, cl::Kernel & kernel,cl::CommandQueue &quene) const
{
    cl::Buffer buffer(this->context,CL_MEM_READ_WRITE,arg.size()*sizeof(T));
//    std::cout << "enqeue\n";
    quene.enqueueWriteBuffer(buffer,CL_FALSE,0,sizeof(T)*arg.size(),&(arg[0]));
    kernel.setArg(i,buffer);

}
Esempio n. 18
0
cl::Buffer performReduction(const cl::Buffer & in,cl::Kernel & ker,cl::CommandQueue & q,int size)
{
	if (size == 1) return in;

	int newsize = std::max(1,size/4);
	cl::Buffer tmp (CLContextLoader::getContext(),CL_MEM_READ_WRITE,sizeof(real)*newsize);

	ker.setArg(0,in());
	ker.setArg(1,tmp());
	ker.setArg(2,size);
	ker.setArg(3,4);

	q.enqueueNDRangeKernel(ker,cl::NDRange(0),cl::NDRange(newsize),
												 getBestWorkspaceDim(cl::NDRange(newsize)));

	return performReduction(tmp,ker,q,newsize);
}
Esempio n. 19
0
    void findMinSeamVert(cl::Context &ctx,
                         cl::CommandQueue &cmdQueue,
                         cl::Event &event,
                         std::vector<cl::Event> &deps,
                         cl::Buffer &energyMatrix,
                         cl::Buffer &vertMinEnergy,
                         cl::Buffer &vertMinIdx,
                         int width,
                         int height,
                         int pitch,
                         int colsRemoved) {

        cl_int errNum;
        errNum = findMinSeamVertKernel.setArg(0, energyMatrix);
        errNum |= findMinSeamVertKernel.setArg(1, vertMinEnergy);
        errNum |= findMinSeamVertKernel.setArg(2, vertMinIdx);
        errNum |= findMinSeamVertKernel.setArg(3, cl::__local(256 * sizeof(float)));
        errNum |= findMinSeamVertKernel.setArg(4, cl::__local(256 * sizeof(float)));
        errNum |= findMinSeamVertKernel.setArg(5, width);
        errNum |= findMinSeamVertKernel.setArg(6, height);
        errNum |= findMinSeamVertKernel.setArg(7, pitch);
        errNum |= findMinSeamVertKernel.setArg(8, colsRemoved);

        if (errNum != CL_SUCCESS) {
            std::cerr << "Error setting findMinSeamVert arguments." << std::endl;
            exit(-1);
        }

        // This kernel could be written to use more than one work group, but its probably not worth it.

        cl::NDRange offset = cl::NDRange(0);
        cl::NDRange localWorkSize = cl::NDRange(256);
        cl::NDRange globalWorkSize = cl::NDRange(256);

        errNum = cmdQueue.enqueueNDRangeKernel(findMinSeamVertKernel,
                                               offset,
                                               globalWorkSize,
                                               localWorkSize,
                                               &deps,
                                               &event);
        if (errNum != CL_SUCCESS) {
            std::cerr << "Error enqueuing computeSeams kernel for execution." << std::endl;
            exit(-1);
        }

        /** DEBUG **/
        // int deviceResultIdx[1];
        // float deviceResultEnergy[1];

        // mem::read(ctx, cmdQueue, deviceResultIdx, vertMinIdx);
        // mem::read(ctx, cmdQueue, deviceResultEnergy, vertMinEnergy);

        // std::cout << "deviceResultIdx = " << deviceResultIdx[0] << std::endl;
        // std::cout << "deviceResultEnergy = " << deviceResultEnergy[0] << std::endl;
    }
void helper(uint32_t* out, int osize, uint8_t* in, int isize, int w, int h, int choice)
{
	int set_size=8;
    try {
        cl::Buffer bufferIn = cl::Buffer(gContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
                isize*sizeof(cl_uchar), in, NULL);
        cl::Buffer bufferOut = cl::Buffer(gContext, CL_MEM_READ_WRITE, osize*sizeof(cl_uchar4));
        cl::Buffer bufferOut2= cl::Buffer(gContext, CL_MEM_READ_WRITE, osize*sizeof(cl_uchar4));
        gNV21Kernel.setArg(2,w);
        gNV21Kernel.setArg(3,h);
        gNV21Kernel.setArg(1,bufferIn);
        gNV21Kernel.setArg(0,bufferOut);
        gQueue.enqueueNDRangeKernel(gNV21Kernel,
                cl::NullRange,
                cl::NDRange( (int)ceil((float)w/16.0f)*16,(int)ceil((float)h/16.0f)*16),
                cl::NDRange(set_size,set_size),
                NULL,
                NULL);
        if (choice==1) {
            gLaplacianK.setArg(2,w);
            gLaplacianK.setArg(3,h);
            gLaplacianK.setArg(1,bufferOut);
            gLaplacianK.setArg(0,bufferOut2);
            gQueue.enqueueNDRangeKernel(gLaplacianK,
                    cl::NullRange,
                    cl::NDRange( (int)ceil((float)w/16.0f)*16,(int)ceil((float)h/16.0f)*16),
                    cl::NDRange(set_size,set_size),
                    NULL,
                    NULL);
        }
        else if (choice>1) {
        	gNegative.setArg(2,w);
        	gNegative.setArg(3,h);
        	gNegative.setArg(1,bufferOut);
        	gNegative.setArg(0,bufferOut2);
        	gQueue.enqueueNDRangeKernel(gNegative,
        	                    cl::NullRange,
        	                    cl::NDRange( (int)ceil((float)w/16.0f)*16,(int)ceil((float)h/16.0f)*16),
        	                    cl::NDRange(set_size,set_size),
        	                    NULL,
        	                    NULL);

        }

        gQueue.enqueueReadBuffer(bufferOut2, CL_TRUE, 0, osize*sizeof(cl_uchar4), out);
    }
    catch (cl::Error e) {
        LOGI("@oclDecoder: %s %d \n",e.what(),e.err());
    }
}
Esempio n. 21
0
/// \brief Query the preferred factor of local size
/// \ingroup OpenCL
///
/// \param kern An OpenCL kernel
/// \param dev An OpenCL device
/// \param factor Multiplier factor of local size for optimzied performance
/// \param lmax Maximum of the local size
/// \param mmax Maximum of the multiplier of the factor
inline void cl_minmax_local_size (
        const ::cl::Kernel &kern, const ::cl::Device &dev,
        std::size_t &factor, std::size_t &lmax, std::size_t &mmax)
{
    try {
        kern.getWorkGroupInfo(dev,
                CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, &factor);
        kern.getWorkGroupInfo(dev,
                CL_KERNEL_WORK_GROUP_SIZE, &lmax);
        if (factor == 0 || factor > lmax) {
            factor = lmax = mmax = 0;
            return;
        }
        mmax = lmax / factor;
    } catch (const ::cl::Error &) {
        factor = lmax = mmax = 0;
    }
}
Esempio n. 22
0
void kernel(cl::Buffer& devOut, cl::CommandQueue& queue)
{
    static std::once_flag   compileFlag;
    static cl::Program      prog;
    static cl::Kernel       kern;

    std::call_once(compileFlag,
        [queue]() {
        prog = cl::Program(queue.getInfo<CL_QUEUE_CONTEXT>(), sinf_ocl_kernel, true);
            kern = cl::Kernel(prog, "sinf");
        });

    static const NDRange global(SIZE * 2);

    kern.setArg(0, devOut);
    kern.setArg(1, dx);
    kern.setArg(2, SIZE);
    queue.enqueueNDRangeKernel(kern, cl::NullRange, global);
}
Esempio n. 23
0
    /**
     * Applies a gaussian blur filter to an image using openCL
     * @param ctx An openCL context object.
     * @param cmdQueue An openCL command queue.
     * @param sampler An openCL image sampler object.
     * @param height The height of the input image.
     * @param width The width of the input image.
     * @return An Image2D object containing the resulting image data.
     */
    void blur(cl::Context &ctx,
              cl::CommandQueue &cmdQueue,
              cl::Event &blurEvent,
              cl::Buffer &inputImage,
              cl::Buffer &outputImage,
              int height,
              int width,
              int colsRemoved) {

        // Set kernel arguments
        cl_int errNum;

        errNum = blurKernel.setArg(0, inputImage);
        errNum |= blurKernel.setArg(1, outputImage);
        errNum |= blurKernel.setArg(2, width);
        errNum |= blurKernel.setArg(3, height);
        errNum |= blurKernel.setArg(4, colsRemoved);

        if (errNum != CL_SUCCESS) {
            std::cerr << "Error setting blurKernel arguments." << std::endl;
            std::cerr << errNum << std::endl;
            exit(-1);
        }

        // Determine local and global work size
        cl::NDRange offset = cl::NDRange(0, 0);
        cl::NDRange localWorkSize = cl::NDRange(16, 16);
        cl::NDRange globalWorkSize = cl::NDRange(math::roundUp(localWorkSize[0], width),
                                                 math::roundUp(localWorkSize[1], height));
        // Run blurKernel
        errNum = cmdQueue.enqueueNDRangeKernel(blurKernel,
                                               offset,
                                               globalWorkSize,
                                               localWorkSize,
                                               NULL,
                                               &blurEvent);
        if (errNum != CL_SUCCESS) {
            std::cerr << "Error enqueuing blur kernel for execution." << std::endl;
            exit(-1);
        }

    }
Esempio n. 24
0
    void run(T* buf)
    {
        cl_int err;
        cl::Buffer outbuf(
            m_context,
            CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
            N*N*sizeof(T),
            buf,
            &err);
        checkErr(err, "Buffer::Buffer()");

        err = m_kernel.setArg(0, outbuf);
        checkErr(err, "Kernel::setArg(0)");

        err = m_kernel.setArg(1, N);
        checkErr(err, "Kernel::setArg(1)");

        err = m_kernel.setArg(2, depth);
        checkErr(err, "Kernel::setArg(2)");

        err = m_kernel.setArg(3, escape2);
        checkErr(err, "Kernel::setArg(3)");

        cl::Event event;
        err = m_cmdq.enqueueNDRangeKernel(
            m_kernel,
            cl::NullRange,
            cl::NDRange(N*N),
            cl::NDRange(N, 1),
            NULL,
            &event);
        checkErr(err, "ComamndQueue::enqueueNDRangeKernel()");

        event.wait();
        err = m_cmdq.enqueueReadBuffer(
            outbuf,
            CL_TRUE,
            0,
            N*N*sizeof(T),
            buf);
        checkErr(err, "ComamndQueue::enqueueReadBuffer()");
    }
Esempio n. 25
0
    void maskUnreachable(cl::Context &ctx,
                         cl::CommandQueue &cmdQueue,
                         cl::Event &event,
                         std::vector<cl::Event> &deps,
                         cl::Buffer &energyMatrix,
                         int width,
                         int height,
                         int pitch,
                         int colsRemoved) {
        cl_int errNum;

        errNum = maskUnreachableKernel.setArg(0, energyMatrix);
        errNum |= maskUnreachableKernel.setArg(1, width);
        errNum |= maskUnreachableKernel.setArg(2, height);
        errNum |= maskUnreachableKernel.setArg(3, pitch);
        errNum |= maskUnreachableKernel.setArg(4, colsRemoved);


        if (errNum != CL_SUCCESS) {
            std::cerr << "Error setting maskUnreachable kernel arguments." << std::endl;
            exit(-1);
        }
        cl::NDRange offset = cl::NDRange(0, 0);
        cl::NDRange localWorkSize = cl::NDRange(16, 16);
        cl::NDRange globalWorkSize = cl::NDRange(math::roundUp(localWorkSize[0], width),
                                                 math::roundUp(localWorkSize[1], height));


        errNum = cmdQueue.enqueueNDRangeKernel(maskUnreachableKernel,
                                               offset,
                                               globalWorkSize,
                                               localWorkSize,
                                               &deps,
                                               &event);

        if (errNum != CL_SUCCESS) {
            std::cerr << "Error enqueueing maskUnreachable kernel." << std::endl;
            std::cerr << errNum << std::endl;
            exit(-1);
        }
    }
void OpenCL::addkernelarg(std::size_t i, std::vector<T> const & arg,
        cl::Kernel & kernel,std::vector<cl::Buffer> &outputbuffer,cl::CommandQueue &quene)const
{
    cl::Buffer buffer(this->context,CL_MEM_READ_WRITE,arg.size()*sizeof(T));
    outputbuffer.push_back(buffer);
//    std::cout << "enqeue\n";
    cl_int err = quene.enqueueWriteBuffer(buffer,CL_FALSE,0,sizeof(T)*arg.size(),&(arg[0]));
    if (err){
        std::cerr << "Error while pushing Vector. Errorcode: " << err << std::endl;
    }
    kernel.setArg(i,buffer);

}
Esempio n. 27
0
    void backtrack(cl::Context &ctx,
                   cl::CommandQueue &cmdQueue,
                   cl::Event &event,
                   std::vector<cl::Event> &deps,
                   cl::Buffer &energyMatrix,
                   cl::Buffer &vertSeamPath,
                   cl::Buffer &vertMinIdx,
                   int width,
                   int height,
                   int pitch,
                   int colsRemoved) {

        cl_int errNum;

        // Set kernel arguments
        errNum = backtrackKernel.setArg(0, energyMatrix);
        errNum |= backtrackKernel.setArg(1, vertSeamPath);
        errNum |= backtrackKernel.setArg(2, vertMinIdx);
        errNum |= backtrackKernel.setArg(3, width);
        errNum |= backtrackKernel.setArg(4, height);
        errNum |= backtrackKernel.setArg(5, pitch);
        errNum |= backtrackKernel.setArg(6, colsRemoved);

        if (errNum != CL_SUCCESS) {
            std::cerr << "Error setting backtrack kernel arguments." << std::endl;
            exit(-1);
        }

        cl::NDRange offset = cl::NDRange(0);
        cl::NDRange localWorkSize = cl::NDRange(1);
        cl::NDRange globalWorkSize = cl::NDRange(256);

        errNum = cmdQueue.enqueueNDRangeKernel(backtrackKernel,
                                               offset,
                                               globalWorkSize,
                                               localWorkSize,
                                               &deps,
                                               &event);


        if (errNum != CL_SUCCESS) {
            std::cerr << "Error enqueueing backTrack kernel for execution." << std::endl;
            exit(-1);
        }

        // /** DEBUGGING **/
        // int deviceResult[height];

        // mem::read(ctx, cmdQueue, deviceResult, vertSeamPath, height);
        // for (int i = height - 5; i < height; ++i) {
        //     std::cout << "deviceResult[" << i << "]=\t" << deviceResult[i] << std::endl;
        // }

    }
Esempio n. 28
0
int main(int argc, char *argv[])
{
	cl_int err = CL_SUCCESS;
	cl::Event evt;

	std::vector<cl::Platform> platforms;
	cl::Platform::get(&platforms);
	if (platforms.size() == 0) {
		return false;
	}
	platform_ = platforms[0];

	cl_context_properties properties[] = 
		{ CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0};
	context_ = cl::Context(CL_DEVICE_TYPE_GPU, properties, NULL, NULL, &err); 
	CHECK_CL_ERROR(err, "cl::Context");

	std::vector<cl::Device> devices = context_.getInfo<CL_CONTEXT_DEVICES>();
	if (devices.size() == 0) {
		return false;
	}
	device_ = devices[0];

	sources_.push_back(std::make_pair(source_str.c_str(), source_str.size()));
	program_ = cl::Program(context_, sources_);
	err = program_.build(devices);
	if (err != CL_SUCCESS) {
		std::string log = program_.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]);
		std::cout << "program.build() ERROR: " << log.c_str() << std::endl;
		return false;
	}

	kernel_ = cl::Kernel(program_, "hello", &err); 
	CHECK_CL_ERROR(err, "cl::Kernel");

	buf_ = cl::Buffer(context_, CL_MEM_READ_ONLY, 1024, NULL, &err);

	queue_ = cl::CommandQueue(context_, device_, 0, &err);
	CHECK_CL_ERROR(err, "cl::CommandQueue");

	kernel_.setArg(0, buf_);

	err = queue_.enqueueNDRangeKernel(kernel_, cl::NullRange, cl::NDRange(10, 10), cl::NullRange, NULL, &evt); 
	evt.wait();
	CHECK_CL_ERROR(err, "queue.enqueueNDRangeKernel()");

	return 0;
}
Esempio n. 29
0
void Buffer::addToKernel(cl::Kernel& kernel, unsigned int argIndex)
{
    if (clBuffer == NULL)
    {
        clBuffer = resourceManager->createBuffer(size);
    }
    try
    {
        kernel.setArg(argIndex, *clBuffer);
    } catch (cl::Error err)
    {
        std::cerr << "ERROR Setting Buffer kernel arg(" << argIndex << "): " << err.what() << "(" << 
            err.err() << ")" << std::endl;
        throw err;
    }
}
Esempio n. 30
0
/// \brief The preferred global and local size
/// \ingroup OpenCL
///
/// \return The difference between the preferred global size and the N
inline std::size_t cl_preferred_work_size (std::size_t N,
        const ::cl::Kernel &kern, const ::cl::Device &dev,
        std::size_t &global_size, std::size_t &local_size)
{
    cl::size_t<3> reqd_size;
    try {
        kern.getWorkGroupInfo(dev,
                CL_KERNEL_COMPILE_WORK_GROUP_SIZE, &reqd_size);
    } catch (const ::cl::Error &) {
        reqd_size[0] = 0;
    }

    if (reqd_size[0] != 0) {
        local_size = reqd_size[0];
        global_size = cl_min_global_size(N, local_size);

        return global_size - N;
    }

    std::size_t factor;
    std::size_t lmax;
    std::size_t mmax;
    cl_minmax_local_size(kern, dev, factor, lmax, mmax);
    if (lmax == 0) {
        global_size = N;
        local_size = 0;

        return global_size - N;
    }

    local_size = lmax;
    global_size = cl_min_global_size(N, local_size);
    std::size_t diff_size = global_size - N;
    for (std::size_t m = mmax; m >= 1; --m) {
        std::size_t l = m * factor;
        std::size_t g = cl_min_global_size(N, l);
        std::size_t d = g - N;
        if (d < diff_size) {
            local_size = l;
            global_size = g;
            diff_size = d;
        }
    }

    return diff_size;
}