Ejemplo n.º 1
0
 void initParticles(uint32_t num_particles)
 {
     m_geom = gl::Geometry::create();
     m_geom->setPrimitiveType(GL_POINTS);
     m_mesh = gl::Mesh::create(m_geom, m_pointMaterial);
     
     m_numParticles = num_particles;
     GLsizei numBytes = m_numParticles * sizeof(vec4);
     
     m_geom->vertices().resize(m_numParticles, vec3(0));
     m_geom->colors().resize(m_numParticles, vec4(1));
     m_geom->point_sizes().resize(m_numParticles, 9.f);
     m_geom->createGLBuffers();
     m_mesh->material()->setPointSize(2.f);
     scene().addObject(m_mesh);
     try
     {
         // shared position buffer for OpenGL / OpenCL
         m_positions = cl::BufferGL(m_context, CL_MEM_READ_WRITE, m_geom->vertexBuffer().id());
         m_colors = cl::BufferGL(m_context, CL_MEM_READ_WRITE, m_geom->colorBuffer().id());
         
         //create the OpenCL only arrays
         m_velocities = cl::Buffer( m_context, CL_MEM_WRITE_ONLY, numBytes );
         m_positionGen = cl::Buffer( m_context, CL_MEM_WRITE_ONLY, numBytes );
         m_velocityGen = cl::Buffer( m_context, CL_MEM_WRITE_ONLY, numBytes );
         
         vector<vec4> posGen, velGen;
         for (int i = 0; i < m_numParticles; i++)
         {
             posGen.push_back( vec4(glm::ballRand(20.0f), 1.f) );
             vec2 tmp = glm::linearRand(vec2(-100), vec2(100));
             float life = kinski::random(2.f, 5.f);
             float yVel = kinski::random<float>(5, 15);
             velGen.push_back(vec4(tmp.x, yVel, tmp.y, life));
             m_geom->point_sizes()[i] = kinski::random(5.f, 15.f);
         }
         m_geom->createGLBuffers();
         
         m_queue.enqueueWriteBuffer(m_velocities, CL_TRUE, 0, numBytes, &velGen[0]);
         m_queue.enqueueWriteBuffer(m_positionGen, CL_TRUE, 0, numBytes, &posGen[0]);
         m_queue.enqueueWriteBuffer(m_velocityGen, CL_TRUE, 0, numBytes, &velGen[0]);
         
         m_particleKernel.setArg(0, m_positions);
         m_particleKernel.setArg(1, m_colors);
         m_particleKernel.setArg(2, m_velocities);
         m_particleKernel.setArg(3, m_positionGen);
         m_particleKernel.setArg(4, m_velocityGen);
     }
     catch(cl::Error &error)
     {
         LOG_ERROR << error.what() << "(" << oclErrorString(error.err()) << ")";
     }
 }
Ejemplo n.º 2
0
	void copyToDevice(cl::CommandQueue &queue)
	{
		if(m_pElts==NULL)
			throw cl::Error(CL_INVALID_MEM_OBJECT, "copyToDevice - Buffer is not initialised.");
		
		queue.enqueueWriteBuffer(m_buffer, CL_TRUE, 0, m_cb, m_pElts);
	}
Ejemplo n.º 3
0
void simulationStep() {
    try {
        // copy
        auto buffer = cl::Buffer(context, CL_MEM_READ_ONLY,
                                 sizeof(unsigned char) * 4 * fieldWidth * fieldHeight,
                                 nullptr, nullptr);
        queue.enqueueWriteBuffer(buffer, CL_TRUE, 0,
                                 sizeof(unsigned char) * 4 * fieldWidth * fieldHeight,
                                 visualizationBufferCPU, NULL, NULL);

        // enque
        stepKernel.setArg(2, buffer);
        cl::NDRange global((size_t) (fieldWidth * fieldHeight));
        queue.enqueueNDRangeKernel(stepKernel, cl::NullRange, global, cl::NullRange);

        // read back
        queue.enqueueReadBuffer(visualizationBufferGPU, CL_TRUE, 0,
                                sizeof(unsigned char) * 4 * fieldWidth * fieldHeight,
                                visualizationBufferCPU, NULL, NULL);

        // finish
        queue.finish();
    } catch (cl::Error err) {
        std::cout << "Error: " << err.what() << "(" << err.err() << ")" << std::endl;
        exit(3);
    }

    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, fieldWidth, fieldHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE,
                 visualizationBufferCPU);
}
inline void OpenCL::addkernelarg(std::size_t i, T const (& arg)[N], cl::Kernel & kernel,cl::CommandQueue &quene) const
{
    cl::Buffer buffer(this->context,CL_MEM_READ_WRITE,N*sizeof(T));
//    std::cout << "enqeue\n";
    quene.enqueueWriteBuffer(buffer,CL_FALSE,0,sizeof(T)*N,&arg);
    kernel.setArg(i,buffer);

}
inline void OpenCL::addkernelarg(std::size_t i, std::vector<T> const & arg, cl::Kernel & kernel,cl::CommandQueue &quene) const
{
    cl::Buffer buffer(this->context,CL_MEM_READ_WRITE,arg.size()*sizeof(T));
//    std::cout << "enqeue\n";
    quene.enqueueWriteBuffer(buffer,CL_FALSE,0,sizeof(T)*arg.size(),&(arg[0]));
    kernel.setArg(i,buffer);

}
Ejemplo n.º 6
0
	cl::Event copyToDeviceAsync(cl::CommandQueue &queue)
	{
		if(m_pElts==NULL)
			throw cl::Error(CL_INVALID_MEM_OBJECT, "copyToDevice - Buffer is not initialised.");
		
		cl::Event complete;
		queue.enqueueWriteBuffer(m_buffer, CL_FALSE, 0, m_cb, m_pElts, NULL, &complete);
		return complete;
	}
Ejemplo n.º 7
0
 void write(const cl::CommandQueue &q, size_t offset, size_t size, const T *host,
         bool blocking = false) const
 {
     if (size)
         q.enqueueWriteBuffer(
                 buffer, blocking ? CL_TRUE : CL_FALSE,
                 sizeof(T) * offset, sizeof(T) * size, host
                 );
 }
Ejemplo n.º 8
0
void Skeleton::updateBuffer(cl::CommandQueue _queue) const
{
	bindPose->calculateOffsetTo(currentPose, bindToCurrentTransforms);

	for (auto& transform : bindToCurrentTransforms)
	{
		transform = glm::transpose(world * transform);
	}

	_queue.enqueueWriteBuffer(transformBuffer, false, 0, sizeof(glm::mat4) * bindToCurrentTransforms.size(), bindToCurrentTransforms.data());
}
Ejemplo n.º 9
0
	cl::Event copyToDeviceAsync(cl::CommandQueue &queue, const cl::Event &prior)
	{
		if(m_pElts==NULL)
			throw cl::Error(CL_INVALID_MEM_OBJECT, "copyToDevice - Buffer is not initialised.");
		
		cl::Event complete;
		std::vector<cl::Event> srcs;
		srcs.push_back(prior);
		queue.enqueueWriteBuffer(m_buffer, CL_FALSE, 0, m_cb, m_pElts, srcs, &complete);
		return complete;
	}
Ejemplo n.º 10
0
void randomizeField() {
    for (int i = 0; i < fieldHeight * fieldWidth; i++) {
        visualizationBufferCPU[i * 4 + 0] = 0;
        visualizationBufferCPU[i * 4 + 1] = 0;
        visualizationBufferCPU[i * 4 + 2] = 0;
        visualizationBufferCPU[i * 4 + 3] = (rand() % 5) * 255;
    }

    queue.enqueueWriteBuffer(visualizationBufferGPU, CL_TRUE, 0,
                             sizeof(unsigned char) * 4 * fieldWidth * fieldHeight,
                             visualizationBufferCPU, NULL, NULL);
}
void OpenCL::addkernelarg(std::size_t i, std::vector<T> const & arg,
        cl::Kernel & kernel,std::vector<cl::Buffer> &outputbuffer,cl::CommandQueue &quene)const
{
    cl::Buffer buffer(this->context,CL_MEM_READ_WRITE,arg.size()*sizeof(T));
    outputbuffer.push_back(buffer);
//    std::cout << "enqeue\n";
    cl_int err = quene.enqueueWriteBuffer(buffer,CL_FALSE,0,sizeof(T)*arg.size(),&(arg[0]));
    if (err){
        std::cerr << "Error while pushing Vector. Errorcode: " << err << std::endl;
    }
    kernel.setArg(i,buffer);

}
Ejemplo n.º 12
0
void CLArgument::copyToDevice( cl::CommandQueue &queue )
{
	assert( myBufferInitialized );

	if ( !myCopyTo )
		return;

	queue.enqueueWriteBuffer(
		myBuffer,
		CL_TRUE,
		0,
		mySize,
		myPtr
	);
}
Ejemplo n.º 13
0
/**
 * initialize tinymt status in device global memory
 * using 1 parameter for all generators.
 *@param tinymt_status device global memories
 *@param total total number of work items
 *@param local_item number of local work items
 *@param seed_array seeds for initialization
 *@param seed_size size of seed_array
 */
static void initialize_by_array(Buffer& tinymt_status,
                                int total,
                                int local_item,
                                uint64_t seed_array[],
                                int seed_size)
{
#if defined(DEBUG)
    cout << "initialize_by_array start" << endl;
#endif
    Buffer seed_array_buffer(context,
                             CL_MEM_READ_WRITE,
                             seed_size * sizeof(uint64_t));
    queue.enqueueWriteBuffer(seed_array_buffer,
                             CL_TRUE,
                             0,
                             seed_size * sizeof(uint64_t),
                             seed_array);
    Kernel init_kernel(program, "tinymt_init_array_kernel");
    init_kernel.setArg(0, tinymt_status);
    init_kernel.setArg(1, seed_array_buffer);
    init_kernel.setArg(2, seed_size);
    NDRange global(total);
    NDRange local(local_item);
    Event event;
    queue.enqueueNDRangeKernel(init_kernel,
                               NullRange,
                               global,
                               local,
                               NULL,
                               &event);
    double time = get_time(event);
    tinymt64j_t status[total];
    queue.enqueueReadBuffer(tinymt_status,
                            CL_TRUE,
                            0,
                            sizeof(tinymt64j_t) * total,
                            status);
    cout << "initializing time = " << time * 1000 << "ms" << endl;
    check_status(status, total);
#if defined(DEBUG)
    cout << "initialize_by_array end" << endl;
#endif
}
Ejemplo n.º 14
0
void MetaBallsApp::updateMarching()
{
    static const cl_int3 size{ VOLUME_WIDTH, VOLUME_HEIGHT, VOLUME_DEPTH };
    mClCommandQueue.enqueueNDRangeKernel( mKernWriteClear,
                                          cl::NullRange,
                                          cl::NDRange( VOLUME_SIZE ) );
    /* Update volumes */
    mClCommandQueue.enqueueNDRangeKernel( mKernWriteMetaballs,
                                          cl::NullRange,
                                          cl::NDRange(  VOLUME_SIZE ) );
    /* End */
    int zero = 0;
    auto kernelRange = (size.s[0]-1) * (size.s[1]-1) * (size.s[2]-1);
    mClCommandQueue.enqueueWriteBuffer( mClVertIndex, true, 0, sizeof(int), &zero );
    mClCommandQueue.enqueueNDRangeKernel( mKernConstructSurface,
                                          cl::NullRange,
                                          cl::NDRange( kernelRange ) );
    mClCommandQueue.enqueueReadBuffer( mClVertIndex,
                                       true, 0, sizeof(cl_int),
                                       &mMarchingVertsWritten );

    /* Generate Normals */
    if (mMarchingVertsWritten > 0) {
        bool smooth = true;
        if( ! smooth )
            mClCommandQueue.enqueueNDRangeKernel( mKernGenNormals,
                                                  cl::NullRange,
                                                  cl::NDRange( mMarchingVertsWritten ) );
        else
            mClCommandQueue.enqueueNDRangeKernel( mKernGenNormalsSmooth,
                                                  cl::NullRange,
                                                  cl::NDRange( mMarchingVertsWritten ) );
    }

    //if( mDebugDraw )
    mClCommandQueue.enqueueNDRangeKernel( mKernWritePointColorBack,
                                          cl::NullRange,
                                          cl::NDRange( VOLUME_SIZE ) );
}
Ejemplo n.º 15
0
int clPeak::runTransferBandwidthTest(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo)
{
  if(!isTransferBW)
    return 0;

  float timed, gbps;
  cl::NDRange globalSize, localSize;
  cl::Context ctx = queue.getInfo<CL_QUEUE_CONTEXT>();
  int iters = devInfo.transferBWIters;
  Timer timer;
  float *arr = NULL;

  cl_uint maxItems = devInfo.maxAllocSize / sizeof(float) / 2;
  cl_uint numItems;

  // Set an upper-limit for cpu devies
  if(devInfo.deviceType & CL_DEVICE_TYPE_CPU) {
    numItems = roundToPowOf2(maxItems, 26);
  } else {
    numItems = roundToPowOf2(maxItems);
  }

  try
  {
    arr = new float[numItems];
    cl::Buffer clBuffer = cl::Buffer(ctx, (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), (numItems * sizeof(float)));

    log->print(NEWLINE TAB TAB "Transfer bandwidth (GBPS)" NEWLINE);
    log->xmlOpenTag("transfer_bandwidth");
    log->xmlAppendAttribs("unit", "gbps");

    ///////////////////////////////////////////////////////////////////////////
    // enqueueWriteBuffer
    log->print(TAB TAB TAB "enqueueWriteBuffer         : ");

    // Dummy warm-up
    queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
    queue.finish();

    timed = 0;

    if(useEventTimer)
    {
      for(int i=0; i<iters; i++)
      {
        cl::Event timeEvent;
        queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
        queue.finish();
        timed += timeInUS(timeEvent);
      }
    } else
    {
      Timer timer;

      timer.start();
      for(int i=0; i<iters; i++)
      {
        queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
      }
      queue.finish();
      timed = timer.stopAndTime();
    }
    timed /= iters;

    gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
    log->print(gbps);   log->print(NEWLINE);
    log->xmlRecord("enqueuewritebuffer", gbps);
    ///////////////////////////////////////////////////////////////////////////
    // enqueueReadBuffer
    log->print(TAB TAB TAB "enqueueReadBuffer          : ");

    // Dummy warm-up
    queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
    queue.finish();

    timed = 0;
    if(useEventTimer)
    {
      for(int i=0; i<iters; i++)
      {
        cl::Event timeEvent;
        queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
        queue.finish();
        timed += timeInUS(timeEvent);
      }
    } else
    {
      Timer timer;

      timer.start();
      for(int i=0; i<iters; i++)
      {
        queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
      }
      queue.finish();
      timed = timer.stopAndTime();
    }
    timed /= iters;

    gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
    log->print(gbps);   log->print(NEWLINE);
    log->xmlRecord("enqueuereadbuffer", gbps);
    ///////////////////////////////////////////////////////////////////////////
    // enqueueMapBuffer
    log->print(TAB TAB TAB "enqueueMapBuffer(for read) : ");

    queue.finish();

    timed = 0;
    if(useEventTimer)
    {
      for(int i=0; i<iters; i++)
      {
        cl::Event timeEvent;
        void *mapPtr;

        mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_READ, 0, (numItems * sizeof(float)), NULL, &timeEvent);
        queue.finish();
        queue.enqueueUnmapMemObject(clBuffer, mapPtr);
        queue.finish();
        timed += timeInUS(timeEvent);
      }
    } else
    {
      for(int i=0; i<iters; i++)
      {
        Timer timer;
        void *mapPtr;

        timer.start();
        mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_READ, 0, (numItems * sizeof(float)));
        queue.finish();
        timed += timer.stopAndTime();

        queue.enqueueUnmapMemObject(clBuffer, mapPtr);
        queue.finish();
      }
    }
    timed /= iters;

    gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
    log->print(gbps);   log->print(NEWLINE);
    log->xmlRecord("enqueuemapbuffer", gbps);
    ///////////////////////////////////////////////////////////////////////////

    // memcpy from mapped ptr
    log->print(TAB TAB TAB TAB "memcpy from mapped ptr   : ");
    queue.finish();

    timed = 0;
    for(int i=0; i<iters; i++)
    {
      cl::Event timeEvent;
      void *mapPtr;

      mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_READ, 0, (numItems * sizeof(float)));
      queue.finish();

      timer.start();
      memcpy(arr, mapPtr, (numItems * sizeof(float)));
      timed += timer.stopAndTime();

      queue.enqueueUnmapMemObject(clBuffer, mapPtr);
      queue.finish();
    }
    timed /= iters;

    gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
    log->print(gbps);   log->print(NEWLINE);
    log->xmlRecord("memcpy_from_mapped_ptr", gbps);

    ///////////////////////////////////////////////////////////////////////////

    // enqueueUnmap
    log->print(TAB TAB TAB "enqueueUnmap(after write)  : ");

    queue.finish();

    timed = 0;
    if(useEventTimer)
    {
      for(int i=0; i<iters; i++)
      {
        cl::Event timeEvent;
        void *mapPtr;

        mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_WRITE, 0, (numItems * sizeof(float)));
        queue.finish();
        queue.enqueueUnmapMemObject(clBuffer, mapPtr, NULL, &timeEvent);
        queue.finish();
        timed += timeInUS(timeEvent);
      }
    } else
    {
      for(int i=0; i<iters; i++)
      {
        Timer timer;
        void *mapPtr;

        mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_WRITE, 0, (numItems * sizeof(float)));
        queue.finish();

        timer.start();
        queue.enqueueUnmapMemObject(clBuffer, mapPtr);
        queue.finish();
        timed += timer.stopAndTime();
      }
    }
    timed /= iters;
    gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;

    log->print(gbps);   log->print(NEWLINE);
    log->xmlRecord("enqueueunmap", gbps);
    ///////////////////////////////////////////////////////////////////////////

    // memcpy to mapped ptr
    log->print(TAB TAB TAB TAB "memcpy to mapped ptr     : ");
    queue.finish();

    timed = 0;
    for(int i=0; i<iters; i++)
    {
      cl::Event timeEvent;
      void *mapPtr;

      mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_WRITE, 0, (numItems * sizeof(float)));
      queue.finish();

      timer.start();
      memcpy(mapPtr, arr, (numItems * sizeof(float)));
      timed += timer.stopAndTime();

      queue.enqueueUnmapMemObject(clBuffer, mapPtr);
      queue.finish();
    }
    timed /= iters;

    gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
    log->print(gbps);   log->print(NEWLINE);
    log->xmlRecord("memcpy_to_mapped_ptr", gbps);

    ///////////////////////////////////////////////////////////////////////////
    log->xmlCloseTag();     // transfer_bandwidth

    if(arr)     delete [] arr;
  }
  catch(cl::Error error)
  {
    stringstream ss;
    ss << error.what() << " (" << error.err() << ")" NEWLINE
       << TAB TAB TAB "Tests skipped" NEWLINE;
    log->print(ss.str());

    if(arr)     delete [] arr;
    return -1;
  }

  return 0;
}
Ejemplo n.º 16
0
void PTWeekend::setup()
{
    
    /* Scene data */
    camera.lookAt(glm::vec3(-2,1,1), vec3(0, 0, -1.0f), vec3(0,1,0));
    camera.setPerspective( 45.0f, getWindowAspectRatio(), 0.01f, 100.0f );
    cameraUI = CameraUi(&camera, getWindow());
    
    
    
    glm::vec3 bottom_sky_color(1.0, 1.0, 1.0);
    glm::vec3 top_sky_color(0.5, 0.7, 1.0);
    
    CGLContextObj glContext = CGLGetCurrentContext();
    CGLShareGroupObj shareGroup = CGLGetShareGroup(glContext);
    
    GLuint imgTexName;
    
    const char* program_file_str = "../../../assets/path_tracing.cl";
    
    /* Obtain a platform */
    std::vector<cl::Platform> platforms;
    clStatus = cl::Platform::get(&platforms);
    pt_assert(clStatus, "Could not find an OpenCL platform.");
    
    /* Obtain a device and determinte max local size */
    std::vector<cl::Device> devices;
    clStatus = platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices);
    pt_assert(clStatus, "Could not find a GPU device.");
    device = devices[0];
    
    /* Create an OpenCL context for the device */
    cl_context_properties properties[] = {
        CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE,
        (cl_context_properties)shareGroup,
        0
    };
    
    context = cl::Context({device}, properties, NULL, NULL, &clStatus);
    pt_assert(clStatus, "Could not create a context for device.");
    
    /* Load and build a program */
    std::ifstream program_file(program_file_str);
    std::string program_str(std::istreambuf_iterator<char>(program_file), (std::istreambuf_iterator<char>()));
    cl::Program::Sources sources(1, std::make_pair(program_str.c_str(), program_str.length() + 1));
    program = cl::Program(context, sources);
    clStatus = program.build({device}, "-I ../../../assets/ -cl-denorms-are-zero");
    
    if (clStatus != CL_SUCCESS)
    {
        std::string log = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device);
        std::cerr << log << "\n";
        exit(EXIT_FAILURE);
    }
    
    /* Create command queue */
    cmd_queue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &clStatus);
    pt_assert(clStatus, "Could not create command queue");
    
    /* create kernel and set the kernel arguments */
    kernel = cl::Kernel(program, "path_tracing", &clStatus);
    pt_assert(clStatus, "Could not create kernel");
    
    img_width = getWindowWidth();
    img_height = getWindowHeight();
    
    true_img_width = getWindowWidth();
    true_img_height = getWindowHeight();
    
    local_size = device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>(); //TODO: throws
    local_width = (size_t)pow(2, ceilf(log2f((floorf(sqrtf(local_size))))));
    local_height = local_size / local_width;
    
    img_width = ceilf((float)img_width / (float)local_width) * local_width;
    img_height = ceilf((float)img_height / (float)local_height) * local_height;
    
    
    unsigned int samples = 16;
    
    /* Create GL texture and CL wrapper */
    glGenTextures(1, &imgTexName);
    glBindTexture(GL_TEXTURE_2D, imgTexName);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, img_width, img_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0);
    glBindTexture(GL_TEXTURE_2D, 0);
    imgTex = gl::Texture2d::create(GL_TEXTURE_2D, imgTexName, img_width, img_height, true);
    glFinish();
    
    img_buffer.push_back(cl::Image2DGL(context, CL_MEM_WRITE_ONLY, GL_TEXTURE_2D, 0, imgTexName, &clStatus));
    pt_assert(clStatus, "Could not create buffer");
    
    /* Create all buffers */
    cam_buffer = cl::Buffer(context, CL_MEM_READ_ONLY, sizeof(cl_pinhole_cam), NULL, &clStatus);
    pt_assert(clStatus, "Could not create camera buffer");
    
    primitive_buffer = cl::Buffer (context, CL_MEM_READ_ONLY, MAX_PRIMITIVES * sizeof(cl_sphere), NULL, &clStatus);
    pt_assert(clStatus, "Could not create primitive buffer");
    
    material_buffer = cl::Buffer (context, CL_MEM_READ_ONLY, MAX_PRIMITIVES * sizeof(cl_material), NULL, &clStatus);
    pt_assert(clStatus, "Could not create primitive buffer");
    
    sky_buffer = cl::Buffer(context, CL_MEM_READ_ONLY, sizeof(cl_sky_material), NULL, &clStatus);
    pt_assert(clStatus, "Could not create sky buffer");
    
    /* Upload scene (static) */
    
    size_t sceneObjectCount = 5;
    cl_sphere* primitive_array = (cl_sphere*)malloc(sceneObjectCount * sizeof(cl_sphere));
    cl_material* material_array = (cl_material*)malloc(sceneObjectCount * sizeof(cl_material));
    
    primitive_array[0] = cl_make_sphere(glm::vec3(1, 0, -1), 0.5f);
    material_array[0] = cl_make_material(pt::ColorHex_to_RGBfloat<float>("0x730202"), 0, MAT_LAMBERTIAN);
    
    primitive_array[1] = cl_make_sphere(glm::vec3(-1, 0, -1), 0.5f);
    material_array[1] = cl_make_material(pt::ColorHex_to_RGBfloat<float>("0xF89000"), 0, MAT_LAMBERTIAN);
    
    primitive_array[2] = cl_make_sphere(glm::vec3(0, 0, 0), 0.5f);
    material_array[2] = cl_make_material(pt::ColorHex_to_RGBfloat<float>("0x97A663"), 0.1f, MAT_METALLIC);
    
    primitive_array[3] = cl_make_sphere(glm::vec3(0, 0, -2), 0.5f);
    material_array[3] = cl_make_material(glm::vec3(0.8f, 0.6f, 0.2f), 0.3f, MAT_METALLIC);
    
    primitive_array[4] = cl_make_sphere(glm::vec3(0,-100.5f, 1.0f), 100.0f);
    material_array[4] = cl_make_material(glm::vec3(0.5f), 0, MAT_LAMBERTIAN);
    
    clStatus = cmd_queue.enqueueWriteBuffer(primitive_buffer, CL_TRUE, 0, sceneObjectCount * sizeof(cl_sphere), primitive_array, NULL, NULL);
    pt_assert(clStatus, "Could not fill primitive buffer");
    
    clStatus = cmd_queue.enqueueWriteBuffer(material_buffer, CL_TRUE, 0, sceneObjectCount * sizeof(cl_material), material_array, NULL, NULL);
    pt_assert(clStatus, "Could not fill material buffer");
    
    pt_assert(cl_set_skycolors(bottom_sky_color, top_sky_color, sky_buffer, cmd_queue),
              "Could not fill sky buffer");
    
    clStatus = kernel.setArg(1, primitive_buffer);
    pt_assert(clStatus, "Could not set primitive buffer argument");
    
    clStatus = kernel.setArg(2, material_buffer);
    pt_assert(clStatus, "Could not set material buffer argument");
    
    clStatus = kernel.setArg(3, sky_buffer);
    pt_assert(clStatus, "Could not set sky buffer argument");
    
    clStatus = kernel.setArg(4, sceneObjectCount);
    pt_assert(clStatus, "Could not set primitive count count argument");
    
    clStatus = kernel.setArg(5, img_buffer[0]);
    pt_assert(clStatus, "Could not set img buffer argument");
    
    clStatus = kernel.setArg(6, samples);
    pt_assert(clStatus, "Could not set samples argument");
    
    clStatus = kernel.setArg(0, cam_buffer);
    pt_assert(clStatus, "Could not set camera buffer argument");
}
void watershed(int width, int height,
               cl::Buffer& src,
               cl::Buffer& labeled,
               ProgramCache& cache,
               cl::CommandQueue& queue)
{

#ifdef OPENCL_PROFILE
    watershed_descent_kernel_time = 0;
    watershed_increment_kernel_time = 0;
    watershed_minima_kernel_time = 0;
    watershed_plateau_kernel_time = 0;
    watershed_flood_kernel_time = 0;
#endif

    cl::Context context = queue.getInfo<CL_QUEUE_CONTEXT>();

    std::stringstream params_stream;
    params_stream << "-DBLOCK_SIZE=";
    params_stream << BLOCK_SIZE;

    std::string program_params = params_stream.str();

    cl::Program& program = cache.getProgram("Watershed", program_params);
    
    cl::Kernel descent_kernel(program, "descent_kernel");
    cl::Kernel increment_kernel(program, "increment_kernel");
    cl::Kernel minima_kernel(program, "minima_kernel");
    cl::Kernel plateau_kernel(program, "plateau_kernel");
    cl::Kernel flood_kernel(program, "flood_kernel");

    //setting constant memory with neigbourhood
    cl::Buffer cl_neighbourhood_x = cl::Buffer(context,CL_MEM_READ_ONLY,
                                               sizeof(neighbourhood_x));
    cl::Buffer cl_neighbourhood_y = cl::Buffer(context, CL_MEM_READ_ONLY,
                                               sizeof(neighbourhood_y));

#ifdef OPENCL_PROFILE
    cl::Event first_event;
    queue.enqueueWriteBuffer(cl_neighbourhood_x, CL_TRUE, 0,
                             sizeof(neighbourhood_x),
                             neighbourhood_x, __null, &first_event);
#else
    queue.enqueueWriteBuffer(cl_neighbourhood_x, CL_TRUE, 0,
                             sizeof(neighbourhood_x), neighbourhood_x);
#endif

    queue.enqueueWriteBuffer(cl_neighbourhood_y, CL_TRUE, 0,
                             sizeof(neighbourhood_y), neighbourhood_y);

    //const size_t block_size = 6;
    //cl::LocalSpaceArg local_mem = cl::__local(block_size * block_size * sizeof(float));
    cl::LocalSpaceArg local_mem = cl::__local(BLOCK_SIZE * BLOCK_SIZE * sizeof(float));

    //setting args for descent_kernel
    descent_kernel.setArg(0, src);
    descent_kernel.setArg(1, labeled);
    descent_kernel.setArg(2, cl_neighbourhood_x);
    descent_kernel.setArg(3, cl_neighbourhood_y);
    descent_kernel.setArg(4, local_mem);
    descent_kernel.setArg(5, width);
    descent_kernel.setArg(6, height);

    size_t global_width = (width / (BLOCK_SIZE - 2) + 1) * BLOCK_SIZE;
    size_t global_height = (height / (BLOCK_SIZE - 2) + 1) * BLOCK_SIZE;

#ifdef DEBUG_PRINT
    std::cout << "global width=" << global_width
              << " global height=" << global_height << std::endl;
#endif

    cl::NDRange global(global_width, global_height);
    cl::NDRange local(BLOCK_SIZE, BLOCK_SIZE);

    cl_int status;

#ifdef OPENCL_PROFILE
    {
        VECTOR_CLASS<cl::Event> events_vector(1);
        status = queue.enqueueNDRangeKernel(descent_kernel, cl::NullRange,
                                            global, local, __null,
                                            &events_vector[0]);

        cl::WaitForEvents(events_vector);

        cl::Event& event = events_vector[0];

        cl_ulong start = event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
        cl_ulong end = event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
        cl_ulong total_time = end - start;

        watershed_descent_kernel_time = total_time;
    }
#else
    status = queue.enqueueNDRangeKernel(descent_kernel, cl::NullRange,
                                        global, local);
#endif

#ifdef DEBUG_PRINT
    std::cout << "kernel execution " << status << std::endl;
#endif

   // queue.flush();
   // queue.enqueueBarrier();

    /* PREPARING INCREMENT KERNEL */

    increment_kernel.setArg(0, labeled);
    increment_kernel.setArg(1, width);
    increment_kernel.setArg(2, height);

#ifdef OPENCL_PROFILE
    {
        VECTOR_CLASS<cl::Event> events_vector(1);
        status = queue.enqueueNDRangeKernel(increment_kernel, cl::NullRange,
                                            global, local, __null,
                                            &events_vector[0]);

        cl::WaitForEvents(events_vector);

        cl::Event& event = events_vector[0];

        cl_ulong start = event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
        cl_ulong end = event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
        cl_ulong total_time = end - start;

        watershed_increment_kernel_time = total_time;
    }
#else
    status = queue.enqueueNDRangeKernel(increment_kernel, cl::NullRange,
                                        global, local);
#endif

//    queue.enqueueBarrier();


    /* PREPARING MINIMA KERNEL */

    int counter_tmp = 0;
    cl::Buffer counter(context, CL_MEM_READ_WRITE, sizeof(int));
    queue.enqueueWriteBuffer(counter, CL_TRUE, 0, sizeof(int), &counter_tmp);

    queue.enqueueBarrier();

    minima_kernel.setArg(0, counter);
    minima_kernel.setArg(1, labeled);
    minima_kernel.setArg(2, cl_neighbourhood_x);
    minima_kernel.setArg(3, cl_neighbourhood_y);
    minima_kernel.setArg(4, local_mem);
    minima_kernel.setArg(5, width);
    minima_kernel.setArg(6, height);

    int old_val = -1;
    int new_val = -2;
    int c = 0;


    while(old_val != new_val)
    {
        old_val = new_val;

#ifdef OPENCL_PROFILE
        {
            VECTOR_CLASS<cl::Event> events_vector(1);
            status = queue.enqueueNDRangeKernel(minima_kernel, cl::NullRange,
                                                global, local, __null,
                                                &events_vector[0]);

            cl::WaitForEvents(events_vector);
            cl::Event& event = events_vector[0];
            cl_ulong start = event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
            cl_ulong end = event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
            cl_ulong total_time = end - start;

            watershed_minima_kernel_time += total_time;
        }
#else
        status = queue.enqueueNDRangeKernel(minima_kernel, cl::NullRange,
                                            global, local);
#endif
        queue.enqueueReadBuffer(counter, CL_TRUE, 0, sizeof(int), &new_val);
        c++;
    }

#ifdef DEBUG_PRINT
    std::cout << "step 2: " << c << " iterations" << std::endl;
#endif

    /* PREPARING PLATEAU KERNEL */

    queue.enqueueWriteBuffer(counter, CL_TRUE, 0, sizeof(int), &counter_tmp);

    queue.enqueueBarrier();

    plateau_kernel.setArg(0, counter);
    plateau_kernel.setArg(1, src);
    plateau_kernel.setArg(2, labeled);
    plateau_kernel.setArg(3, cl_neighbourhood_x);
    plateau_kernel.setArg(4, cl_neighbourhood_y);
    plateau_kernel.setArg(5, local_mem);
    plateau_kernel.setArg(6, width);
    plateau_kernel.setArg(7, height);

    old_val = -1;
    new_val = -2;
    c = 0;

#ifdef OPENCL_PROFILE
    watershed_plateau_kernel_time = 0;
#endif

    while(old_val != new_val)
    {
        old_val = new_val;

#ifdef OPENCL_PROFILE
        {
            VECTOR_CLASS<cl::Event> events_vector(1);
            status = queue.enqueueNDRangeKernel(plateau_kernel, cl::NullRange,
                                                global, local, __null,
                                                &events_vector[0]);

            cl::WaitForEvents(events_vector);
            cl::Event& event = events_vector[0];
            cl_ulong start = event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
            cl_ulong end = event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
            cl_ulong total_time = end - start;

            watershed_plateau_kernel_time += total_time;
        }
#else
        status = queue.enqueueNDRangeKernel(plateau_kernel, cl::NullRange,
                                            global, local);
#endif
        queue.enqueueReadBuffer(counter, CL_TRUE, 0, sizeof(int), &new_val);
        c++;
    }

#ifdef DEBUG_PRINT
    std::cout << "step 3: " << c << " iterations" << std::endl;
#endif

    //preparing flood kernel

    queue.enqueueWriteBuffer(counter, CL_TRUE, 0, sizeof(int), &counter_tmp);
    queue.enqueueBarrier();

    flood_kernel.setArg(0, counter);
    flood_kernel.setArg(1, labeled);
    flood_kernel.setArg(2, width);
    flood_kernel.setArg(3, height);

    old_val = -1;
    new_val = -2;
    c = 0;

    int new_block_size = 16;
    local = cl::NDRange(new_block_size, new_block_size);

    int n_width = ((width - 1) / new_block_size + 2) * new_block_size;
    int n_height = ((height - 1) / new_block_size + 2) * new_block_size;

    global = cl::NDRange(n_width, n_height);

#ifdef DEBUG_PRINT
    std::cout << "flood kernel invocation params:" << std::endl;
    std::cout << "local: " << local[0] << ", " << local[1] << std::endl;
    std::cout << "global: " << global[0] << ", " << global[1] << std::endl;
#endif

#ifdef OPENCL_PROFILE
    cl::Event last_event;
#endif


#ifdef OPENCL_PROFILE
    watershed_flood_kernel_time = 0;
#endif

    while(old_val != new_val)
    {
        old_val = new_val;

#ifdef OPENCL_PROFILE
        {
            VECTOR_CLASS<cl::Event> events_vector(1);
            status = queue.enqueueNDRangeKernel(flood_kernel, cl::NullRange,
                                                global, local, __null,
                                                &events_vector[0]);

            cl::WaitForEvents(events_vector);
            cl::Event& event = events_vector[0];
            cl_ulong start = event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
            cl_ulong end = event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
            cl_ulong total_time = end - start;

            watershed_flood_kernel_time += total_time;
        }
#else
        status = queue.enqueueNDRangeKernel(flood_kernel, cl::NullRange,
                                            global, local);
#endif

#ifdef OPENCL_PROFILE
        queue.enqueueReadBuffer(counter, CL_TRUE, 0, sizeof(int),
                                &new_val, __null, &last_event);
#else
        queue.enqueueReadBuffer(counter, CL_TRUE, 0, sizeof(int), &new_val);
#endif
        c++;
    }

#ifdef OPENCL_PROFILE
    watershed_descent_kernel_time /= TIME_DIVISOR;
    watershed_increment_kernel_time /= TIME_DIVISOR;
    watershed_minima_kernel_time /= TIME_DIVISOR;
    watershed_plateau_kernel_time /= TIME_DIVISOR;
    watershed_flood_kernel_time /= TIME_DIVISOR;
#endif

#ifdef DEBUG_PRINT
    std::cout << "step 4: " << c << " iterations" << std::endl;
#endif

#ifdef OPENCL_PROFILE
    last_event.wait();

    cl_ulong start = first_event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
    cl_ulong end = last_event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
    cl_ulong total_time = end - start;

    setLastExecutionTime(total_time/TIME_DIVISOR);
#endif
}
Ejemplo n.º 18
0
void mainLoop( cl::CommandQueue& queue, cl::Context& context, cl::Kernel kernel, cl::Buffer clImgDesc, cl::Buffer clCamera ){
  cl::Event eAcquire, eRelease, eExecute;
  cl_int err;


  glFinish();
  checkGLErr( "glFinish()" );

  queue.enqueueWriteBuffer( clImgDesc, CL_TRUE, 0, 1 * sizeof(ImageDescriptor), (const void*)&imgDesc);

  err = queue.enqueueAcquireGLObjects( vSharedUnits, NULL, &eAcquire );
  checkErr(err, "CommandQueue::enqueueAcquireGLObjects()");

  eAcquire.wait();


  err = queue.enqueueNDRangeKernel( kernel, cl::NullRange, cl::NDRange(WIDTH, HEIGHT), cl::NullRange, NULL, &eExecute);

  checkErr(err, "CommandQueue::enqueueNDRangeKernel()");
  //std::cout<<"Kernel executing"<< std::endl ;
  clock_t ti = clock();
  eExecute.wait();
  clock_t tf = clock();

  queue.finish();
  err = queue.enqueueReleaseGLObjects( vSharedUnits, NULL, &eRelease );
  checkErr(err, "CommandQueue::enqueueReleaseGLObjects()");

  eRelease.wait();


  imgDesc.numSamples += SAMPLES;

  pAccumulator->glBind( GL_DRAW_FRAMEBUFFER );
  checkGLErr( "glBind GL_DRAW_FRAMEBUFFER, Accumulator " );
  pCLTarget->glBind( GL_READ_FRAMEBUFFER );
  checkGLErr( "glBind GL_READ_FRAMEBUFFER, Main Target " );
  glBlitFramebuffer( 0, 0, WIDTH, HEIGHT, 0, 0, WIDTH, HEIGHT, GL_COLOR_BUFFER_BIT, GL_NEAREST );
  checkGLErr( "glBlitFramebuffer" );

  glBindFramebuffer( GL_DRAW_FRAMEBUFFER, 0 );
  checkGLErr( "glBind GL_DRAW_FRAMEBUFFER, 0 " );
  pCLTarget->glBind( GL_READ_FRAMEBUFFER );
  checkGLErr( "glBind GL_READ_FRAMEBUFFER, something " );
  glBlitFramebuffer( 0, 0, WIDTH, HEIGHT, 0, 0, WIDTH, HEIGHT, GL_COLOR_BUFFER_BIT, GL_NEAREST );
  checkGLErr( "glBlitFramebuffer" );

  glfwPollEvents();

  pCamera->glfwHandleCursor( ((float)(tf - ti))/(CLOCKS_PER_SEC * 1.0f) );
  if( sceneChanged() ){
    //printf("scene changed..!");
    imgDesc.numSamples = 0;
    CLCamera* cam = pCamera->getCLCamera();
    queue.enqueueWriteBuffer( clCamera, CL_TRUE, 0, 1 * sizeof(CLCamera), (const void*)cam );
    delete cam;
  }

  glfwSwapBuffers( window );
  checkGLErr( "glSwapBuffers" );

  //Block for a while.
  //int i;
  //std::cin >> i;

  //float timeTaken = ( (float)(tf - ti) ) / (float)CLOCKS_PER_SEC;
  //std::cout<<"Time taken: "<< timeTaken * 1000 << "ms" << std::endl;
  //std::cout<<"Predicted FPS: "<< 1 / timeTaken << " FPS"<< std::endl;
  if( imgDesc.numSamples % 10 == 0 )
    std::cout<<"numSamples: "<<imgDesc.numSamples<<std::endl;
  //handleFrameCounter();

}
Ejemplo n.º 19
0
int main()
{
    try {
        std::vector<cl::Device> devices;

        // select platform
        cl::Platform platform = selectPlatform();

        // select device
        platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
        cl::Device device = selectDevice(devices);

        // create context
        context = cl::Context(devices);

        // create command queue
        queue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE);

        // load opencl source
        std::ifstream cl_file("inclusive_scan.cl");

        std::string cl_string{std::istreambuf_iterator<char>(cl_file),
                    std::istreambuf_iterator<char>()};

        cl::Program::Sources source(1,
                                    std::make_pair(cl_string.c_str(),
                                                   cl_string.length() + 1));

        // create programm
        program = cl::Program(context, source);

        // compile opencl source
        try {
            program.build(devices);

            size_t input_size;
            std::ifstream input_file("input.txt");
            input_file >> input_size;

            std::vector<float> input(input_size);

//            for (size_t i = 0; i < input_size; ++i) {
//                input[i] = i % 10;
//            }

            for (int i = 0; i < input_size; i++) {
                input_file >> input[i];
            }

            std::vector<float> output(input_size, 0);

            cl::Buffer dev_input (context, CL_MEM_READ_ONLY, sizeof(float) * input_size);
            queue.enqueueWriteBuffer(dev_input, CL_TRUE, 0, sizeof(float) * input_size, &input[0]);

            cl::Buffer dev_output = inclusive_scan(dev_input, input_size);

            queue.enqueueReadBuffer(dev_output, CL_TRUE, 0, sizeof(float) * input_size, &output[0]);
            queue.finish();

            cpu_check(input, output);

            std::ofstream output_file("output.txt");
            for (int i = 0; i < input_size; i++) {
                output_file << output[i] << " ";
            }

        }
        catch (cl::Error const & e) {
            std::string log_str = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device);
            std::cout << std::endl << e.what() << " : " << e.err() << std::endl;
            std::cout << log_str;
            return 0;
        }


    }
    catch (cl::Error const & e) {
        std::cout << "Error: " << e.what() << " #" << e.err() << std::endl;
    }

    return 0;
}
Ejemplo n.º 20
0
bool runTestType(cl::Context context, cl::CommandQueue queue)
{
  cl_uint size = (1 << 16) + 16384;

  std::vector<T> input(size);

  std::cout << "##Testing AMD radix sort for " << input.size() << " elements and type " 
	    << magnet::CL::detail::traits<T>::kernel_type();
  
  for(size_t i = 0; i < input.size(); ++i)
    input[i] = input.size() - i - 1;
  
  // create input buffer using pinned memory
  cl::Buffer bufferIn(context, CL_MEM_ALLOC_HOST_PTR |
		      CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, 
		      sizeof(T) * input.size(), &input[0]);
  
  magnet::CL::radixSortAMD<T> radixSortFunctor;
  radixSortFunctor.build(queue, context);
  radixSortFunctor(bufferIn, bufferIn);

  std::vector<T> output(size);
 
  queue.enqueueReadBuffer(bufferIn, CL_TRUE, 0, input.size() *
			  sizeof(T), &output[0]);

  bool failed = !testOutput(input, output);

  std::cout << " key(only) " << (failed ? "FAILED" : "PASSED") << ", "; 

  //Now test with some data!
  //Refresh the input array
  queue.enqueueWriteBuffer(bufferIn, CL_TRUE, 0, input.size() *
			   sizeof(T), &input[0]);

  //Write a data array
  std::vector<cl_uint> data(size);
  for(size_t i = 0; i < input.size(); ++i)
    data[i] = i;

  cl::Buffer dataIn(context, CL_MEM_ALLOC_HOST_PTR |
		    CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, 
		    sizeof(cl_uint) * data.size(), &data[0])
    ;

  radixSortFunctor(bufferIn, dataIn, bufferIn, dataIn);
  
  queue.enqueueReadBuffer(dataIn, CL_TRUE, 0, data.size() *
			  sizeof(cl_uint), &data[0]);

  bool keyfail = !testOutput(input, output);

  std::cout << " key " << (keyfail ? "FAILED" : "PASSED"); 

  bool datafail = false;
  for(size_t i = 0; i < input.size(); ++i)
    if (data[i] != input.size() - 1 - i)
      datafail = true;

  std::cout << " data " << (datafail ? "FAILED" : "PASSED") << std::endl;
  
  return failed || keyfail || datafail;
}
Ejemplo n.º 21
0
bool runTestType(cl::Context context, cl::CommandQueue queue)
{
  cl_uint size = 64 * 256;

  std::vector<T> input(size);

  for(size_t i = 0; i < input.size(); ++i)
    input[i] = input.size() - i - 1;
  
  // create input buffer using pinned memory
  cl::Buffer bufferIn(context, CL_MEM_ALLOC_HOST_PTR |
		      CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, 
		      sizeof(T) * input.size(), &input[0])
    ;
  
  magnet::CL::sort<T> sortFunctor;
  sortFunctor.build(queue, context);
  sortFunctor(bufferIn);

  std::cout << "##Testing generic sort (";
  switch(sortFunctor.getMode())
    {
    case magnet::CL::sort<T>::CPU:
      std::cout << "HeapSort";
      break;
    case magnet::CL::sort<T>::NVIDIA:
      std::cout << "radixNVIDIA";
      break;
    case magnet::CL::sort<T>::AMD:
      std::cout << "radixAMD";
      break;
    default:
      M_throw() << "Could not determine which sorting algorithm is being used";
    }

  std::cout << ") for " << input.size() << " elements and type " 
	    << magnet::CL::detail::traits<T>::kernel_type();
  

  std::vector<T> output(size);
 
  queue.enqueueReadBuffer(bufferIn, CL_TRUE, 0, input.size() *
			  sizeof(T), &output[0]);

  bool failed = !testOutput(input, output);

  std::cout << " key(only) " << (failed ? "FAILED" : "PASSED") << ", "; 

  //Now test with some data!
  //Refresh the input array
  queue.enqueueWriteBuffer(bufferIn, CL_TRUE, 0, input.size() *
			   sizeof(T), &input[0]);

  //Write a data array
  std::vector<cl_uint> data(size);
  for(size_t i = 0; i < input.size(); ++i)
    data[i] = i;

  cl::Buffer dataIn(context, CL_MEM_ALLOC_HOST_PTR |
		    CL_MEM_COPY_HOST_PTR | CL_MEM_READ_WRITE, 
		    sizeof(cl_uint) * data.size(), &data[0])
    ;

  sortFunctor(bufferIn, dataIn);
  
  queue.enqueueReadBuffer(dataIn, CL_TRUE, 0, data.size() *
			  sizeof(cl_uint), &data[0]);

  bool keyfail = false;//!testOutput(input, output);

  std::cout << " key " << (keyfail ? "FAILED" : "PASSED"); 

  bool datafail = false;
  for(size_t i = 0; i < input.size(); ++i)
    if (data[i] != input.size() - 1 - i)
      datafail = true;

  std::cout << " data " << (datafail ? "FAILED" : "PASSED") << std::endl;
  
  return failed || keyfail || datafail;
}
Ejemplo n.º 22
0
int clPeak::runTransferBandwidthTest(cl::CommandQueue &queue, cl::Program &prog, device_info_t &devInfo)
{
    if(!isTransferBW)
        return 0;

    float timed, gbps;
    cl::NDRange globalSize, localSize;
    cl::Context ctx = queue.getInfo<CL_QUEUE_CONTEXT>();
    int iters = devInfo.transferBWIters;
    Timer timer;

    cl_uint maxItems = devInfo.maxAllocSize / sizeof(float) / 2;
    cl_uint numItems;

    // Set an upper-limit for cpu devies
    if(devInfo.deviceType & CL_DEVICE_TYPE_CPU) {
        numItems = roundToPowOf2(maxItems, 26);
    } else {
        numItems = roundToPowOf2(maxItems);
    }

    float *arr = new float[numItems];

    try
    {
        cl::Buffer clBuffer = cl::Buffer(ctx, (CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR), (numItems * sizeof(float)));

        cout << NEWLINE TAB TAB "Transfer bandwidth (GBPS)" << endl;
        cout << setprecision(2) << fixed;

        ///////////////////////////////////////////////////////////////////////////
        // enqueueWriteBuffer
        cout << TAB TAB TAB "enqueueWriteBuffer         : ";    cout.flush();

        // Dummy warm-up
        queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
        queue.finish();

        timed = 0;

        if(useEventTimer)
        {
            for(int i=0; i<iters; i++)
            {
                cl::Event timeEvent;
                queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
                queue.finish();
                timed += timeInUS(timeEvent);
            }
        } else
        {
            Timer timer;

            timer.start();
            for(int i=0; i<iters; i++)
            {
                queue.enqueueWriteBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
            }
            queue.finish();
            timed = timer.stopAndTime();
        }
        timed /= iters;

        gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
        cout << gbps << endl;
        ///////////////////////////////////////////////////////////////////////////
        // enqueueReadBuffer
        cout << TAB TAB TAB "enqueueReadBuffer          : ";    cout.flush();

        // Dummy warm-up
        queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
        queue.finish();

        timed = 0;
        if(useEventTimer)
        {
            for(int i=0; i<iters; i++)
            {
                cl::Event timeEvent;
                queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr, NULL, &timeEvent);
                queue.finish();
                timed += timeInUS(timeEvent);
            }
        } else
        {
            Timer timer;

            timer.start();
            for(int i=0; i<iters; i++)
            {
                queue.enqueueReadBuffer(clBuffer, CL_TRUE, 0, (numItems * sizeof(float)), arr);
            }
            queue.finish();
            timed = timer.stopAndTime();
        }
        timed /= iters;

        gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
        cout << gbps << endl;
        ///////////////////////////////////////////////////////////////////////////
        // enqueueMapBuffer
        cout << TAB TAB TAB "enqueueMapBuffer(for read) : ";    cout.flush();

        queue.finish();

        timed = 0;
        if(useEventTimer)
        {
            for(int i=0; i<iters; i++)
            {
                cl::Event timeEvent;
                void *mapPtr;

                mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_READ, 0, (numItems * sizeof(float)), NULL, &timeEvent);
                queue.finish();
                queue.enqueueUnmapMemObject(clBuffer, mapPtr);
                queue.finish();
                timed += timeInUS(timeEvent);
            }
        } else
        {
            for(int i=0; i<iters; i++)
            {
                Timer timer;
                void *mapPtr;

                timer.start();
                mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_READ, 0, (numItems * sizeof(float)));
                queue.finish();
                timed += timer.stopAndTime();

                queue.enqueueUnmapMemObject(clBuffer, mapPtr);
                queue.finish();
            }
        }
        timed /= iters;

        gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
        cout << gbps << endl;
        ///////////////////////////////////////////////////////////////////////////

        // memcpy from mapped ptr
        cout << TAB TAB TAB TAB "memcpy from mapped ptr   : ";  cout.flush();
        queue.finish();

        timed = 0;
        for(int i=0; i<iters; i++)
        {
            cl::Event timeEvent;
            void *mapPtr;

            mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_READ, 0, (numItems * sizeof(float)));
            queue.finish();

            timer.start();
            memcpy(arr, mapPtr, (numItems * sizeof(float)));
            timed += timer.stopAndTime();

            queue.enqueueUnmapMemObject(clBuffer, mapPtr);
            queue.finish();
        }
        timed /= iters;

        gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
        cout << gbps << endl;

        ///////////////////////////////////////////////////////////////////////////

        // enqueueUnmap
        cout << TAB TAB TAB "enqueueUnmap(after write)  : ";    cout.flush();

        queue.finish();

        timed = 0;
        if(useEventTimer)
        {
            for(int i=0; i<iters; i++)
            {
                cl::Event timeEvent;
                void *mapPtr;

                mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_WRITE, 0, (numItems * sizeof(float)));
                queue.finish();
                queue.enqueueUnmapMemObject(clBuffer, mapPtr, NULL, &timeEvent);
                queue.finish();
                timed += timeInUS(timeEvent);
            }
        } else
        {
            for(int i=0; i<iters; i++)
            {
                Timer timer;
                void *mapPtr;

                mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_WRITE, 0, (numItems * sizeof(float)));
                queue.finish();

                timer.start();
                queue.enqueueUnmapMemObject(clBuffer, mapPtr);
                queue.finish();
                timed += timer.stopAndTime();
            }
        }
        timed /= iters;
        gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;

        cout << gbps << endl;
        ///////////////////////////////////////////////////////////////////////////

        // memcpy to mapped ptr
        cout << TAB TAB TAB TAB "memcpy to mapped ptr     : ";  cout.flush();
        queue.finish();

        timed = 0;
        for(int i=0; i<iters; i++)
        {
            cl::Event timeEvent;
            void *mapPtr;

            mapPtr = queue.enqueueMapBuffer(clBuffer, CL_TRUE, CL_MAP_WRITE, 0, (numItems * sizeof(float)));
            queue.finish();

            timer.start();
            memcpy(mapPtr, arr, (numItems * sizeof(float)));
            timed += timer.stopAndTime();

            queue.enqueueUnmapMemObject(clBuffer, mapPtr);
            queue.finish();
        }
        timed /= iters;

        gbps = ((float)numItems * sizeof(float)) / timed / 1e3f;
        cout << gbps << endl;

        ///////////////////////////////////////////////////////////////////////////


    }
    catch(cl::Error error)
    {
        cerr << error.what() << "(" << error.err() << ")" << endl;
        cerr << TAB TAB TAB "Tests skipped" << endl;

        if(arr)     delete [] arr;
        return -1;
    }

    if(arr)     delete [] arr;
    return 0;
}