示例#1
0
//-----------------------------------------------------------------------------
// Execute tone mapping kernel
// this is all the important CL code in this example, from startup to shutdown
//-----------------------------------------------------------------------------
void ExecuteToneMappingKernel(cl_mem inputBuffer, cl_mem outputBuffer, CHDRData *pData, cl_uint imageWidth, cl_uint imageHeight)
{
    cl_int status = CL_SUCCESS;

    clug_ToneMappingPerPixel s = clugCreate_ToneMappingPerPixel(&status);
    CL_VERIFY(status, cluGetBuildErrors(s.m_program));

    // allocate the buffer
    cl_mem HDRDataBuffer = clCreateBuffer(CLU_CONTEXT, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(CHDRData), pData, &status);
    CL_VERIFY(status, "Failed to create HDR Data Buffer.\n");

    clu_enqueue_params params = CLU_DEFAULT_PARAMS;
    params.nd_range = cluNDRange2(imageHeight, imageWidth, BLOCK_DIM, BLOCK_DIM, 0, 0);

    status = clugEnqueue_ToneMappingPerPixel(s, &params, inputBuffer, outputBuffer, HDRDataBuffer, imageWidth);

    status = clReleaseMemObject(HDRDataBuffer);

    status = clReleaseKernel(s.m_kernel);
}
示例#2
0
// Main function 
// *********************************************************************
int main(int argc, char **argv)
{
    // set and log Global and Local work size dimensions
    szLocalWorkSize = 16;
    szGlobalWorkSize = (iNumElements + szLocalWorkSize - 1) / szLocalWorkSize * szLocalWorkSize;  // rounded up to the nearest multiple of the LocalWorkSize

    // Allocate and initialize host arrays 
    std::vector<float> radius;
    std::vector<float4> position;
    std::vector<float4> emission;
    std::vector<float4> color;
    std::vector<cl_short> reflType;
    std::vector<float4> output;
    cl_int numSpheres = parallelize(spheres, radius, position, emission, color, reflType);

    output.resize(szGlobalWorkSize);

    // Create the OpenCL context on a GPU device
    cl_int err = 0;
    cxGPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &err);
    CL_VERIFY(err);

    // Get the list of GPU devices associated with context
    clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes);
    cdDevices = (cl_device_id*)malloc(szParmDataBytes);
    clGetContextInfo(cxGPUContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL);

    cqCommandQue = clCreateCommandQueue(cxGPUContext, cdDevices[0], 0, 0);

    // Allocate the OpenCL buffer memory objects for source and result on the device GMEM
    cl_mem radiusBuf = createBuffer(cxGPUContext, radius);
    cl_mem positionBuf = createBuffer(cxGPUContext, position);
    cl_mem emissionBuf = createBuffer(cxGPUContext, emission);
    cl_mem colorBuf = createBuffer(cxGPUContext, color);
    cl_mem reflTypeBuf = createBuffer(cxGPUContext, reflType);
    cl_mem outputBuf = clCreateBuffer(cxGPUContext, CL_MEM_WRITE_ONLY, sizeof(output[0]) * output.size(), NULL, 0);
    
    std::string source = readFile(cSourceFile);
    const char *cSource = source.c_str();

    // Create the program
    cpProgram = clCreateProgramWithSource(cxGPUContext, 1, &cSource, 0, 0);

    // Build the program
    err = clBuildProgram(cpProgram, 0, NULL, NULL, NULL, NULL);
    CL_VERIFY(err);

    // Create the kernel
    ckKernel = clCreateKernel(cpProgram, "radiance", &err);
    CL_VERIFY(err);

    // Set the Argument values
    CL_VERIFY(clSetKernelArg(ckKernel, 0, sizeof(cl_mem), (void*)&radiusBuf));
    CL_VERIFY(clSetKernelArg(ckKernel, 1, sizeof(cl_mem), (void*)&positionBuf));
    CL_VERIFY(clSetKernelArg(ckKernel, 2, sizeof(cl_mem), (void*)&emissionBuf));
    CL_VERIFY(clSetKernelArg(ckKernel, 3, sizeof(cl_mem), (void*)&colorBuf));
    CL_VERIFY(clSetKernelArg(ckKernel, 4, sizeof(cl_mem), (void*)&reflTypeBuf));
    CL_VERIFY(clSetKernelArg(ckKernel, 5, sizeof(cl_int), (void*)&numSpheres));
    CL_VERIFY(clSetKernelArg(ckKernel, 6, sizeof(cl_mem), (void*)&outputBuf));
    CL_VERIFY(clSetKernelArg(ckKernel, 7, sizeof(cl_int), (void*)&width));
    CL_VERIFY(clSetKernelArg(ckKernel, 8, sizeof(cl_int), (void*)&height));

    // --------------------------------------------------------
    // Start Core sequence... copy input data to GPU, compute, copy results back

    // Launch kernel
    CL_VERIFY(clEnqueueNDRangeKernel(cqCommandQue, ckKernel, 1, NULL, &szGlobalWorkSize, &szLocalWorkSize, 0, NULL, NULL));

    // Synchronous/blocking read of results, and check accumulated errors
    CL_VERIFY(clEnqueueReadBuffer(cqCommandQue, outputBuf, CL_TRUE, 0, sizeof(output[0]) * output.size(), &output.front(), 0, NULL, NULL));

    writePPM(output);
    writeRaw(output);

    // Cleanup and leave
    Cleanup (EXIT_SUCCESS);
}