Exemplo n.º 1
0
int
Lucas::normalize2_runCLKernels ()
{
  cl_int status;

  /* 
   * Kernel runs over complete output matrix with blocks of blockSize x blockSize 
   * running concurrently
   */
  size_t globalThreads = Nn / threads;
  size_t localThreads = 256;

  cl_int eventStatus = CL_QUEUED;

  // Set input data to matrix A and matrix B
  cl::Event ndrEvt;

  // Set appropriate arguments to the kernel 
  status = normalize2_kernel.setArg (0, g_x);
  CHECK_OPENCL_ERROR (status, "cl::setArg failed. (g_x)");
  status = normalize2_kernel.setArg (1, threads);
  CHECK_OPENCL_ERROR (status, "cl::setArg failed. (threads)");
  status = normalize2_kernel.setArg (2, bigAB);
  CHECK_OPENCL_ERROR (status, "cl::setArg failed. (bigAB)");
  status = normalize2_kernel.setArg (3, bigAB);
  CHECK_OPENCL_ERROR (status, "cl::setArg failed. (bigAB)");
  status = normalize2_kernel.setArg (4, g_carry);
  CHECK_OPENCL_ERROR (status, "cl::setArg failed. (g_carry)");
  status = normalize2_kernel.setArg (5, Nn);
  CHECK_OPENCL_ERROR (status, "cl::setArg failed. (Nn)");
  status = normalize2_kernel.setArg (6, g_inv2);
  CHECK_OPENCL_ERROR (status, "cl::setArg failed. (g_inv2)");
  status = normalize2_kernel.setArg (7, g_ttp2);
  CHECK_OPENCL_ERROR (status, "cl::setArg failed. (g_ttp2)");
  status = normalize2_kernel.setArg (8, g_ttmp2);
  CHECK_OPENCL_ERROR (status, "cl::setArg failed. (g_ttmp2)");
  status = normalize2_kernel.setArg (9, g_inv3);
  CHECK_OPENCL_ERROR (status, "cl::setArg failed. (g_inv3)");
  status = normalize2_kernel.setArg (10, g_ttp3);
  CHECK_OPENCL_ERROR (status, "cl::setArg failed. (g_ttp3)");
  status = normalize2_kernel.setArg (11, g_ttmp3);
  CHECK_OPENCL_ERROR (status, "cl::setArg failed. (g_ttmp3)");
  /* 
   * Enqueue a kernel run call.
   */

  // Each thread calculates 2 gaussian numbers 
  cl::NDRange gThreads (globalThreads);
  cl::NDRange lThreads (localThreads);

  status = commandQueue.enqueueNDRangeKernel (normalize2_kernel,
					      cl::NullRange,
					      gThreads, lThreads, 0, &ndrEvt);
  CHECK_OPENCL_ERROR (status, "CommandQueue::enqueueNDRangeKernel() failed.");
  status = commandQueue.flush ();
  CHECK_OPENCL_ERROR (status, "cl::CommandQueue.flush failed.");

  return SDK_SUCCESS;
}
int
MatrixMulDouble::runCLKernels(void)
{
    cl_int   status;

    /*
     * Kernel runs over complete output matrix with blocks of blockSize x blockSize
     * running concurrently
     */
    size_t globalThreads[2]= {widthB / 4, heightA / 4};
    size_t localThreads[2] = {blockSize, blockSize};

    cl_int eventStatus = CL_QUEUED;

    // Set input data to matrix A and matrix B
    cl::Event inMapEvtA;
    cl::Event inMapEvtB;
    cl::Event inUnmapEvtA;
    cl::Event inUnmapEvtB;
    cl::Event outMapEvt;
    cl::Event outUnmapEvt;
    cl::Event ndrEvt;

    void* inMapPtrA = NULL;
    void* inMapPtrB = NULL;
    void* outMapPtr = NULL;

    inMapPtrA = commandQueue.enqueueMapBuffer(
                    inputBufA,
                    CL_FALSE,
                    CL_MAP_WRITE,
                    0,
                    widthA * heightA * sizeof(cl_double),
                    NULL,
                    &inMapEvtA,
                    &status);
    CHECK_OPENCL_ERROR(status,
                       "cl::CommandQueue.enqueueMapBuffer failed. (inputBufA)");

    inMapPtrB = commandQueue.enqueueMapBuffer(
                    inputBufB,
                    CL_FALSE,
                    CL_MAP_WRITE,
                    0,
                    widthB * heightB * sizeof(cl_double),
                    NULL,
                    &inMapEvtB,
                    &status);
    CHECK_OPENCL_ERROR(status,
                       "cl::CommandQueue.enqueueMapBuffer failed. (inputBufB)");

    status = commandQueue.flush();
    CHECK_OPENCL_ERROR(status, "cl::CommandQueue.flush failed.");

    eventStatus = CL_QUEUED;
    while(eventStatus != CL_COMPLETE)
    {
        status = inMapEvtA.getInfo<cl_int>(
                     CL_EVENT_COMMAND_EXECUTION_STATUS,
                     &eventStatus);
        CHECK_OPENCL_ERROR(status,
                           "cl:Event.getInfo(CL_EVENT_COMMAND_EXECUTION_STATUS) failed.");
    }

    memcpy(inMapPtrA, inputA, sizeof(cl_double) * widthA  * heightA);

    eventStatus = CL_QUEUED;
    while(eventStatus != CL_COMPLETE)
    {
        status = inMapEvtB.getInfo<cl_int>(
                     CL_EVENT_COMMAND_EXECUTION_STATUS,
                     &eventStatus);
        CHECK_OPENCL_ERROR(status,
                           "cl:Event.getInfo(CL_EVENT_COMMAND_EXECUTION_STATUS) failed.");
    }

    memcpy(inMapPtrB, inputB, sizeof(cl_double) * widthB  * heightB);

    status = commandQueue.enqueueUnmapMemObject(
                 inputBufA,
                 inMapPtrA,
                 NULL,
                 &inUnmapEvtA);
    CHECK_OPENCL_ERROR(status,
                       "cl::CommandQueue.enqueueUnmapMemObject (inputBufA).");

    status = commandQueue.enqueueUnmapMemObject(
                 inputBufB,
                 inMapPtrB,
                 NULL,
                 &inUnmapEvtB);
    CHECK_OPENCL_ERROR(status,
                       "cl::CommandQueue.enqueueUnmapMemObject (inputBufB)");

    status = commandQueue.flush();
    CHECK_OPENCL_ERROR(status, "cl::CommandQueue.flush failed.");

    eventStatus = CL_QUEUED;
    while(eventStatus != CL_COMPLETE)
    {
        status = inUnmapEvtA.getInfo<cl_int>(
                     CL_EVENT_COMMAND_EXECUTION_STATUS,
                     &eventStatus);
        CHECK_OPENCL_ERROR(status,
                           "cl:Event.getInfo(CL_EVENT_COMMAND_EXECUTION_STATUS) failed.");
    }

    eventStatus = CL_QUEUED;
    while(eventStatus != CL_COMPLETE)
    {
        status = inUnmapEvtB.getInfo<cl_int>(
                     CL_EVENT_COMMAND_EXECUTION_STATUS,
                     &eventStatus);
        CHECK_OPENCL_ERROR(status,
                           "cl:Event.getInfo(CL_EVENT_COMMAND_EXECUTION_STATUS) failed.");
    }


    // Set appropriate arguments to the kernel

    // output array as the 1st argument : stores product of inputA and inputB
    status = kernel.setArg(0, inputBufA);
    CHECK_OPENCL_ERROR(status, "cl::setArg failed. (inputBufA)");

    status = kernel.setArg(1, inputBufB);
    CHECK_OPENCL_ERROR(status, "cl::setArg failed. (inputBufB)");

    status = kernel.setArg(2, outputBuf);
    CHECK_OPENCL_ERROR(status, "cl::setArg failed. (outputBuf)");

    // widthA of the inputA matrix as 4th argument - widthA
    status = kernel.setArg(3, widthA);
    CHECK_OPENCL_ERROR(status, "cl::setArg failed. (widthA)");

    // Set local memory argument if Scratchpad is available
    if(lds)
    {
        status = kernel.setArg(
                     4,
                     (blockSize * 4) * (blockSize * 4) * sizeof(cl_double),
                     NULL);
        CHECK_OPENCL_ERROR(status, "cl::setArg failed. (local memory)");

    }
    else
    {
        status = kernel.setArg(4, sizeof(cl_int), &widthB);
        CHECK_OPENCL_ERROR(status, "cl::setArg failed. (widthB)");

    }

    /*
     * Enqueue a kernel run call.
     */

    // Each thread calculates 2 gaussian numbers
    cl::NDRange gThreads(globalThreads[0], globalThreads[1]);
    cl::NDRange lThreads(localThreads[0], localThreads[1]);

    status = commandQueue.enqueueNDRangeKernel(kernel,
             cl::NullRange,
             gThreads,
             lThreads,
             0,
             &ndrEvt);
    CHECK_OPENCL_ERROR(status, "CommandQueue::enqueueNDRangeKernel() failed.");

    status = commandQueue.flush();
    CHECK_OPENCL_ERROR(status, "cl::CommandQueue.flush failed.");

    eventStatus = CL_QUEUED;
    while(eventStatus != CL_COMPLETE)
    {
        status = ndrEvt.getInfo<cl_int>(
                     CL_EVENT_COMMAND_EXECUTION_STATUS,
                     &eventStatus);
        CHECK_OPENCL_ERROR(status,
                           "cl:Event.getInfo(CL_EVENT_COMMAND_EXECUTION_STATUS) failed.");
    }

    if(!eAppGFLOPS)
    {
        // Calculate performance
        cl_ulong startTime;
        cl_ulong endTime;
        status = ndrEvt.getProfilingInfo<cl_ulong>(
                     CL_PROFILING_COMMAND_START,
                     &startTime);
        CHECK_OPENCL_ERROR(status, "cl::Event.getProfilingInfo failed.(startTime).");

        status = ndrEvt.getProfilingInfo<cl_ulong>(
                     CL_PROFILING_COMMAND_END,
                     &endTime);
        CHECK_OPENCL_ERROR(status, "cl::Event.getProfilingInfo failed.(endTime).");

        // Print performance numbers
        double sec = 1e-9 * (endTime - startTime);
        kernelTime += sec;
    }

    outMapPtr = commandQueue.enqueueMapBuffer(
                    outputBuf,
                    CL_FALSE,
                    CL_MAP_READ,
                    0,
                    widthB * heightA * sizeof(cl_double),
                    NULL,
                    &outMapEvt,
                    &status);
    CHECK_OPENCL_ERROR(status,
                       "cl::CommandQueue.enqueueMapBuffer failed. (outputBuf).");

    status = commandQueue.flush();
    CHECK_OPENCL_ERROR(status, "cl::CommandQueue.flush failed.");

    eventStatus = CL_QUEUED;
    while(eventStatus != CL_COMPLETE)
    {
        status = outMapEvt.getInfo<cl_int>(
                     CL_EVENT_COMMAND_EXECUTION_STATUS,
                     &eventStatus);
        CHECK_OPENCL_ERROR(status,
                           "cl:Event.getInfo(CL_EVENT_COMMAND_EXECUTION_STATUS) failed.");
    }

    memcpy(output, outMapPtr, sizeof(cl_double) * widthB  * heightA);

    status = commandQueue.enqueueUnmapMemObject(
                 outputBuf,
                 outMapPtr,
                 NULL,
                 &outUnmapEvt);
    CHECK_OPENCL_ERROR(status,
                       "cl::CommandQueue.enqueueUnmapMemObject (outputBuf)");

    status = commandQueue.flush();
    CHECK_OPENCL_ERROR(status, "cl::CommandQueue.flush failed.");

    eventStatus = CL_QUEUED;
    while(eventStatus != CL_COMPLETE)
    {
        status = outUnmapEvt.getInfo<cl_int>(
                     CL_EVENT_COMMAND_EXECUTION_STATUS,
                     &eventStatus);
        CHECK_OPENCL_ERROR(status,
                           "cl:Event.getInfo(CL_EVENT_COMMAND_EXECUTION_STATUS) failed.");
    }

    return SDK_SUCCESS;
}