Exemple #1
0
/*
 * main execution routine
 * Basically it consists of three parts:
 *   - generating the inputs
 *   - running OpenCL kernel
 *   - reading results of processing
 */
int _tmain(int argc, TCHAR* argv[])
{
    cl_int err;
    ocl_args_d_t ocl;
    cl_device_type deviceType = CL_DEVICE_TYPE_GPU;

    LARGE_INTEGER perfFrequency;
    LARGE_INTEGER performanceCountNDRangeStart;
    LARGE_INTEGER performanceCountNDRangeStop;

    cl_uint arrayWidth = 1024;
    cl_uint arrayHeight = 1024;

    //initialize Open CL objects (context, queue, etc.)
    if (CL_SUCCESS != SetupOpenCL(&ocl, deviceType))
    {
        return -1;
    }

    // allocate working buffers. 
    // the buffer should be aligned with 4K page and size should fit 64-byte cached line
    cl_uint optimizedSize = ((sizeof(cl_int) * arrayWidth * arrayHeight - 1) / 64 + 1) * 64;
    cl_int* inputA = (cl_int*)_aligned_malloc(optimizedSize, 4096);
    cl_int* inputB = (cl_int*)_aligned_malloc(optimizedSize, 4096);
    cl_int* outputC = (cl_int*)_aligned_malloc(optimizedSize, 4096);
    if (NULL == inputA || NULL == inputB || NULL == outputC)
    {
        LogError("Error: _aligned_malloc failed to allocate buffers.\n");
        return -1;
    }

    //random input
    generateInput(inputA, arrayWidth, arrayHeight);
    generateInput(inputB, arrayWidth, arrayHeight);

    // Create OpenCL buffers from host memory
    // These buffers will be used later by the OpenCL kernel
    if (CL_SUCCESS != CreateBufferArguments(&ocl, inputA, inputB, outputC, arrayWidth, arrayHeight))
    {
        return -1;
    }

    // Create and build the OpenCL program
    if (CL_SUCCESS != CreateAndBuildProgram(&ocl))
    {
        return -1;
    }

    // Program consists of kernels.
    // Each kernel can be called (enqueued) from the host part of OpenCL application.
    // To call the kernel, you need to create it from existing program.
    ocl.kernel = clCreateKernel(ocl.program, "Add", &err);
    if (CL_SUCCESS != err)
    {
        LogError("Error: clCreateKernel returned %s\n", TranslateOpenCLError(err));
        return -1;
    }

    // Passing arguments into OpenCL kernel.
    if (CL_SUCCESS != SetKernelArguments(&ocl))
    {
        return -1;
    }

    // Regularly you wish to use OpenCL in your application to achieve greater performance results
    // that are hard to achieve in other ways.
    // To understand those performance benefits you may want to measure time your application spent in OpenCL kernel execution.
    // The recommended way to obtain this time is to measure interval between two moments:
    //   - just before clEnqueueNDRangeKernel is called, and
    //   - just after clFinish is called
    // clFinish is necessary to measure entire time spending in the kernel, measuring just clEnqueueNDRangeKernel is not enough,
    // because this call doesn't guarantees that kernel is finished.
    // clEnqueueNDRangeKernel is just enqueue new command in OpenCL command queue and doesn't wait until it ends.
    // clFinish waits until all commands in command queue are finished, that suits your need to measure time.
    bool queueProfilingEnable = true;
    if (queueProfilingEnable)
        QueryPerformanceCounter(&performanceCountNDRangeStart);
    // Execute (enqueue) the kernel
    if (CL_SUCCESS != ExecuteAddKernel(&ocl, arrayWidth, arrayHeight))
    {
        return -1;
    }
    if (queueProfilingEnable)
        QueryPerformanceCounter(&performanceCountNDRangeStop);

    // The last part of this function: getting processed results back.
    // use map-unmap sequence to update original memory area with output buffer.
    ReadAndVerify(&ocl, arrayWidth, arrayHeight, inputA, inputB);

    // retrieve performance counter frequency
    if (queueProfilingEnable)
    {
        QueryPerformanceFrequency(&perfFrequency);
        LogInfo("NDRange performance counter time %f ms.\n",
            1000.0f*(float)(performanceCountNDRangeStop.QuadPart - performanceCountNDRangeStart.QuadPart) / (float)perfFrequency.QuadPart);
    }

    _aligned_free(inputA);
    _aligned_free(inputB);
    _aligned_free(outputC);

#if defined(_DEBUG)
    getchar();
#endif

    return 0;
}
Exemple #2
0
int main(int argc, char *argv[])
{
    struct timeval begin, end;
    gettimeofday(&begin, NULL);
    printf("OpenCL Initialization\n");

    cl_int err = CL_SUCCESS;
    
    struct timeval begin_init, end_init;
    gettimeofday(&begin_init, NULL);


    if(!createContext())
    {
        printf("Error: createContext\n");
        return 0;
    }

    if(!getDeviceIDs())
    {
        printf("Error: getDeviceIDs\n");
        return 0;
    }

    generateArgument();

    if(CreateAndBuildProgram() != CL_SUCCESS)
    {
        printf("Error: CreateAndBuildProgram\n");
        return 0;
    }

    if(CreateBufferArguments() != CL_SUCCESS)
    {
        printf("Error: CreateBufferArguments\n");
        return 0;
    }

    ocl.kernel = clCreateKernel(ocl.program, "ray_cal", &err);
    if(err != CL_SUCCESS)
    {
        printf("Error: clCreateKernel\n");
        return 0;
    }

    if(SetKernelArguments() != CL_SUCCESS)
    {
        printf("Error: SetKernelArguments\n");
        return 0;
    }

    gettimeofday(&end_init, NULL);
    printf("init elapsed time : %lfs\n", (double)timeval_diff(&end_init, &begin_init)/1000000);
    
    struct timeval begin_kernel, end_kernel;
    gettimeofday(&begin_kernel, NULL);

    srand((unsigned int)time(NULL));
    pthread_t t1, t2;

    int tmpvalue = WIDTH*HEIGHT/WorkAmount;

    printf("total recursive %d\n", tmpvalue);

    //PASSING_OCL ocl_info[2] = {{ocl, 0, 0, tmpvalue*5/8, 256}, {ocl, 1, tmpvalue*5/8, tmpvalue, 256}};
    
    PASSING_OCL ocl_info[2] = {{ocl, 0, 0, 8, 256}, {ocl, 1, 8, 16, 256}};
    //int joinstatus
    pthread_create(&t1, NULL, RenderDisplay, (void *)&ocl_info[0]);
    pthread_create(&t2, NULL, RenderDisplay, (void *)&ocl_info[1]);
    pthread_join(t1, NULL);
    pthread_join(t2, NULL); 
    
    gettimeofday(&end_kernel, NULL);
    printf("render elapsed time : %lfs\n", (double)timeval_diff(&end_kernel, &begin_kernel)/1000000);
     
    /* 
    std::ostringstream headerStream;
    headerStream << "P6\n";
    headerStream << Width << ' ' << Height << '\n';
    headerStream << "255\n";
    std::ofstream fileStream("out.ppm", std::ios::out | std::ios::binary);

    fileStream << headerStream.str();

    for (unsigned int j = 0; j < Width*Height; j++)
    {
        unsigned char r, g, b;
        unsigned int tmp = ImagePixel[j];
        r = (unsigned char)((tmp >> 16) & 0xFF);
        g = (unsigned char)((tmp >> 8) & 0xFF);
        b = (unsigned char)((tmp)& 0xFF);
        fileStream << r << g << b;
    }
    
    fileStream.flush();
    fileStream.close();
    */
    //gettimeofday(&end_kernel, NULL);
    //printf("render elapsed time : %lfs\n", (double)timeval_diff(&end_kernel, &begin_kernel)/1000000);
     
    gettimeofday(&end, NULL);
    printf("elapsed time : %lfs\n", (double)timeval_diff(&end, &begin)/1000000);
    
    printf("End Success\n");

    return 0;
}