Exemplo n.º 1
0
int
main(void)
{
    const size_t OUTPUT_SIZE = 5;
    const char *input = "PING\0";
    char output[OUTPUT_SIZE];
    float a = 23456.0f;
    int b = 2000001;   

    try {
        std::vector<cl::Platform> platformList;

        // Pick platform
        cl::Platform::get(&platformList);

        // Pick first platform
        cl_context_properties cprops[] = {
            CL_CONTEXT_PLATFORM, (cl_context_properties)(platformList[0])(), 0};
        cl::Context context(CL_DEVICE_TYPE_GPU, cprops);

        // Query the set of devices attched to the context
        std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
        
        assert (devices.size() == 1);

        cl::Device device = devices.at(0);

        assert (strncmp(device.getInfo<CL_DEVICE_NAME>().c_str(), "tta", 3) == 0);

        a = poclu_bswap_cl_float (device(), a);
        b = poclu_bswap_cl_int (device(), b);

        // Create and program from source
        cl::Program::Sources sources({kernelSourceCode});
        cl::Program program(context, sources);

        // Build program
        program.build(devices);

        cl::Buffer inputBuffer = cl::Buffer(
            context, 
            CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 
            strlen (input) + 1, (void *) &input[0]);

        // Create buffer for that uses the host ptr C
        cl::Buffer outputBuffer = cl::Buffer(
            context, 
            CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, 
            OUTPUT_SIZE, (void *) &output[0]);

        // Create kernel object
        cl::Kernel kernel(program, "test_kernel");

        // Set kernel args
        kernel.setArg(0, inputBuffer);
        kernel.setArg(1, outputBuffer);
        kernel.setArg(2, a);
        kernel.setArg(3, b);

        // Create command queue
        cl::CommandQueue queue(context, devices[0], CL_QUEUE_PROFILING_ENABLE);
 
        cl::Event enqEvent;

        // Do the work
        queue.enqueueNDRangeKernel(
            kernel, 
            cl::NullRange, 
            cl::NDRange(1),
            cl::NullRange,
            NULL, &enqEvent);
 
        cl::Event mapEvent;
        void *outVal = queue.enqueueMapBuffer(
            outputBuffer,
            CL_TRUE, // block 
            CL_MAP_READ,
            0, OUTPUT_SIZE, NULL, &mapEvent);
       
        char* outStr = (char*)(outVal);
        if (std::string(outStr) == "PONG") 
            std::cout << "OK\n";
        else
            std::cerr << "FAIL, received: " << outStr << "\n";

        cl::Event unmapEvent;
        // Finally release our hold on accessing the memory
        queue.enqueueUnmapMemObject(
            outputBuffer,
            (void*)(outVal),
            NULL,
            &unmapEvent);

        queue.finish();

        assert (enqEvent.getInfo<CL_EVENT_COMMAND_EXECUTION_STATUS>() == CL_COMPLETE);
        assert (mapEvent.getInfo<CL_EVENT_COMMAND_EXECUTION_STATUS>() == CL_COMPLETE);
        assert (unmapEvent.getInfo<CL_EVENT_COMMAND_EXECUTION_STATUS>() == CL_COMPLETE);


        assert (
            enqEvent.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>() <=
            enqEvent.getProfilingInfo<CL_PROFILING_COMMAND_SUBMIT>());

        assert (
            enqEvent.getProfilingInfo<CL_PROFILING_COMMAND_SUBMIT>() <=
            enqEvent.getProfilingInfo<CL_PROFILING_COMMAND_START>());

        assert (
            enqEvent.getProfilingInfo<CL_PROFILING_COMMAND_START>() <
            enqEvent.getProfilingInfo<CL_PROFILING_COMMAND_END>());

#if 0
        std::cerr << "exec time: " 
                  << enqEvent.getProfilingInfo<CL_PROFILING_COMMAND_END>() -
            enqEvent.getProfilingInfo<CL_PROFILING_COMMAND_START>() << std::endl;
#endif

        assert (
            mapEvent.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>() <=
            mapEvent.getProfilingInfo<CL_PROFILING_COMMAND_SUBMIT>());

        assert (
            mapEvent.getProfilingInfo<CL_PROFILING_COMMAND_SUBMIT>() <=
            mapEvent.getProfilingInfo<CL_PROFILING_COMMAND_START>());


        assert (
            mapEvent.getProfilingInfo<CL_PROFILING_COMMAND_START>() <=
            mapEvent.getProfilingInfo<CL_PROFILING_COMMAND_END>());

        assert (
            unmapEvent.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>() <=
            unmapEvent.getProfilingInfo<CL_PROFILING_COMMAND_SUBMIT>());

        assert (
            unmapEvent.getProfilingInfo<CL_PROFILING_COMMAND_SUBMIT>() <=
            unmapEvent.getProfilingInfo<CL_PROFILING_COMMAND_START>());

        assert (
            unmapEvent.getProfilingInfo<CL_PROFILING_COMMAND_START>() <=
            unmapEvent.getProfilingInfo<CL_PROFILING_COMMAND_END>());

        assert (enqEvent.getProfilingInfo<CL_PROFILING_COMMAND_END>() <=
                mapEvent.getProfilingInfo<CL_PROFILING_COMMAND_END>());

        assert (mapEvent.getProfilingInfo<CL_PROFILING_COMMAND_END>() <=
                unmapEvent.getProfilingInfo<CL_PROFILING_COMMAND_END>());

    } 
    catch (cl::Error err) {
         std::cerr
             << "ERROR: "
             << err.what()
             << "("
             << err.err()
             << ")"
             << std::endl;

         return EXIT_FAILURE;
    }

    return EXIT_SUCCESS;
}
Exemplo n.º 2
0
int
main(void)
{
    float A[BUFFER_SIZE];

    cl_int err;

    try {
        std::vector<cl::Platform> platformList;

        // Pick platform
        cl::Platform::get(&platformList);

        // Pick first platform
        cl_context_properties cprops[] = {
            CL_CONTEXT_PLATFORM, (cl_context_properties)(platformList[0])(), 0};
        cl::Context context(CL_DEVICE_TYPE_CPU|CL_DEVICE_TYPE_GPU, cprops);

        // Query the set of devices attched to the context
        std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();

        // Create and program from source
        cl::Program::Sources sources(1, std::make_pair(kernelSourceCode, 0));
        cl::Program program(context, sources);

        cl_device_id dev_id = devices.at(0)();

        int scalar = poclu_bswap_cl_int (dev_id, 4);

        for (int i = 0; i < BUFFER_SIZE; ++i)
            A[i] = poclu_bswap_cl_float(dev_id, i);

        // Build program
        program.build(devices);

        cl::Buffer aBuffer = cl::Buffer(
            context, 
            CL_MEM_COPY_HOST_PTR,
            BUFFER_SIZE * sizeof(float), 
            (void *) &A[0]);

        cl::Buffer localBuffer = cl::Buffer(
            context, 0, BUFFER_SIZE * sizeof(int), NULL);

        // Create kernel object
        cl::Kernel kernel(program, "test_kernel");

        // Set kernel args
        kernel.setArg(0, aBuffer);
        kernel.setArg(1, localBuffer);
        kernel.setArg(2, scalar);

        // Create command queue
        cl::CommandQueue queue(context, devices[0], 0);
 
        // Do the work
        queue.enqueueNDRangeKernel(
            kernel, 
            cl::NullRange, 
            cl::NDRange(WORK_ITEMS),
            cl::NullRange);
 
        // Map aBuffer to host pointer. This enforces a sync with 
        // the host backing space, remember we choose GPU device.
        float * res = (float *) queue.enqueueMapBuffer(
            aBuffer,
            CL_TRUE, // block 
            CL_MAP_READ,
            0,
            BUFFER_SIZE * sizeof(float));

        res[0] = poclu_bswap_cl_float (dev_id, res[0]);
        res[1] = poclu_bswap_cl_float (dev_id, res[1]);
        bool ok = res[0] == 8 && res[1] == 10;
        if (ok) {
            return EXIT_SUCCESS;
        } else {
            std::cout << "NOK " << res[0] << " " << res[1] << std::endl;
            std::cout << "res@" << std::hex << res << std::endl;
            return EXIT_FAILURE;
        }

        // Finally release our hold on accessing the memory
        err = queue.enqueueUnmapMemObject(
            aBuffer, (void *) res);
 
        // There is no need to perform a finish on the final unmap
        // or release any objects as this all happens implicitly with
        // the C++ Wrapper API.
    } 
    catch (cl::Error err) {
         std::cerr
             << "ERROR: "
             << err.what()
             << "("
             << err.err()
             << ")"
             << std::endl;

         return EXIT_FAILURE;
    }

    return EXIT_SUCCESS;
}
Exemplo n.º 3
0
int
main(void)
{
    cl_float A[BUFFER_SIZE];
    cl_int R[WORK_ITEMS];

    for (int i = 0; i < BUFFER_SIZE; i++) {
        A[i] = i;
    }

    for (int i = 0; i < WORK_ITEMS; i++) {
        R[i] = i;
    }

    try {
        std::vector<cl::Platform> platformList;

        // Pick platform
        cl::Platform::get(&platformList);

        // Pick first platform
        cl_context_properties cprops[] = {
            CL_CONTEXT_PLATFORM, (cl_context_properties)(platformList[0])(), 0};
        cl::Context context(CL_DEVICE_TYPE_CPU | CL_DEVICE_TYPE_GPU, cprops);

        // Query the set of devices attched to the context
        std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();

        // Create and program from source
        cl::Program::Sources sources(1, std::make_pair(kernelSourceCode, 0));
        cl::Program program(context, sources);

        cl_device_id dev_id = devices.at(0)();

        poclu_bswap_cl_float_array(dev_id, A, BUFFER_SIZE);
        poclu_bswap_cl_int_array(dev_id, R, WORK_ITEMS);

        // Build program
        program.build(devices);

        // Create buffer for A and copy host contents
        cl::Buffer aBuffer = cl::Buffer(
            context, 
            CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 
            BUFFER_SIZE * sizeof(float), 
            (void *) &A[0]);

        // Create buffer for that uses the host ptr C
        cl::Buffer cBuffer = cl::Buffer(
            context, 
            CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, 
            WORK_ITEMS * sizeof(int), 
            (void *) &R[0]);

        // Create kernel object
        cl::Kernel kernel(program, "test_kernel");

        // Set kernel args
        kernel.setArg(0, aBuffer);
        kernel.setArg(1, cBuffer);

        // Create command queue
        cl::CommandQueue queue(context, devices[0], 0);
 
        // Do the work
        queue.enqueueNDRangeKernel(
            kernel, 
            cl::NullRange, 
            cl::NDRange(WORK_ITEMS),
            cl::NullRange);
 

        // Map cBuffer to host pointer. This enforces a sync with 
        // the host backing space, remember we choose GPU device.
        int * output = (int *) queue.enqueueMapBuffer(
            cBuffer,
            CL_TRUE, // block 
            CL_MAP_READ,
            0,
            WORK_ITEMS * sizeof(int));

        bool ok = true;
        for (int i = 0; i < WORK_ITEMS; i++) {

            float global_sum = 0.0f;
            int j;
            float result;

            result = global_sum;
            for (j=0; j < 32; ++j) {
                float value = poclu_bswap_cl_float (dev_id, A[i+j]);
                global_sum += value;
            }
            result = result + global_sum;
            for (j=0; j < 32; ++j) {
                float value = poclu_bswap_cl_float (dev_id, A[i+j]);
                global_sum += value;
            }
            result = result + global_sum;

            if ((int)result != poclu_bswap_cl_int (dev_id, R[i])) {
                std::cout 
                    << "F(" << i << ": " << (int)result << " != " << R[i] 
                    << ") ";
                ok = false;
            }
        }
        if (ok) 
            return EXIT_SUCCESS; 
        else
            return EXIT_FAILURE;

        // Finally release our hold on accessing the memory
        queue.enqueueUnmapMemObject(
            cBuffer,
            (void *) output);
 
        // There is no need to perform a finish on the final unmap
        // or release any objects as this all happens implicitly with
        // the C++ Wrapper API.
    } 
    catch (cl::Error err) {
         std::cerr
             << "ERROR: "
             << err.what()
             << "("
             << err.err()
             << ")"
             << std::endl;

         return EXIT_FAILURE;
    }

    return EXIT_SUCCESS;
}
Exemplo n.º 4
0
Arquivo: host.cpp Projeto: Drako/pocl
int
main(void)
{
    //float a = 23456.0f;
    float a = 3.f;

    // test the poclu's half conversion functions
    printf("through conversion: %.0f\n", 
           poclu_cl_half_to_float(poclu_float_to_cl_half(42.0f)));

    try {
        std::vector<cl::Platform> platformList;

        // Pick platform
        cl::Platform::get(&platformList);

        // Pick first platform
        cl_context_properties cprops[] = {
            CL_CONTEXT_PLATFORM, (cl_context_properties)(platformList[0])(), 0};
        cl::Context context(CL_DEVICE_TYPE_GPU, cprops);

        // Query the set of devices attched to the context
        std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
        
        assert (devices.size() == 1);

        cl::Device device = devices.at(0);

        assert (strncmp(device.getInfo<CL_DEVICE_NAME>().c_str(), "ttasim", 6)==0 );

        a = poclu_bswap_cl_float (device(), a);

        // Create and program from source
        cl::Program::Sources sources(1, std::make_pair(kernelSourceCode, 0));
        cl::Program program(context, sources);

        // Build program
        program.build(devices);

        // Create kernel object
        cl::Kernel kernel(program, "test_kernel");

        // Set kernel args
        kernel.setArg(0, a);

        // Create command queue
        cl::CommandQueue queue(context, devices[0], CL_QUEUE_PROFILING_ENABLE);
 
        cl::Event enqEvent;

        // Do the work
        queue.enqueueNDRangeKernel(
            kernel, 
            cl::NullRange, 
            cl::NDRange(8),
            cl::NullRange,
            NULL, &enqEvent);
	queue.finish();
 
        assert (enqEvent.getInfo<CL_EVENT_COMMAND_EXECUTION_STATUS>() == CL_COMPLETE);

        assert (
            enqEvent.getProfilingInfo<CL_PROFILING_COMMAND_QUEUED>() <=
            enqEvent.getProfilingInfo<CL_PROFILING_COMMAND_SUBMIT>());

        assert (
            enqEvent.getProfilingInfo<CL_PROFILING_COMMAND_SUBMIT>() <=
            enqEvent.getProfilingInfo<CL_PROFILING_COMMAND_START>());

        assert (
            enqEvent.getProfilingInfo<CL_PROFILING_COMMAND_START>() <
            enqEvent.getProfilingInfo<CL_PROFILING_COMMAND_END>());

#if 0
        std::cerr << "exec time: " 
                  << enqEvent.getProfilingInfo<CL_PROFILING_COMMAND_END>() -
            enqEvent.getProfilingInfo<CL_PROFILING_COMMAND_START>() << std::endl;
#endif
    } 
    catch (cl::Error err) {
         std::cerr
             << "ERROR: "
             << err.what()
             << "("
             << err.err()
             << ")"
             << std::endl;

         return EXIT_FAILURE;
    }

    return EXIT_SUCCESS;
}