Example #1
0
int
main (int argc, const char **argv)
{
    OclPlatform *ocl;
    cl_int errcode;
    cl_program program;
    const char *inname;
    size_t asize;
    size_t *sizes;
    char **binaries;

    if (argc < 2) {
        printf ("Usage: dump-opencl-binary input.cl\n");
        exit (0);
    }

    inname = argv[1];

    ocl = ocl_new (0, CL_DEVICE_TYPE_GPU);
    asize = sizeof (size_t) * ocl_get_num_devices (ocl);
    sizes = malloc (asize);
    binaries = malloc (sizeof (char *) * ocl_get_num_devices (ocl));

    program = ocl_create_program_from_file (ocl, inname, NULL, &errcode);
    OCL_CHECK_ERROR (errcode);

    OCL_CHECK_ERROR (clGetProgramInfo (program, CL_PROGRAM_BINARY_SIZES, asize, sizes, NULL));
    
    for (int i = 0; i < ocl_get_num_devices (ocl); i++)
        binaries[i] = malloc (sizes[i]);

    OCL_CHECK_ERROR (clGetProgramInfo (program, CL_PROGRAM_BINARIES, 0, binaries, NULL));

    for (int i = 0; i < ocl_get_num_devices (ocl); i++) {
        char fname[256];
        FILE *fp;

        snprintf (fname, 256, "%s.%i", inname, i);
        fp = fopen (fname, "wb");
        fwrite (binaries[0], sizes[i], 1, fp);
        fclose (fp);
    }

    OCL_CHECK_ERROR (clReleaseProgram (program));

    free (sizes);
    ocl_free (ocl);
}
Example #2
0
int
main (int argc, const char **argv)
{
    OclPlatform *ocl;
    cl_program program;
    cl_device_id *devices;
    cl_command_queue *queues;
    cl_kernel kernel;
    cl_int errcode;
    int num_devices;
    GTimer *timer;

    ocl = ocl_new_from_args (argc, argv, CL_QUEUE_PROFILING_ENABLE);

    program = ocl_create_program_from_source (ocl, source, NULL, &errcode);
    OCL_CHECK_ERROR (errcode);

    kernel = clCreateKernel (program, "touch", &errcode);
    OCL_CHECK_ERROR (errcode);

    num_devices = ocl_get_num_devices (ocl);
    devices = ocl_get_devices (ocl);
    queues = ocl_get_cmd_queues (ocl);
    timer = g_timer_new ();

    for (int i = 0; i < num_devices; i++) {
        char name[256];
        cl_event event;
        size_t size = 16;
        const int NUM_RUNS = 50000;
        unsigned long total_wait = 0;
        unsigned long total_execution = 0;
        double wall_clock = 0.0;

        for (int r = 0; r < NUM_RUNS; r++) {
            unsigned long wait;
            unsigned long execution;

            g_timer_start (timer);
            OCL_CHECK_ERROR (clEnqueueNDRangeKernel (queues[i], kernel, 
                        1, NULL, &size, NULL,
                        0, NULL, &event));

            clWaitForEvents (1, &event);
            g_timer_stop (timer);

            wall_clock += g_timer_elapsed (timer, NULL);

            get_event_times (event, &wait, &execution);
            clReleaseEvent (event);

            total_wait += wait;
            total_execution += execution;
        }

        OCL_CHECK_ERROR (clGetDeviceInfo (devices[i], CL_DEVICE_NAME, 256, name, NULL));

        /* all times in nano seconds */
        printf ("%s %f %f %f\n", name,
                total_wait / ((double) NUM_RUNS),
                total_execution / ((double) NUM_RUNS),
                wall_clock / NUM_RUNS * 1000 * 1000 * 1000);
    }

    g_timer_destroy (timer);
    clReleaseKernel (kernel);
    clReleaseProgram (program);

    ocl_free (ocl);
}