int main (int argc, const char **argv) { OclPlatform *ocl; cl_int errcode; cl_program program; const char *inname; size_t asize; size_t *sizes; char **binaries; if (argc < 2) { printf ("Usage: dump-opencl-binary input.cl\n"); exit (0); } inname = argv[1]; ocl = ocl_new (0, CL_DEVICE_TYPE_GPU); asize = sizeof (size_t) * ocl_get_num_devices (ocl); sizes = malloc (asize); binaries = malloc (sizeof (char *) * ocl_get_num_devices (ocl)); program = ocl_create_program_from_file (ocl, inname, NULL, &errcode); OCL_CHECK_ERROR (errcode); OCL_CHECK_ERROR (clGetProgramInfo (program, CL_PROGRAM_BINARY_SIZES, asize, sizes, NULL)); for (int i = 0; i < ocl_get_num_devices (ocl); i++) binaries[i] = malloc (sizes[i]); OCL_CHECK_ERROR (clGetProgramInfo (program, CL_PROGRAM_BINARIES, 0, binaries, NULL)); for (int i = 0; i < ocl_get_num_devices (ocl); i++) { char fname[256]; FILE *fp; snprintf (fname, 256, "%s.%i", inname, i); fp = fopen (fname, "wb"); fwrite (binaries[0], sizes[i], 1, fp); fclose (fp); } OCL_CHECK_ERROR (clReleaseProgram (program)); free (sizes); ocl_free (ocl); }
int main (int argc, const char **argv) { OclPlatform *ocl; cl_program program; cl_device_id *devices; cl_command_queue *queues; cl_kernel kernel; cl_int errcode; int num_devices; GTimer *timer; ocl = ocl_new_from_args (argc, argv, CL_QUEUE_PROFILING_ENABLE); program = ocl_create_program_from_source (ocl, source, NULL, &errcode); OCL_CHECK_ERROR (errcode); kernel = clCreateKernel (program, "touch", &errcode); OCL_CHECK_ERROR (errcode); num_devices = ocl_get_num_devices (ocl); devices = ocl_get_devices (ocl); queues = ocl_get_cmd_queues (ocl); timer = g_timer_new (); for (int i = 0; i < num_devices; i++) { char name[256]; cl_event event; size_t size = 16; const int NUM_RUNS = 50000; unsigned long total_wait = 0; unsigned long total_execution = 0; double wall_clock = 0.0; for (int r = 0; r < NUM_RUNS; r++) { unsigned long wait; unsigned long execution; g_timer_start (timer); OCL_CHECK_ERROR (clEnqueueNDRangeKernel (queues[i], kernel, 1, NULL, &size, NULL, 0, NULL, &event)); clWaitForEvents (1, &event); g_timer_stop (timer); wall_clock += g_timer_elapsed (timer, NULL); get_event_times (event, &wait, &execution); clReleaseEvent (event); total_wait += wait; total_execution += execution; } OCL_CHECK_ERROR (clGetDeviceInfo (devices[i], CL_DEVICE_NAME, 256, name, NULL)); /* all times in nano seconds */ printf ("%s %f %f %f\n", name, total_wait / ((double) NUM_RUNS), total_execution / ((double) NUM_RUNS), wall_clock / NUM_RUNS * 1000 * 1000 * 1000); } g_timer_destroy (timer); clReleaseKernel (kernel); clReleaseProgram (program); ocl_free (ocl); }