/** * Cellular automata sample main function. * */ int main(int argc, char* argv[]) { /* Wrappers for OpenCL objects. */ CCLContext* ctx; CCLDevice* dev; CCLImage* img1; CCLImage* img2; CCLProgram* prg; CCLKernel* krnl; CCLEvent* evt1; CCLEvent* evt2; /* Other variables. */ CCLEventWaitList ewl = NULL; /* Profiler object. */ CCLProf* prof; /* Output images filename. */ char* filename; /* Selected device, may be given in command line. */ int dev_idx = -1; /* Error handling object (must be NULL). */ GError* err = NULL; /* Does selected device support images? */ cl_bool image_ok; /* Initial sim state. */ cl_uchar4* input_image; /* Simulation states. */ cl_uchar4** output_images; /* RNG seed, may be given in command line. */ unsigned int seed; /* Image file write status. */ int file_write_status; /* Image format. */ cl_image_format image_format = { CL_RGBA, CL_UNSIGNED_INT8 }; /* Thread data. */ struct thread_data td; /* Global and local worksizes. */ size_t gws[2]; size_t lws[2]; /* Threads. */ GThread* comm_thread; GThread* exec_thread; /* Check arguments. */ if (argc >= 2) { /* Check if a device was specified in the command line. */ dev_idx = atoi(argv[1]); } if (argc >= 3) { /* Check if a RNG seed was specified. */ seed = atoi(argv[2]); } else { seed = (unsigned int) time(NULL); } /* Initialize RNG. */ srand(seed); /* Create random initial state. */ input_image = (cl_uchar4*) malloc(CA_WIDTH * CA_HEIGHT * sizeof(cl_uchar4)); for (cl_uint i = 0; i < CA_WIDTH * CA_HEIGHT; ++i) { cl_uchar state = (rand() & 0x3) ? 0xFF : 0x00; input_image[i] = (cl_uchar4) {{ state, state, state, 0xFF }}; } /* Allocate space for simulation results. */ output_images = (cl_uchar4**) malloc((CA_ITERS + 1) * sizeof(cl_uchar4*)); for (cl_uint i = 0; i < CA_ITERS + 1; ++i) output_images[i] = (cl_uchar4*) malloc(CA_WIDTH * CA_HEIGHT * sizeof(cl_uchar4)); /* Create context using device selected from menu. */ ctx = ccl_context_new_from_menu_full(&dev_idx, &err); HANDLE_ERROR(err); /* Get first device in context. */ dev = ccl_context_get_device(ctx, 0, &err); HANDLE_ERROR(err); /* Ask device if it supports images. */ image_ok = ccl_device_get_info_scalar( dev, CL_DEVICE_IMAGE_SUPPORT, cl_bool, &err); HANDLE_ERROR(err); if (!image_ok) ERROR_MSG_AND_EXIT("Selected device doesn't support images."); /* Create command queues. */ queue_exec = ccl_queue_new(ctx, dev, CL_QUEUE_PROFILING_ENABLE, &err); HANDLE_ERROR(err); queue_comm = ccl_queue_new(ctx, dev, CL_QUEUE_PROFILING_ENABLE, &err); HANDLE_ERROR(err); /* Create 2D image for initial state. */ img1 = ccl_image_new(ctx, CL_MEM_READ_WRITE, &image_format, NULL, &err, "image_type", (cl_mem_object_type) CL_MEM_OBJECT_IMAGE2D, "image_width", (size_t) CA_WIDTH, "image_height", (size_t) CA_HEIGHT, NULL); HANDLE_ERROR(err); /* Create another 2D image for double buffering. */ img2 = ccl_image_new(ctx, CL_MEM_READ_WRITE, &image_format, NULL, &err, "image_type", (cl_mem_object_type) CL_MEM_OBJECT_IMAGE2D, "image_width", (size_t) CA_WIDTH, "image_height", (size_t) CA_HEIGHT, NULL); HANDLE_ERROR(err); /* Create program from kernel source and compile it. */ prg = ccl_program_new_from_source(ctx, CA_KERNEL, &err); HANDLE_ERROR(err); ccl_program_build(prg, NULL, &err); HANDLE_ERROR(err); /* Get kernel wrapper. */ krnl = ccl_program_get_kernel(prg, "ca", &err); HANDLE_ERROR(err); /* Determine nice local and global worksizes. */ ccl_kernel_suggest_worksizes(krnl, dev, 2, real_ws, gws, lws, &err); HANDLE_ERROR(err); printf("\n * Global work-size: (%d, %d)\n", (int) gws[0], (int) gws[1]); printf(" * Local work-size: (%d, %d)\n", (int) lws[0], (int) lws[1]); /* Create thread communication queues. */ comm_thread_queue = g_async_queue_new(); exec_thread_queue = g_async_queue_new(); host_thread_queue = g_async_queue_new(); /* Setup thread data. */ td.krnl = krnl; td.img1 = img1; td.img2 = img2; td.gws = gws; td.lws = lws; td.output_images = output_images; /* Create threads. */ exec_thread = g_thread_new("exec_thread", exec_func, &td); comm_thread = g_thread_new("comm_thread", comm_func, &td); /* Start profiling. */ prof = ccl_prof_new(); ccl_prof_start(prof); /* Write initial state. */ ccl_image_enqueue_write(img1, queue_comm, CL_TRUE, origin, region, 0, 0, input_image, NULL, &err); HANDLE_ERROR(err); /* Run CA_ITERS iterations of the CA. */ for (cl_uint i = 0; i < CA_ITERS; ++i) { /* Send message to comms thread. */ g_async_queue_push(comm_thread_queue, &go_msg); /* Send message to exec thread. */ g_async_queue_push(exec_thread_queue, &go_msg); /* Get event wrappers from both threads. */ evt1 = (CCLEvent*) g_async_queue_pop(host_thread_queue); evt2 = (CCLEvent*) g_async_queue_pop(host_thread_queue); /* Can't continue until this iteration is over. */ ccl_event_wait_list_add(&ewl, evt1, evt2, NULL); /* Wait for events. */ ccl_event_wait(&ewl, &err); HANDLE_ERROR(err); } /* Send message to comms thread to read last result. */ g_async_queue_push(comm_thread_queue, &go_msg); /* Send stop messages to both threads. */ g_async_queue_push(comm_thread_queue, &stop_msg); g_async_queue_push(exec_thread_queue, &stop_msg); /* Get event wrapper from comms thread. */ evt1 = (CCLEvent*) g_async_queue_pop(host_thread_queue); /* Can't continue until final read is over. */ ccl_event_wait_list_add(&ewl, evt1, NULL); ccl_event_wait(&ewl, &err); HANDLE_ERROR(err); /* Make sure both queues are finished. */ ccl_queue_finish(queue_comm, &err); HANDLE_ERROR(err); ccl_queue_finish(queue_exec, &err); HANDLE_ERROR(err); /* Stop profiling timer and add queues for analysis. */ ccl_prof_stop(prof); ccl_prof_add_queue(prof, "Comms", queue_comm); ccl_prof_add_queue(prof, "Exec", queue_exec); /* Allocate space for base filename. */ filename = (char*) malloc( (strlen(IMAGE_FILE_PREFIX ".png") + IMAGE_FILE_NUM_DIGITS + 1) * sizeof(char)); /* Write results to image files. */ for (cl_uint i = 0; i < CA_ITERS; ++i) { /* Determine next filename. */ sprintf(filename, "%s%0" G_STRINGIFY(IMAGE_FILE_NUM_DIGITS) "d.png", IMAGE_FILE_PREFIX, i); /* Save next image. */ file_write_status = stbi_write_png(filename, CA_WIDTH, CA_HEIGHT, 4, output_images[i], CA_WIDTH * sizeof(cl_uchar4)); /* Give feedback if unable to save image. */ if (!file_write_status) { ERROR_MSG_AND_EXIT("Unable to save image in file."); } } /* Process profiling info. */ ccl_prof_calc(prof, &err); HANDLE_ERROR(err); /* Print profiling info. */ ccl_prof_print_summary(prof); /* Save profiling info. */ ccl_prof_export_info_file(prof, "prof.tsv", &err); HANDLE_ERROR(err); /* Destroy threads. */ g_thread_join(exec_thread); g_thread_join(comm_thread); /* Destroy thread communication queues. */ g_async_queue_unref(comm_thread_queue); g_async_queue_unref(exec_thread_queue); g_async_queue_unref(host_thread_queue); /* Release host buffers. */ free(filename); free(input_image); for (cl_uint i = 0; i < CA_ITERS + 1; ++i) free(output_images[i]); free(output_images); /* Release wrappers. */ ccl_image_destroy(img1); ccl_image_destroy(img2); ccl_program_destroy(prg); ccl_queue_destroy(queue_comm); ccl_queue_destroy(queue_exec); ccl_context_destroy(ctx); /* Destroy profiler. */ ccl_prof_destroy(prof); /* Check all wrappers have been destroyed. */ g_assert(ccl_wrapper_memcheck()); /* Terminate. */ return 0; }
/** * Canonical example main function. * */ int main(int argc, char** argv) { /* Number of elements in buffer. */ size_t buf_n = DEF_BUF_N; /* Device selected specified in the command line. */ int dev_idx = -1; /* Program return value. */ int ret_val; /* Check if a device was specified in the command line. */ if (argc >= 2) { dev_idx = atoi(argv[1]); } /* Check if a new buffer size was specified in the command line. */ if (argc >= 3) { buf_n = atoi(argv[2]); } /* Wrappers. */ CCLContext* ctx = NULL; CCLProgram* prg = NULL; CCLDevice* dev = NULL; CCLQueue* queue = NULL; CCLKernel* krnl = NULL; CCLBuffer* a_dev; CCLBuffer* b_dev; CCLBuffer* c_dev; CCLEvent* evt_write1; CCLEvent* evt_write2; CCLEvent* evt_exec; CCLEventWaitList ewl = NULL; /* Profiler. */ CCLProf* prof; /* Global and local worksizes. */ size_t gws = 0; size_t lws = 0; /* Host buffers. */ cl_uint* a_host = NULL; cl_uint* b_host = NULL; cl_uint* c_host = NULL; cl_uint d_host; /* Error reporting object. */ CCLErr* err = NULL; /* Check results flag. */ cl_bool check_result; /* Create a context with device selected from menu. */ ctx = ccl_context_new_from_menu_full(&dev_idx, &err); HANDLE_ERROR(err); /* Get the selected device. */ dev = ccl_context_get_device(ctx, 0, &err); HANDLE_ERROR(err); /* Create a new program from kernel source. */ prg = ccl_program_new_from_source(ctx, KERNEL_SRC, &err); HANDLE_ERROR(err); /* Build program. */ ccl_program_build(prg, NULL, &err); HANDLE_ERROR(err); /* Create a command queue. */ queue = ccl_queue_new(ctx, dev, CL_QUEUE_PROFILING_ENABLE, &err); HANDLE_ERROR(err); /* Get kernel object. */ krnl = ccl_program_get_kernel(prg, KERNEL_NAME, &err); HANDLE_ERROR(err); /* Get worksizes. */ lws = ccl_kernel_suggest_worksizes(krnl, dev, 1, &buf_n, &gws, &lws, &err); HANDLE_ERROR(err); /* Show worksizes. */ printf("\n"); printf(" * Global worksize: %d\n", (int) gws); printf(" * Local worksize : %d\n", (int) lws); /* Initialize host buffers. */ a_host = (cl_uint*) malloc(sizeof(cl_uint) * buf_n); b_host = (cl_uint*) malloc(sizeof(cl_uint) * buf_n); c_host = (cl_uint*) malloc(sizeof(cl_uint) * buf_n); /* Fill host buffers. */ for (cl_uint i = 0; i < buf_n; ++i) { a_host[i] = i; b_host[i] = buf_n - i; } d_host = buf_n / 4; /* Create device buffers. */ a_dev = ccl_buffer_new(ctx, CL_MEM_READ_ONLY, buf_n * sizeof(cl_uint), NULL, &err); HANDLE_ERROR(err); b_dev = ccl_buffer_new(ctx, CL_MEM_READ_ONLY, buf_n * sizeof(cl_uint), NULL, &err); HANDLE_ERROR(err); c_dev = ccl_buffer_new(ctx, CL_MEM_WRITE_ONLY, buf_n * sizeof(cl_uint), NULL, &err); HANDLE_ERROR(err); /* Copy host data to device buffers without waiting for transfer * to terminate before continuing host program. */ evt_write1 = ccl_buffer_enqueue_write(a_dev, queue, CL_FALSE, 0, buf_n * sizeof(cl_uint), a_host, NULL, &err); HANDLE_ERROR(err); evt_write2 = ccl_buffer_enqueue_write(b_dev, queue, CL_FALSE, 0, buf_n * sizeof(cl_uint), b_host, NULL, &err); HANDLE_ERROR(err); /* Initialize event wait list and add the two transfer events. */ ccl_event_wait_list_add(&ewl, evt_write1, evt_write2, NULL); /* Execute program kernel, waiting for the two transfer events * to terminate (this will empty the event wait list). */ evt_exec = ccl_program_enqueue_kernel(prg, KERNEL_NAME, queue, 1, NULL, &gws, &lws, &ewl, &err, /* Kernel arguments. */ a_dev, b_dev, c_dev, ccl_arg_priv(d_host, cl_uint), ccl_arg_priv(buf_n, cl_uint), NULL); HANDLE_ERROR(err); /* Add the kernel termination event to the wait list. */ ccl_event_wait_list_add(&ewl, evt_exec, NULL); /* Sync. queue for events in wait list (just the execute event in * this case) to terminate before going forward... */ ccl_enqueue_barrier(queue, &ewl, &err); HANDLE_ERROR(err); /* Read back results from host waiting for transfer to terminate * before continuing host program. */ ccl_buffer_enqueue_read(c_dev, queue, CL_TRUE, 0, buf_n * sizeof(cl_uint), c_host, NULL, &err); HANDLE_ERROR(err); /* Check results are as expected (not available with OpenCL stub). */ check_result = CL_TRUE; for (cl_uint i = 0; i < buf_n; ++i) { if(c_host[i] != a_host[i] + b_host[i] + d_host) { check_result = CL_FALSE; break; } } if (check_result) { fprintf(stdout, " * Kernel execution produced the expected results.\n"); ret_val = EXIT_SUCCESS; } else { fprintf(stderr, " * Kernel execution failed to produce the expected results.\n"); ret_val = EXIT_FAILURE; } /* Perform profiling. */ prof = ccl_prof_new(); ccl_prof_add_queue(prof, "queue1", queue); ccl_prof_calc(prof, &err); HANDLE_ERROR(err); /* Show profiling info. */ ccl_prof_print_summary(prof); /* Export profiling info. */ ccl_prof_export_info_file(prof, "out.tsv", &err); HANDLE_ERROR(err); /* Destroy profiler object. */ ccl_prof_destroy(prof); /* Destroy host buffers. */ free(a_host); free(b_host); free(c_host); /* Destroy wrappers. */ ccl_buffer_destroy(a_dev); ccl_buffer_destroy(b_dev); ccl_buffer_destroy(c_dev); ccl_queue_destroy(queue); ccl_program_destroy(prg); ccl_context_destroy(ctx); /* Confirm that memory allocated by wrappers has been properly freed. */ assert(ccl_wrapper_memcheck()); /* Bye. */ return ret_val; }