/** * Cellular automata sample main function. * */ int main(int argc, char* argv[]) { /* Wrappers for OpenCL objects. */ CCLContext* ctx; CCLDevice* dev; CCLImage* img1; CCLImage* img2; CCLProgram* prg; CCLKernel* krnl; CCLEvent* evt1; CCLEvent* evt2; /* Other variables. */ CCLEventWaitList ewl = NULL; /* Profiler object. */ CCLProf* prof; /* Output images filename. */ char* filename; /* Selected device, may be given in command line. */ int dev_idx = -1; /* Error handling object (must be NULL). */ GError* err = NULL; /* Does selected device support images? */ cl_bool image_ok; /* Initial sim state. */ cl_uchar4* input_image; /* Simulation states. */ cl_uchar4** output_images; /* RNG seed, may be given in command line. */ unsigned int seed; /* Image file write status. */ int file_write_status; /* Image format. */ cl_image_format image_format = { CL_RGBA, CL_UNSIGNED_INT8 }; /* Thread data. */ struct thread_data td; /* Global and local worksizes. */ size_t gws[2]; size_t lws[2]; /* Threads. */ GThread* comm_thread; GThread* exec_thread; /* Check arguments. */ if (argc >= 2) { /* Check if a device was specified in the command line. */ dev_idx = atoi(argv[1]); } if (argc >= 3) { /* Check if a RNG seed was specified. */ seed = atoi(argv[2]); } else { seed = (unsigned int) time(NULL); } /* Initialize RNG. */ srand(seed); /* Create random initial state. */ input_image = (cl_uchar4*) malloc(CA_WIDTH * CA_HEIGHT * sizeof(cl_uchar4)); for (cl_uint i = 0; i < CA_WIDTH * CA_HEIGHT; ++i) { cl_uchar state = (rand() & 0x3) ? 0xFF : 0x00; input_image[i] = (cl_uchar4) {{ state, state, state, 0xFF }}; } /* Allocate space for simulation results. */ output_images = (cl_uchar4**) malloc((CA_ITERS + 1) * sizeof(cl_uchar4*)); for (cl_uint i = 0; i < CA_ITERS + 1; ++i) output_images[i] = (cl_uchar4*) malloc(CA_WIDTH * CA_HEIGHT * sizeof(cl_uchar4)); /* Create context using device selected from menu. */ ctx = ccl_context_new_from_menu_full(&dev_idx, &err); HANDLE_ERROR(err); /* Get first device in context. */ dev = ccl_context_get_device(ctx, 0, &err); HANDLE_ERROR(err); /* Ask device if it supports images. */ image_ok = ccl_device_get_info_scalar( dev, CL_DEVICE_IMAGE_SUPPORT, cl_bool, &err); HANDLE_ERROR(err); if (!image_ok) ERROR_MSG_AND_EXIT("Selected device doesn't support images."); /* Create command queues. */ queue_exec = ccl_queue_new(ctx, dev, CL_QUEUE_PROFILING_ENABLE, &err); HANDLE_ERROR(err); queue_comm = ccl_queue_new(ctx, dev, CL_QUEUE_PROFILING_ENABLE, &err); HANDLE_ERROR(err); /* Create 2D image for initial state. */ img1 = ccl_image_new(ctx, CL_MEM_READ_WRITE, &image_format, NULL, &err, "image_type", (cl_mem_object_type) CL_MEM_OBJECT_IMAGE2D, "image_width", (size_t) CA_WIDTH, "image_height", (size_t) CA_HEIGHT, NULL); HANDLE_ERROR(err); /* Create another 2D image for double buffering. */ img2 = ccl_image_new(ctx, CL_MEM_READ_WRITE, &image_format, NULL, &err, "image_type", (cl_mem_object_type) CL_MEM_OBJECT_IMAGE2D, "image_width", (size_t) CA_WIDTH, "image_height", (size_t) CA_HEIGHT, NULL); HANDLE_ERROR(err); /* Create program from kernel source and compile it. */ prg = ccl_program_new_from_source(ctx, CA_KERNEL, &err); HANDLE_ERROR(err); ccl_program_build(prg, NULL, &err); HANDLE_ERROR(err); /* Get kernel wrapper. */ krnl = ccl_program_get_kernel(prg, "ca", &err); HANDLE_ERROR(err); /* Determine nice local and global worksizes. */ ccl_kernel_suggest_worksizes(krnl, dev, 2, real_ws, gws, lws, &err); HANDLE_ERROR(err); printf("\n * Global work-size: (%d, %d)\n", (int) gws[0], (int) gws[1]); printf(" * Local work-size: (%d, %d)\n", (int) lws[0], (int) lws[1]); /* Create thread communication queues. */ comm_thread_queue = g_async_queue_new(); exec_thread_queue = g_async_queue_new(); host_thread_queue = g_async_queue_new(); /* Setup thread data. */ td.krnl = krnl; td.img1 = img1; td.img2 = img2; td.gws = gws; td.lws = lws; td.output_images = output_images; /* Create threads. */ exec_thread = g_thread_new("exec_thread", exec_func, &td); comm_thread = g_thread_new("comm_thread", comm_func, &td); /* Start profiling. */ prof = ccl_prof_new(); ccl_prof_start(prof); /* Write initial state. */ ccl_image_enqueue_write(img1, queue_comm, CL_TRUE, origin, region, 0, 0, input_image, NULL, &err); HANDLE_ERROR(err); /* Run CA_ITERS iterations of the CA. */ for (cl_uint i = 0; i < CA_ITERS; ++i) { /* Send message to comms thread. */ g_async_queue_push(comm_thread_queue, &go_msg); /* Send message to exec thread. */ g_async_queue_push(exec_thread_queue, &go_msg); /* Get event wrappers from both threads. */ evt1 = (CCLEvent*) g_async_queue_pop(host_thread_queue); evt2 = (CCLEvent*) g_async_queue_pop(host_thread_queue); /* Can't continue until this iteration is over. */ ccl_event_wait_list_add(&ewl, evt1, evt2, NULL); /* Wait for events. */ ccl_event_wait(&ewl, &err); HANDLE_ERROR(err); } /* Send message to comms thread to read last result. */ g_async_queue_push(comm_thread_queue, &go_msg); /* Send stop messages to both threads. */ g_async_queue_push(comm_thread_queue, &stop_msg); g_async_queue_push(exec_thread_queue, &stop_msg); /* Get event wrapper from comms thread. */ evt1 = (CCLEvent*) g_async_queue_pop(host_thread_queue); /* Can't continue until final read is over. */ ccl_event_wait_list_add(&ewl, evt1, NULL); ccl_event_wait(&ewl, &err); HANDLE_ERROR(err); /* Make sure both queues are finished. */ ccl_queue_finish(queue_comm, &err); HANDLE_ERROR(err); ccl_queue_finish(queue_exec, &err); HANDLE_ERROR(err); /* Stop profiling timer and add queues for analysis. */ ccl_prof_stop(prof); ccl_prof_add_queue(prof, "Comms", queue_comm); ccl_prof_add_queue(prof, "Exec", queue_exec); /* Allocate space for base filename. */ filename = (char*) malloc( (strlen(IMAGE_FILE_PREFIX ".png") + IMAGE_FILE_NUM_DIGITS + 1) * sizeof(char)); /* Write results to image files. */ for (cl_uint i = 0; i < CA_ITERS; ++i) { /* Determine next filename. */ sprintf(filename, "%s%0" G_STRINGIFY(IMAGE_FILE_NUM_DIGITS) "d.png", IMAGE_FILE_PREFIX, i); /* Save next image. */ file_write_status = stbi_write_png(filename, CA_WIDTH, CA_HEIGHT, 4, output_images[i], CA_WIDTH * sizeof(cl_uchar4)); /* Give feedback if unable to save image. */ if (!file_write_status) { ERROR_MSG_AND_EXIT("Unable to save image in file."); } } /* Process profiling info. */ ccl_prof_calc(prof, &err); HANDLE_ERROR(err); /* Print profiling info. */ ccl_prof_print_summary(prof); /* Save profiling info. */ ccl_prof_export_info_file(prof, "prof.tsv", &err); HANDLE_ERROR(err); /* Destroy threads. */ g_thread_join(exec_thread); g_thread_join(comm_thread); /* Destroy thread communication queues. */ g_async_queue_unref(comm_thread_queue); g_async_queue_unref(exec_thread_queue); g_async_queue_unref(host_thread_queue); /* Release host buffers. */ free(filename); free(input_image); for (cl_uint i = 0; i < CA_ITERS + 1; ++i) free(output_images[i]); free(output_images); /* Release wrappers. */ ccl_image_destroy(img1); ccl_image_destroy(img2); ccl_program_destroy(prg); ccl_queue_destroy(queue_comm); ccl_queue_destroy(queue_exec); ccl_context_destroy(ctx); /* Destroy profiler. */ ccl_prof_destroy(prof); /* Check all wrappers have been destroyed. */ g_assert(ccl_wrapper_memcheck()); /* Terminate. */ return 0; }
/** * @internal * * @brief Tests memory object migration. * */ static void migrate_test() { /* Test variables. */ CCLPlatforms * ps; CCLPlatform * p; CCLContext * ctx = NULL; CCLDevice * d = NULL; CCLBuffer * b = NULL; CCLQueue * q; size_t buf_size = sizeof(cl_char8) * CCL_TEST_BUFFER_SIZE; CCLErr * err = NULL; /* Get a context which supports OpenCL 1.2 if possible. */ ps = ccl_platforms_new(&err); g_assert_no_error(err); for (guint i = 0; i < ccl_platforms_count(ps); ++i) { p = ccl_platforms_get(ps, i); cl_uint ocl_ver = ccl_platform_get_opencl_version(p, &err); if (ocl_ver >= 120) { ctx = ccl_context_new_from_devices( ccl_platform_get_num_devices(p, NULL), ccl_platform_get_all_devices(p, NULL), &err); g_assert_no_error(err); break; } } /* If not possible to find a 1.2 or better context, finish this * test. */ if (ctx == NULL) { g_test_message("'%s' test not performed because no platform " \ "with OpenCL 1.2 support was found", CCL_STRD); ccl_platforms_destroy(ps); return; } /* Get first device in context. */ d = ccl_context_get_device(ctx, 0, &err); g_assert_no_error(err); /* Create a command queue associated with first device in * context. */ q = ccl_queue_new(ctx, d, 0, &err); g_assert_no_error(err); /* Create regular buffer. */ b = ccl_buffer_new(ctx, CL_MEM_READ_WRITE, buf_size, NULL, &err); g_assert_no_error(err); /* Assign buffer to first device in context (via the command * queue). */ ccl_memobj_enqueue_migrate((CCLMemObj **) &b, 1, q, 0, NULL, &err); g_assert_no_error(err); /* Migrate buffer to host. */ ccl_memobj_enqueue_migrate( (CCLMemObj **) &b, 1, q, CL_MIGRATE_MEM_OBJECT_HOST, NULL, &err); g_assert_no_error(err); /* Wait for queue to finish... */ ccl_queue_finish(q, &err); g_assert_no_error(err); /* Free stuff. */ ccl_buffer_destroy(b); ccl_queue_destroy(q); ccl_context_destroy(ctx); ccl_platforms_destroy(ps); /* Confirm that memory allocated by wrappers has been properly * freed. */ g_assert(ccl_wrapper_memcheck()); }
/** * Main program. * * @param argc Number of command line arguments. * @param argv Vector of command line arguments. * @return @link clo_error_codes::CLO_SUCCESS @endlink if program * terminates successfully, or another value of #clo_error_codes if an * error occurs. * */ int main(int argc, char **argv) { /* Status var aux */ int status; /* Context object for command line argument parsing. */ GOptionContext *context = NULL; /* Test data structures. */ cl_uchar* host_data = NULL; size_t bytes; cl_ulong total_time; FILE *outfile = NULL; CloType clotype_elem; /* Sorter object. */ CloSort* sorter = NULL; /* cf4ocl wrappers. */ CCLQueue* cq_exec = NULL; CCLQueue* cq_comm = NULL; CCLContext* ctx = NULL; CCLDevice* dev = NULL; /* Profiler object. */ CCLProf* prof; /* Host-based random number generator (mersenne twister) */ GRand* rng_host = NULL; /* Error management object. */ GError *err = NULL; /* Sorting benchmarks. */ cl_ulong** benchmarks = NULL; /* Parse command line options. */ context = g_option_context_new (" - " CLO_SORT_DESCRIPTION); g_option_context_add_main_entries(context, entries, NULL); g_option_context_parse(context, &argc, &argv, &err); g_if_err_goto(err, error_handler); clotype_elem = clo_type_by_name( type != NULL ? type : CLO_SORT_BENCHMARK_TYPE, &err); g_if_err_goto(err, error_handler); if (algorithm == NULL) algorithm = g_strdup(CLO_SORT_BENCHMARK_ALGORITHM); if (alg_options == NULL) alg_options = g_strdup(CLO_SORT_BENCHMARK_ALG_OPTS); /* Determine size in bytes of each element to sort. */ bytes = clo_type_sizeof(clotype_elem); /* Initialize random number generator. */ rng_host = g_rand_new_with_seed(rng_seed); /* Get the context wrapper and the chosen device. */ ctx = ccl_context_new_from_menu_full(&dev_idx, &err); g_if_err_goto(err, error_handler); dev = ccl_context_get_device(ctx, 0, &err); g_if_err_goto(err, error_handler); /* Get sorter object. */ sorter = clo_sort_new( algorithm, alg_options, ctx, &clotype_elem, NULL, NULL, NULL, compiler_opts, &err); g_if_err_goto(err, error_handler); /* Create command queues. */ cq_exec = ccl_queue_new(ctx, dev, CL_QUEUE_PROFILING_ENABLE, &err); g_if_err_goto(err, error_handler); cq_comm = ccl_queue_new(ctx, dev, 0, &err); g_if_err_goto(err, error_handler); /* Create benchmarks table. */ benchmarks = g_new(cl_ulong*, maxpo2); for (unsigned int i = 0; i < maxpo2; i++) benchmarks[i] = g_new0(cl_ulong, runs); /* Print options. */ printf("\n =========================== Selected options ============================\n\n"); printf(" Random number generator seed: %u\n", rng_seed); printf(" Maximum local worksize (0 is auto-select): %d\n", (int) lws); printf(" Type of elements to sort: %s\n", clo_type_get_name(clotype_elem)); printf(" Number of runs: %d\n", runs); printf(" Compiler Options: %s\n", compiler_opts); /* Create host buffer. */ host_data = g_slice_alloc(bytes * (1 << maxpo2)); /* Perform test. */ for (unsigned int N = 4; N <= maxpo2; N++) { unsigned int num_elems = 1 << N; gboolean sorted_ok = TRUE; for (unsigned int r = 0; r < runs; r++) { /* Initialize host buffer. */ for (unsigned int i = 0; i < num_elems; i++) { clo_bench_rand( rng_host, clotype_elem, host_data + bytes * i); } /* Perform sort. */ clo_sort_with_host_data(sorter, cq_exec, cq_comm, host_data, host_data, num_elems, lws, &err); g_if_err_goto(err, error_handler); /* Perform profiling. */ prof = ccl_prof_new(); ccl_prof_add_queue(prof, "q_exec", cq_exec); ccl_prof_calc(prof, &err); g_if_err_goto(err, error_handler); /* Save duration to benchmarks. */ benchmarks[N - 1][r] = ccl_prof_get_duration(prof); ccl_prof_destroy(prof); /* Check if sorting was well performed. */ sorted_ok = TRUE; /* Wait on host thread for data transfer queue to finish... */ ccl_queue_finish(cq_comm, &err); g_if_err_goto(err, error_handler); /* Start check. */ for (unsigned int i = 0; i < num_elems - 1; i++) { /* Perform comparison. */ if (clo_bench_compare(clotype_elem, host_data + bytes*i, host_data + bytes*(i + 1)) > 0) { sorted_ok = FALSE; break; } } } /* Print info. */ total_time = 0; for (unsigned int i = 0; i < runs; i++) total_time += benchmarks[N - 1][i]; printf(" - 2^%d: %lf Mkeys/s %s\n", N, (1e-6 * num_elems * runs) / (total_time * 1e-9), sorted_ok ? "" : "(sort did not work)"); } /* Save benchmarks to file, if filename was given as cli option. */ if (out) { outfile = fopen(out, "w"); for (unsigned int i = 0; i < maxpo2; i++) { fprintf(outfile, "%d", i); for (unsigned int j = 0; j < runs; j++) { fprintf(outfile, "\t%lu", (unsigned long)benchmarks[i][j]); } fprintf(outfile, "\n"); } fclose(outfile); } /* If we get here, everything went Ok. */ status = CLO_SUCCESS; g_assert(err == NULL); goto cleanup; error_handler: /* Handle error. */ g_assert(err != NULL); fprintf(stderr, "Error: %s\n", err->message); g_error_free(err); cleanup: /* Free sorter object. */ if (sorter) clo_sort_destroy(sorter); /* Free command line options. */ if (context) g_option_context_free(context); if (algorithm) g_free(algorithm); if (alg_options) g_free(alg_options); if (compiler_opts) g_free(compiler_opts); if (out) g_free(out); /* Free host-based random number generator. */ if (rng_host) g_rand_free(rng_host); /* Free OpenCL wrappers. */ if (cq_exec) ccl_queue_destroy(cq_exec); if (cq_comm) ccl_queue_destroy(cq_comm); if (ctx) ccl_context_destroy(ctx); /* Free host resources */ if (host_data) g_slice_free1(bytes * (1 << maxpo2), host_data); /* Free benchmarks. */ if (benchmarks) { for (unsigned int i = 0; i < maxpo2; i++) if (benchmarks[i]) g_free(benchmarks[i]); g_free(benchmarks); } /* Bye. */ return status; }