/** * Tests creation, getting info from and destruction of * profiler objects, and their relationship with context, device and * queue wrapper objects. * */ static void create_add_destroy_test() { /* Test variables. */ CCLErr* err = NULL; CCLBuffer* buf1 = NULL; CCLBuffer* buf2 = NULL; CCLProf* prof = NULL; CCLContext* ctx = NULL; CCLDevice* d = NULL; CCLQueue* cq1 = NULL; CCLQueue* cq2 = NULL; CCLEvent* evt = NULL; CCLEventWaitList ewl = NULL; size_t buf_size = 8 * sizeof(cl_short); cl_short hbuf[8] = {1, 2, 3, 4, 5, 6, 7, 8}; cl_ulong duration, eff_duration; double time_elapsed; /* Create a new profile object. */ prof = ccl_prof_new(); /* Get a context and a device. */ ctx = ccl_test_context_new(&err); g_assert_no_error(err); d = ccl_context_get_device(ctx, 0, &err); g_assert_no_error(err); /* Create two command queue wrappers. */ cq1 = ccl_queue_new(ctx, d, CL_QUEUE_PROFILING_ENABLE, &err); g_assert_no_error(err); cq2 = ccl_queue_new(ctx, d, CL_QUEUE_PROFILING_ENABLE, &err); g_assert_no_error(err); /* Create device buffers. */ buf1 = ccl_buffer_new(ctx, CL_MEM_READ_ONLY, buf_size, NULL, &err); g_assert_no_error(err); buf2 = ccl_buffer_new(ctx, CL_MEM_READ_WRITE, buf_size, NULL, &err); g_assert_no_error(err); /* Start profile object timer. */ ccl_prof_start(prof); /* Transfer data to buffer. */ evt = ccl_buffer_enqueue_write( buf1, cq1, CL_FALSE, 0, buf_size, hbuf, NULL, &err); g_assert_no_error(err); /* Transfer data from one buffer to another. */ evt = ccl_buffer_enqueue_copy(buf1, buf2, cq2, 0, 0, buf_size, ccl_ewl(&ewl, evt, NULL), &err); g_assert_no_error(err); /* Wait for copy. */ ccl_event_wait(ccl_ewl(&ewl, evt, NULL), &err); g_assert_no_error(err); /* Stop profile object timer. */ ccl_prof_stop(prof); /* Add both queues to profile object. */ ccl_prof_add_queue(prof, "A Queue", cq1); ccl_prof_add_queue(prof, "Another Queue", cq2); /* Process queues. */ ccl_prof_calc(prof, &err); g_assert_no_error(err); /* Request some profiling information. */ time_elapsed = ccl_prof_time_elapsed(prof); duration = ccl_prof_get_duration(prof); eff_duration = ccl_prof_get_eff_duration(prof); g_debug("Profiling time elapsed: %lf", time_elapsed); g_debug("Profiling duration: %d", (cl_int) duration); g_debug("Profiling eff. duration: %d", (cl_int) eff_duration); /* Destroy buffers. */ ccl_buffer_destroy(buf1); ccl_buffer_destroy(buf2); /* Unref cq1, which should not be destroyed because it is held * by the profile object. */ ccl_queue_destroy(cq1); /* Destroy the profile object, which will also destroy cq1. cq2 * will me merely unrefed and must still be explicitly destroyed. */ ccl_prof_destroy(prof); /* Destroy cq2. */ ccl_queue_destroy(cq2); /* Destroy the context. */ ccl_context_destroy(ctx); /* Confirm that memory allocated by wrappers has been properly * freed. */ g_assert(ccl_wrapper_memcheck()); }
/** * Main program. * * @param argc Number of command line arguments. * @param argv Vector of command line arguments. * @return @link clo_error_codes::CLO_SUCCESS @endlink if program * terminates successfully, or another value of #clo_error_codes if an * error occurs. * */ int main(int argc, char **argv) { /* Status var aux */ int status; /* Context object for command line argument parsing. */ GOptionContext *context = NULL; /* Test data structures. */ cl_uchar* host_data = NULL; size_t bytes; cl_ulong total_time; FILE *outfile = NULL; CloType clotype_elem; /* Sorter object. */ CloSort* sorter = NULL; /* cf4ocl wrappers. */ CCLQueue* cq_exec = NULL; CCLQueue* cq_comm = NULL; CCLContext* ctx = NULL; CCLDevice* dev = NULL; /* Profiler object. */ CCLProf* prof; /* Host-based random number generator (mersenne twister) */ GRand* rng_host = NULL; /* Error management object. */ GError *err = NULL; /* Sorting benchmarks. */ cl_ulong** benchmarks = NULL; /* Parse command line options. */ context = g_option_context_new (" - " CLO_SORT_DESCRIPTION); g_option_context_add_main_entries(context, entries, NULL); g_option_context_parse(context, &argc, &argv, &err); g_if_err_goto(err, error_handler); clotype_elem = clo_type_by_name( type != NULL ? type : CLO_SORT_BENCHMARK_TYPE, &err); g_if_err_goto(err, error_handler); if (algorithm == NULL) algorithm = g_strdup(CLO_SORT_BENCHMARK_ALGORITHM); if (alg_options == NULL) alg_options = g_strdup(CLO_SORT_BENCHMARK_ALG_OPTS); /* Determine size in bytes of each element to sort. */ bytes = clo_type_sizeof(clotype_elem); /* Initialize random number generator. */ rng_host = g_rand_new_with_seed(rng_seed); /* Get the context wrapper and the chosen device. */ ctx = ccl_context_new_from_menu_full(&dev_idx, &err); g_if_err_goto(err, error_handler); dev = ccl_context_get_device(ctx, 0, &err); g_if_err_goto(err, error_handler); /* Get sorter object. */ sorter = clo_sort_new( algorithm, alg_options, ctx, &clotype_elem, NULL, NULL, NULL, compiler_opts, &err); g_if_err_goto(err, error_handler); /* Create command queues. */ cq_exec = ccl_queue_new(ctx, dev, CL_QUEUE_PROFILING_ENABLE, &err); g_if_err_goto(err, error_handler); cq_comm = ccl_queue_new(ctx, dev, 0, &err); g_if_err_goto(err, error_handler); /* Create benchmarks table. */ benchmarks = g_new(cl_ulong*, maxpo2); for (unsigned int i = 0; i < maxpo2; i++) benchmarks[i] = g_new0(cl_ulong, runs); /* Print options. */ printf("\n =========================== Selected options ============================\n\n"); printf(" Random number generator seed: %u\n", rng_seed); printf(" Maximum local worksize (0 is auto-select): %d\n", (int) lws); printf(" Type of elements to sort: %s\n", clo_type_get_name(clotype_elem)); printf(" Number of runs: %d\n", runs); printf(" Compiler Options: %s\n", compiler_opts); /* Create host buffer. */ host_data = g_slice_alloc(bytes * (1 << maxpo2)); /* Perform test. */ for (unsigned int N = 4; N <= maxpo2; N++) { unsigned int num_elems = 1 << N; gboolean sorted_ok = TRUE; for (unsigned int r = 0; r < runs; r++) { /* Initialize host buffer. */ for (unsigned int i = 0; i < num_elems; i++) { clo_bench_rand( rng_host, clotype_elem, host_data + bytes * i); } /* Perform sort. */ clo_sort_with_host_data(sorter, cq_exec, cq_comm, host_data, host_data, num_elems, lws, &err); g_if_err_goto(err, error_handler); /* Perform profiling. */ prof = ccl_prof_new(); ccl_prof_add_queue(prof, "q_exec", cq_exec); ccl_prof_calc(prof, &err); g_if_err_goto(err, error_handler); /* Save duration to benchmarks. */ benchmarks[N - 1][r] = ccl_prof_get_duration(prof); ccl_prof_destroy(prof); /* Check if sorting was well performed. */ sorted_ok = TRUE; /* Wait on host thread for data transfer queue to finish... */ ccl_queue_finish(cq_comm, &err); g_if_err_goto(err, error_handler); /* Start check. */ for (unsigned int i = 0; i < num_elems - 1; i++) { /* Perform comparison. */ if (clo_bench_compare(clotype_elem, host_data + bytes*i, host_data + bytes*(i + 1)) > 0) { sorted_ok = FALSE; break; } } } /* Print info. */ total_time = 0; for (unsigned int i = 0; i < runs; i++) total_time += benchmarks[N - 1][i]; printf(" - 2^%d: %lf Mkeys/s %s\n", N, (1e-6 * num_elems * runs) / (total_time * 1e-9), sorted_ok ? "" : "(sort did not work)"); } /* Save benchmarks to file, if filename was given as cli option. */ if (out) { outfile = fopen(out, "w"); for (unsigned int i = 0; i < maxpo2; i++) { fprintf(outfile, "%d", i); for (unsigned int j = 0; j < runs; j++) { fprintf(outfile, "\t%lu", (unsigned long)benchmarks[i][j]); } fprintf(outfile, "\n"); } fclose(outfile); } /* If we get here, everything went Ok. */ status = CLO_SUCCESS; g_assert(err == NULL); goto cleanup; error_handler: /* Handle error. */ g_assert(err != NULL); fprintf(stderr, "Error: %s\n", err->message); g_error_free(err); cleanup: /* Free sorter object. */ if (sorter) clo_sort_destroy(sorter); /* Free command line options. */ if (context) g_option_context_free(context); if (algorithm) g_free(algorithm); if (alg_options) g_free(alg_options); if (compiler_opts) g_free(compiler_opts); if (out) g_free(out); /* Free host-based random number generator. */ if (rng_host) g_rand_free(rng_host); /* Free OpenCL wrappers. */ if (cq_exec) ccl_queue_destroy(cq_exec); if (cq_comm) ccl_queue_destroy(cq_comm); if (ctx) ccl_context_destroy(ctx); /* Free host resources */ if (host_data) g_slice_free1(bytes * (1 << maxpo2), host_data); /* Free benchmarks. */ if (benchmarks) { for (unsigned int i = 0; i < maxpo2; i++) if (benchmarks[i]) g_free(benchmarks[i]); g_free(benchmarks); } /* Bye. */ return status; }