Пример #1
0
/**
 * Tests creation, getting info from and destruction of
 * profiler objects, and their relationship with context, device and
 * queue wrapper objects.
 * */
static void create_add_destroy_test() {

	/* Test variables. */
	CCLErr* err = NULL;
	CCLBuffer* buf1 = NULL;
	CCLBuffer* buf2 = NULL;
	CCLProf* prof = NULL;
	CCLContext* ctx = NULL;
	CCLDevice* d = NULL;
	CCLQueue* cq1 = NULL;
	CCLQueue* cq2 = NULL;
	CCLEvent* evt = NULL;
	CCLEventWaitList ewl = NULL;
	size_t buf_size = 8 * sizeof(cl_short);
	cl_short hbuf[8] = {1, 2, 3, 4, 5, 6, 7, 8};
	cl_ulong duration, eff_duration;
	double time_elapsed;

	/* Create a new profile object. */
	prof = ccl_prof_new();

	/* Get a context and a device. */
	ctx = ccl_test_context_new(&err);
	g_assert_no_error(err);

	d = ccl_context_get_device(ctx, 0, &err);
	g_assert_no_error(err);

	/* Create two command queue wrappers. */
	cq1 = ccl_queue_new(ctx, d, CL_QUEUE_PROFILING_ENABLE, &err);
	g_assert_no_error(err);

	cq2 = ccl_queue_new(ctx, d, CL_QUEUE_PROFILING_ENABLE, &err);
	g_assert_no_error(err);

	/* Create device buffers. */
	buf1 = ccl_buffer_new(ctx, CL_MEM_READ_ONLY, buf_size, NULL, &err);
	g_assert_no_error(err);
	buf2 = ccl_buffer_new(ctx, CL_MEM_READ_WRITE, buf_size, NULL, &err);
	g_assert_no_error(err);

	/* Start profile object timer. */
	ccl_prof_start(prof);

	/* Transfer data to buffer. */
	evt = ccl_buffer_enqueue_write(
		buf1, cq1, CL_FALSE, 0, buf_size, hbuf, NULL, &err);
	g_assert_no_error(err);

	/* Transfer data from one buffer to another. */
	evt = ccl_buffer_enqueue_copy(buf1, buf2, cq2, 0, 0, buf_size,
		ccl_ewl(&ewl, evt, NULL), &err);
	g_assert_no_error(err);

	/* Wait for copy. */
	ccl_event_wait(ccl_ewl(&ewl, evt, NULL), &err);
	g_assert_no_error(err);

	/* Stop profile object timer. */
	ccl_prof_stop(prof);

	/* Add both queues to profile object. */
	ccl_prof_add_queue(prof, "A Queue", cq1);
	ccl_prof_add_queue(prof, "Another Queue", cq2);

	/* Process queues. */
	ccl_prof_calc(prof, &err);
	g_assert_no_error(err);

	/* Request some profiling information. */
	time_elapsed = ccl_prof_time_elapsed(prof);
	duration = ccl_prof_get_duration(prof);
	eff_duration = ccl_prof_get_eff_duration(prof);

	g_debug("Profiling time elapsed: %lf", time_elapsed);
	g_debug("Profiling duration: %d", (cl_int) duration);
	g_debug("Profiling eff. duration: %d", (cl_int) eff_duration);

	/* Destroy buffers. */
	ccl_buffer_destroy(buf1);
	ccl_buffer_destroy(buf2);

	/* Unref cq1, which should not be destroyed because it is held
	 * by the profile object. */
	ccl_queue_destroy(cq1);

	/* Destroy the profile object, which will also destroy cq1. cq2
	 * will me merely unrefed and must still be explicitly destroyed. */
	ccl_prof_destroy(prof);

	/* Destroy cq2. */
	ccl_queue_destroy(cq2);

	/* Destroy the context. */
	ccl_context_destroy(ctx);

	/* Confirm that memory allocated by wrappers has been properly
	 * freed. */
	g_assert(ccl_wrapper_memcheck());

}
Пример #2
0
/**
 * Main program.
 *
 * @param argc Number of command line arguments.
 * @param argv Vector of command line arguments.
 * @return @link clo_error_codes::CLO_SUCCESS @endlink if program
 * terminates successfully, or another value of #clo_error_codes if an
 * error occurs.
 * */
int main(int argc, char **argv)
{

	/* Status var aux */
	int status;

	/* Context object for command line argument parsing. */
	GOptionContext *context = NULL;

	/* Test data structures. */
	cl_uchar* host_data = NULL;
	size_t bytes;
	cl_ulong total_time;
	FILE *outfile = NULL;
	CloType clotype_elem;

	/* Sorter object. */
	CloSort* sorter = NULL;

	/* cf4ocl wrappers. */
	CCLQueue* cq_exec = NULL;
	CCLQueue* cq_comm = NULL;
	CCLContext* ctx = NULL;
	CCLDevice* dev = NULL;

	/* Profiler object. */
	CCLProf* prof;

	/* Host-based random number generator (mersenne twister) */
	GRand* rng_host = NULL;

	/* Error management object. */
	GError *err = NULL;

	/* Sorting benchmarks. */
	cl_ulong** benchmarks = NULL;

	/* Parse command line options. */
	context = g_option_context_new (" - " CLO_SORT_DESCRIPTION);
	g_option_context_add_main_entries(context, entries, NULL);
	g_option_context_parse(context, &argc, &argv, &err);
	g_if_err_goto(err, error_handler);

	clotype_elem = clo_type_by_name(
		type != NULL ? type : CLO_SORT_BENCHMARK_TYPE, &err);
	g_if_err_goto(err, error_handler);

	if (algorithm == NULL) algorithm = g_strdup(CLO_SORT_BENCHMARK_ALGORITHM);
	if (alg_options == NULL) alg_options = g_strdup(CLO_SORT_BENCHMARK_ALG_OPTS);

	/* Determine size in bytes of each element to sort. */
	bytes = clo_type_sizeof(clotype_elem);

	/* Initialize random number generator. */
	rng_host = g_rand_new_with_seed(rng_seed);

	/* Get the context wrapper and the chosen device. */
	ctx = ccl_context_new_from_menu_full(&dev_idx, &err);
	g_if_err_goto(err, error_handler);
	dev = ccl_context_get_device(ctx, 0, &err);
	g_if_err_goto(err, error_handler);

	/* Get sorter object. */
	sorter = clo_sort_new(
		algorithm, alg_options, ctx, &clotype_elem, NULL, NULL, NULL,
		compiler_opts, &err);
	g_if_err_goto(err, error_handler);

	/* Create command queues. */
	cq_exec = ccl_queue_new(ctx, dev, CL_QUEUE_PROFILING_ENABLE, &err);
	g_if_err_goto(err, error_handler);
	cq_comm = ccl_queue_new(ctx, dev, 0, &err);
	g_if_err_goto(err, error_handler);

	/* Create benchmarks table. */
	benchmarks = g_new(cl_ulong*, maxpo2);
	for (unsigned int i = 0; i < maxpo2; i++)
		benchmarks[i] = g_new0(cl_ulong, runs);

	/* Print options. */
	printf("\n   =========================== Selected options ============================\n\n");
	printf("     Random number generator seed: %u\n", rng_seed);
	printf("     Maximum local worksize (0 is auto-select): %d\n", (int) lws);
	printf("     Type of elements to sort: %s\n", clo_type_get_name(clotype_elem));
	printf("     Number of runs: %d\n", runs);
	printf("     Compiler Options: %s\n", compiler_opts);

	/* Create host buffer. */
	host_data = g_slice_alloc(bytes * (1 << maxpo2));

	/* Perform test. */
	for (unsigned int N = 4; N <= maxpo2; N++) {

		unsigned int num_elems = 1 << N;
		gboolean sorted_ok = TRUE;

		for (unsigned int r = 0; r < runs; r++) {

			/* Initialize host buffer. */
			for (unsigned int i = 0;  i < num_elems; i++) {
				clo_bench_rand(
					rng_host, clotype_elem, host_data + bytes * i);
			}

			/* Perform sort. */
			clo_sort_with_host_data(sorter, cq_exec, cq_comm,
				host_data, host_data, num_elems, lws, &err);
			g_if_err_goto(err, error_handler);

			/* Perform profiling. */
			prof = ccl_prof_new();
			ccl_prof_add_queue(prof, "q_exec", cq_exec);
			ccl_prof_calc(prof, &err);
			g_if_err_goto(err, error_handler);

			/* Save duration to benchmarks. */
			benchmarks[N - 1][r] = ccl_prof_get_duration(prof);
			ccl_prof_destroy(prof);

			/* Check if sorting was well performed. */
			sorted_ok = TRUE;
			/* Wait on host thread for data transfer queue to finish... */
			ccl_queue_finish(cq_comm, &err);
			g_if_err_goto(err, error_handler);
			/* Start check. */
			for (unsigned int i = 0;  i < num_elems - 1; i++) {

				/* Perform comparison. */
				if (clo_bench_compare(clotype_elem, host_data + bytes*i,
						host_data + bytes*(i + 1)) > 0) {

					sorted_ok = FALSE;
					break;
				}

			}
		}

		/* Print info. */
		total_time = 0;
		for (unsigned int i = 0;  i < runs; i++)
			total_time += benchmarks[N - 1][i];
		printf("       - 2^%d: %lf Mkeys/s %s\n", N,
			(1e-6 * num_elems * runs) / (total_time * 1e-9),
			sorted_ok ? "" : "(sort did not work)");
	}

	/* Save benchmarks to file, if filename was given as cli option. */
	if (out) {
		outfile = fopen(out, "w");
		for (unsigned int i = 0; i < maxpo2; i++) {
			fprintf(outfile, "%d", i);
			for (unsigned int j = 0; j < runs; j++) {
				fprintf(outfile, "\t%lu", (unsigned long)benchmarks[i][j]);
			}
			fprintf(outfile, "\n");
		}
		fclose(outfile);
	}

	/* If we get here, everything went Ok. */
	status = CLO_SUCCESS;
	g_assert(err == NULL);
	goto cleanup;

error_handler:
	/* Handle error. */
	g_assert(err != NULL);
	fprintf(stderr, "Error: %s\n", err->message);
	g_error_free(err);

cleanup:

	/* Free sorter object. */
	if (sorter) clo_sort_destroy(sorter);

	/* Free command line options. */
	if (context) g_option_context_free(context);
	if (algorithm) g_free(algorithm);
	if (alg_options) g_free(alg_options);
	if (compiler_opts) g_free(compiler_opts);
	if (out) g_free(out);

	/* Free host-based random number generator. */
	if (rng_host) g_rand_free(rng_host);

	/* Free OpenCL wrappers. */
	if (cq_exec) ccl_queue_destroy(cq_exec);
	if (cq_comm) ccl_queue_destroy(cq_comm);
	if (ctx) ccl_context_destroy(ctx);

	/* Free host resources */
	if (host_data) g_slice_free1(bytes * (1 << maxpo2), host_data);

	/* Free benchmarks. */
	if (benchmarks) {
		for (unsigned int i = 0; i < maxpo2; i++)
			if (benchmarks[i]) g_free(benchmarks[i]);
		g_free(benchmarks);
	}

	/* Bye. */
	return status;

}