int main (int argv, char **argc) { ///////////////////////// ////// SAME IN EVERY FILE ///////////////////////// // create context and command queue cl_context __sheets_context; cl_command_queue __sheets_queue; int _i; cl_int __cl_err; create_context_on(SHEETS_PLAT_NAME, SHEETS_DEV_NAME, 0, /* choose the first (only) available device */ &__sheets_context, &__sheets_queue, 0); // compile kernels for (_i = 0; _i < NKERNELS; _i++) { compiled_kernels[_i] = kernel_from_string(__sheets_context, kernel_strings[_i], kernel_names[_i], SHEETS_KERNEL_COMPILE_OPTS); } ////// [END] size_t __SIZE_wav = atoi(argc[1]); float wav[__SIZE_wav]; const char *file_name = "mytune.wav"; int in_thrsh_cnt = 0; timestamp_type st; timestamp_type end; get_timestamp(&st); for (_i = 0; _i < __SIZE_wav; _i++) { wav[_i] = (float) rand() / RAND_MAX; if (in_thrsh(wav[_i], 0.1112, 0.7888)) in_thrsh_cnt++; } get_timestamp(&end); printf("cpu execution took %f seconds\n", timestamp_diff_in_seconds(st, end)); get_timestamp(&st); ///////////////// ////// GFUNC CALL ///////////////// /// create variables for function arguments given as literals float __PRIM_band_restrict_ARG2 = 0.1112f; float __PRIM_band_restrict_ARG3 = 0.7888f; /// return array (always arg0) cl_mem __CLMEM_band_restrict_ARG0 = clCreateBuffer(__sheets_context, CL_MEM_WRITE_ONLY, sizeof(float) * __SIZE_wav, NULL, &__cl_err); CHECK_CL_ERROR(__cl_err, "clCreateBuffer"); /// input arrays cl_mem __CLMEM_band_restrict_ARG1 = clCreateBuffer(__sheets_context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * __SIZE_wav, (void *) wav, &__cl_err); CHECK_CL_ERROR(__cl_err, "clCreateBuffer"); /// write to device memory CALL_CL_GUARDED(clEnqueueWriteBuffer, (__sheets_queue, __CLMEM_band_restrict_ARG1, CL_TRUE, /* blocking write */ 0, /* no offset */ sizeof(float) * __SIZE_wav, wav, 0, /* no wait list */ NULL, NULL) ); /// set up kernel arguments SET_4_KERNEL_ARGS(compiled_kernels[0], __CLMEM_band_restrict_ARG0, __CLMEM_band_restrict_ARG1, __PRIM_band_restrict_ARG2, __PRIM_band_restrict_ARG3); /// enqueue kernel cl_event __CLEVENT_band_restrict_CALL; CALL_CL_GUARDED(clEnqueueNDRangeKernel, (__sheets_queue, compiled_kernels[0], 1, /* 1 dimension */ 0, /* 0 offset */ &__SIZE_wav, NULL, /* let OpenCL break things up */ 0, /* no events in wait list */ NULL, /* empty wait list */ &__CLEVENT_band_restrict_CALL) ); /// allocate space for cpu return array float out[__SIZE_wav]; CALL_CL_GUARDED(clEnqueueReadBuffer, (__sheets_queue, __CLMEM_band_restrict_ARG0, CL_TRUE, /* blocking read */ 0, /* 0 offset */ sizeof(float) * __SIZE_wav, /* read whole buffer */ (void *) out, /* host pointer */ 1, /* wait for gfunc to finish */ &__CLEVENT_band_restrict_CALL, /* "" */ NULL) /* no need to wait for this call though */ ); ////// [END] GFUNC CALL get_timestamp(&end); printf("gfunc call took %f seconds\n", timestamp_diff_in_seconds(st, end)); ////// Validate call int c = 0; for (_i = 0; _i < __SIZE_wav; _i++) { if (in_thrsh(out[_i], 0.1112, 0.7888)) { c++; } else if(out[_i]) { exit(1); } } printf("\n"); assert(in_thrsh_cnt == c); ////////////// ////// CLEANUP ////////////// CALL_CL_GUARDED(clReleaseMemObject, (__CLMEM_band_restrict_ARG0)); CALL_CL_GUARDED(clReleaseMemObject, (__CLMEM_band_restrict_ARG1)); for (_i = 0; _i < NKERNELS; _i++) { CALL_CL_GUARDED(clReleaseKernel, (compiled_kernels[_i])); } CALL_CL_GUARDED(clReleaseCommandQueue, (__sheets_queue)); CALL_CL_GUARDED(clReleaseContext, (__sheets_context)); return 0; }
int main (int argc, char *argv[]) { double *a, *b, *c; if (argc != 3) { fprintf(stderr, "Usage: %s size_of_vector num_adds\n", argv[0]); abort(); } const cl_long N = (cl_long) atol(argv[1]); const int num_adds = atoi(argv[2]); cl_context ctx; cl_command_queue queue; create_context_on(CHOOSE_INTERACTIVELY, CHOOSE_INTERACTIVELY, 0, &ctx, &queue, 0); print_device_info_from_queue(queue); // -------------------------------------------------------------------------- // load kernels // -------------------------------------------------------------------------- char *knl_text = read_file("vec-add-kernel.cl"); cl_kernel knl = kernel_from_string(ctx, knl_text, "sum", NULL); free(knl_text); // -------------------------------------------------------------------------- // allocate and initialize CPU memory // -------------------------------------------------------------------------- posix_memalign((void**)&a, 32, N*sizeof(double)); if (!a) { fprintf(stderr, "alloc a"); abort(); } posix_memalign((void**)&b, 32, N*sizeof(double)); if (!b) { fprintf(stderr, "alloc b"); abort(); } posix_memalign((void**)&c, 32, N*sizeof(double)); if (!c) { fprintf(stderr, "alloc c"); abort(); } for(cl_long n = 0; n < N; ++n) { a[n] = n; b[n] = 2*n; } // -------------------------------------------------------------------------- // allocate device memory // -------------------------------------------------------------------------- cl_int status; cl_mem buf_a = clCreateBuffer(ctx, CL_MEM_READ_WRITE, sizeof(double) * N, 0, &status); CHECK_CL_ERROR(status, "clCreateBuffer"); cl_mem buf_b = clCreateBuffer(ctx, CL_MEM_READ_WRITE, sizeof(double) * N, 0, &status); CHECK_CL_ERROR(status, "clCreateBuffer"); cl_mem buf_c = clCreateBuffer(ctx, CL_MEM_READ_WRITE, sizeof(double) * N, 0, &status); CHECK_CL_ERROR(status, "clCreateBuffer"); // -------------------------------------------------------------------------- // transfer to device // -------------------------------------------------------------------------- CALL_CL_SAFE(clEnqueueWriteBuffer( queue, buf_a, /*blocking*/ CL_TRUE, /*offset*/ 0, N * sizeof(double), a, 0, NULL, NULL)); CALL_CL_SAFE(clEnqueueWriteBuffer( queue, buf_b, /*blocking*/ CL_TRUE, /*offset*/ 0, N * sizeof(double), b, 0, NULL, NULL)); // -------------------------------------------------------------------------- // run code on device // -------------------------------------------------------------------------- CALL_CL_SAFE(clFinish(queue)); timestamp_type tic, toc; get_timestamp(&tic); for(int add = 0; add < num_adds; ++add) { SET_4_KERNEL_ARGS(knl, N, buf_a, buf_b, buf_c); size_t local_size[] = { 128 }; size_t global_size[] = { ((N + local_size[0] - 1)/local_size[0])* local_size[0] }; CALL_CL_SAFE(clEnqueueNDRangeKernel(queue, knl, 1, NULL, global_size, local_size, 0, NULL, NULL)); } CALL_CL_SAFE(clFinish(queue)); get_timestamp(&toc); double elapsed = timestamp_diff_in_seconds(tic,toc)/num_adds; printf("%f s\n", elapsed); printf("%f GB/s\n", 3*N*sizeof(double)/1e9/elapsed); // -------------------------------------------------------------------------- // transfer back & check // -------------------------------------------------------------------------- CALL_CL_SAFE(clEnqueueReadBuffer( queue, buf_c, /*blocking*/ CL_TRUE, /*offset*/ 0, N * sizeof(double), c, 0, NULL, NULL)); for(cl_long i = 0; i < N; ++i) if(c[i] != 3*i) { printf("BAD %ld\n", (long)i); abort(); } printf("GOOD\n"); // -------------------------------------------------------------------------- // clean up // -------------------------------------------------------------------------- CALL_CL_SAFE(clReleaseMemObject(buf_a)); CALL_CL_SAFE(clReleaseMemObject(buf_b)); CALL_CL_SAFE(clReleaseMemObject(buf_c)); CALL_CL_SAFE(clReleaseKernel(knl)); CALL_CL_SAFE(clReleaseCommandQueue(queue)); CALL_CL_SAFE(clReleaseContext(ctx)); free(a); free(b); free(c); return 0; }