int main(int argc, char **argv) { read_args(argc, argv); counters timer; start_measure(timer); // declarations Complex ioB(1.0, 1.0); ioBuffer = cl::sycl::buffer<Complex,2>(cl::sycl::range<2> {M, N}); ioABuffer = cl::sycl::buffer<Complex,2>(cl::sycl::range<2> {M, N}); ioBBuffer = cl::sycl::buffer<Complex,1>(&ioB, cl::sycl::range<1> {1}); // initialization for (size_t i = 0; i < M; ++i){ for (size_t j = 0; j < N; ++j){ float tmp = (float) (i*(j+2) + 10) / N; Complex value(tmp, tmp); cl::sycl::id<2> id = {i, j}; ioBuffer.get_access<cl::sycl::access::mode::write>()[id] = value; ioABuffer.get_access<cl::sycl::access::mode::write>()[id] = value; } } // our work coef_var2D<0, 0> c1; coef_var2D<1, 0> c2; coef_var2D<0, 1> c3; coef_var2D<-1, 0> c4; coef_var2D<0, -1> c5; auto st = c1+c2+c3+c4+c5; input_var2D<Complex, &ioABuffer, &ioBBuffer, &fdl_in, &fac> work_in; output_2D<Complex, &ioBuffer, &fdl_out> work_out; auto op_work = work_out << st << work_in; auto st_id = c1.toStencil(); input_var2D<Complex, &ioBuffer, &ioBBuffer, &fdl_in, &fac_id> copy_in; output_2D<Complex, &ioABuffer, &fdl_out> copy_out; auto op_copy = copy_out << st_id << copy_in; end_init(timer); auto begin_op = counters::clock_type::now(); // compute result with "gpu" { cl::sycl::queue myQueue; for (unsigned int i = 0; i < NB_ITER; ++i){ //op_work.doComputation(myQueue); op_work.doLocalComputation(myQueue); op_copy.doComputation(myQueue); } } auto end_op = counters::clock_type::now(); timer.stencil_time = std::chrono::duration_cast<counters::duration_type>(end_op - begin_op); // loading time is not watched end_measure(timer); return 0; }
int main(int argc, char **argv) { read_args(argc, argv); struct counters timer; start_measure(timer); // declarations float tab_var = 1.0; float *ioB = &tab_var; ioBuffer = cl::sycl::buffer<float,2>(cl::sycl::range<2> {M, N}); ioABuffer = cl::sycl::buffer<float,2>(cl::sycl::range<2> {M, N}); ioBBuffer = cl::sycl::buffer<float,1>(ioB, cl::sycl::range<1> {1}); #if DEBUG_STENCIL float *a_test = (float *) malloc(sizeof(float)*M*N); float *b_test = (float *) malloc(sizeof(float)*M*N); #endif // initialization for (size_t i = 0; i < M; ++i){ for (size_t j = 0; j < N; ++j){ float value = ((float) i*(j+2) + 10) / N; cl::sycl::id<2> id = {i, j}; ioBuffer.get_access<cl::sycl::access::mode::write, cl::sycl::access::target::host_buffer>()[id] = value; ioABuffer.get_access<cl::sycl::access::mode::write, cl::sycl::access::target::host_buffer>()[id] = value; #if DEBUG_STENCIL a_test[i*N+j] = value; b_test[i*N+j] = value; #endif } } // our work coef_var2D<0, 0> c1; coef_var2D<1, 0> c2; coef_var2D<0, 1> c3; coef_var2D<-1, 0> c4; coef_var2D<0, -1> c5; auto st = c1+c2+c3+c4+c5; input_var2D<float, &ioABuffer, &ioBBuffer, &fdl_in, &fac> work_in; output_2D<float, &ioBuffer, &fdl_out> work_out; auto op_work = work_out << st << work_in; auto st_id = c1.toStencil(); input_var2D<float, &ioBuffer, &ioBBuffer, &fdl_in, &fac_id> copy_in; output_2D<float, &ioABuffer, &fdl_out> copy_out; auto op_copy = copy_out << st_id << copy_in; end_init(timer); struct op_time time_op; begin_op(time_op); // compute result with "gpu" { cl::sycl::queue myQueue; for (unsigned int i = 0; i < NB_ITER; ++i){ //op_work.doComputation(myQueue); op_work.doLocalComputation(myQueue); op_copy.doComputation(myQueue); } } end_op(time_op, timer.stencil_time); // loading time is not watched end_measure(timer); #if DEBUG_STENCIL // get the gpu result auto C = (ioABuffer).get_access<cl::sycl::access::mode::read, cl::sycl::access::target::host_buffer>(); ute_and_are(a_test,b_test,C); free(a_test); free(b_test); #endif return 0; }