// Describes how to run a naive version of the routine (for correctness/performance comparison). // Note that a proper clBLAS or CPU BLAS comparison is not available for non-BLAS routines. static StatusCode RunReference1(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { auto buffers_host = BuffersHost<T>(); DeviceToHost(args, buffers, buffers_host, queue, BuffersIn()); const auto status = RunReference(args, buffers_host); HostToDevice(args, buffers, buffers_host, queue, BuffersOut()); return status; }
StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &buffers_host) { auto a_buffer2 = HalfToFloatBuffer(buffers_host.a_mat); auto b_buffer2 = HalfToFloatBuffer(buffers_host.b_mat); auto dummy = std::vector<float>(0); auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy}; auto args2 = Arguments<float>(); args2.a_size = args.a_size; args2.b_size = args.b_size; args2.a_ld = args.a_ld; args2.b_ld = args.b_ld; args2.m = args.m; args2.n = args.n; args2.a_offset = args.a_offset; args2.b_offset = args.b_offset; args2.layout = args.layout; args2.a_transpose = args.a_transpose; args2.alpha = HalfToFloat(args.alpha); auto status = RunReference(args2, buffers2); FloatToHalfBuffer(buffers_host.b_mat, b_buffer2); return status; }
StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &buffers_host) { auto a_buffer2 = HalfToFloatBuffer(buffers_host.a_mat); auto b_buffer2 = HalfToFloatBuffer(buffers_host.b_mat); auto dummy = std::vector<float>(0); auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy}; auto args2 = Arguments<float>(); args2.a_size = args.a_size; args2.b_size = args.b_size; args2.kernel_mode = args.kernel_mode; args2.channels = args.channels; args2.height = args.height; args2.width = args.width; args2.kernel_h = args.kernel_h; args2.kernel_w = args.kernel_w; args2.pad_h = args.pad_h; args2.pad_w = args.pad_w; args2.stride_h = args.stride_h; args2.stride_w = args.stride_w; args2.dilation_h = args.dilation_h; args2.dilation_w = args.dilation_w; args2.a_offset = args.a_offset; args2.b_offset = args.b_offset; auto status = RunReference(args2, buffers2); FloatToHalfBuffer(buffers_host.a_mat, buffers2.a_mat); return status; }
static StatusCode RunReference2(const Arguments<T> &args, BuffersHost<T> &buffers_host, Queue&) { return RunReference(args, buffers_host); }