bool equal(ExecutionPolicy& exec, ForwardIt1 first1, ForwardIt1 last1, ForwardIt2 first2, ForwardIt2 last2, BinaryPredicate p) { cl::sycl::queue q(exec.get_queue()); auto size1 = sycl::helpers::distance(first1, last1); auto size2 = sycl::helpers::distance(first2, last2); if (size1 != size2) { return false; } if (size1 < 1) { return true; } auto device = q.get_device(); auto length = size1; auto ndRange = exec.calculateNdRange(size1); const auto local = ndRange.get_local_range()[0]; auto buf1 = sycl::helpers::make_const_buffer(first1, last1); auto buf2 = sycl::helpers::make_const_buffer(first2, last2); auto bufR = cl::sycl::buffer<bool, 1>(cl::sycl::range<1>(size1)); do { int passes = 0; auto f = [passes, length, ndRange, local, &buf1, &buf2, &bufR, p](cl::sycl::handler& h) mutable { auto a1 = buf1.template get_access<cl::sycl::access::mode::read>(h); auto a2 = buf2.template get_access<cl::sycl::access::mode::read>(h); auto aR = bufR.template get_access<cl::sycl::access::mode::read_write>(h); cl::sycl::accessor<bool, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local> scratch(ndRange.get_local_range(), h); h.parallel_for<typename ExecutionPolicy::kernelName>( ndRange, [a1, a2, aR, scratch, passes, local, length, p](cl::sycl::nd_item<1> id) { auto r = ReductionStrategy<bool>(local, length, id, scratch); if (passes == 0) { r.workitem_get_from(p, a1, a2); } else { r.workitem_get_from(aR); } r.combine_threads(std::logical_and<bool>{}); r.workgroup_write_to(aR); }); // end kernel }; // end command group q.submit(f); length = length / local; ndRange = cl::sycl::nd_range<1>{cl::sycl::range<1>(std::max(length, local)), ndRange.get_local_range()}; ++passes; } while (length > 1); q.wait_and_throw(); auto hr = bufR.template get_access<cl::sycl::access::mode::read>( cl::sycl::range<1>{1}, cl::sycl::id<1>{0}); return hr[0]; }