Exemple #1
0
typename std::iterator_traits<Iterator>::value_type reduce(
    ExecutionPolicy &sep, Iterator b, Iterator e, T init, BinaryOperation bop) {
  cl::sycl::queue q(sep.get_queue());
  auto vectorSize = std::distance(b, e);

  if (vectorSize < 1) {
    return init;
  }

  auto device = q.get_device();
  auto local = device.get_info<cl::sycl::info::device::max_work_group_size>();
  typedef typename std::iterator_traits<Iterator>::value_type type_;
  auto bufI = sycl::helpers::make_const_buffer(b, e);
  size_t length = vectorSize;
  size_t global = sep.calculateGlobalSize(length, local);

  do {
    auto f = [length, local, global, &bufI, bop](cl::sycl::handler &h) mutable {
      cl::sycl::nd_range<3> r{cl::sycl::range<3>{std::max(global, local), 1, 1},
                              cl::sycl::range<3>{local, 1, 1}};
      auto aI = bufI.template get_access<cl::sycl::access::mode::read_write>(h);
      cl::sycl::accessor<type_, 1, cl::sycl::access::mode::read_write,
                         cl::sycl::access::target::local>
          scratch(cl::sycl::range<1>(local), h);

      h.parallel_for<typename ExecutionPolicy::kernelName>(
          r, [aI, scratch, local, length, bop](cl::sycl::nd_item<3> id) {
            int globalid = id.get_global(0);
            int localid = id.get_local(0);

            auto r = ReductionStrategy<T>(local, length, id, scratch);
            r.workitem_get_from(aI);
            r.combine_threads(bop);
            r.workgroup_write_to(aI);
          });
    };
    q.submit(f);
    length = length / local;
  } while (length > 1);
  q.wait_and_throw();
  auto hI = bufI.template get_access<cl::sycl::access::mode::read,
                                     cl::sycl::access::target::host_buffer>();
  return hI[0] + init;
}
bool equal(ExecutionPolicy& exec, ForwardIt1 first1, ForwardIt1 last1,
           ForwardIt2 first2, ForwardIt2 last2, BinaryPredicate p) {
  cl::sycl::queue q(exec.get_queue());

  auto size1 = sycl::helpers::distance(first1, last1);
  auto size2 = sycl::helpers::distance(first2, last2);

  if (size1 != size2) {
    return false;
  }

  if (size1 < 1) {
    return true;
  }

  auto device = q.get_device();

  auto length = size1;
  auto ndRange = exec.calculateNdRange(size1);
  const auto local = ndRange.get_local_range()[0];

  auto buf1 = sycl::helpers::make_const_buffer(first1, last1);
  auto buf2 = sycl::helpers::make_const_buffer(first2, last2);
  auto bufR = cl::sycl::buffer<bool, 1>(cl::sycl::range<1>(size1));

  do {
    int passes = 0;

    auto f = [passes, length, ndRange, local, &buf1, &buf2, &bufR,
              p](cl::sycl::handler& h) mutable {
      auto a1 = buf1.template get_access<cl::sycl::access::mode::read>(h);
      auto a2 = buf2.template get_access<cl::sycl::access::mode::read>(h);
      auto aR = bufR.template get_access<cl::sycl::access::mode::read_write>(h);
      cl::sycl::accessor<bool, 1, cl::sycl::access::mode::read_write,
                         cl::sycl::access::target::local>
          scratch(ndRange.get_local_range(), h);

      h.parallel_for<typename ExecutionPolicy::kernelName>(
          ndRange, [a1, a2, aR, scratch, passes, local, length,
              p](cl::sycl::nd_item<1> id) {
            auto r =
                ReductionStrategy<bool>(local, length, id, scratch);
            if (passes == 0) {
              r.workitem_get_from(p, a1, a2);
            } else {
              r.workitem_get_from(aR);
            }
            r.combine_threads(std::logical_and<bool>{});
            r.workgroup_write_to(aR);
          });  // end kernel
    };         // end command group

    q.submit(f);
    length = length / local;
    ndRange = cl::sycl::nd_range<1>{cl::sycl::range<1>(std::max(length, local)),
                                    ndRange.get_local_range()};
    ++passes;
  } while (length > 1);
  q.wait_and_throw();
  auto hr = bufR.template get_access<cl::sycl::access::mode::read>(
      cl::sycl::range<1>{1}, cl::sycl::id<1>{0});
  return hr[0];
}