template <typename F> void GpuFFTPlanCl<F>::doExecute (const cl::CommandQueue& queue, const cl::Buffer& input, csize_t inputOffset, const cl::Buffer& output, csize_t outputOffset, bool doForward) const { if (supportNonZeroOffsets) { if (inputOffset != 0 || outputOffset != 0) { queue.enqueueCopyBuffer (input, tmp.getDataWritable (), (inputOffset * sizeof (std::complex<F>)) (), 0, (csize_t (sizeof (std::complex<F>)) * this->batchSize ()).value ()); doExecute (queue, tmp.getDataWritable (), 0, tmp.getDataWritable (), 0, doForward); queue.enqueueCopyBuffer (tmp.getData (), output, 0, (outputOffset * sizeof (std::complex<F>)) (), (csize_t (sizeof (std::complex<F>)) * this->batchSize ()).value ()); return; } } else { ASSERT_MSG (inputOffset == 0, "not implemented"); ASSERT_MSG (outputOffset == 0, "not implemented"); } if (this->size () == 0) return; if (this->size () == 1) { if (input () != output ()) queue.enqueueCopyBuffer (input, output, 0, 0, (csize_t (sizeof (std::complex<F>)) * this->batchCount ()).value ()); return; } cl::detail::errHandler (VAL(ExecuteInterleaved) (queue (), static_cast<TY(Plan)> (plan), Core::checked_cast<int> (this->batchCount ()), doForward ? VAL(Forward) : VAL(Inverse), input (), output (), 0, NULL, NULL), VAL(ExecuteInterleavedStr)); }