double transform(double* ptr, size_t size) { size_t num_wg = 64; size_t num_compute_units = 6; size_t global_size = num_wg * num_compute_units; size_t workgroup_size = 256; m_result.resize( global_size); m_local_dispatch->clearArgs(); FIX_ARGS_STABLE(m_local_dispatch); m_local_dispatch->pushPointerArg((void*)ptr); m_local_dispatch->pushIntArg((int)size ); m_local_dispatch->pushPointerArg((void*)&m_result[0]); size_t global_dims[3] = { std::min(roundUp(size, workgroup_size), global_size*workgroup_size),1,1}; size_t local_dims[3] = {workgroup_size,1,1}; m_local_dispatch->setLaunchAttributes(1, global_dims, local_dims); m_local_dispatch->dispatchKernelWaitComplete(); if (size < workgroup_size) { return m_result[0]; } return reduceTail(size); }
// reduce all terms in f by the leading terms of all polynomials in F // first reduce the leading term completely, then the lower terms bool reduce(BRP &f, const IntermediateBasis &F, const IntermediateBasis::const_iterator itF) { bool ret = false; ret = reduceLt(f, F, itF); if (!f.isZero() ) { if ( reduceTail(f, F, itF) ) { ret = true; } } return ret; }