static void fftmod2_r(unsigned int N, const long dims[N], unsigned long flags, const long ostrs[N], complex float* dst, const long istrs[N], const complex float* src, bool inv, double phase) { if (0 == flags) { md_zsmul2(N, dims, ostrs, dst, istrs, src, cexp(M_PI * 2.i * (inv ? -phase : phase))); return; } /* this will also currently be slow on the GPU because we do not * support strides there on the lowest level */ unsigned int i = N - 1; while (!MD_IS_SET(flags, i)) i--; #if 1 // If there is only one dimensions left and it is the innermost // which is contiguous optimize using md_zfftmod2 if ((0u == MD_CLEAR(flags, i)) && (1 == md_calc_size(i, dims)) && (CFL_SIZE == ostrs[i]) && (CFL_SIZE == istrs[i])) { md_zfftmod2(N - i, dims + i, ostrs + i, dst, istrs + i, src, inv, phase); return; } #endif long tdims[N]; md_select_dims(N, ~MD_BIT(i), tdims, dims); #pragma omp parallel for for (int j = 0; j < dims[i]; j++) fftmod2_r(N, tdims, MD_CLEAR(flags, i), ostrs, (void*)dst + j * ostrs[i], istrs, (void*)src + j * istrs[i], inv, phase + fftmod_phase(dims[i], j)); }
/* * The correct usage is fftmod before and after fft and * ifftmod before and after ifft (this is different from * how fftshift/ifftshift has to be used) */ void ifftmod2(unsigned int N, const long dims[N], unsigned long flags, const long ostrs[N], complex float* dst, const long istrs[N], const complex float* src) { fftmod2_r(N, dims, clear_singletons(N, dims, flags), ostrs, dst, istrs, src, true, 0.); }