void fftscale2(unsigned int N, const long dimensions[N], unsigned long flags, const long ostrides[N], complex float* dst, const long istrides[N], const complex float* src) { long fft_dims[N]; md_select_dims(N, flags, fft_dims, dimensions); float scale = 1. / sqrtf((float)md_calc_size(N, fft_dims)); md_zsmul2(N, dimensions, ostrides, dst, istrides, src, scale); }
static void fftmod2_r(unsigned int N, const long dims[N], unsigned long flags, const long ostrs[N], complex float* dst, const long istrs[N], const complex float* src, bool inv, double phase) { if (0 == flags) { md_zsmul2(N, dims, ostrs, dst, istrs, src, cexp(M_PI * 2.i * (inv ? -phase : phase))); return; } /* this will also currently be slow on the GPU because we do not * support strides there on the lowest level */ unsigned int i = N - 1; while (!MD_IS_SET(flags, i)) i--; #if 1 // If there is only one dimensions left and it is the innermost // which is contiguous optimize using md_zfftmod2 if ((0u == MD_CLEAR(flags, i)) && (1 == md_calc_size(i, dims)) && (CFL_SIZE == ostrs[i]) && (CFL_SIZE == istrs[i])) { md_zfftmod2(N - i, dims + i, ostrs + i, dst, istrs + i, src, inv, phase); return; } #endif long tdims[N]; md_select_dims(N, ~MD_BIT(i), tdims, dims); #pragma omp parallel for for (int j = 0; j < dims[i]; j++) fftmod2_r(N, tdims, MD_CLEAR(flags, i), ostrs, (void*)dst + j * ostrs[i], istrs, (void*)src + j * istrs[i], inv, phase + fftmod_phase(dims[i], j)); }
// y = 2*x - circshift(x,center_adj) - circshift(x,center) static void zfinitediff_normal(const linop_data_t* _data, complex float* optr, const complex float* iptr) { const auto data = CAST_DOWN(zfinitediff_data, _data); // Turns out that this is faster, but this requires extra memory. complex float* tmp = md_alloc_sameplace(data->D, data->dims_in, CFL_SIZE, iptr); zfinitediff_apply(_data, tmp, iptr); zfinitediff_adjoint(_data, optr, tmp); md_free(tmp); return; // FIXME: WTF? unsigned long d = data->dim_diff; long nx = data->dims_in[d]; long offset; long dims_sub[data->D]; md_copy_dims(data->D, dims_sub, data->dims_in); // optr and iptr same size regardless if do_circdiff true/false // if (data->do_circdiff) // out = 2*in; // out(..,1:(end-1),..) = out(..,1:(end-1),..) - in(..,2:end,..) // out(..,2:end,..) = out(..,2:end,..) - in(..,1:(end-1),..) // out(..,end,..) = out(..,end,..) - in(..,1,..) // out(..,1,..) = out(..,1,..) - in(..,end,..) // // else // out(..,1,..) = in(..,1,..) // out(..,end,..) = in(..,end,..) // out(..,2:(end-1),..) = 2*in(..,2:(end-1),..) // out(..,1:(end-1),..) = out(..,1:(end-1),..) - in(..,2:end,..) // out(..,2:end,..) = out(..,2:end,..) - in(..,1:(end-1),..) // if (data->do_circdiff) { md_zsmul2(data->D, data->dims_in, data->strides_in, optr, data->strides_in, iptr, 2.); dims_sub[d] = (nx - 1); offset = data->strides_in[d] / CFL_SIZE; // out(..,1:(end-1),..) = out(..,1:(end-1),..) - in(..,2:end,..) md_zsub2(data->D, dims_sub, data->strides_in, optr, data->strides_in, optr, data->strides_in, iptr + offset); // out(..,2:end,..) = out(..,2:end,..) - in(..,1:(end-1),..) md_zsub2(data->D, dims_sub, data->strides_in, optr + offset, data->strides_in, optr + offset, data->strides_in, iptr); dims_sub[d] = 1; offset = (nx - 1) * data->strides_in[d] / CFL_SIZE; // out(..,1,..) = out(..,1,..) - in(..,end,..) md_zsub2(data->D, dims_sub, data->strides_in, optr, data->strides_in, optr, data->strides_in, iptr + offset); // out(..,end,..) = out(..,end,..) - in(..,1,..) md_zsub2(data->D, dims_sub, data->strides_in, optr+offset, data->strides_in, optr+offset, data->strides_in, iptr); } else { dims_sub[d] = 1; offset = (nx - 1) * data->strides_in[d] / CFL_SIZE; // out(..,1,..) = in(..,1,..) md_copy2(data->D, dims_sub, data->strides_in, optr, data->strides_in, iptr, CFL_SIZE); // out(..,end,..) = in(..,end,..) md_copy2(data->D, dims_sub, data->strides_in, optr + offset, data->strides_in, iptr + offset, CFL_SIZE); dims_sub[d] = nx - 2; offset = data->strides_in[d] / CFL_SIZE; // out(..,2:(end-1),..) = 2*in(..,2:(end-1),..) md_zsmul2(data->D, dims_sub, data->strides_in, optr + offset, data->strides_in, iptr + offset, 2.); dims_sub[d] = nx - 1; offset = data->strides_in[d] / CFL_SIZE; // out(..,1:(end-1),..) = out(..,1:(end-1),..) - in(..,2:end,..) md_zsub2(data->D, dims_sub, data->strides_in, optr, data->strides_in, optr, data->strides_in, iptr + offset); // out(..,2:end,..) = out(..,2:end,..) - in(..,1:(end-1),..) md_zsub2(data->D, dims_sub, data->strides_in, optr + offset, data->strides_in, optr + offset, data->strides_in, iptr); } }