void forward( cube<real>& in, cube<complex>& out ) { ZI_ASSERT(size(out)==fft_complex_size(in)); ZI_ASSERT(size(in)==sz); fft_plan plan = fft_plans.get_forward( vec3i(in.shape()[0],in.shape()[1],in.shape()[2])); MKL_LONG status; # ifdef MEASURE_FFT_RUNTIME zi::wall_timer wt; # endif status = DftiComputeForward(*plan, reinterpret_cast<real*>(in.data()), reinterpret_cast<real*>(out.data())); # ifdef MEASURE_FFT_RUNTIME fft_stats.add(wt.elapsed<real>()); # endif }
inline void convolve_sparse_inverse_add( cube<T> const & a, cube<T> const & b, vec3i const & s, cube<T> & r ) noexcept { if ( s == vec3i::one ) { convolve_inverse_add(a,b,r); return; } size_t ax = a.shape()[0]; size_t ay = a.shape()[1]; size_t az = a.shape()[2]; size_t bx = b.shape()[0]; size_t by = b.shape()[1]; size_t bz = b.shape()[2]; # ifndef NDEBUG size_t rbx = (bx-1) * s[0] + 1; size_t rby = (by-1) * s[1] + 1; size_t rbz = (bz-1) * s[2] + 1; size_t rx = ax + rbx - 1; size_t ry = ay + rby - 1; size_t rz = az + rbz - 1; ZI_ASSERT(r.shape()[0]==rx); ZI_ASSERT(r.shape()[1]==ry); ZI_ASSERT(r.shape()[2]==rz); # endif for ( size_t wx = 0; wx < bx; ++wx ) for ( size_t wy = 0; wy < by; ++wy ) for ( size_t wz = 0; wz < bz; ++wz ) { size_t fx = bx - 1 - wx; size_t fy = by - 1 - wy; size_t fz = bz - 1 - wz; size_t ox = fx * s[0]; size_t oy = fy * s[1]; size_t oz = fz * s[2]; for ( size_t x = 0; x < ax; ++x ) for ( size_t y = 0; y < ay; ++y ) for ( size_t z = 0; z < az; ++z ) r[x+ox][y+oy][z+oz] += a[x][y][z] * b[wx][wy][wz]; } }
static void backward( cube<complex>& in, cube<real>& out ) { ZI_ASSERT(in.shape()[0]==out.shape()[0]); ZI_ASSERT(in.shape()[1]==out.shape()[1]); ZI_ASSERT((out.shape()[2]/2+1)==in.shape()[2]); fft_plan plan = fft_plans.get_backward( vec3i(out.shape()[0],out.shape()[1],out.shape()[2])); MKL_LONG status; # ifdef MEASURE_FFT_RUNTIME zi::wall_timer wt; # endif status = DftiComputeBackward(*plan, reinterpret_cast<real*>(in.data()), reinterpret_cast<real*>(out.data())); # ifdef MEASURE_FFT_RUNTIME fft_stats.add(wt.elapsed<real>()); # endif }
inline void convolve_sparse_add( cube<T> const & a, cube<T> const & b, vec3i const & s, cube<T> & r ) noexcept { if ( s == vec3i::one ) { convolve_add(a,b,r); return; } size_t ax = a.shape()[0]; size_t ay = a.shape()[1]; size_t az = a.shape()[2]; size_t bx = b.shape()[0]; size_t by = b.shape()[1]; size_t bz = b.shape()[2]; size_t rbx = (bx-1) * s[0] + 1; size_t rby = (by-1) * s[1] + 1; size_t rbz = (bz-1) * s[2] + 1; size_t rx = ax - rbx + 1; size_t ry = ay - rby + 1; size_t rz = az - rbz + 1; ZI_ASSERT(r.shape()[0]==rx); ZI_ASSERT(r.shape()[1]==ry); ZI_ASSERT(r.shape()[2]==rz); for ( size_t x = 0; x < rx; ++x ) for ( size_t y = 0; y < ry; ++y ) for ( size_t z = 0; z < rz; ++z ) for ( size_t dx = x, wx = bx-1; dx < rbx + x; dx += s[0], --wx ) for ( size_t dy = y, wy = by-1; dy < rby + y; dy += s[1], --wy ) for ( size_t dz = z, wz = bz-1; dz < rbz + z; dz += s[2], --wz ) r[x][y][z] += a[dx][dy][dz] * b[wx][wy][wz]; }
inline void convolve_sparse_flipped_add( cube<T> const & a, cube<T> const & b, vec3i const & s, cube<T> & r ) noexcept { if ( s == vec3i::one ) { convolve_flipped_add(a,b,r); return; } size_t ax = a.shape()[0]; size_t ay = a.shape()[1]; size_t az = a.shape()[2]; size_t bx = b.shape()[0]; size_t by = b.shape()[1]; size_t bz = b.shape()[2]; size_t rx = (ax - bx) / s[0] + 1; size_t ry = (ay - by) / s[1] + 1; size_t rz = (az - bz) / s[2] + 1; ZI_ASSERT(r.shape()[0]==rx); ZI_ASSERT(r.shape()[1]==ry); ZI_ASSERT(r.shape()[2]==rz); for ( size_t qx = 0, x = 0; qx < rx; ++qx, x += s[0] ) for ( size_t qy = 0, y = 0; qy < ry; ++qy, y += s[1] ) for ( size_t qz = 0, z = 0; qz < rz; ++qz, z += s[2] ) for ( size_t dx = 0; dx < bx; ++dx ) for ( size_t dy = 0; dy < by; ++dy ) for ( size_t dz = 0; dz < bz; ++dz ) r[qx][qy][qz] += a[ax-1-x-dx][ay-1-y-dy][az-1-z-dz] * b[bx-1-dx][by-1-dy][bz-1-dz]; }