static void wavelet_dims_r(unsigned int N, unsigned int n, unsigned int flags, long odims[2 * N], const long dims[N], const long flen) { if (MD_IS_SET(flags, n)) { odims[0 + n] = bandsize(dims[n], flen); odims[N + n] = 2; } if (n > 0) wavelet_dims_r(N, n - 1, flags, odims, dims, flen); }
void fwt1(unsigned int N, unsigned int d, const long dims[N], const long ostr[N], complex float* low, complex float* hgh, const long istr[N], const complex float* in, const long flen, const float filter[2][2][flen]) { debug_printf(DP_DEBUG4, "fwt1: %d/%d\n", d, N); debug_print_dims(DP_DEBUG4, N, dims); assert(dims[d] >= 2); long odims[N]; md_copy_dims(N, odims, dims); odims[d] = bandsize(dims[d], flen); debug_print_dims(DP_DEBUG4, N, odims); long o = d + 1; long u = N - o; // 0 1 2 3 4 5 6|7 // --d-- * --u--|N // ---o--- assert(d == md_calc_blockdim(d, dims + 0, istr + 0, CFL_SIZE)); assert(u == md_calc_blockdim(u, dims + o, istr + o, CFL_SIZE * md_calc_size(o, dims))); assert(d == md_calc_blockdim(d, odims + 0, ostr + 0, CFL_SIZE)); assert(u == md_calc_blockdim(u, odims + o, ostr + o, CFL_SIZE * md_calc_size(o, odims))); // merge dims long wdims[3] = { md_calc_size(d, dims), dims[d], md_calc_size(u, dims + o) }; long wistr[3] = { CFL_SIZE, istr[d], CFL_SIZE * md_calc_size(o, dims) }; long wostr[3] = { CFL_SIZE, ostr[d], CFL_SIZE * md_calc_size(o, odims) }; #ifdef USE_CUDA if (cuda_ondevice(in)) { assert(cuda_ondevice(low)); assert(cuda_ondevice(hgh)); float* flow = md_gpu_move(1, MD_DIMS(flen), filter[0][0], FL_SIZE); float* fhgh = md_gpu_move(1, MD_DIMS(flen), filter[0][1], FL_SIZE); wl3_cuda_down3(wdims, wostr, low, wistr, in, flen, flow); wl3_cuda_down3(wdims, wostr, hgh, wistr, in, flen, fhgh); md_free(flow); md_free(fhgh); return; } #endif // no clear needed wavelet_down3(wdims, wostr, low, wistr, in, flen, filter[0][0]); wavelet_down3(wdims, wostr, hgh, wistr, in, flen, filter[0][1]); }
void iwt1(unsigned int N, unsigned int d, const long dims[N], const long ostr[N], complex float* out, const long istr[N], const complex float* low, const complex float* hgh, const long flen, const float filter[2][2][flen]) { debug_printf(DP_DEBUG4, "ifwt1: %d/%d\n", d, N); debug_print_dims(DP_DEBUG4, N, dims); assert(dims[d] >= 2); long idims[N]; md_copy_dims(N, idims, dims); idims[d] = bandsize(dims[d], flen); debug_print_dims(DP_DEBUG4, N, idims); long o = d + 1; long u = N - o; // 0 1 2 3 4 5 6|7 // --d-- * --u--|N // ---o--- assert(d == md_calc_blockdim(d, dims + 0, ostr + 0, CFL_SIZE)); assert(u == md_calc_blockdim(u, dims + o, ostr + o, CFL_SIZE * md_calc_size(o, dims))); assert(d == md_calc_blockdim(d, idims + 0, istr + 0, CFL_SIZE)); assert(u == md_calc_blockdim(u, idims + o, istr + o, CFL_SIZE * md_calc_size(o, idims))); long wdims[3] = { md_calc_size(d, dims), dims[d], md_calc_size(u, dims + o) }; long wistr[3] = { CFL_SIZE, istr[d], CFL_SIZE * md_calc_size(o, idims) }; long wostr[3] = { CFL_SIZE, ostr[d], CFL_SIZE * md_calc_size(o, dims) }; md_clear(3, wdims, out, CFL_SIZE); // we cannot clear because we merge outputs #ifdef USE_CUDA if (cuda_ondevice(out)) { assert(cuda_ondevice(low)); assert(cuda_ondevice(hgh)); float* flow = md_gpu_move(1, MD_DIMS(flen), filter[1][0], FL_SIZE); float* fhgh = md_gpu_move(1, MD_DIMS(flen), filter[1][1], FL_SIZE); wl3_cuda_up3(wdims, wostr, out, wistr, low, flen, flow); wl3_cuda_up3(wdims, wostr, out, wistr, hgh, flen, fhgh); md_free(flow); md_free(fhgh); return; } #endif wavelet_up3(wdims, wostr, out, wistr, low, flen, filter[1][0]); wavelet_up3(wdims, wostr, out, wistr, hgh, flen, filter[1][1]); }
static void wavelet_down3(const long dims[3], const long out_str[3], complex float* out, const long in_str[3], const complex float* in, unsigned int flen, const float filter[flen]) { #pragma omp parallel for collapse(3) for (unsigned int i = 0; i < dims[2]; i++) for (unsigned int j = 0; j < bandsize(dims[1], flen); j++) for (unsigned int k = 0; k < dims[0]; k++) { *access(out_str, out, i, j, k) = 0.; for (unsigned int l = 0; l < flen; l++) { int n = coord(j, dims[1], flen, l); *access(out_str, out, i, j, k) += *(caccess(in_str, in, i, n, k)) * filter[flen - l - 1]; } } }
static void wavelet_up3(const long dims[3], const long out_str[3], complex float* out, const long in_str[3], const complex float* in, unsigned int flen, const float filter[flen]) { // md_clear2(3, dims, out_str, out, CFL_SIZE); #pragma omp parallel for collapse(3) for (unsigned int i = 0; i < dims[2]; i++) for (unsigned int j = 0; j < dims[1]; j++) for (unsigned int k = 0; k < dims[0]; k++) { // *access(out_str, out, i, j, k) = 0.; for (unsigned int l = ((j + flen / 2 - 0) - (flen - 1)) % 2; l < flen; l += 2) { int n = ((j + flen / 2 - 0) - (flen - 1) + l) / 2; if ((0 <= n) && ((unsigned int)n < bandsize(dims[1], flen))) *access(out_str, out, i, j, k) += *caccess(in_str, in, i, n, k) * filter[flen - l - 1]; } } }