void fwt1(unsigned int N, unsigned int d, const long dims[N], const long ostr[N], complex float* low, complex float* hgh, const long istr[N], const complex float* in, const long flen, const float filter[2][2][flen]) { debug_printf(DP_DEBUG4, "fwt1: %d/%d\n", d, N); debug_print_dims(DP_DEBUG4, N, dims); assert(dims[d] >= 2); long odims[N]; md_copy_dims(N, odims, dims); odims[d] = bandsize(dims[d], flen); debug_print_dims(DP_DEBUG4, N, odims); long o = d + 1; long u = N - o; // 0 1 2 3 4 5 6|7 // --d-- * --u--|N // ---o--- assert(d == md_calc_blockdim(d, dims + 0, istr + 0, CFL_SIZE)); assert(u == md_calc_blockdim(u, dims + o, istr + o, CFL_SIZE * md_calc_size(o, dims))); assert(d == md_calc_blockdim(d, odims + 0, ostr + 0, CFL_SIZE)); assert(u == md_calc_blockdim(u, odims + o, ostr + o, CFL_SIZE * md_calc_size(o, odims))); // merge dims long wdims[3] = { md_calc_size(d, dims), dims[d], md_calc_size(u, dims + o) }; long wistr[3] = { CFL_SIZE, istr[d], CFL_SIZE * md_calc_size(o, dims) }; long wostr[3] = { CFL_SIZE, ostr[d], CFL_SIZE * md_calc_size(o, odims) }; #ifdef USE_CUDA if (cuda_ondevice(in)) { assert(cuda_ondevice(low)); assert(cuda_ondevice(hgh)); float* flow = md_gpu_move(1, MD_DIMS(flen), filter[0][0], FL_SIZE); float* fhgh = md_gpu_move(1, MD_DIMS(flen), filter[0][1], FL_SIZE); wl3_cuda_down3(wdims, wostr, low, wistr, in, flen, flow); wl3_cuda_down3(wdims, wostr, hgh, wistr, in, flen, fhgh); md_free(flow); md_free(fhgh); return; } #endif // no clear needed wavelet_down3(wdims, wostr, low, wistr, in, flen, filter[0][0]); wavelet_down3(wdims, wostr, hgh, wistr, in, flen, filter[0][1]); }
void iwt1(unsigned int N, unsigned int d, const long dims[N], const long ostr[N], complex float* out, const long istr[N], const complex float* low, const complex float* hgh, const long flen, const float filter[2][2][flen]) { debug_printf(DP_DEBUG4, "ifwt1: %d/%d\n", d, N); debug_print_dims(DP_DEBUG4, N, dims); assert(dims[d] >= 2); long idims[N]; md_copy_dims(N, idims, dims); idims[d] = bandsize(dims[d], flen); debug_print_dims(DP_DEBUG4, N, idims); long o = d + 1; long u = N - o; // 0 1 2 3 4 5 6|7 // --d-- * --u--|N // ---o--- assert(d == md_calc_blockdim(d, dims + 0, ostr + 0, CFL_SIZE)); assert(u == md_calc_blockdim(u, dims + o, ostr + o, CFL_SIZE * md_calc_size(o, dims))); assert(d == md_calc_blockdim(d, idims + 0, istr + 0, CFL_SIZE)); assert(u == md_calc_blockdim(u, idims + o, istr + o, CFL_SIZE * md_calc_size(o, idims))); long wdims[3] = { md_calc_size(d, dims), dims[d], md_calc_size(u, dims + o) }; long wistr[3] = { CFL_SIZE, istr[d], CFL_SIZE * md_calc_size(o, idims) }; long wostr[3] = { CFL_SIZE, ostr[d], CFL_SIZE * md_calc_size(o, dims) }; md_clear(3, wdims, out, CFL_SIZE); // we cannot clear because we merge outputs #ifdef USE_CUDA if (cuda_ondevice(out)) { assert(cuda_ondevice(low)); assert(cuda_ondevice(hgh)); float* flow = md_gpu_move(1, MD_DIMS(flen), filter[1][0], FL_SIZE); float* fhgh = md_gpu_move(1, MD_DIMS(flen), filter[1][1], FL_SIZE); wl3_cuda_up3(wdims, wostr, out, wistr, low, flen, flow); wl3_cuda_up3(wdims, wostr, out, wistr, hgh, flen, fhgh); md_free(flow); md_free(fhgh); return; } #endif wavelet_up3(wdims, wostr, out, wistr, low, flen, filter[1][0]); wavelet_up3(wdims, wostr, out, wistr, hgh, flen, filter[1][1]); }
static void linop_matrix_apply_normal(const linop_data_t* _data, complex float* dst, const complex float* src) { struct operator_matrix_s* data = CAST_DOWN(operator_matrix_s, _data); if (NULL == data->mat_gram) { complex float* tmp = md_alloc_sameplace(data->N, data->out_dims, CFL_SIZE, src); linop_matrix_apply(_data, tmp, src); linop_matrix_apply_adjoint(_data, dst, tmp); md_free(tmp); } else { const complex float* mat_gram = data->mat_gram; #ifdef USE_CUDA if (cuda_ondevice(src)) { if (NULL == data->mat_gram_gpu) data->mat_gram_gpu = md_gpu_move(2 * data->N, data->grm_dims, data->mat_gram, CFL_SIZE); mat_gram = data->mat_gram_gpu; } #endif md_ztenmul(2 * data->N, data->gout_dims, dst, data->gin_dims, src, data->grm_dims, mat_gram); } }
static const complex float* get_pat(const struct sampling_data_s* data, bool gpu) { const complex float* pattern = data->pattern; if (gpu) { if (NULL == data->gpu_pattern) ((struct sampling_data_s*)data)->gpu_pattern = md_gpu_move(DIMS, data->pat_dims, data->pattern, CFL_SIZE); pattern = data->gpu_pattern; } return pattern; }
static const complex float* get_tensor(const struct fmac_data* data, bool gpu) { const complex float* tensor = data->tensor; if (gpu) { if (NULL == data->gpu_tensor) ((struct fmac_data*)data)->gpu_tensor = md_gpu_move(data->N, data->tdims, data->tensor, CFL_SIZE); tensor = data->gpu_tensor; } return tensor; }
static void cdiag_adjoint(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct cdiag_s* data = CAST_DOWN(cdiag_s, _data); const complex float* diag = data->diag; #ifdef USE_CUDA if (cuda_ondevice(src)) { if (NULL == data->gpu_diag) ((struct cdiag_s*)data)->gpu_diag = md_gpu_move(data->N, data->dims, data->diag, CFL_SIZE); diag = data->gpu_diag; } #endif (data->rmul ? md_zrmul2 : md_zmulc2)(data->N, data->dims, data->strs, dst, data->strs, src, data->dstrs, diag); }
static void linop_matrix_apply_adjoint(const linop_data_t* _data, complex float* dst, const complex float* src) { struct operator_matrix_s* data = CAST_DOWN(operator_matrix_s, _data); const complex float* mat = data->mat; #ifdef USE_CUDA if (cuda_ondevice(src)) { if (NULL == data->mat_gpu) data->mat_gpu = md_gpu_move(data->N, data->mat_dims, data->mat, CFL_SIZE); mat = data->mat_gpu; } #endif md_ztenmulc(data->N, data->in_dims, dst, data->out_dims, src, data->mat_dims, mat); }
struct noir_data* noir_init(const long dims[DIMS], const complex float* mask, const complex float* psf, bool rvc, bool use_gpu) { #ifdef USE_CUDA md_alloc_fun_t my_alloc = use_gpu ? md_alloc_gpu : md_alloc; #else assert(!use_gpu); md_alloc_fun_t my_alloc = md_alloc; #endif PTR_ALLOC(struct noir_data, data); data->rvc = rvc; md_copy_dims(DIMS, data->dims, dims); md_select_dims(DIMS, FFT_FLAGS|COIL_FLAG|CSHIFT_FLAG, data->sign_dims, dims); md_calc_strides(DIMS, data->sign_strs, data->sign_dims, CFL_SIZE); md_select_dims(DIMS, FFT_FLAGS|COIL_FLAG|MAPS_FLAG, data->coil_dims, dims); md_calc_strides(DIMS, data->coil_strs, data->coil_dims, CFL_SIZE); md_select_dims(DIMS, FFT_FLAGS|MAPS_FLAG|CSHIFT_FLAG, data->imgs_dims, dims); md_calc_strides(DIMS, data->imgs_strs, data->imgs_dims, CFL_SIZE); md_select_dims(DIMS, FFT_FLAGS|COIL_FLAG, data->data_dims, dims); md_calc_strides(DIMS, data->data_strs, data->data_dims, CFL_SIZE); md_select_dims(DIMS, FFT_FLAGS, data->mask_dims, dims); md_calc_strides(DIMS, data->mask_strs, data->mask_dims, CFL_SIZE); md_select_dims(DIMS, FFT_FLAGS, data->wght_dims, dims); md_calc_strides(DIMS, data->wght_strs, data->wght_dims, CFL_SIZE); md_select_dims(DIMS, FFT_FLAGS|CSHIFT_FLAG, data->ptrn_dims, dims); md_calc_strides(DIMS, data->ptrn_strs, data->ptrn_dims, CFL_SIZE); complex float* weights = md_alloc(DIMS, data->wght_dims, CFL_SIZE); noir_calc_weights(dims, weights); fftmod(DIMS, data->wght_dims, FFT_FLAGS, weights, weights); fftscale(DIMS, data->wght_dims, FFT_FLAGS, weights, weights); data->weights = weights; #ifdef USE_CUDA if (use_gpu) { data->weights = md_gpu_move(DIMS, data->wght_dims, weights, CFL_SIZE); } #endif complex float* ptr = my_alloc(DIMS, data->ptrn_dims, CFL_SIZE); md_copy(DIMS, data->ptrn_dims, ptr, psf, CFL_SIZE); fftmod(DIMS, data->ptrn_dims, FFT_FLAGS, ptr, ptr); data->pattern = ptr; complex float* msk = my_alloc(DIMS, data->mask_dims, CFL_SIZE); if (NULL == mask) { assert(!use_gpu); md_zfill(DIMS, data->mask_dims, msk, 1.); } else { md_copy(DIMS, data->mask_dims, msk, mask, CFL_SIZE); } // fftmod(DIMS, data->mask_dims, 7, msk, msk); fftscale(DIMS, data->mask_dims, FFT_FLAGS, msk, msk); data->mask = msk; data->sens = my_alloc(DIMS, data->coil_dims, CFL_SIZE); data->xn = my_alloc(DIMS, data->imgs_dims, CFL_SIZE); data->tmp = my_alloc(DIMS, data->sign_dims, CFL_SIZE); return data; }