static void linop_matrix_apply_adjoint(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct operator_matrix_s* data = CAST_DOWN(operator_matrix_s, _data); unsigned int N = data->mat_iovec->N; //debug_printf(DP_DEBUG1, "compute adjoint\n"); md_clear2(N, data->domain_iovec->dims, data->domain_iovec->strs, dst, CFL_SIZE); // FIXME check all the cases where computation can be done with blas if (cgemm_forward_standard(data)) { long L = md_calc_size(data->T_dim, data->domain_iovec->dims); blas_cgemm('N', 'N', L, data->K, data->T, 1., L, (const complex float (*)[])src, data->T, (const complex float (*)[])data->mat_conj, 0., L, (complex float (*)[])dst); } else { md_zfmacc2(N, data->max_dims, data->domain_iovec->strs, dst, data->codomain_iovec->strs, src, data->mat_iovec->strs, data->mat); } }
/** * Compute Strang's circulant preconditioner * * Strang's reconditioner is simply the cropped psf in the image domain * */ static complex float* compute_precond(unsigned int N, const long* pre_dims, const long* pre_strs, const long* psf_dims, const long* psf_strs, const complex float* psf, const complex float* linphase) { int ND = N + 1; unsigned long flags = FFT_FLAGS; complex float* pre = md_alloc(ND, pre_dims, CFL_SIZE); complex float* psft = md_alloc(ND, psf_dims, CFL_SIZE); // Transform psf to image domain ifftuc(ND, psf_dims, flags, psft, psf); // Compensate for linear phase to get cropped psf md_clear(ND, pre_dims, pre, CFL_SIZE); md_zfmacc2(ND, psf_dims, pre_strs, pre, psf_strs, psft, psf_strs, linphase); md_free(psft); // Transform to Fourier domain fftuc(N, pre_dims, flags, pre, pre); md_zabs(N, pre_dims, pre, pre); md_zsadd(N, pre_dims, pre, pre, 1e-3); return pre; }
static void maps_apply_adjoint(const void* _data, complex float* dst, const complex float* src) { const struct maps_data* data = _data; // dst = sum( conj(sens) .* tmp ) md_clear(DIMS, data->img_dims, dst, CFL_SIZE); md_zfmacc2(DIMS, data->max_dims, data->strs_img, dst, data->strs_ksp, src, data->strs_mps, data->sens); }
static void sense_adjoint(const void* _data, complex float* imgs, const complex float* out) { const struct sense_data* data = _data; md_zmulc2(DIMS, data->data_dims, data->data_strs, data->tmp, data->data_strs, out, data->mask_strs, data->pattern); ifftc(DIMS, data->data_dims, FFT_FLAGS, data->tmp, data->tmp); fftscale(DIMS, data->data_dims, FFT_FLAGS, data->tmp, data->tmp); md_clear(DIMS, data->imgs_dims, imgs, CFL_SIZE); md_zfmacc2(DIMS, data->sens_dims, data->imgs_strs, imgs, data->data_strs, data->tmp, data->sens_strs, data->sens); }
static void toeplitz_mult(const struct nufft_data* data, complex float* dst, const complex float* src) { unsigned int ND = data->N + 3; md_zmul2(ND, data->cml_dims, data->cml_strs, data->grid, data->cim_strs, src, data->lph_strs, data->linphase); linop_forward(data->fft_op, ND, data->cml_dims, data->grid, ND, data->cml_dims, data->grid); md_zmul2(ND, data->cml_dims, data->cml_strs, data->grid, data->cml_strs, data->grid, data->psf_strs, data->psf); linop_adjoint(data->fft_op, ND, data->cml_dims, data->grid, ND, data->cml_dims, data->grid); md_clear(ND, data->cim_dims, dst, CFL_SIZE); md_zfmacc2(ND, data->cml_dims, data->cim_strs, dst, data->cml_strs, data->grid, data->lph_strs, data->linphase); }
void noir_adj(struct noir_data* data, complex float* dst, const complex float* src) { long split = md_calc_size(DIMS, data->imgs_dims); md_zmulc2(DIMS, data->sign_dims, data->sign_strs, data->tmp, data->data_strs, src, data->ptrn_strs, data->pattern); ifft(DIMS, data->sign_dims, FFT_FLAGS, data->tmp, data->tmp); // we should move it to the end, but fft scaling is applied so this would be need to moved into data->xn or weights maybe? md_zmulc2(DIMS, data->sign_dims, data->sign_strs, data->tmp, data->sign_strs, data->tmp, data->mask_strs, data->mask); md_clear(DIMS, data->coil_dims, dst + split, CFL_SIZE); md_zfmacc2(DIMS, data->sign_dims, data->coil_strs, dst + split, data->sign_strs, data->tmp, data->imgs_strs, data->xn); noir_back_coils(data, dst + split, dst + split); md_clear(DIMS, data->imgs_dims, dst, CFL_SIZE); md_zfmacc2(DIMS, data->sign_dims, data->imgs_strs, dst, data->sign_strs, data->tmp, data->coil_strs, data->sens); if (data->rvc) md_zreal(DIMS, data->imgs_dims, dst, dst); }
static void fmac_adjoint(const linop_data_t* _data, complex float* dst, const complex float* src) { auto data = CAST_DOWN(fmac_data, _data); #ifdef USE_CUDA const complex float* tensor = get_tensor(data, cuda_ondevice(src)); #else const complex float* tensor = data->tensor; #endif md_clear2(data->N, data->idims, data->istrs, dst, CFL_SIZE); md_zfmacc2(data->N, data->dims, data->istrs, dst, data->ostrs, src, data->tstrs, tensor); }
// Adjoint: from kspace to image static void nufft_apply_adjoint(const void* _data, complex float* dst, const complex float* src) { const struct nufft_data* data = _data; unsigned int ND = data->N + 3; complex float* gridX = md_alloc(data->N, data->cm2_dims, CFL_SIZE); md_clear(data->N, data->cm2_dims, gridX, CFL_SIZE); complex float* wdat = NULL; if (NULL != data->weights) { wdat = md_alloc(data->N, data->ksp_dims, CFL_SIZE); md_zmulc2(data->N, data->ksp_dims, data->ksp_strs, wdat, data->ksp_strs, src, data->wgh_strs, data->weights); src = wdat; } grid2(2., data->width, data->beta, ND, data->trj_dims, data->traj, data->cm2_dims, gridX, data->ksp_dims, src); md_free(wdat); long factors[data->N]; for (unsigned int i = 0; i < data->N; i++) factors[i] = ((data->img_dims[i] > 1) && (i < 3)) ? 2 : 1; md_decompose(data->N, factors, data->cml_dims, data->grid, data->cm2_dims, gridX, CFL_SIZE); md_free(gridX); md_zmulc2(ND, data->cml_dims, data->cml_strs, data->grid, data->cml_strs, data->grid, data->img_strs, data->fftmod); linop_adjoint(data->fft_op, ND, data->cml_dims, data->grid, ND, data->cml_dims, data->grid); md_clear(ND, data->cim_dims, dst, CFL_SIZE); md_zfmacc2(ND, data->cml_dims, data->cim_strs, dst, data->cml_strs, data->grid, data->lph_strs, data->linphase); if (data->conf.toeplitz) md_zmul2(ND, data->cim_dims, data->cim_strs, dst, data->cim_strs, dst, data->img_strs, data->roll); }
static bool test_md_zfmacc2_flags(unsigned int D, const long idims[D], unsigned int flags, const complex float* in1, const complex float* in2, const complex float* out_ref) { long odims[D]; md_select_dims(D, ~flags, odims, idims); complex float* out = md_calloc(D, odims, CFL_SIZE); long istr[D]; long ostr[D]; md_calc_strides(D, istr, idims, CFL_SIZE); md_calc_strides(D, ostr, odims, CFL_SIZE); md_zfmacc2(D, idims, ostr, out, istr, in1, istr, in2); float err = md_znrmse(D, odims, out_ref, out); md_free(out); UT_ASSERT(err < UT_TOL); return true; }
/** * Compute the Gram matrix, A^H A. * Stores the result in @param gram, which is allocated by the function * Returns: iovec_s corresponding to the gram matrix dimensions * * @param N number of dimensions * @param T_dim dimension corresponding to the rows of A * @param T number of rows of A (codomain) * @param K_dim dimension corresponding to the columns of A * @param K number of columns of A (domain) * @param gram store the result (allocated by this function) * @param matrix_dims dimensions of A * @param matrix matrix data */ const struct iovec_s* compute_gram_matrix(unsigned int N, unsigned int T_dim, unsigned int T, unsigned int K_dim, unsigned int K, complex float** gram, const long matrix_dims[N], const complex float* matrix) { // FIXME this can certainly be simplfied... // Just be careful to consider the case where the data passed to the operator is a subset of a bigger array // B_dims = [T K 1] or [K T 1] // C_dims = [T 1 K] or [1 T K] // A_dims = [1 K K] or [K 1 K] // after: gram_dims = [1 K1 K2] --> [K2 K1 1] or [K1 1 K2] --> [K1 K2 1] long A_dims[N + 1]; long B_dims[N + 1]; long C_dims[N + 1]; long fake_gram_dims[N + 1]; long A_str[N + 1]; long B_str[N + 1]; long C_str[N + 1]; long max_dims[N + 1]; md_singleton_dims(N + 1, A_dims); md_singleton_dims(N + 1, B_dims); md_singleton_dims(N + 1, C_dims); md_singleton_dims(N + 1, fake_gram_dims); md_singleton_dims(N + 1, max_dims); A_dims[K_dim] = K; A_dims[N] = K; B_dims[T_dim] = T; B_dims[K_dim] = K; C_dims[T_dim] = T; C_dims[N] = K; max_dims[T_dim] = T; max_dims[K_dim] = K; max_dims[N] = K; fake_gram_dims[T_dim] = K; fake_gram_dims[K_dim] = K; md_calc_strides(N + 1, A_str, A_dims, CFL_SIZE); md_calc_strides(N + 1, B_str, B_dims, CFL_SIZE); md_calc_strides(N + 1, C_str, C_dims, CFL_SIZE); complex float* tmpA = md_alloc_sameplace(N + 1 , A_dims, CFL_SIZE, matrix); complex float* tmpB = md_alloc_sameplace(N + 1, B_dims, CFL_SIZE, matrix); complex float* tmpC = md_alloc_sameplace(N + 1, C_dims, CFL_SIZE, matrix); md_copy(N, matrix_dims, tmpB, matrix, CFL_SIZE); //md_copy(N, matrix_dims, tmpC, matrix, CFL_SIZE); md_transpose(N + 1, K_dim, N, C_dims, tmpC, B_dims, tmpB, CFL_SIZE); md_clear(N + 1, A_dims, tmpA, CFL_SIZE); md_zfmacc2(N + 1, max_dims, A_str, tmpA, B_str, tmpB, C_str, tmpC); *gram = md_alloc_sameplace(N, fake_gram_dims, CFL_SIZE, matrix); md_transpose(N + 1, T_dim, N, fake_gram_dims, *gram, A_dims, tmpA, CFL_SIZE); const struct iovec_s* s = iovec_create(N, fake_gram_dims, CFL_SIZE); md_free(tmpA); md_free(tmpB); md_free(tmpC); return s; }