/* * Implements finite difference operator (order 1 for now) * using circular shift: diff(x) = x - circshift(x) * @param snip Keeps first entry if snip = false; clear first entry if snip = true * * optr = [iptr(1); diff(iptr)] */ static void md_zfinitediff_core2(unsigned int D, const long dims[D], unsigned int flags, bool snip, complex float* tmp, const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr) { md_copy2(D, dims, istrs, tmp, istrs, iptr, sizeof(complex float)); long zdims[D]; long center[D]; md_select_dims(D, ~0, zdims, dims); memset(center, 0, D * sizeof(long)); for (unsigned int i=0; i < D; i++) { if (MD_IS_SET(flags, i)) { center[i] = 1; // order md_circ_shift2(D, dims, center, ostrs, optr, istrs, tmp, sizeof(complex float)); zdims[i] = 1; if (!snip) // zero out first dimension before subtracting md_clear2(D, zdims, ostrs, optr, sizeof(complex float)); md_zsub2(D, dims, ostrs, optr, istrs, tmp, ostrs, optr); md_copy2(D, dims, ostrs, tmp, ostrs, optr, sizeof(complex float)); if (snip) // zero out first dimension after subtracting md_clear2(D, zdims, ostrs, optr, sizeof(complex float)); center[i] = 0; zdims[i] = dims[i]; } } }
static void linop_matrix_apply_adjoint(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct operator_matrix_s* data = CAST_DOWN(operator_matrix_s, _data); unsigned int N = data->mat_iovec->N; //debug_printf(DP_DEBUG1, "compute adjoint\n"); md_clear2(N, data->domain_iovec->dims, data->domain_iovec->strs, dst, CFL_SIZE); // FIXME check all the cases where computation can be done with blas if (cgemm_forward_standard(data)) { long L = md_calc_size(data->T_dim, data->domain_iovec->dims); blas_cgemm('N', 'N', L, data->K, data->T, 1., L, (const complex float (*)[])src, data->T, (const complex float (*)[])data->mat_conj, 0., L, (complex float (*)[])dst); } else { md_zfmacc2(N, data->max_dims, data->domain_iovec->strs, dst, data->codomain_iovec->strs, src, data->mat_iovec->strs, data->mat); } }
static void fmac_apply(const linop_data_t* _data, complex float* dst, const complex float* src) { auto data = CAST_DOWN(fmac_data, _data); #ifdef USE_CUDA const complex float* tensor = get_tensor(data, cuda_ondevice(src)); #else const complex float* tensor = data->tensor; #endif md_clear2(data->N, data->odims, data->ostrs, dst, CFL_SIZE); md_zfmac2(data->N, data->dims, data->ostrs, dst, data->istrs, src, data->tstrs, tensor); }
static void linop_matrix_apply(const void* _data, complex float* dst, const complex float* src) { const struct operator_matrix_s* data = _data; long N = data->mat_iovec->N; //debug_printf(DP_DEBUG1, "compute forward\n"); md_clear2(N, data->codomain_iovec->dims, data->codomain_iovec->strs, dst, CFL_SIZE); // FIXME check all the cases where computation can be done with blas if ( cgemm_forward_standard(data) ) { long L = md_calc_size(data->T_dim, data->domain_iovec->dims); cgemm_sameplace('N', 'T', L, data->T, data->K, &(complex float){1.}, (const complex float (*) [])src, L, (const complex float (*) [])data->mat, data->T, &(complex float){0.}, (complex float (*) [])dst, L);
/* * Implements cumulative sum operator (order 1 for now) * using circular shift: cumsum(x) = x + circshift(x,1) + circshift(x,2) + ... * * optr = cumsum(iptr) */ static void md_zcumsum_core2(unsigned int D, const long dims[D], unsigned int flags, complex float* tmp, complex float* tmp2, const long ostrs[D], complex float* optr, const long istrs[D], const complex float* iptr) { //out = dx md_copy2(D, dims, ostrs, optr, istrs, iptr, sizeof(complex float)); md_copy2(D, dims, istrs, tmp, istrs, iptr, sizeof(complex float)); long zdims[D]; long center[D]; md_select_dims(D, ~0, zdims, dims); memset(center, 0, D * sizeof(long)); for (unsigned int i=0; i < D; i++) { if (MD_IS_SET(flags, i)) { for (int d=1; d < dims[i]; d++) { // tmp = circshift(tmp, i) center[i] = d; md_circ_shift2(D, dims, center, istrs, tmp2, istrs, tmp, sizeof(complex float)); zdims[i] = d; // tmp(1:d,:) = 0 md_clear2(D, zdims, istrs, tmp2, sizeof(complex float)); //md_zsmul2(D, zdims, istrs, tmp2, istrs, tmp2, 0.); //dump_cfl("tmp2", D, dims, tmp2); // out = out + tmp md_zadd2(D, dims, ostrs, optr, istrs, tmp2, ostrs, optr); //md_copy2(D, dims, ostrs, tmp, ostrs, optr, sizeof(complex float)); } md_copy2(D, dims, ostrs, tmp, ostrs, optr, sizeof(complex float)); center[i] = 0; zdims[i] = dims[i]; } } }
static void zfinitediff_adjoint(const linop_data_t* _data, complex float* optr, const complex float* iptr) { const auto data = CAST_DOWN(zfinitediff_data, _data); // if (docircshift) // out(..,2:end,..) = in(..,2:end,..) - in(..,1:(end-1),..) // out(..,1,..) = in(..,1,..) - in(..,end,..) // else // out(..,1,..) = in(..,1,..) // out(..,2:(end-1),..) = in(..,2:end,..) - in(..,1:(end-1),..) // out(..,end,..) = -in(..,end,..); unsigned int d = data->dim_diff; long nx = data->dims_adj[d]; long off_in, off_adj; long dims_sub[data->D]; md_copy_dims(data->D, dims_sub, data->dims_adj); if (data->do_circdiff) { // out(..,2:end,..) = in(..,2:end,..) - in(..,1:(end-1),..) dims_sub[d] = nx - 1; off_adj = data->strides_adj[d] / CFL_SIZE; off_in = data->strides_in[d] / CFL_SIZE; md_zsub2(data->D, dims_sub, data->strides_in, optr + off_in, data->strides_in, iptr + off_adj, data->strides_adj, iptr); // out(..,1,..) = in(..,1,..) - in(..,end,..) dims_sub[d] = 1; off_adj = (nx - 1) * data->strides_adj[d] / CFL_SIZE; off_in = (nx - 1) * data->strides_in[d] / CFL_SIZE; md_zsub2(data->D, dims_sub, data->strides_in, optr, data->strides_adj, iptr, data->strides_adj, iptr + off_adj); } else { // out(..,end,..) = 0 //md_clear2(data->D, data->dims_in, data->strides_in, optr, CFL_SIZE); dims_sub[d] = 1; off_in = nx * data->strides_in[d] / CFL_SIZE; md_clear2(data->D, dims_sub, data->strides_in, optr + off_in, CFL_SIZE); // out(..,1:end-1,:) = in_adj(..,1:end,:) md_copy2(data->D, data->dims_adj, data->strides_in, optr, data->strides_adj, iptr, CFL_SIZE); // out(..,2:end,:) -= in_adj(..,1:end,:) off_in = data->strides_in[d] / CFL_SIZE; md_zsub2(data->D, data->dims_adj, data->strides_in, optr + off_in, data->strides_in, optr + off_in, data->strides_adj, iptr); /* // out(..,1,..) = in_adj(..,1,..) dims_sub[d] = 1; md_copy2(data->D, dims_sub, data->strides_in, optr, data->strides_adj, iptr, CFL_SIZE); // out(..,2:(end-1),..) = in(..,2:end,..) - in(..,1:(end-1),..) dims_sub[d] = nx - 1; off_adj = data->strides_adj[d]/CFL_SIZE; off_in = data->strides_in[d]/CFL_SIZE; md_zsub2(data->D, dims_sub, data->strides_in, optr+off_in, data->strides_adj, iptr+off_adj, data->strides_adj, iptr); // out(..,end,..) = -in(..,end,..); dims_sub[d] = 1; off_adj = (nx - 1) * data->strides_adj[d]/CFL_SIZE; off_in = nx * data->strides_in[d]/CFL_SIZE; // !!!This one operation is really really slow!!! md_zsmul2(data->D, dims_sub, data->strides_in, optr+off_in, data->strides_adj, iptr+off_adj, -1.); */ } }