static void linop_matrix_apply(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct operator_matrix_s* data = CAST_DOWN(operator_matrix_s, _data); long N = data->mat_iovec->N; //debug_printf(DP_DEBUG1, "compute forward\n"); md_clear2(N, data->codomain_iovec->dims, data->codomain_iovec->strs, dst, CFL_SIZE); // FIXME check all the cases where computation can be done with blas if (cgemm_forward_standard(data)) { long L = md_calc_size(data->T_dim, data->domain_iovec->dims); blas_cgemm('N', 'T', L, data->T, data->K, 1., L, (const complex float (*)[])src, data->T, (const complex float (*)[])data->mat, 0., L, (complex float (*)[])dst); } else { md_zfmac2(N, data->max_dims, data->codomain_iovec->strs, dst, data->domain_iovec->strs, src, data->mat_iovec->strs, data->mat); } }
static void maps_apply(const void* _data, complex float* dst, const complex float* src) { const struct maps_data* data = _data; md_clear(DIMS, data->ksp_dims, dst, CFL_SIZE); md_zfmac2(DIMS, data->max_dims, data->strs_ksp, dst, data->strs_img, src, data->strs_mps, data->sens); }
void noir_fun(struct noir_data* data, complex float* dst, const complex float* src) { long split = md_calc_size(DIMS, data->imgs_dims); md_copy(DIMS, data->imgs_dims, data->xn, src, CFL_SIZE); noir_forw_coils(data, data->sens, src + split); md_clear(DIMS, data->sign_dims, data->tmp, CFL_SIZE); md_zfmac2(DIMS, data->sign_dims, data->sign_strs, data->tmp, data->imgs_strs, src, data->coil_strs, data->sens); // could be moved to the benning, but see comment below md_zmul2(DIMS, data->sign_dims, data->sign_strs, data->tmp, data->sign_strs, data->tmp, data->mask_strs, data->mask); fft(DIMS, data->sign_dims, FFT_FLAGS, data->tmp, data->tmp); md_clear(DIMS, data->data_dims, dst, CFL_SIZE); md_zfmac2(DIMS, data->sign_dims, data->data_strs, dst, data->sign_strs, data->tmp, data->ptrn_strs, data->pattern); }
static void sense_forward(const void* _data, complex float* out, const complex float* imgs) { const struct sense_data* data = _data; md_clear(DIMS, data->data_dims, out, CFL_SIZE); md_zfmac2(DIMS, data->sens_dims, data->data_strs, out, data->sens_strs, data->sens, data->imgs_strs, imgs); fftc(DIMS, data->data_dims, FFT_FLAGS, out, out); fftscale(DIMS, data->data_dims, FFT_FLAGS, out, out); md_zmul2(DIMS, data->data_dims, data->data_strs, out, data->data_strs, out, data->mask_strs, data->pattern); }
static void fmac_apply(const linop_data_t* _data, complex float* dst, const complex float* src) { auto data = CAST_DOWN(fmac_data, _data); #ifdef USE_CUDA const complex float* tensor = get_tensor(data, cuda_ondevice(src)); #else const complex float* tensor = data->tensor; #endif md_clear2(data->N, data->odims, data->ostrs, dst, CFL_SIZE); md_zfmac2(data->N, data->dims, data->ostrs, dst, data->istrs, src, data->tstrs, tensor); }
void data_consistency(const long dims[DIMS], complex float* dst, const complex float* pattern, const complex float* kspace1, const complex float* kspace2) { assert(1 == dims[MAPS_DIM]); long strs[DIMS]; long dims1[DIMS]; long strs1[DIMS]; md_select_dims(DIMS, ~COIL_FLAG, dims1, dims); md_calc_strides(DIMS, strs1, dims1, CFL_SIZE); md_calc_strides(DIMS, strs, dims, CFL_SIZE); complex float* tmp = md_alloc_sameplace(DIMS, dims, CFL_SIZE, dst); md_zmul2(DIMS, dims, strs, tmp, strs, kspace2, strs1, pattern); md_zsub(DIMS, dims, tmp, kspace2, tmp); md_zfmac2(DIMS, dims, strs, tmp, strs, kspace1, strs1, pattern); md_copy(DIMS, dims, dst, tmp, CFL_SIZE); md_free(tmp); }
static double bench_generic_matrix_multiply(long dims[DIMS]) { long dimsX[DIMS]; long dimsY[DIMS]; long dimsZ[DIMS]; md_select_dims(DIMS, 2 * 3 + 17, dimsX, dims); // 1 110 1 md_select_dims(DIMS, 2 * 6 + 17, dimsY, dims); // 1 011 1 md_select_dims(DIMS, 2 * 5 + 17, dimsZ, dims); // 1 101 1 long strsX[DIMS]; long strsY[DIMS]; long strsZ[DIMS]; md_calc_strides(DIMS, strsX, dimsX, CFL_SIZE); md_calc_strides(DIMS, strsY, dimsY, CFL_SIZE); md_calc_strides(DIMS, strsZ, dimsZ, CFL_SIZE); complex float* x = md_alloc(DIMS, dimsX, CFL_SIZE); complex float* y = md_alloc(DIMS, dimsY, CFL_SIZE); complex float* z = md_alloc(DIMS, dimsZ, CFL_SIZE); md_gaussian_rand(DIMS, dimsX, x); md_gaussian_rand(DIMS, dimsY, y); md_clear(DIMS, dimsZ, z, CFL_SIZE); double tic = timestamp(); md_zfmac2(DIMS, dims, strsZ, z, strsX, x, strsY, y); double toc = timestamp(); md_free(x); md_free(y); md_free(z); return toc - tic; }
static void linop_matrix_apply_normal(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct operator_matrix_s* data = CAST_DOWN(operator_matrix_s, _data); unsigned int N = data->mat_iovec->N; // FIXME check all the cases where computation can be done with blas //debug_printf(DP_DEBUG1, "compute normal\n"); if (cgemm_forward_standard(data)) { long max_dims_gram[N]; md_copy_dims(N, max_dims_gram, data->domain_iovec->dims); max_dims_gram[data->T_dim] = data->K; long tmp_dims[N]; long tmp_str[N]; md_copy_dims(N, tmp_dims, max_dims_gram); tmp_dims[data->K_dim] = 1; md_calc_strides(N, tmp_str, tmp_dims, CFL_SIZE); complex float* tmp = md_alloc_sameplace(N, data->domain_iovec->dims, CFL_SIZE, dst); md_clear(N, data->domain_iovec->dims, tmp, CFL_SIZE); md_zfmac2(N, max_dims_gram, tmp_str, tmp, data->domain_iovec->strs, src, data->mat_gram_iovec->strs, data->mat_gram); md_transpose(N, data->T_dim, data->K_dim, data->domain_iovec->dims, dst, tmp_dims, tmp, CFL_SIZE); md_free(tmp); } else { long L = md_calc_size(data->T_dim, data->domain_iovec->dims); blas_cgemm('N', 'T', L, data->K, data->K, 1., L, (const complex float (*)[])src, data->K, (const complex float (*)[])data->mat_gram, 0., L, (complex float (*)[])dst); } }
static bool test_md_zfmac2_flags(unsigned int D, const long idims[D], unsigned int flags, const complex float* in1, const complex float* in2, const complex float* out_ref) { long odims[D]; md_select_dims(D, ~flags, odims, idims); complex float* out = md_calloc(D, odims, CFL_SIZE); long istr[D]; long ostr[D]; md_calc_strides(D, istr, idims, CFL_SIZE); md_calc_strides(D, ostr, odims, CFL_SIZE); md_zfmac2(D, idims, ostr, out, istr, in1, istr, in2); float err = md_znrmse(D, odims, out_ref, out); md_free(out); UT_ASSERT(err < UT_TOL); return true; }
/** * Efficiently chain two matrix linops by multiplying the actual matrices together. * Stores a copy of the new matrix. * Returns: C = B A * * @param a first matrix (applied to input) * @param b second matrix (applied to output of first matrix) */ struct linop_s* linop_matrix_chain(const struct linop_s* a, const struct linop_s* b) { const struct operator_matrix_s* a_data = linop_get_data(a); const struct operator_matrix_s* b_data = linop_get_data(b); // check compatibility assert(linop_codomain(a)->N == linop_domain(b)->N); assert(md_calc_size(linop_codomain(a)->N, linop_codomain(a)->dims) == md_calc_size(linop_domain(b)->N, linop_domain(b)->dims)); assert(a_data->K_dim != b_data->T_dim); // FIXME error for now -- need to deal with this specially. assert((a_data->T_dim == b_data->K_dim) && (a_data->T == b_data->K)); unsigned int N = linop_domain(a)->N; long max_dims[N]; md_singleton_dims(N, max_dims); max_dims[a_data->T_dim] = a_data->T; max_dims[a_data->K_dim] = a_data->K; max_dims[b_data->T_dim] = b_data->T; long matrix_dims[N]; long matrix_strs[N]; md_select_dims(N, ~MD_BIT(a_data->T_dim), matrix_dims, max_dims); md_calc_strides(N, matrix_strs, matrix_dims, CFL_SIZE); complex float* matrix = md_alloc_sameplace(N, matrix_dims, CFL_SIZE, a_data->mat); md_clear(N, matrix_dims, matrix, CFL_SIZE); md_zfmac2(N, max_dims, matrix_strs, matrix, a_data->mat_iovec->strs, a_data->mat, b_data->mat_iovec->strs, b_data->mat); struct linop_s* c = linop_matrix_create(N, linop_codomain(b)->dims, linop_domain(a)->dims, matrix_dims, matrix); md_free(matrix); return c; }