static void linop_matrix_apply_adjoint(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct operator_matrix_s* data = CAST_DOWN(operator_matrix_s, _data); unsigned int N = data->mat_iovec->N; //debug_printf(DP_DEBUG1, "compute adjoint\n"); md_clear2(N, data->domain_iovec->dims, data->domain_iovec->strs, dst, CFL_SIZE); // FIXME check all the cases where computation can be done with blas if (cgemm_forward_standard(data)) { long L = md_calc_size(data->T_dim, data->domain_iovec->dims); blas_cgemm('N', 'N', L, data->K, data->T, 1., L, (const complex float (*)[])src, data->T, (const complex float (*)[])data->mat_conj, 0., L, (complex float (*)[])dst); } else { md_zfmacc2(N, data->max_dims, data->domain_iovec->strs, dst, data->codomain_iovec->strs, src, data->mat_iovec->strs, data->mat); } }
static void linop_matrix_apply_normal(const linop_data_t* _data, complex float* dst, const complex float* src) { const struct operator_matrix_s* data = CAST_DOWN(operator_matrix_s, _data); unsigned int N = data->mat_iovec->N; // FIXME check all the cases where computation can be done with blas //debug_printf(DP_DEBUG1, "compute normal\n"); if (cgemm_forward_standard(data)) { long max_dims_gram[N]; md_copy_dims(N, max_dims_gram, data->domain_iovec->dims); max_dims_gram[data->T_dim] = data->K; long tmp_dims[N]; long tmp_str[N]; md_copy_dims(N, tmp_dims, max_dims_gram); tmp_dims[data->K_dim] = 1; md_calc_strides(N, tmp_str, tmp_dims, CFL_SIZE); complex float* tmp = md_alloc_sameplace(N, data->domain_iovec->dims, CFL_SIZE, dst); md_clear(N, data->domain_iovec->dims, tmp, CFL_SIZE); md_zfmac2(N, max_dims_gram, tmp_str, tmp, data->domain_iovec->strs, src, data->mat_gram_iovec->strs, data->mat_gram); md_transpose(N, data->T_dim, data->K_dim, data->domain_iovec->dims, dst, tmp_dims, tmp, CFL_SIZE); md_free(tmp); } else { long L = md_calc_size(data->T_dim, data->domain_iovec->dims); blas_cgemm('N', 'T', L, data->K, data->K, 1., L, (const complex float (*)[])src, data->K, (const complex float (*)[])data->mat_gram, 0., L, (complex float (*)[])dst); } }
void (blas_matrix_multiply)(long M, long N, long K, complex float C[N][M], const complex float A[K][M], const complex float B[N][K]) { blas_cgemm(CblasNoTrans, CblasNoTrans, M, N, K, 1., M, A, K, B, 0., M, C); }