void bi::marginalise(const ExpGaussianPdf<V1, M1>& p1, const ExpGaussianPdf<V2,M2>& p2, const M3 C, const ExpGaussianPdf<V4, M4>& q2, ExpGaussianPdf<V5,M5>& p3) { /* pre-conditions */ BI_ASSERT(q2.size() == p2.size()); BI_ASSERT(p3.size() == p1.size()); BI_ASSERT(C.size1() == p1.size() && C.size2() == p2.size()); typename sim_temp_vector<V1>::type z2(p2.size()); typename sim_temp_matrix<M1>::type K(p1.size(), p2.size()); typename sim_temp_matrix<M1>::type A1(p2.size(), p2.size()); typename sim_temp_matrix<M1>::type A2(p2.size(), p2.size()); /** * Compute gain matrix: * * \f[\mathcal{K} = C_{\mathbf{x}_1,\mathbf{x}_2}\Sigma_2^{-1}\,.\f] */ symm(1.0, p2.prec(), C, 0.0, K, 'R', 'U'); /** * Then result is given by \f$\mathcal{N}(\boldsymbol{\mu}', * \Sigma')\f$, where: * * \f[\boldsymbol{\mu}' = \boldsymbol{\mu}_1 + * \mathcal{K}(\boldsymbol{\mu}_3 - \boldsymbol{\mu}_2)\,,\f] */ z2 = q2.mean(); axpy(-1.0, p2.mean(), z2); p3.mean() = p1.mean(); gemv(1.0, K, z2, 1.0, p3.mean()); /** * and: * * \f{eqnarray*} * \Sigma' &=& \Sigma_1 + \mathcal{K}(\Sigma_3 - * \Sigma_2)\mathcal{K}^T \\ * &=& \Sigma_1 + \mathcal{K}\Sigma_3\mathcal{K}^T - * \mathcal{K}\Sigma_2\mathcal{K}^T\,. * \f} */ p3.cov() = p1.cov(); A1 = K; trmm(1.0, q2.std(), A1, 'R', 'U', 'T'); syrk(1.0, A1, 1.0, p3.cov(), 'U'); A2 = K; trmm(1.0, p2.std(), A2, 'R', 'U', 'T'); syrk(-1.0, A2, 1.0, p3.cov(), 'U'); /* make sure correct log-variables set */ p3.setLogs(p2.getLogs()); p3.init(); // redo precalculations }
void trmm( matrix_expression<MatA> const& A, matrix_expression<MatB>& B, boost::mpl::true_ ){ SIZE_CHECK(A().size1() == A().size2()); SIZE_CHECK(A().size2() == B().size1()); std::size_t n = A().size1(); std::size_t m = B().size2(); CBLAS_DIAG cblasUnit = unit?CblasUnit:CblasNonUnit; CBLAS_UPLO cblasUplo = upper?CblasUpper:CblasLower; CBLAS_ORDER stor_ord= (CBLAS_ORDER)storage_order<typename MatA::orientation>::value; CBLAS_TRANSPOSE trans=CblasNoTrans; //special case: MatA and MatB do not have same storage order. in this case compute as //AB->B^TA^T where transpose of B is done implicitely by exchanging storage order CBLAS_ORDER stor_ordB= (CBLAS_ORDER)storage_order<typename MatB::orientation>::value; if(stor_ord != stor_ordB){ trans = CblasTrans; cblasUplo= upper?CblasLower:CblasUpper; } trmm(stor_ordB, CblasLeft, cblasUplo, trans, cblasUnit, (int)n, int(m), traits::storage(A), traits::leading_dimension(A), traits::storage(B), traits::leading_dimension(B) ); }
void trmm(enum AMPBLAS_ORDER order, enum AMPBLAS_SIDE side, enum AMPBLAS_UPLO uplo, enum AMPBLAS_TRANSPOSE transa, enum AMPBLAS_DIAG diag, int m, int n, value_type alpha, const value_type* a, int lda, value_type* b, int ldb) { // recursive order adjustment if (order == AmpblasRowMajor) { trmm(AmpblasColMajor, side == AmpblasLeft ? AmpblasRight : AmpblasLeft, uplo == AmpblasUpper ? AmpblasLower : AmpblasUpper, transa, diag, m, n, alpha, a, lda, b, ldb); return; } // quick return if (m == 0 && n == 0) return; // derived parameters int k = (side == AmpblasLeft ? m : n); // argument check if (m < 0) argument_error("trmm", 6); if (n < 0) argument_error("trmm", 7); if (a == nullptr) argument_error("trmm", 9); if (lda < k) argument_error("trmm", 10); if (b == nullptr) argument_error("trmm", 11); if (ldb < m) argument_error("trmm", 12); // create views auto a_mat = make_matrix_view(k, k, a, lda); auto b_mat = make_matrix_view(m, n, b, ldb); auto b_mat_const = make_matrix_view(m, n, const_cast<const value_type*>(b), ldb); // fill with zeros if alpha is zero if (alpha == value_type()) { ampblas::_detail::fill(get_current_accelerator_view(), b_mat.extent, value_type(), b_mat); return; } // workspace concurrency::array<value_type,2> c(n,m); concurrency::array_view<value_type,2> c_mat(c); c_mat.discard_data(); // forward to tuning routine ampblas::trmm(get_current_accelerator_view(), cast(side), cast(uplo), cast(transa), cast(diag), m, n, alpha, a_mat, b_mat_const, c_mat); // copy workspace to answer copy(c_mat, b_mat); }
T Linalg<T, H>::trmm( const T &a, const value_type &alpha, Side side, Uplo uplo, Diag diag) { T b(a.allocator()); trmm(a, &b, alpha, side, uplo, diag); return b; }
T& Linalg<T, H>::trmm( const T &a, T &b, const value_type &alpha, Side side, Uplo uplo, Diag diag) { return const_cast< T& >(trmm( a, const_cast< const T& >(b), alpha, side, uplo, diag)); }