inline Mat< typename promote_type<typename T1::elem_type, typename T2::elem_type>::result > operator* (const Op<T1, op_diagmat>& X, const Op<T2, op_diagmat>& Y) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT1; typedef typename T2::elem_type eT2; typedef typename promote_type<eT1,eT2>::result out_eT; promote_type<eT1,eT2>::check(); const diagmat_proxy<T1> A(X.m); const diagmat_proxy<T2> B(Y.m); arma_debug_assert_mul_size(A.n_rows, A.n_cols, B.n_rows, B.n_cols, "matrix multiplication"); Mat<out_eT> out(A.n_rows, B.n_cols, fill::zeros); const uword A_length = (std::min)(A.n_rows, A.n_cols); const uword B_length = (std::min)(B.n_rows, B.n_cols); const uword N = (std::min)(A_length, B_length); for(uword i=0; i<N; ++i) { out.at(i,i) = upgrade_val<eT1,eT2>::apply( A[i] ) * upgrade_val<eT1,eT2>::apply( B[i] ); } return out; }
arma_inline Mat< typename promote_type<typename T1::elem_type, typename T2::elem_type>::result > operator* (const Op<T1, op_diagmat>& X, const Op<T2, op_diagmat>& Y) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT1; typedef typename T2::elem_type eT2; typedef typename promote_type<eT1,eT2>::result out_eT; promote_type<eT1,eT2>::check(); const diagmat_proxy<T1> A(X.m); const diagmat_proxy<T2> B(Y.m); arma_debug_assert_mul_size(A.n_elem, A.n_elem, B.n_elem, B.n_elem, "matrix multiply"); const u32 N = A.n_elem; Mat<out_eT> out(N,N); out.zeros(); for(u32 i=0; i<N; ++i) { out.at(i,i) = upgrade_val<eT1,eT2>::apply( A[i] ) * upgrade_val<eT1,eT2>::apply( B[i] ); } return out; }
inline void glue_times::apply_mixed(Mat<typename promote_type<eT1,eT2>::result>& out, const Mat<eT1>& X, const Mat<eT2>& Y) { arma_extra_debug_sigprint(); typedef typename promote_type<eT1,eT2>::result out_eT; arma_debug_assert_mul_size(X,Y, "matrix multiply"); out.set_size(X.n_rows,Y.n_cols); gemm_mixed<>::apply(out, X, Y); }
arma_hot inline typename T1::elem_type trace_mul_proxy(const T1& XA, const T2& XB) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT; const Proxy<T1> PA(XA); const Proxy<T2> PB(XB); if(is_Mat<typename Proxy<T2>::stored_type>::value == true) { return trace_mul_unwrap(PA.Q, PB.Q); } arma_debug_assert_mul_size(PA.get_n_rows(), PA.get_n_cols(), PB.get_n_rows(), PB.get_n_cols(), "matrix multiply"); arma_debug_check( (PA.get_n_rows() != PB.get_n_cols()), "trace(): matrix must be square sized" ); const uword N1 = PA.get_n_rows(); // equivalent to PB.get_n_cols(), due to square size requirements const uword N2 = PA.get_n_cols(); // equivalent to PB.get_n_rows(), due to matrix multiplication requirements eT val = eT(0); for(uword i=0; i<N1; ++i) { eT acc1 = eT(0); eT acc2 = eT(0); uword j,k; for(j=0, k=1; k < N2; j+=2, k+=2) { const eT tmp_j = PB.at(j,i); const eT tmp_k = PB.at(k,i); acc1 += PA.at(i,j) * tmp_j; acc2 += PA.at(i,k) * tmp_k; } if(j < N2) { acc1 += PA.at(i,j) * PB.at(j,i); } val += (acc1 + acc2); } return val; }
arma_hot inline typename T1::elem_type trace_mul_unwrap(const T1& XA, const T2& XB) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT; const Proxy<T1> PA(XA); const unwrap<T2> tmpB(XB); const Mat<eT>& B = tmpB.M; arma_debug_assert_mul_size(PA.get_n_rows(), PA.get_n_cols(), B.n_rows, B.n_cols, "matrix multiply"); arma_debug_check( (PA.get_n_rows() != B.n_cols), "trace(): matrix must be square sized" ); const uword N1 = PA.get_n_rows(); // equivalent to B.n_cols, due to square size requirements const uword N2 = PA.get_n_cols(); // equivalent to B.n_rows, due to matrix multiplication requirements eT val = eT(0); for(uword i=0; i<N1; ++i) { const eT* B_colmem = B.colptr(i); eT acc1 = eT(0); eT acc2 = eT(0); uword j,k; for(j=0, k=1; k < N2; j+=2, k+=2) { const eT tmp_j = B_colmem[j]; const eT tmp_k = B_colmem[k]; acc1 += PA.at(i,j) * tmp_j; acc2 += PA.at(i,k) * tmp_k; } if(j < N2) { acc1 += PA.at(i,j) * B_colmem[j]; } val += (acc1 + acc2); } return val; }
inline void glue_times::apply_inplace(Mat<typename T1::elem_type>& out, const T1& X) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT; const unwrap_check<T1> tmp(X, out); const Mat<eT>& B = tmp.M; arma_debug_assert_mul_size(out, B, "matrix multiply"); if(out.n_cols == B.n_cols) { podarray<eT> tmp(out.n_cols); eT* tmp_rowdata = tmp.memptr(); for(u32 out_row=0; out_row < out.n_rows; ++out_row) { for(u32 out_col=0; out_col < out.n_cols; ++out_col) { tmp_rowdata[out_col] = out.at(out_row,out_col); } for(u32 B_col=0; B_col < B.n_cols; ++B_col) { const eT* B_coldata = B.colptr(B_col); eT val = eT(0); for(u32 i=0; i < B.n_rows; ++i) { val += tmp_rowdata[i] * B_coldata[i]; } out.at(out_row,B_col) = val; } } } else { const Mat<eT> tmp(out); glue_times::apply(out, tmp, B, eT(1), false, false, false); } }
inline typename enable_if2 < (is_arma_type<T1>::value && is_arma_sparse_type<T2>::value && is_same_type<typename T1::elem_type, typename T2::elem_type>::value), Mat<typename T1::elem_type> >::result operator* ( const T1& x, const T2& y ) { arma_extra_debug_sigprint(); const Proxy<T1> pa(x); const SpProxy<T2> pb(y); arma_debug_assert_mul_size(pa.get_n_rows(), pa.get_n_cols(), pb.get_n_rows(), pb.get_n_cols(), "matrix multiplication"); Mat<typename T1::elem_type> result(pa.get_n_rows(), pb.get_n_cols()); result.zeros(); if( (pa.get_n_elem() > 0) && (pb.get_n_nonzero() > 0) ) { typename SpProxy<T2>::const_iterator_type y_col_it = pb.begin(); typename SpProxy<T2>::const_iterator_type y_col_it_end = pb.end(); const uword result_n_rows = result.n_rows; while(y_col_it != y_col_it_end) { for(uword row = 0; row < result_n_rows; ++row) { result.at(row, y_col_it.col()) += pa.at(row, y_col_it.row()) * (*y_col_it); } ++y_col_it; } } return result; }
inline void glue_mixed_times::apply(Mat<typename eT_promoter<T1,T2>::eT>& out, const mtGlue<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_times>& X) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT1; typedef typename T2::elem_type eT2; // TODO: extend the unwrap_check framework to handle mixed matrix types const unwrap<T1> tmp1(X.A); const unwrap<T2> tmp2(X.B); const Mat<eT1>& A = tmp1.M; const Mat<eT2>& B = tmp2.M; const bool A_is_alias = ( ((void *)&out) == ((void *)&A) ); const bool B_is_alias = ( ((void *)&out) == ((void *)&B) ); const Mat<eT1>* AA_ptr = A_is_alias ? new Mat<eT1>(A) : 0; const Mat<eT2>* BB_ptr = B_is_alias ? new Mat<eT2>(B) : 0; const Mat<eT1>& AA = A_is_alias ? *AA_ptr : A; const Mat<eT2>& BB = B_is_alias ? *BB_ptr : B; arma_debug_assert_mul_size(AA, BB, "matrix multiplication"); out.set_size(AA.n_rows, BB.n_cols); gemm_mixed<>::apply(out, AA, BB); if(A_is_alias == true) { delete AA_ptr; } if(B_is_alias == true) { delete BB_ptr; } }
inline void glue_mixed_times::apply(Mat<typename eT_promoter<T1,T2>::eT>& out, const mtGlue<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_times>& X) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT1; typedef typename T2::elem_type eT2; const unwrap_check_mixed<T1> tmp1(X.A, out); const unwrap_check_mixed<T2> tmp2(X.B, out); const Mat<eT1>& A = tmp1.M; const Mat<eT2>& B = tmp2.M; arma_debug_assert_mul_size(A, B, "matrix multiplication"); out.set_size(A.n_rows, B.n_cols); gemm_mixed<>::apply(out, A, B); }
inline arma_warn_unused typename T1::elem_type trace(const Glue<T1, T2, glue_times>& X) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT; const unwrap<T1> tmp1(X.A); const unwrap<T2> tmp2(X.B); const Mat<eT>& A = tmp1.M; const Mat<eT>& B = tmp2.M; arma_debug_assert_mul_size(A, B, "matrix multiply"); arma_debug_check( (A.n_rows != B.n_cols), "trace(): matrix must be square sized" ); const uword N1 = A.n_rows; const uword N2 = A.n_cols; eT val = eT(0); for(uword i=0; i<N1; ++i) { const eT* B_colmem = B.colptr(i); eT acc = eT(0); for(uword j=0; j<N2; ++j) { acc += A.at(i,j) * B_colmem[j]; } val += acc; } return val; }
arma_hot inline void glue_times_diag::apply(Mat<typename T1::elem_type>& out, const Glue<T1, T2, glue_times_diag>& X) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT; const strip_diagmat<T1> S1(X.A); const strip_diagmat<T2> S2(X.B); typedef typename strip_diagmat<T1>::stored_type T1_stripped; typedef typename strip_diagmat<T2>::stored_type T2_stripped; if( (S1.do_diagmat == true) && (S2.do_diagmat == false) ) { const diagmat_proxy_check<T1_stripped> A(S1.M, out); const unwrap_check<T2> tmp(X.B, out); const Mat<eT>& B = tmp.M; arma_debug_assert_mul_size(A.n_elem, A.n_elem, B.n_rows, B.n_cols, "matrix multiply"); out.set_size(A.n_elem, B.n_cols); for(u32 col=0; col<B.n_cols; ++col) { eT* out_coldata = out.colptr(col); const eT* B_coldata = B.colptr(col); for(u32 row=0; row<B.n_rows; ++row) { out_coldata[row] = A[row] * B_coldata[row]; } } } else if( (S1.do_diagmat == false) && (S2.do_diagmat == true) ) { const unwrap_check<T1> tmp(X.A, out); const Mat<eT>& A = tmp.M; const diagmat_proxy_check<T2_stripped> B(S2.M, out); arma_debug_assert_mul_size(A.n_rows, A.n_cols, B.n_elem, B.n_elem, "matrix multiply"); out.set_size(A.n_rows, B.n_elem); for(u32 col=0; col<A.n_cols; ++col) { const eT val = B[col]; eT* out_coldata = out.colptr(col); const eT* A_coldata = A.colptr(col); for(u32 row=0; row<A.n_rows; ++row) { out_coldata[row] = A_coldata[row] * val; } } } else if( (S1.do_diagmat == true) && (S2.do_diagmat == true) ) { const diagmat_proxy_check<T1_stripped> A(S1.M, out); const diagmat_proxy_check<T2_stripped> B(S2.M, out); arma_debug_assert_mul_size(A.n_elem, A.n_elem, B.n_elem, B.n_elem, "matrix multiply"); out.zeros(A.n_elem, A.n_elem); for(u32 i=0; i<A.n_elem; ++i) { out.at(i,i) = A[i] * B[i]; } } }
arma_hot inline void glue_times::apply ( Mat<eT>& out, const Mat<eT>& A, const Mat<eT>& B, const eT alpha, const bool do_trans_A, const bool do_trans_B, const bool use_alpha ) { arma_extra_debug_sigprint(); arma_debug_assert_mul_size(A, B, do_trans_A, do_trans_B, "matrix multiply"); const u32 final_n_rows = (do_trans_A == false) ? A.n_rows : A.n_cols; const u32 final_n_cols = (do_trans_B == false) ? B.n_cols : B.n_rows; out.set_size(final_n_rows, final_n_cols); // TODO: thoroughly test all combinations if( (do_trans_A == false) && (do_trans_B == false) && (use_alpha == false) ) { if(A.n_rows == 1) { gemv<true, false, false>::apply(out.memptr(), B, A.memptr()); } else if(B.n_cols == 1) { gemv<false, false, false>::apply(out.memptr(), A, B.memptr()); } else { gemm<false, false, false, false>::apply(out, A, B); } } else if( (do_trans_A == false) && (do_trans_B == false) && (use_alpha == true) ) { if(A.n_rows == 1) { gemv<true, true, false>::apply(out.memptr(), B, A.memptr(), alpha); } else if(B.n_cols == 1) { gemv<false, true, false>::apply(out.memptr(), A, B.memptr(), alpha); } else { gemm<false, false, true, false>::apply(out, A, B, alpha); } } else if( (do_trans_A == true) && (do_trans_B == false) && (use_alpha == false) ) { if(A.n_cols == 1) { gemv<true, false, false>::apply(out.memptr(), B, A.memptr()); } else if(B.n_cols == 1) { gemv<true, false, false>::apply(out.memptr(), A, B.memptr()); } else { gemm<true, false, false, false>::apply(out, A, B); } } else if( (do_trans_A == true) && (do_trans_B == false) && (use_alpha == true) ) { if(A.n_cols == 1) { gemv<true, true, false>::apply(out.memptr(), B, A.memptr(), alpha); } else if(B.n_cols == 1) { gemv<true, true, false>::apply(out.memptr(), A, B.memptr(), alpha); } else { gemm<true, false, true, false>::apply(out, A, B, alpha); } } else if( (do_trans_A == false) && (do_trans_B == true) && (use_alpha == false) ) { if(A.n_rows == 1) { gemv<false, false, false>::apply(out.memptr(), B, A.memptr()); } else if(B.n_rows == 1) { gemv<false, false, false>::apply(out.memptr(), A, B.memptr()); } else { gemm<false, true, false, false>::apply(out, A, B); } } else if( (do_trans_A == false) && (do_trans_B == true) && (use_alpha == true) ) { if(A.n_rows == 1) { gemv<false, true, false>::apply(out.memptr(), B, A.memptr(), alpha); } else if(B.n_rows == 1) { gemv<false, true, false>::apply(out.memptr(), A, B.memptr(), alpha); } else { gemm<false, true, true, false>::apply(out, A, B, alpha); } } else if( (do_trans_A == true) && (do_trans_B == true) && (use_alpha == false) ) { if(A.n_cols == 1) { gemv<false, false, false>::apply(out.memptr(), B, A.memptr()); } else if(B.n_rows == 1) { gemv<true, false, false>::apply(out.memptr(), A, B.memptr()); } else { gemm<true, true, false, false>::apply(out, A, B); } } else if( (do_trans_A == true) && (do_trans_B == true) && (use_alpha == true) ) { if(A.n_cols == 1) { gemv<false, true, false>::apply(out.memptr(), B, A.memptr(), alpha); } else if(B.n_rows == 1) { gemv<true, true, false>::apply(out.memptr(), A, B.memptr(), alpha); } else { gemm<true, true, true, false>::apply(out, A, B, alpha); } } }
arma_hot inline void glue_times::apply_inplace_plus(Mat<typename T1::elem_type>& out, const Glue<T1, T2, glue_times>& X, const s32 sign) { arma_extra_debug_sigprint(); typedef typename T1::elem_type eT; const partial_unwrap_check<T1> tmp1(X.A, out); const partial_unwrap_check<T2> tmp2(X.B, out); const Mat<eT>& A = tmp1.M; const Mat<eT>& B = tmp2.M; const eT alpha = tmp1.val * tmp2.val * ( (sign > s32(0)) ? eT(1) : eT(-1) ); const bool do_trans_A = tmp1.do_trans; const bool do_trans_B = tmp2.do_trans; const bool use_alpha = tmp1.do_times | tmp2.do_times | (sign < s32(0)); arma_debug_assert_mul_size(A, B, do_trans_A, do_trans_B, "matrix multiply"); const u32 result_n_rows = (do_trans_A == false) ? A.n_rows : A.n_cols; const u32 result_n_cols = (do_trans_B == false) ? B.n_cols : B.n_rows; arma_assert_same_size(out.n_rows, out.n_cols, result_n_rows, result_n_cols, "matrix addition"); if( (do_trans_A == false) && (do_trans_B == false) && (use_alpha == false) ) { if(A.n_rows == 1) { gemv<true, false, true>::apply(out.memptr(), B, A.memptr(), alpha, eT(1)); } else if(B.n_cols == 1) { gemv<false, false, true>::apply(out.memptr(), A, B.memptr(), alpha, eT(1)); } else { gemm<false, false, false, true>::apply(out, A, B, alpha, eT(1)); } } else if( (do_trans_A == false) && (do_trans_B == false) && (use_alpha == true) ) { if(A.n_rows == 1) { gemv<true, true, true>::apply(out.memptr(), B, A.memptr(), alpha, eT(1)); } else if(B.n_cols == 1) { gemv<false, true, true>::apply(out.memptr(), A, B.memptr(), alpha, eT(1)); } else { gemm<false, false, true, true>::apply(out, A, B, alpha, eT(1)); } } else if( (do_trans_A == true) && (do_trans_B == false) && (use_alpha == false) ) { if(A.n_cols == 1) { gemv<true, false, true>::apply(out.memptr(), B, A.memptr(), alpha, eT(1)); } else if(B.n_cols == 1) { gemv<true, false, true>::apply(out.memptr(), A, B.memptr(), alpha, eT(1)); } else { gemm<true, false, false, true>::apply(out, A, B, alpha, eT(1)); } } else if( (do_trans_A == true) && (do_trans_B == false) && (use_alpha == true) ) { if(A.n_cols == 1) { gemv<true, true, true>::apply(out.memptr(), B, A.memptr(), alpha, eT(1)); } else if(B.n_cols == 1) { gemv<true, true, true>::apply(out.memptr(), A, B.memptr(), alpha, eT(1)); } else { gemm<true, false, true, true>::apply(out, A, B, alpha, eT(1)); } } else if( (do_trans_A == false) && (do_trans_B == true) && (use_alpha == false) ) { if(A.n_rows == 1) { gemv<false, false, true>::apply(out.memptr(), B, A.memptr(), alpha, eT(1)); } else if(B.n_rows == 1) { gemv<false, false, true>::apply(out.memptr(), A, B.memptr(), alpha, eT(1)); } else { gemm<false, true, false, true>::apply(out, A, B, alpha, eT(1)); } } else if( (do_trans_A == false) && (do_trans_B == true) && (use_alpha == true) ) { if(A.n_rows == 1) { gemv<false, true, true>::apply(out.memptr(), B, A.memptr(), alpha, eT(1)); } else if(B.n_rows == 1) { gemv<false, true, true>::apply(out.memptr(), A, B.memptr(), alpha, eT(1)); } else { gemm<false, true, true, true>::apply(out, A, B, alpha, eT(1)); } } else if( (do_trans_A == true) && (do_trans_B == true) && (use_alpha == false) ) { if(A.n_cols == 1) { gemv<false, false, true>::apply(out.memptr(), B, A.memptr(), alpha, eT(1)); } else if(B.n_rows == 1) { gemv<true, false, true>::apply(out.memptr(), A, B.memptr(), alpha, eT(1)); } else { gemm<true, true, false, true>::apply(out, A, B, alpha, eT(1)); } } else if( (do_trans_A == true) && (do_trans_B == true) && (use_alpha == true) ) { if(A.n_cols == 1) { gemv<false, true, true>::apply(out.memptr(), B, A.memptr(), alpha, eT(1)); } else if(B.n_rows == 1) { gemv<true, true, true>::apply(out.memptr(), A, B.memptr(), alpha, eT(1)); } else { gemm<true, true, true, true>::apply(out, A, B, alpha, eT(1)); } } }
arma_hot inline void spglue_times::apply_noalias(SpMat<eT>& c, const SpProxy<T1>& pa, const SpProxy<T2>& pb) { arma_extra_debug_sigprint(); const uword x_n_rows = pa.get_n_rows(); const uword x_n_cols = pa.get_n_cols(); const uword y_n_rows = pb.get_n_rows(); const uword y_n_cols = pb.get_n_cols(); arma_debug_assert_mul_size(x_n_rows, x_n_cols, y_n_rows, y_n_cols, "matrix multiplication"); // First we must determine the structure of the new matrix (column pointers). // This follows the algorithm described in 'Sparse Matrix Multiplication // Package (SMMP)' (R.E. Bank and C.C. Douglas, 2001). Their description of // "SYMBMM" does not include anything about memory allocation. In addition it // does not consider that there may be elements which space may be allocated // for but which evaluate to zero anyway. So we have to modify the algorithm // to work that way. For the "SYMBMM" implementation we will not determine // the row indices but instead just the column pointers. //SpMat<typename T1::elem_type> c(x_n_rows, y_n_cols); // Initializes col_ptrs to 0. c.zeros(x_n_rows, y_n_cols); //if( (pa.get_n_elem() == 0) || (pb.get_n_elem() == 0) ) if( (pa.get_n_nonzero() == 0) || (pb.get_n_nonzero() == 0) ) { return; } // Auxiliary storage which denotes when items have been found. podarray<uword> index(x_n_rows); index.fill(x_n_rows); // Fill with invalid links. typename SpProxy<T2>::const_iterator_type y_it = pb.begin(); typename SpProxy<T2>::const_iterator_type y_end = pb.end(); // SYMBMM: calculate column pointers for resultant matrix to obtain a good // upper bound on the number of nonzero elements. uword cur_col_length = 0; uword last_ind = x_n_rows + 1; do { const uword y_it_row = y_it.row(); // Look through the column that this point (*y_it) could affect. typename SpProxy<T1>::const_iterator_type x_it = pa.begin_col(y_it_row); while(x_it.col() == y_it_row) { // A point at x(i, j) and y(j, k) implies a point at c(i, k). if(index[x_it.row()] == x_n_rows) { index[x_it.row()] = last_ind; last_ind = x_it.row(); ++cur_col_length; } ++x_it; } const uword old_col = y_it.col(); ++y_it; // See if column incremented. if(old_col != y_it.col()) { // Set column pointer (this is not a cumulative count; that is done later). access::rw(c.col_ptrs[old_col + 1]) = cur_col_length; cur_col_length = 0; // Return index markers to zero. Use last_ind for traversal. while(last_ind != x_n_rows + 1) { const uword tmp = index[last_ind]; index[last_ind] = x_n_rows; last_ind = tmp; } } } while(y_it != y_end); // Accumulate column pointers. for(uword i = 0; i < c.n_cols; ++i) { access::rw(c.col_ptrs[i + 1]) += c.col_ptrs[i]; } // Now that we know a decent bound on the number of nonzero elements, allocate // the memory and fill it. c.mem_resize(c.col_ptrs[c.n_cols]); // Now the implementation of the NUMBMM algorithm. uword cur_pos = 0; // Current position in c matrix. podarray<eT> sums(x_n_rows); // Partial sums. sums.zeros(); // setting the size of 'sorted_indices' to x_n_rows is a better-than-nothing guess; // the correct minimum size is determined later podarray<uword> sorted_indices(x_n_rows); // last_ind is already set to x_n_rows, and cur_col_length is already set to 0. // We will loop through all columns as necessary. uword cur_col = 0; while(cur_col < c.n_cols) { // Skip to next column with elements in it. while((cur_col < c.n_cols) && (c.col_ptrs[cur_col] == c.col_ptrs[cur_col + 1])) { // Update current column pointer to actual number of nonzero elements up // to this point. access::rw(c.col_ptrs[cur_col]) = cur_pos; ++cur_col; } if(cur_col == c.n_cols) { break; } // Update current column pointer. access::rw(c.col_ptrs[cur_col]) = cur_pos; // Check all elements in this column. typename SpProxy<T2>::const_iterator_type y_col_it = pb.begin_col(cur_col); while(y_col_it.col() == cur_col) { // Check all elements in the column of the other matrix corresponding to // the row of this column. typename SpProxy<T1>::const_iterator_type x_col_it = pa.begin_col(y_col_it.row()); const eT y_value = (*y_col_it); while(x_col_it.col() == y_col_it.row()) { // A point at x(i, j) and y(j, k) implies a point at c(i, k). // Add to partial sum. const eT x_value = (*x_col_it); sums[x_col_it.row()] += (x_value * y_value); // Add point if it hasn't already been marked. if(index[x_col_it.row()] == x_n_rows) { index[x_col_it.row()] = last_ind; last_ind = x_col_it.row(); } ++x_col_it; } ++y_col_it; } // Now sort the indices that were used in this column. //podarray<uword> sorted_indices(c.col_ptrs[cur_col + 1] - c.col_ptrs[cur_col]); sorted_indices.set_min_size(c.col_ptrs[cur_col + 1] - c.col_ptrs[cur_col]); // .set_min_size() can only enlarge the array to the specified size, // hence if we request a smaller size than already allocated, // no new memory allocation is done uword cur_index = 0; while(last_ind != x_n_rows + 1) { const uword tmp = last_ind; // Check that it wasn't a "fake" nonzero element. if(sums[tmp] != eT(0)) { // Assign to next open position. sorted_indices[cur_index] = tmp; ++cur_index; } last_ind = index[tmp]; index[tmp] = x_n_rows; } // Now sort the indices. if (cur_index != 0) { op_sort::direct_sort_ascending(sorted_indices.memptr(), cur_index); for(uword k = 0; k < cur_index; ++k) { const uword row = sorted_indices[k]; access::rw(c.row_indices[cur_pos]) = row; access::rw(c.values[cur_pos]) = sums[row]; sums[row] = eT(0); ++cur_pos; } } // Move to next column. ++cur_col; } // Update last column pointer and resize to actual memory size. access::rw(c.col_ptrs[c.n_cols]) = cur_pos; c.mem_resize(cur_pos); }