inline
Mat< typename promote_type<typename T1::elem_type, typename T2::elem_type>::result >
operator*
(const Op<T1, op_diagmat>& X, const Op<T2, op_diagmat>& Y)
  {
  arma_extra_debug_sigprint();
  
  typedef typename T1::elem_type eT1;
  typedef typename T2::elem_type eT2;
  
  typedef typename promote_type<eT1,eT2>::result out_eT;
  
  promote_type<eT1,eT2>::check();
  
  const diagmat_proxy<T1> A(X.m);
  const diagmat_proxy<T2> B(Y.m);
  
  arma_debug_assert_mul_size(A.n_rows, A.n_cols, B.n_rows, B.n_cols, "matrix multiplication");
  
  Mat<out_eT> out(A.n_rows, B.n_cols, fill::zeros);
  
  const uword A_length = (std::min)(A.n_rows, A.n_cols);
  const uword B_length = (std::min)(B.n_rows, B.n_cols);
  
  const uword N = (std::min)(A_length, B_length);
  
  for(uword i=0; i<N; ++i)
    {
    out.at(i,i) = upgrade_val<eT1,eT2>::apply( A[i] ) * upgrade_val<eT1,eT2>::apply( B[i] );
    }
  
  return out;
  }
예제 #2
0
arma_inline
Mat< typename promote_type<typename T1::elem_type, typename T2::elem_type>::result >
operator*
(const Op<T1, op_diagmat>& X, const Op<T2, op_diagmat>& Y)
  {
  arma_extra_debug_sigprint();
  
  typedef typename T1::elem_type eT1;
  typedef typename T2::elem_type eT2;
  
  typedef typename promote_type<eT1,eT2>::result out_eT;
  
  promote_type<eT1,eT2>::check();
  
  const diagmat_proxy<T1> A(X.m);
  const diagmat_proxy<T2> B(Y.m);
  
  arma_debug_assert_mul_size(A.n_elem, A.n_elem, B.n_elem, B.n_elem, "matrix multiply");
  
  const u32 N = A.n_elem;
  
  Mat<out_eT> out(N,N);
  
  out.zeros();
  
  for(u32 i=0; i<N; ++i)
    {
    out.at(i,i) = upgrade_val<eT1,eT2>::apply( A[i] ) * upgrade_val<eT1,eT2>::apply( B[i] );
    }
  
  return out;
  }
예제 #3
0
inline
void
glue_times::apply_mixed(Mat<typename promote_type<eT1,eT2>::result>& out, const Mat<eT1>& X, const Mat<eT2>& Y)
  {
  arma_extra_debug_sigprint();
  
  typedef typename promote_type<eT1,eT2>::result out_eT;
  
  arma_debug_assert_mul_size(X,Y, "matrix multiply");
  
  out.set_size(X.n_rows,Y.n_cols);
  gemm_mixed<>::apply(out, X, Y);
  }
예제 #4
0
arma_hot
inline
typename T1::elem_type
trace_mul_proxy(const T1& XA, const T2& XB)
  {
  arma_extra_debug_sigprint();
  
  typedef typename T1::elem_type eT;
  
  const Proxy<T1> PA(XA);
  const Proxy<T2> PB(XB);
  
  if(is_Mat<typename Proxy<T2>::stored_type>::value == true)
    {
    return trace_mul_unwrap(PA.Q, PB.Q);
    }
  
  arma_debug_assert_mul_size(PA.get_n_rows(), PA.get_n_cols(), PB.get_n_rows(), PB.get_n_cols(), "matrix multiply");
  
  arma_debug_check( (PA.get_n_rows() != PB.get_n_cols()), "trace(): matrix must be square sized" );
  
  const uword N1 = PA.get_n_rows();   // equivalent to PB.get_n_cols(), due to square size requirements
  const uword N2 = PA.get_n_cols();   // equivalent to PB.get_n_rows(), due to matrix multiplication requirements
  
  eT val = eT(0);
  
  for(uword i=0; i<N1; ++i)
    {
    eT acc1 = eT(0);
    eT acc2 = eT(0);
    
    uword j,k;
    for(j=0, k=1; k < N2; j+=2, k+=2)
      {
      const eT tmp_j = PB.at(j,i);
      const eT tmp_k = PB.at(k,i);
      
      acc1 += PA.at(i,j) * tmp_j;
      acc2 += PA.at(i,k) * tmp_k;
      }
    
    if(j < N2)
      {
      acc1 += PA.at(i,j) * PB.at(j,i);
      }
    
    val += (acc1 + acc2);
    }
  
  return val;
  }
예제 #5
0
arma_hot
inline
typename T1::elem_type
trace_mul_unwrap(const T1& XA, const T2& XB)
  {
  arma_extra_debug_sigprint();
  
  typedef typename T1::elem_type eT;
  
  const Proxy<T1>    PA(XA);
  const unwrap<T2> tmpB(XB);
  
  const Mat<eT>& B = tmpB.M;
  
  arma_debug_assert_mul_size(PA.get_n_rows(), PA.get_n_cols(), B.n_rows, B.n_cols, "matrix multiply");
  
  arma_debug_check( (PA.get_n_rows() != B.n_cols), "trace(): matrix must be square sized" );
  
  const uword N1 = PA.get_n_rows();   // equivalent to B.n_cols, due to square size requirements
  const uword N2 = PA.get_n_cols();   // equivalent to B.n_rows, due to matrix multiplication requirements
  
  eT val = eT(0);
  
  for(uword i=0; i<N1; ++i)
    {
    const eT* B_colmem = B.colptr(i);
    
    eT acc1 = eT(0);
    eT acc2 = eT(0);
    
    uword j,k;
    for(j=0, k=1; k < N2; j+=2, k+=2)
      {
      const eT tmp_j = B_colmem[j];
      const eT tmp_k = B_colmem[k];
      
      acc1 += PA.at(i,j) * tmp_j;
      acc2 += PA.at(i,k) * tmp_k;
      }
    
    if(j < N2)
      {
      acc1 += PA.at(i,j) * B_colmem[j];
      }
    
    val += (acc1 + acc2);
    }
  
  return val;
  }
예제 #6
0
inline
void
glue_times::apply_inplace(Mat<typename T1::elem_type>& out, const T1& X)
  {
  arma_extra_debug_sigprint();
  
  typedef typename T1::elem_type eT;
  
  const unwrap_check<T1> tmp(X, out);
  const Mat<eT>& B     = tmp.M;
  
  arma_debug_assert_mul_size(out, B, "matrix multiply");
  
  if(out.n_cols == B.n_cols)
    {
    podarray<eT> tmp(out.n_cols);
    eT* tmp_rowdata = tmp.memptr();
    
    for(u32 out_row=0; out_row < out.n_rows; ++out_row)
      {
      for(u32 out_col=0; out_col < out.n_cols; ++out_col)
        {
        tmp_rowdata[out_col] = out.at(out_row,out_col);
        }
      
      for(u32 B_col=0; B_col < B.n_cols; ++B_col)
        {
        const eT* B_coldata = B.colptr(B_col);
        
        eT val = eT(0);
        for(u32 i=0; i < B.n_rows; ++i)
          {
          val += tmp_rowdata[i] * B_coldata[i];
          }
        
        out.at(out_row,B_col) = val;
        }
      }
    
    }
  else
    {
    const Mat<eT> tmp(out);
    glue_times::apply(out, tmp, B, eT(1), false, false, false);
    }
  
  }
예제 #7
0
inline
typename
enable_if2
  <
  (is_arma_type<T1>::value && is_arma_sparse_type<T2>::value && is_same_type<typename T1::elem_type, typename T2::elem_type>::value),
  Mat<typename T1::elem_type>
  >::result
operator*
  (
  const T1& x,
  const T2& y
  )
  {
  arma_extra_debug_sigprint();
  
  const   Proxy<T1> pa(x);
  const SpProxy<T2> pb(y);
  
  arma_debug_assert_mul_size(pa.get_n_rows(), pa.get_n_cols(), pb.get_n_rows(), pb.get_n_cols(), "matrix multiplication");
  
  Mat<typename T1::elem_type> result(pa.get_n_rows(), pb.get_n_cols());
  result.zeros();
  
  if( (pa.get_n_elem() > 0) && (pb.get_n_nonzero() > 0) )
    {
    typename SpProxy<T2>::const_iterator_type y_col_it     = pb.begin();
    typename SpProxy<T2>::const_iterator_type y_col_it_end = pb.end();
    
    const uword result_n_rows = result.n_rows;
    
    while(y_col_it != y_col_it_end)
      {
      for(uword row = 0; row < result_n_rows; ++row)
        {
        result.at(row, y_col_it.col()) += pa.at(row, y_col_it.row()) * (*y_col_it);
        }
      
      ++y_col_it;
      }
    }
  
  return result;
  }
예제 #8
0
inline
void
glue_mixed_times::apply(Mat<typename eT_promoter<T1,T2>::eT>& out, const mtGlue<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_times>& X)
  {
  arma_extra_debug_sigprint();
  
  typedef typename T1::elem_type eT1;
  typedef typename T2::elem_type eT2;
  
  // TODO: extend the unwrap_check framework to handle mixed matrix types
  
  const unwrap<T1> tmp1(X.A);
  const unwrap<T2> tmp2(X.B);
  
  const Mat<eT1>& A = tmp1.M;
  const Mat<eT2>& B = tmp2.M;
  
  const bool A_is_alias = ( ((void *)&out) == ((void *)&A) );
  const bool B_is_alias = ( ((void *)&out) == ((void *)&B) );
  
  const Mat<eT1>* AA_ptr = A_is_alias ? new Mat<eT1>(A) : 0;
  const Mat<eT2>* BB_ptr = B_is_alias ? new Mat<eT2>(B) : 0;
  
  const Mat<eT1>& AA = A_is_alias ? *AA_ptr : A;
  const Mat<eT2>& BB = B_is_alias ? *BB_ptr : B;
  
  arma_debug_assert_mul_size(AA, BB, "matrix multiplication");
  
  out.set_size(AA.n_rows, BB.n_cols);
  
  gemm_mixed<>::apply(out, AA, BB);
  
  if(A_is_alias == true)
    {
    delete AA_ptr;
    }
  
  if(B_is_alias == true)
    {
    delete BB_ptr;
    }
  }
예제 #9
0
inline
void
glue_mixed_times::apply(Mat<typename eT_promoter<T1,T2>::eT>& out, const mtGlue<typename eT_promoter<T1,T2>::eT, T1, T2, glue_mixed_times>& X)
  {
  arma_extra_debug_sigprint();
  
  typedef typename T1::elem_type eT1;
  typedef typename T2::elem_type eT2;
  
  const unwrap_check_mixed<T1> tmp1(X.A, out);
  const unwrap_check_mixed<T2> tmp2(X.B, out);
  
  const Mat<eT1>& A = tmp1.M;
  const Mat<eT2>& B = tmp2.M;
  
  arma_debug_assert_mul_size(A, B, "matrix multiplication");
  
  out.set_size(A.n_rows, B.n_cols);
  
  gemm_mixed<>::apply(out, A, B);
  }
예제 #10
0
inline
arma_warn_unused
typename T1::elem_type
trace(const Glue<T1, T2, glue_times>& X)
  {
  arma_extra_debug_sigprint();
  
  typedef typename T1::elem_type eT;
  
  const unwrap<T1> tmp1(X.A);
  const unwrap<T2> tmp2(X.B);
  
  const Mat<eT>& A = tmp1.M;
  const Mat<eT>& B = tmp2.M;
  
  arma_debug_assert_mul_size(A, B, "matrix multiply");
  
  arma_debug_check( (A.n_rows != B.n_cols), "trace(): matrix must be square sized" );
  
  const uword N1  = A.n_rows;
  const uword N2  = A.n_cols;
        eT  val = eT(0);
  
  for(uword i=0; i<N1; ++i)
    {
    const eT* B_colmem = B.colptr(i);
          eT  acc      = eT(0);
    
    for(uword j=0; j<N2; ++j)
      {
      acc += A.at(i,j) * B_colmem[j];
      }
    
    val += acc;
    }
  
  return val;
  }
예제 #11
0
arma_hot
inline
void
glue_times_diag::apply(Mat<typename T1::elem_type>& out, const Glue<T1, T2, glue_times_diag>& X)
  {
  arma_extra_debug_sigprint();
  
  typedef typename T1::elem_type eT;
  
  const strip_diagmat<T1> S1(X.A);
  const strip_diagmat<T2> S2(X.B);
  
  typedef typename strip_diagmat<T1>::stored_type T1_stripped;
  typedef typename strip_diagmat<T2>::stored_type T2_stripped;
  
  if( (S1.do_diagmat == true) && (S2.do_diagmat == false) )
    {
    const diagmat_proxy_check<T1_stripped> A(S1.M, out);
    
    const unwrap_check<T2> tmp(X.B, out);
    const Mat<eT>& B     = tmp.M;
    
    arma_debug_assert_mul_size(A.n_elem, A.n_elem, B.n_rows, B.n_cols, "matrix multiply");
    
    out.set_size(A.n_elem, B.n_cols);
    
    for(u32 col=0; col<B.n_cols; ++col)
      {
            eT* out_coldata = out.colptr(col);
      const eT* B_coldata   = B.colptr(col);
      
      for(u32 row=0; row<B.n_rows; ++row)
        {
        out_coldata[row] = A[row] * B_coldata[row];
        }
      }
    }
  else
  if( (S1.do_diagmat == false) && (S2.do_diagmat == true) )
    {
    const unwrap_check<T1> tmp(X.A, out);
    const Mat<eT>& A     = tmp.M;
    
    const diagmat_proxy_check<T2_stripped> B(S2.M, out);
    
    arma_debug_assert_mul_size(A.n_rows, A.n_cols, B.n_elem, B.n_elem, "matrix multiply");
    
    out.set_size(A.n_rows, B.n_elem);
    
    for(u32 col=0; col<A.n_cols; ++col)
      {
      const eT  val = B[col];
      
            eT* out_coldata = out.colptr(col);
      const eT*   A_coldata =   A.colptr(col);
    
      for(u32 row=0; row<A.n_rows; ++row)
        {
        out_coldata[row] = A_coldata[row] * val;
        }
      }
    }
  else
  if( (S1.do_diagmat == true) && (S2.do_diagmat == true) )
    {
    const diagmat_proxy_check<T1_stripped> A(S1.M, out);
    const diagmat_proxy_check<T2_stripped> B(S2.M, out);
    
    arma_debug_assert_mul_size(A.n_elem, A.n_elem, B.n_elem, B.n_elem, "matrix multiply");
    
    out.zeros(A.n_elem, A.n_elem);
    
    for(u32 i=0; i<A.n_elem; ++i)
      {
      out.at(i,i) = A[i] * B[i];
      }
    }
  }
예제 #12
0
arma_hot
inline
void
glue_times::apply
  (
        Mat<eT>& out,
  const Mat<eT>& A,
  const Mat<eT>& B,
  const eT       alpha,
  const bool     do_trans_A,
  const bool     do_trans_B,
  const bool     use_alpha
  )
  {
  arma_extra_debug_sigprint();
  
  arma_debug_assert_mul_size(A, B, do_trans_A, do_trans_B, "matrix multiply");
  
  const u32 final_n_rows = (do_trans_A == false) ? A.n_rows : A.n_cols;
  const u32 final_n_cols = (do_trans_B == false) ? B.n_cols : B.n_rows;
  
  out.set_size(final_n_rows, final_n_cols);
  
  // TODO: thoroughly test all combinations
  
  if( (do_trans_A == false) && (do_trans_B == false) && (use_alpha == false) )
    {
    if(A.n_rows == 1)
      {
      gemv<true,         false, false>::apply(out.memptr(), B, A.memptr());
      }
    else
    if(B.n_cols == 1)
      {
      gemv<false,        false, false>::apply(out.memptr(), A, B.memptr());
      }
    else
      {
      gemm<false, false, false, false>::apply(out, A, B);
      }
    }
  else
  if( (do_trans_A == false) && (do_trans_B == false) && (use_alpha == true) )
    {
    if(A.n_rows == 1)
      {
      gemv<true,         true, false>::apply(out.memptr(), B, A.memptr(), alpha);
      }
    else
    if(B.n_cols == 1)
      {
      gemv<false,        true, false>::apply(out.memptr(), A, B.memptr(), alpha);
      }
    else
      {
      gemm<false, false, true, false>::apply(out, A, B, alpha);
      }
    }
  else
  if( (do_trans_A == true) && (do_trans_B == false) && (use_alpha == false) )
    {
    if(A.n_cols == 1)
      {
      gemv<true,        false, false>::apply(out.memptr(), B, A.memptr());
      }
    else
    if(B.n_cols == 1)
      {
      gemv<true,        false, false>::apply(out.memptr(), A, B.memptr());
      }
    else
      {
      gemm<true, false, false, false>::apply(out, A, B);
      }
    }
  else
  if( (do_trans_A == true) && (do_trans_B == false) && (use_alpha == true) )
    {
    if(A.n_cols == 1)
      {
      gemv<true,        true, false>::apply(out.memptr(), B, A.memptr(), alpha);
      }
    else
    if(B.n_cols == 1)
      {
      gemv<true,        true, false>::apply(out.memptr(), A, B.memptr(), alpha);
      }
    else
      {
      gemm<true, false, true, false>::apply(out, A, B, alpha);
      }
    }
  else
  if( (do_trans_A == false) && (do_trans_B == true) && (use_alpha == false) )
    {
    if(A.n_rows == 1)
      {
      gemv<false,       false, false>::apply(out.memptr(), B, A.memptr());
      }
    else
    if(B.n_rows == 1)
      {
      gemv<false,       false, false>::apply(out.memptr(), A, B.memptr());
      }
    else
      {
      gemm<false, true, false, false>::apply(out, A, B);
      }
    }
  else
  if( (do_trans_A == false) && (do_trans_B == true) && (use_alpha == true) )
    {
    if(A.n_rows == 1)
      {
      gemv<false,       true, false>::apply(out.memptr(), B, A.memptr(), alpha);
      }
    else
    if(B.n_rows == 1)
      {
      gemv<false,       true, false>::apply(out.memptr(), A, B.memptr(), alpha);
      }
    else
      {
      gemm<false, true, true, false>::apply(out, A, B, alpha);
      }
    }
  else
  if( (do_trans_A == true) && (do_trans_B == true) && (use_alpha == false) )
    {
    if(A.n_cols == 1)
      {
      gemv<false,      false, false>::apply(out.memptr(), B, A.memptr());
      }
    else
    if(B.n_rows == 1)
      {
      gemv<true,       false, false>::apply(out.memptr(), A, B.memptr());
      }
    else
      {
      gemm<true, true, false, false>::apply(out, A, B);
      }
    }
  else
  if( (do_trans_A == true) && (do_trans_B == true) && (use_alpha == true) )
    {
    if(A.n_cols == 1)
      {
      gemv<false,      true, false>::apply(out.memptr(), B, A.memptr(), alpha);
      }
    else
    if(B.n_rows == 1)
      {
      gemv<true,       true, false>::apply(out.memptr(), A, B.memptr(), alpha);
      }
    else
      {
      gemm<true, true, true, false>::apply(out, A, B, alpha);
      }
    }
  }
예제 #13
0
arma_hot
inline
void
glue_times::apply_inplace_plus(Mat<typename T1::elem_type>& out, const Glue<T1, T2, glue_times>& X, const s32 sign)
  {
  arma_extra_debug_sigprint();
  
  typedef typename T1::elem_type eT;
  
  const partial_unwrap_check<T1> tmp1(X.A, out);
  const partial_unwrap_check<T2> tmp2(X.B, out);
  
  const Mat<eT>& A     = tmp1.M;
  const Mat<eT>& B     = tmp2.M;
  const eT       alpha = tmp1.val * tmp2.val * ( (sign > s32(0)) ? eT(1) : eT(-1) );
  
  const bool do_trans_A = tmp1.do_trans;
  const bool do_trans_B = tmp2.do_trans;
  const bool use_alpha  = tmp1.do_times | tmp2.do_times | (sign < s32(0));
  
  arma_debug_assert_mul_size(A, B, do_trans_A, do_trans_B, "matrix multiply");
  
  const u32 result_n_rows = (do_trans_A == false) ? A.n_rows : A.n_cols;
  const u32 result_n_cols = (do_trans_B == false) ? B.n_cols : B.n_rows;
  
  arma_assert_same_size(out.n_rows, out.n_cols, result_n_rows, result_n_cols, "matrix addition");
  
  if( (do_trans_A == false) && (do_trans_B == false) && (use_alpha == false) )
    {
    if(A.n_rows == 1)
      {
      gemv<true,         false, true>::apply(out.memptr(), B, A.memptr(), alpha, eT(1));
      }
    else
    if(B.n_cols == 1)
      {
      gemv<false,        false, true>::apply(out.memptr(), A, B.memptr(), alpha, eT(1));
      }
    else
      {
      gemm<false, false, false, true>::apply(out, A, B, alpha, eT(1));
      }
    }
  else
  if( (do_trans_A == false) && (do_trans_B == false) && (use_alpha == true) )
    {
    if(A.n_rows == 1)
      {
      gemv<true,         true, true>::apply(out.memptr(), B, A.memptr(), alpha, eT(1));
      }
    else
    if(B.n_cols == 1)
      {
      gemv<false,        true, true>::apply(out.memptr(), A, B.memptr(), alpha, eT(1));
      }
    else
      {
      gemm<false, false, true, true>::apply(out, A, B, alpha, eT(1));
      }
    }
  else
  if( (do_trans_A == true) && (do_trans_B == false) && (use_alpha == false) )
    {
    if(A.n_cols == 1)
      {
      gemv<true,        false, true>::apply(out.memptr(), B, A.memptr(), alpha, eT(1));
      }
    else
    if(B.n_cols == 1)
      {
      gemv<true,        false, true>::apply(out.memptr(), A, B.memptr(), alpha, eT(1));
      }
    else
      {
      gemm<true, false, false, true>::apply(out, A, B, alpha, eT(1));
      }
    }
  else
  if( (do_trans_A == true) && (do_trans_B == false) && (use_alpha == true) )
    {
    if(A.n_cols == 1)
      {
      gemv<true,        true, true>::apply(out.memptr(), B, A.memptr(), alpha, eT(1));
      }
    else
    if(B.n_cols == 1)
      {
      gemv<true,        true, true>::apply(out.memptr(), A, B.memptr(), alpha, eT(1));
      }
    else
      {
      gemm<true, false, true, true>::apply(out, A, B, alpha, eT(1));
      }
    }
  else
  if( (do_trans_A == false) && (do_trans_B == true) && (use_alpha == false) )
    {
    if(A.n_rows == 1)
      {
      gemv<false,       false, true>::apply(out.memptr(), B, A.memptr(), alpha, eT(1));
      }
    else
    if(B.n_rows == 1)
      {
      gemv<false,       false, true>::apply(out.memptr(), A, B.memptr(), alpha, eT(1));
      }
    else
      {
      gemm<false, true, false, true>::apply(out, A, B, alpha, eT(1));
      }
    }
  else
  if( (do_trans_A == false) && (do_trans_B == true) && (use_alpha == true) )
    {
    if(A.n_rows == 1)
      {
      gemv<false,       true, true>::apply(out.memptr(), B, A.memptr(), alpha, eT(1));
      }
    else
    if(B.n_rows == 1)
      {
      gemv<false,       true, true>::apply(out.memptr(), A, B.memptr(), alpha, eT(1));
      }
    else
      {
      gemm<false, true, true, true>::apply(out, A, B, alpha, eT(1));
      }
    }
  else
  if( (do_trans_A == true) && (do_trans_B == true) && (use_alpha == false) )
    {
    if(A.n_cols == 1)
      {
      gemv<false,      false, true>::apply(out.memptr(), B, A.memptr(), alpha, eT(1));
      }
    else
    if(B.n_rows == 1)
      {
      gemv<true,       false, true>::apply(out.memptr(), A, B.memptr(), alpha, eT(1));
      }
    else
      {
      gemm<true, true, false, true>::apply(out, A, B, alpha, eT(1));
      }
    }
  else
  if( (do_trans_A == true) && (do_trans_B == true) && (use_alpha == true) )
    {
    if(A.n_cols == 1)
      {
      gemv<false,      true, true>::apply(out.memptr(), B, A.memptr(), alpha, eT(1));
      }
    else
    if(B.n_rows == 1)
      {
      gemv<true,       true, true>::apply(out.memptr(), A, B.memptr(), alpha, eT(1));
      }
    else
      {
      gemm<true, true, true, true>::apply(out, A, B, alpha, eT(1));
      }
    }
  
  
  }
예제 #14
0
arma_hot
inline
void
spglue_times::apply_noalias(SpMat<eT>& c, const SpProxy<T1>& pa, const SpProxy<T2>& pb)
  {
  arma_extra_debug_sigprint();
  
  const uword x_n_rows = pa.get_n_rows();
  const uword x_n_cols = pa.get_n_cols();
  const uword y_n_rows = pb.get_n_rows();
  const uword y_n_cols = pb.get_n_cols();

  arma_debug_assert_mul_size(x_n_rows, x_n_cols, y_n_rows, y_n_cols, "matrix multiplication");

  // First we must determine the structure of the new matrix (column pointers).
  // This follows the algorithm described in 'Sparse Matrix Multiplication
  // Package (SMMP)' (R.E. Bank and C.C. Douglas, 2001).  Their description of
  // "SYMBMM" does not include anything about memory allocation.  In addition it
  // does not consider that there may be elements which space may be allocated
  // for but which evaluate to zero anyway.  So we have to modify the algorithm
  // to work that way.  For the "SYMBMM" implementation we will not determine
  // the row indices but instead just the column pointers.
  
  //SpMat<typename T1::elem_type> c(x_n_rows, y_n_cols); // Initializes col_ptrs to 0.
  c.zeros(x_n_rows, y_n_cols);
  
  //if( (pa.get_n_elem() == 0) || (pb.get_n_elem() == 0) )
  if( (pa.get_n_nonzero() == 0) || (pb.get_n_nonzero() == 0) )
    {
    return;
    }
  
  // Auxiliary storage which denotes when items have been found.
  podarray<uword> index(x_n_rows);
  index.fill(x_n_rows); // Fill with invalid links.
  
  typename SpProxy<T2>::const_iterator_type y_it  = pb.begin();
  typename SpProxy<T2>::const_iterator_type y_end = pb.end();

  // SYMBMM: calculate column pointers for resultant matrix to obtain a good
  // upper bound on the number of nonzero elements.
  uword cur_col_length = 0;
  uword last_ind = x_n_rows + 1;
  do
    {
    const uword y_it_row = y_it.row();
    
    // Look through the column that this point (*y_it) could affect.
    typename SpProxy<T1>::const_iterator_type x_it = pa.begin_col(y_it_row);
    
    while(x_it.col() == y_it_row)
      {
      // A point at x(i, j) and y(j, k) implies a point at c(i, k).
      if(index[x_it.row()] == x_n_rows)
        {
        index[x_it.row()] = last_ind;
        last_ind = x_it.row();
        ++cur_col_length;
        }

      ++x_it;
      }

    const uword old_col = y_it.col();
    ++y_it;

    // See if column incremented.
    if(old_col != y_it.col())
      {
      // Set column pointer (this is not a cumulative count; that is done later).
      access::rw(c.col_ptrs[old_col + 1]) = cur_col_length;
      cur_col_length = 0;

      // Return index markers to zero.  Use last_ind for traversal.
      while(last_ind != x_n_rows + 1)
        {
        const uword tmp = index[last_ind];
        index[last_ind] = x_n_rows;
        last_ind = tmp;
        }
      }
    }
  while(y_it != y_end);

  // Accumulate column pointers.
  for(uword i = 0; i < c.n_cols; ++i)
    {
    access::rw(c.col_ptrs[i + 1]) += c.col_ptrs[i];
    }

  // Now that we know a decent bound on the number of nonzero elements, allocate
  // the memory and fill it.
  c.mem_resize(c.col_ptrs[c.n_cols]);

  // Now the implementation of the NUMBMM algorithm.
  uword cur_pos = 0; // Current position in c matrix.
  podarray<eT> sums(x_n_rows); // Partial sums.
  sums.zeros();
  
  // setting the size of 'sorted_indices' to x_n_rows is a better-than-nothing guess;
  // the correct minimum size is determined later
  podarray<uword> sorted_indices(x_n_rows);
  
  // last_ind is already set to x_n_rows, and cur_col_length is already set to 0.
  // We will loop through all columns as necessary.
  uword cur_col = 0;
  while(cur_col < c.n_cols)
    {
    // Skip to next column with elements in it.
    while((cur_col < c.n_cols) && (c.col_ptrs[cur_col] == c.col_ptrs[cur_col + 1]))
      {
      // Update current column pointer to actual number of nonzero elements up
      // to this point.
      access::rw(c.col_ptrs[cur_col]) = cur_pos;
      ++cur_col;
      }

    if(cur_col == c.n_cols)
      {
      break;
      }

    // Update current column pointer.
    access::rw(c.col_ptrs[cur_col]) = cur_pos;

    // Check all elements in this column.
    typename SpProxy<T2>::const_iterator_type y_col_it = pb.begin_col(cur_col);
    
    while(y_col_it.col() == cur_col)
      {
      // Check all elements in the column of the other matrix corresponding to
      // the row of this column.
      typename SpProxy<T1>::const_iterator_type x_col_it = pa.begin_col(y_col_it.row());

      const eT y_value = (*y_col_it);

      while(x_col_it.col() == y_col_it.row())
        {
        // A point at x(i, j) and y(j, k) implies a point at c(i, k).
        // Add to partial sum.
        const eT x_value = (*x_col_it);
        sums[x_col_it.row()] += (x_value * y_value);

        // Add point if it hasn't already been marked.
        if(index[x_col_it.row()] == x_n_rows)
          {
          index[x_col_it.row()] = last_ind;
          last_ind = x_col_it.row();
          }

        ++x_col_it;
        }

      ++y_col_it;
      }

    // Now sort the indices that were used in this column.
    //podarray<uword> sorted_indices(c.col_ptrs[cur_col + 1] - c.col_ptrs[cur_col]);
    sorted_indices.set_min_size(c.col_ptrs[cur_col + 1] - c.col_ptrs[cur_col]);
    
    // .set_min_size() can only enlarge the array to the specified size,
    // hence if we request a smaller size than already allocated,
    // no new memory allocation is done
    
    
    uword cur_index = 0;
    while(last_ind != x_n_rows + 1)
      {
      const uword tmp = last_ind;

      // Check that it wasn't a "fake" nonzero element.
      if(sums[tmp] != eT(0))
        {
        // Assign to next open position.
        sorted_indices[cur_index] = tmp;
        ++cur_index;
        }

      last_ind = index[tmp];
      index[tmp] = x_n_rows;
      }

    // Now sort the indices.
    if (cur_index != 0)
      {
      op_sort::direct_sort_ascending(sorted_indices.memptr(), cur_index);

      for(uword k = 0; k < cur_index; ++k)
        {
        const uword row = sorted_indices[k];
        access::rw(c.row_indices[cur_pos]) = row;
        access::rw(c.values[cur_pos]) = sums[row];
        sums[row] = eT(0);
        ++cur_pos;
        }
      }

    // Move to next column.
    ++cur_col;
    }

  // Update last column pointer and resize to actual memory size.
  access::rw(c.col_ptrs[c.n_cols]) = cur_pos;
  c.mem_resize(cur_pos);
  }