void DenseMatrix<T>::_svd_lapack (DenseVector<T>& sigma, DenseMatrix<T>& U, DenseMatrix<T>& VT)
{
  // The calling sequence for dgetrf is:
  // DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO )


  //  JOBU    (input) CHARACTER*1
  //          Specifies options for computing all or part of the matrix U:
  //          = 'A':  all M columns of U are returned in array U:
  //          = 'S':  the first min(m,n) columns of U (the left singular
  //                  vectors) are returned in the array U;
  //          = 'O':  the first min(m,n) columns of U (the left singular
  //                  vectors) are overwritten on the array A;
  //          = 'N':  no columns of U (no left singular vectors) are
  //                  computed.
  char JOBU = 'S';

  //  JOBVT   (input) CHARACTER*1
  //          Specifies options for computing all or part of the matrix
  //          V**T:
  //          = 'A':  all N rows of V**T are returned in the array VT;
  //          = 'S':  the first min(m,n) rows of V**T (the right singular
  //                  vectors) are returned in the array VT;
  //          = 'O':  the first min(m,n) rows of V**T (the right singular
  //                  vectors) are overwritten on the array A;
  //          = 'N':  no rows of V**T (no right singular vectors) are
  //                  computed.
  char JOBVT = 'S';

  std::vector<T> sigma_val;
  std::vector<T> U_val;
  std::vector<T> VT_val;

  _svd_helper(JOBU, JOBVT, sigma_val, U_val, VT_val);

  // Load the singular values into sigma, ignore U_val and VT_val
  sigma.resize(sigma_val.size());
  for(unsigned int i=0; i<sigma_val.size(); i++)
    sigma(i) = sigma_val[i];

  int M = this->n();
  int N = this->m();
  int min_MN = (M < N) ? M : N;
  U.resize(M,min_MN);
  for(unsigned int i=0; i<U.m(); i++)
    for(unsigned int j=0; j<U.n(); j++)
    {
      unsigned int index = i + j*U.n();  // Column major storage
      U(i,j) = U_val[index];
    }

  VT.resize(min_MN,N);
  for(unsigned int i=0; i<VT.m(); i++)
    for(unsigned int j=0; j<VT.n(); j++)
    {
      unsigned int index = i + j*U.n(); // Column major storage
      VT(i,j) = VT_val[index];
    }

}
Esempio n. 2
0
void DenseMatrix<T>::right_multiply_transpose (const DenseMatrix<T>& B)
{
  if (this->use_blas_lapack)
    this->_multiply_blas(B, RIGHT_MULTIPLY_TRANSPOSE);
  else
    {
      //Check to see if we are doing B*(B^T)
      if (this == &B)
	{
	  //libmesh_here();
	  DenseMatrix<T> A(*this);

	  // Simple but inefficient way
	  // return this->right_multiply_transpose(A);

	  // More efficient, more code
	  // If B is mxn, the result will be a square matrix of Size m x m.
	  const unsigned int n_rows = B.m();
	  const unsigned int n_cols = B.n();

	  // resize() *this and also zero out all entries.
	  this->resize(n_rows,n_rows);

	  // Compute the lower-triangular part
	  for (unsigned int i=0; i<n_rows; ++i)
	    for (unsigned int j=0; j<=i; ++j)
	      for (unsigned int k=0; k<n_cols; ++k) // inner products are over n_cols
		(*this)(i,j) += A(i,k)*A(j,k);

	  // Copy lower-triangular part into upper-triangular part
	  for (unsigned int i=0; i<n_rows; ++i)
	    for (unsigned int j=i+1; j<n_rows; ++j)
	      (*this)(i,j) = (*this)(j,i);
	}

      else
	{
	  DenseMatrix<T> A(*this);

	  this->resize (A.m(), B.m());

	  libmesh_assert_equal_to (A.n(), B.n());
	  libmesh_assert_equal_to (this->m(), A.m());
	  libmesh_assert_equal_to (this->n(), B.m());

	  const unsigned int m_s = A.m();
	  const unsigned int p_s = A.n();
	  const unsigned int n_s = this->n();

	  // Do it this way because there is a
	  // decent chance (at least for constraint matrices)
	  // that B.transpose(k,j) = 0.
	  for (unsigned int j=0; j<n_s; j++)
	    for (unsigned int k=0; k<p_s; k++)
	      if (B.transpose(k,j) != 0.)
		for (unsigned int i=0; i<m_s; i++)
		  (*this)(i,j) += A(i,k)*B.transpose(k,j);
	}
    }
}
Esempio n. 3
0
void SparseMatrix<T>::add_block_matrix (const DenseMatrix<T> & dm,
                                        const std::vector<numeric_index_type> & brows,
                                        const std::vector<numeric_index_type> & bcols)
{
  libmesh_assert_equal_to (dm.m() / brows.size(), dm.n() / bcols.size());

  const numeric_index_type blocksize = cast_int<numeric_index_type>
    (dm.m() / brows.size());

  libmesh_assert_equal_to (dm.m()%blocksize, 0);
  libmesh_assert_equal_to (dm.n()%blocksize, 0);

  std::vector<numeric_index_type> rows, cols;

  rows.reserve(blocksize*brows.size());
  cols.reserve(blocksize*bcols.size());

  for (unsigned int ib=0; ib<brows.size(); ib++)
    {
      numeric_index_type i=brows[ib]*blocksize;

      for (unsigned int v=0; v<blocksize; v++)
        rows.push_back(i++);
    }

  for (unsigned int jb=0; jb<bcols.size(); jb++)
    {
      numeric_index_type j=bcols[jb]*blocksize;

      for (unsigned int v=0; v<blocksize; v++)
        cols.push_back(j++);
    }

  this->add_matrix (dm, rows, cols);
}
Esempio n. 4
0
void
dataStore(std::ostream & stream, DenseMatrix<Real> & v, void * /*context*/)
{
  unsigned int m = v.m();
  unsigned int n = v.n();
  stream.write((char *) &m, sizeof(m));
  stream.write((char *) &n, sizeof(n));
  for (unsigned int i = 0; i < v.m(); i++)
    for (unsigned int j = 0; j < v.n(); j++)
    {
      Real r = v(i, j);
      stream.write((char *) &r, sizeof(r));
    }
}
Esempio n. 5
0
void PetscMatrix<T>::add_block_matrix(const DenseMatrix<T>& dm,
				      const std::vector<numeric_index_type>& brows,
				      const std::vector<numeric_index_type>& bcols)
{
  libmesh_assert (this->initialized());

  const numeric_index_type n_rows    = dm.m();
  const numeric_index_type n_cols    = dm.n();
  const numeric_index_type n_brows   = brows.size();
  const numeric_index_type n_bcols   = bcols.size();
  const numeric_index_type blocksize = n_rows / n_brows;

  libmesh_assert_equal_to (n_cols / n_bcols, blocksize);
  libmesh_assert_equal_to (blocksize*n_brows, n_rows);
  libmesh_assert_equal_to (blocksize*n_bcols, n_cols);

  PetscErrorCode ierr=0;

#ifndef NDEBUG
  PetscInt petsc_blocksize;
  ierr = MatGetBlockSize(_mat, &petsc_blocksize);
  LIBMESH_CHKERRABORT(ierr);
  libmesh_assert_equal_to (blocksize, static_cast<numeric_index_type>(petsc_blocksize));
#endif

  // These casts are required for PETSc <= 2.1.5
  ierr = MatSetValuesBlocked(_mat,
			     n_brows, (PetscInt*) &brows[0],
			     n_bcols, (PetscInt*) &bcols[0],
			     (PetscScalar*) &dm.get_values()[0],
			     ADD_VALUES);
  LIBMESH_CHKERRABORT(ierr);
}
Esempio n. 6
0
void DenseMatrix<T>::get_transpose (DenseMatrix<T>& dest) const
{
  dest.resize(this->n(), this->m());

  for (unsigned int i=0; i<dest.m(); i++)
    for (unsigned int j=0; j<dest.n(); j++)
      dest(i,j) = (*this)(j,i);
}
Esempio n. 7
0
void LaspackMatrix<T>::add_matrix(const DenseMatrix<T>& dm,
				  const std::vector<numeric_index_type>& rows,
				  const std::vector<numeric_index_type>& cols)

{
  libmesh_assert (this->initialized());
  libmesh_assert_equal_to (dm.m(), rows.size());
  libmesh_assert_equal_to (dm.n(), cols.size());


  for (numeric_index_type i=0; i<rows.size(); i++)
    for (numeric_index_type j=0; j<cols.size(); j++)
      this->add(rows[i],cols[j],dm(i,j));
}
void EpetraMatrix<T>::add_matrix(const DenseMatrix<T>& dm,
				 const std::vector<unsigned int>& rows,
				 const std::vector<unsigned int>& cols)
{
  libmesh_assert (this->initialized());

  const unsigned int m = dm.m();
  const unsigned int n = dm.n();

  libmesh_assert (rows.size() == m);
  libmesh_assert (cols.size() == n);

  _mat->SumIntoGlobalValues(m, (int *)&rows[0], n, (int *)&cols[0], &dm.get_values()[0]);
}
Esempio n. 9
0
void EpetraMatrix<T>::add_matrix(const DenseMatrix<T>& dm,
                                 const std::vector<numeric_index_type>& rows,
                                 const std::vector<numeric_index_type>& cols)
{
  libmesh_assert (this->initialized());

  const numeric_index_type m = dm.m();
  const numeric_index_type n = dm.n();

  libmesh_assert_equal_to (rows.size(), m);
  libmesh_assert_equal_to (cols.size(), n);

  _mat->SumIntoGlobalValues(m, (int *)&rows[0], n, (int *)&cols[0], &dm.get_values()[0]);
}
Esempio n. 10
0
void
dataLoad(std::istream & stream, DenseMatrix<Real> & v, void * /*context*/)
{
  unsigned int nr = 0, nc = 0;
  stream.read((char *) &nr, sizeof(nr));
  stream.read((char *) &nc, sizeof(nc));
  v.resize(nr,nc);
  for (unsigned int i = 0; i < v.m(); i++)
    for (unsigned int j = 0; j < v.n(); j++)
    {
      Real r = 0;
      stream.read((char *) &r, sizeof(r));
      v(i, j) = r;
    }
}
Esempio n. 11
0
void EigenSparseMatrix<T>::add_matrix(const DenseMatrix<T> & dm,
                                      const std::vector<numeric_index_type> & rows,
                                      const std::vector<numeric_index_type> & cols)

{
  libmesh_assert (this->initialized());
  unsigned int n_rows = cast_int<unsigned int>(rows.size());
  unsigned int n_cols = cast_int<unsigned int>(cols.size());
  libmesh_assert_equal_to (dm.m(), n_rows);
  libmesh_assert_equal_to (dm.n(), n_cols);


  for (unsigned int i=0; i<n_rows; i++)
    for (unsigned int j=0; j<n_cols; j++)
      this->add(rows[i],cols[j],dm(i,j));
}
Esempio n. 12
0
void
Assembly::addJacobianBlock(SparseMatrix<Number> & jacobian, DenseMatrix<Number> & jac_block, const std::vector<dof_id_type> & idof_indices, const std::vector<dof_id_type> & jdof_indices, Real scaling_factor)
{
  if ((idof_indices.size() > 0) && (jdof_indices.size() > 0) && jac_block.n() && jac_block.m())
  {
    std::vector<dof_id_type> di(idof_indices);
    std::vector<dof_id_type> dj(jdof_indices);
    _dof_map.constrain_element_matrix(jac_block, di, dj, false);

    if (scaling_factor != 1.0)
    {
      _tmp_Ke = jac_block;
      _tmp_Ke *= scaling_factor;
      jacobian.add_matrix(_tmp_Ke, di, dj);
    }
    else
      jacobian.add_matrix(jac_block, di, dj);
  }
}
Esempio n. 13
0
void PetscMatrix<T>::add_matrix(const DenseMatrix<T>& dm,
				const std::vector<numeric_index_type>& rows,
				const std::vector<numeric_index_type>& cols)
{
  libmesh_assert (this->initialized());

  const numeric_index_type n_rows = dm.m();
  const numeric_index_type n_cols = dm.n();

  libmesh_assert_equal_to (rows.size(), n_rows);
  libmesh_assert_equal_to (cols.size(), n_cols);

  PetscErrorCode ierr=0;

  // These casts are required for PETSc <= 2.1.5
  ierr = MatSetValues(_mat,
		      n_rows, (PetscInt*) &rows[0],
		      n_cols, (PetscInt*) &cols[0],
		      (PetscScalar*) &dm.get_values()[0],
		      ADD_VALUES);
         LIBMESH_CHKERRABORT(ierr);
}
Esempio n. 14
0
void
Assembly::cacheJacobianBlock(DenseMatrix<Number> & jac_block, std::vector<dof_id_type> & idof_indices, std::vector<dof_id_type> & jdof_indices, Real scaling_factor)
{
  if ((idof_indices.size() > 0) && (jdof_indices.size() > 0) && jac_block.n() && jac_block.m())
  {
    std::vector<dof_id_type> di(idof_indices);
    std::vector<dof_id_type> dj(jdof_indices);
    _dof_map.constrain_element_matrix(jac_block, di, dj, false);

    if (scaling_factor != 1.0)
    {
      _tmp_Ke = jac_block;
      _tmp_Ke *= scaling_factor;

      for(unsigned int i=0; i<di.size(); i++)
        for(unsigned int j=0; j<dj.size(); j++)
        {
          _cached_jacobian_values.push_back(_tmp_Ke(i, j));
          _cached_jacobian_rows.push_back(di[i]);
          _cached_jacobian_cols.push_back(dj[j]);
        }
    }
    else
    {
      for(unsigned int i=0; i<di.size(); i++)
        for(unsigned int j=0; j<dj.size(); j++)
        {
          _cached_jacobian_values.push_back(jac_block(i, j));
          _cached_jacobian_rows.push_back(di[i]);
          _cached_jacobian_cols.push_back(dj[j]);
        }
    }
  }

  jac_block.zero();
}
Esempio n. 15
0
  // This function is called by testEVD for different matrices.  The
  // Lapack results are compared to known eigenvalue real and
  // imaginary parts for the matrix in question, which must also be
  // passed in by non-const value, since this routine will sort them
  // in-place.
  void testEVD_helper(DenseMatrix<Real> & A,
                      std::vector<Real> true_lambda_real,
                      std::vector<Real> true_lambda_imag)
  {
    // Note: see bottom of this file, we only do this test if PETSc is
    // available, but this function currently only exists if we're
    // using real numbers.
#ifdef LIBMESH_USE_REAL_NUMBERS
    // Let's compute the eigenvalues on a copy of A, so that we can
    // use the original to check the computation.
    DenseMatrix<Real> A_copy = A;

    DenseVector<Real> lambda_real, lambda_imag;
    DenseMatrix<Real> VR; // right eigenvectors
    DenseMatrix<Real> VL; // left eigenvectors
    A_copy.evd_left_and_right(lambda_real, lambda_imag, VL, VR);

    // The matrix is square and of size N x N.
    const unsigned N = A.m();

    // Verify left eigen-values.
    // Test that the right eigenvalues are self-consistent by computing
    // u_j**H * A = lambda_j * u_j**H
    // Note that we have to handle real and complex eigenvalues
    // differently, since complex eigenvectors share their storage.
    for (unsigned eigenval=0; eigenval<N; ++eigenval)
      {
        // Only check real eigenvalues
        if (std::abs(lambda_imag(eigenval)) < TOLERANCE*TOLERANCE)
          {
            // remove print libMesh::out << "Checking eigenvalue: " << eigenval << std::endl;
            DenseVector<Real> lhs(N), rhs(N);
            for (unsigned i=0; i<N; ++i)
              {
                rhs(i) = lambda_real(eigenval) * VL(i, eigenval);
                for (unsigned j=0; j<N; ++j)
                  lhs(i) += A(j, i) * VL(j, eigenval); // Note: A(j,i)
              }

            // Subtract and assert that the norm of the difference is
            // below some tolerance.
            lhs -= rhs;
            CPPUNIT_ASSERT_DOUBLES_EQUAL(/*expected=*/0., /*actual=*/lhs.l2_norm(), std::sqrt(TOLERANCE)*TOLERANCE);
          }
        else
          {
            // This is a complex eigenvalue, so:
            // a.) It occurs in a complex-conjugate pair
            // b.) the real part of the eigenvector is stored is VL(:,eigenval)
            // c.) the imag part of the eigenvector is stored in VL(:,eigenval+1)
            //
            // Equating the real and imaginary parts of Ax=lambda*x leads to two sets
            // of relations that must hold:
            // 1.) A^T x_r =  lambda_r*x_r + lambda_i*x_i
            // 2.) A^T x_i = -lambda_i*x_r + lambda_r*x_i
            // which we can verify.

            // 1.)
            DenseVector<Real> lhs(N), rhs(N);
            for (unsigned i=0; i<N; ++i)
              {
                rhs(i) = lambda_real(eigenval) * VL(i, eigenval) + lambda_imag(eigenval) * VL(i, eigenval+1);
                for (unsigned j=0; j<N; ++j)
                  lhs(i) += A(j, i) * VL(j, eigenval); // Note: A(j,i)
              }

            lhs -= rhs;
            CPPUNIT_ASSERT_DOUBLES_EQUAL(/*expected=*/0., /*actual=*/lhs.l2_norm(), std::sqrt(TOLERANCE)*TOLERANCE);

            // libMesh::out << "lhs=" << std::endl;
            // lhs.print_scientific(libMesh::out, /*precision=*/15);
            //
            // libMesh::out << "rhs=" << std::endl;
            // rhs.print_scientific(libMesh::out, /*precision=*/15);

            // 2.)
            lhs.zero();
            rhs.zero();
            for (unsigned i=0; i<N; ++i)
              {
                rhs(i) = -lambda_imag(eigenval) * VL(i, eigenval) + lambda_real(eigenval) * VL(i, eigenval+1);
                for (unsigned j=0; j<N; ++j)
                  lhs(i) += A(j, i) * VL(j, eigenval+1); // Note: A(j,i)
              }

            lhs -= rhs;
            CPPUNIT_ASSERT_DOUBLES_EQUAL(/*expected=*/0., /*actual=*/lhs.l2_norm(), std::sqrt(TOLERANCE)*TOLERANCE);

            // libMesh::out << "lhs=" << std::endl;
            // lhs.print_scientific(libMesh::out, /*precision=*/15);
            //
            // libMesh::out << "rhs=" << std::endl;
            // rhs.print_scientific(libMesh::out, /*precision=*/15);

            // We'll skip the second member of the complex conjugate
            // pair.  If the first one worked, the second one should
            // as well...
            eigenval += 1;
          }
      }

    // Verify right eigen-values.
    // Test that the right eigenvalues are self-consistent by computing
    // A * v_j - lambda_j * v_j
    // Note that we have to handle real and complex eigenvalues
    // differently, since complex eigenvectors share their storage.
    for (unsigned eigenval=0; eigenval<N; ++eigenval)
      {
        // Only check real eigenvalues
        if (std::abs(lambda_imag(eigenval)) < TOLERANCE*TOLERANCE)
          {
            // remove print libMesh::out << "Checking eigenvalue: " << eigenval << std::endl;
            DenseVector<Real> lhs(N), rhs(N);
            for (unsigned i=0; i<N; ++i)
              {
                rhs(i) = lambda_real(eigenval) * VR(i, eigenval);
                for (unsigned j=0; j<N; ++j)
                  lhs(i) += A(i, j) * VR(j, eigenval);
              }

            lhs -= rhs;
            CPPUNIT_ASSERT_DOUBLES_EQUAL(/*expected=*/0., /*actual=*/lhs.l2_norm(), std::sqrt(TOLERANCE)*TOLERANCE);
          }
        else
          {
            // This is a complex eigenvalue, so:
            // a.) It occurs in a complex-conjugate pair
            // b.) the real part of the eigenvector is stored is VR(:,eigenval)
            // c.) the imag part of the eigenvector is stored in VR(:,eigenval+1)
            //
            // Equating the real and imaginary parts of Ax=lambda*x leads to two sets
            // of relations that must hold:
            // 1.) Ax_r = lambda_r*x_r - lambda_i*x_i
            // 2.) Ax_i = lambda_i*x_r + lambda_r*x_i
            // which we can verify.

            // 1.)
            DenseVector<Real> lhs(N), rhs(N);
            for (unsigned i=0; i<N; ++i)
              {
                rhs(i) = lambda_real(eigenval) * VR(i, eigenval) - lambda_imag(eigenval) * VR(i, eigenval+1);
                for (unsigned j=0; j<N; ++j)
                  lhs(i) += A(i, j) * VR(j, eigenval);
              }

            lhs -= rhs;
            CPPUNIT_ASSERT_DOUBLES_EQUAL(/*expected=*/0., /*actual=*/lhs.l2_norm(), std::sqrt(TOLERANCE)*TOLERANCE);

            // 2.)
            lhs.zero();
            rhs.zero();
            for (unsigned i=0; i<N; ++i)
              {
                rhs(i) = lambda_imag(eigenval) * VR(i, eigenval) + lambda_real(eigenval) * VR(i, eigenval+1);
                for (unsigned j=0; j<N; ++j)
                  lhs(i) += A(i, j) * VR(j, eigenval+1);
              }

            lhs -= rhs;
            CPPUNIT_ASSERT_DOUBLES_EQUAL(/*expected=*/0., /*actual=*/lhs.l2_norm(), std::sqrt(TOLERANCE)*TOLERANCE);

            // We'll skip the second member of the complex conjugate
            // pair.  If the first one worked, the second one should
            // as well...
            eigenval += 1;
          }
      }

    // Sort the results from Lapack *individually*.
    std::sort(lambda_real.get_values().begin(), lambda_real.get_values().end());
    std::sort(lambda_imag.get_values().begin(), lambda_imag.get_values().end());

    // Sort the true eigenvalues *individually*.
    std::sort(true_lambda_real.begin(), true_lambda_real.end());
    std::sort(true_lambda_imag.begin(), true_lambda_imag.end());

    // Compare the individually-sorted values.
    for (unsigned i=0; i<lambda_real.size(); ++i)
      {
        // Note: I initially verified the results with TOLERANCE**2,
        // but that turned out to be just a bit too tight for some of
        // the test problems.  I'm not sure what controls the accuracy
        // of the eigenvalue computation in LAPACK, there is no way to
        // set a tolerance in the LAPACKgeev_ interface.
        CPPUNIT_ASSERT_DOUBLES_EQUAL(/*expected=*/true_lambda_real[i], /*actual=*/lambda_real(i), std::sqrt(TOLERANCE)*TOLERANCE);
        CPPUNIT_ASSERT_DOUBLES_EQUAL(/*expected=*/true_lambda_imag[i], /*actual=*/lambda_imag(i), std::sqrt(TOLERANCE)*TOLERANCE);
      }
#endif
  }
Esempio n. 16
0
void DenseMatrix<T>::_multiply_blas(const DenseMatrixBase<T>& other,
                                    _BLAS_Multiply_Flag flag)
{
  int result_size = 0;

  // For each case, determine the size of the final result make sure
  // that the inner dimensions match
  switch (flag)
    {
    case LEFT_MULTIPLY:
      {
        result_size = other.m() * this->n();
        if (other.n() == this->m())
          break;
      }
    case RIGHT_MULTIPLY:
      {
        result_size = other.n() * this->m();
        if (other.m() == this->n())
          break;
      }
    case LEFT_MULTIPLY_TRANSPOSE:
      {
        result_size = other.n() * this->n();
        if (other.m() == this->m())
          break;
      }
    case RIGHT_MULTIPLY_TRANSPOSE:
      {
        result_size = other.m() * this->m();
        if (other.n() == this->n())
          break;
      }
    default:
      {
        libMesh::out << "Unknown flag selected or matrices are ";
        libMesh::out << "incompatible for multiplication." << std::endl;
        libmesh_error();
      }
    }

  // For this to work, the passed arg. must actually be a DenseMatrix<T>
  const DenseMatrix<T>* const_that = libmesh_cast_ptr< const DenseMatrix<T>* >(&other);

  // Also, although 'that' is logically const in this BLAS routine,
  // the PETSc BLAS interface does not specify that any of the inputs are
  // const.  To use it, I must cast away const-ness.
  DenseMatrix<T>* that = const_cast< DenseMatrix<T>* > (const_that);

  // Initialize A, B pointers for LEFT_MULTIPLY* cases
  DenseMatrix<T>
    *A = this,
    *B = that;

  // For RIGHT_MULTIPLY* cases, swap the meaning of A and B.
  // Here is a full table of combinations we can pass to BLASgemm, and what the answer is when finished:
  // pass A B   -> (Fortran) -> A^T B^T -> (C++) -> (A^T B^T)^T -> (identity) -> B A   "lt multiply"
  // pass B A   -> (Fortran) -> B^T A^T -> (C++) -> (B^T A^T)^T -> (identity) -> A B   "rt multiply"
  // pass A B^T -> (Fortran) -> A^T B   -> (C++) -> (A^T B)^T   -> (identity) -> B^T A "lt multiply t"
  // pass B^T A -> (Fortran) -> B A^T   -> (C++) -> (B A^T)^T   -> (identity) -> A B^T "rt multiply t"
  if (flag==RIGHT_MULTIPLY || flag==RIGHT_MULTIPLY_TRANSPOSE)
    std::swap(A,B);

  // transa, transb values to pass to blas
  char
    transa[] = "n",
    transb[] = "n";

  // Integer values to pass to BLAS:
  //
  // M
  // In Fortran, the number of rows of op(A),
  // In the BLAS documentation, typically known as 'M'.
  //
  // In C/C++, we set:
  // M = n_cols(A) if (transa='n')
  //     n_rows(A) if (transa='t')
  int M = static_cast<int>( A->n() );

  // N
  // In Fortran, the number of cols of op(B), and also the number of cols of C.
  // In the BLAS documentation, typically known as 'N'.
  //
  // In C/C++, we set:
  // N = n_rows(B) if (transb='n')
  //     n_cols(B) if (transb='t')
  int N = static_cast<int>( B->m() );

  // K
  // In Fortran, the number of cols of op(A), and also
  // the number of rows of op(B). In the BLAS documentation,
  // typically known as 'K'.
  //
  // In C/C++, we set:
  // K = n_rows(A) if (transa='n')
  //     n_cols(A) if (transa='t')
  int K = static_cast<int>( A->m() );

  // LDA (leading dimension of A). In our cases,
  // LDA is always the number of columns of A.
  int LDA = static_cast<int>( A->n() );

  // LDB (leading dimension of B).  In our cases,
  // LDB is always the number of columns of B.
  int LDB = static_cast<int>( B->n() );

  if (flag == LEFT_MULTIPLY_TRANSPOSE)
    {
      transb[0] = 't';
      N = static_cast<int>( B->n() );
    }

  else if (flag == RIGHT_MULTIPLY_TRANSPOSE)
    {
      transa[0] = 't';
      std::swap(M,K);
    }

  // LDC (leading dimension of C).  LDC is the
  // number of columns in the solution matrix.
  int LDC = M;

  // Scalar values to pass to BLAS
  //
  // scalar multiplying the whole product AB
  T alpha = 1.;

  // scalar multiplying C, which is the original matrix.
  T beta  = 0.;

  // Storage for the result
  std::vector<T> result (result_size);

  // Finally ready to call the BLAS
  BLASgemm_(transa, transb, &M, &N, &K, &alpha, &(A->_val[0]), &LDA, &(B->_val[0]), &LDB, &beta, &result[0], &LDC);

  // Update the relevant dimension for this matrix.
  switch (flag)
    {
    case LEFT_MULTIPLY:            { this->_m = other.m(); break; }
    case RIGHT_MULTIPLY:           { this->_n = other.n(); break; }
    case LEFT_MULTIPLY_TRANSPOSE:  { this->_m = other.n(); break; }
    case RIGHT_MULTIPLY_TRANSPOSE: { this->_n = other.m(); break; }
    default:
      {
        libMesh::out << "Unknown flag selected." << std::endl;
        libmesh_error();
      }
    }

  // Swap my data vector with the result
  this->_val.swap(result);
}