void DenseMatrixBase::submatrixAssign(const DenseMatrixBase& mat, unsigned int startRow, unsigned int startCol, unsigned int numRows, unsigned int numCols) { cadet_assert(numRows == mat.rows()); cadet_assert(numCols == mat.columns()); cadet_assert(_rows > startRow); cadet_assert(_cols > startCol); cadet_assert(_rows >= startRow + numRows); cadet_assert(_cols >= startCol + numCols); double* const ptrDest = _data + startRow * stride() + startCol; double const* const ptrSrc = mat.data(); for (unsigned int i = 0; i < numRows; ++i) { for (unsigned int j = 0; j < numCols; ++j) { ptrDest[i * stride() + j] = ptrSrc[i * mat.stride() + j]; } } }
void DenseMatrix<T>::right_multiply (const DenseMatrixBase<T>& M3) { if (this->use_blas_lapack) this->_multiply_blas(M3, RIGHT_MULTIPLY); else { // (*this) <- M3 * (*this) // Where: // (*this) = (m x n), // M2 = (m x p), // M3 = (p x n) // M2 is a copy of *this before it gets resize()d DenseMatrix<T> M2(*this); // Resize *this so that the result can fit this->resize (M2.m(), M3.n()); this->multiply(*this, M2, M3); } }
void DenseMatrix<T>::left_multiply (const DenseMatrixBase<T>& M2) { if (this->use_blas_lapack) this->_multiply_blas(M2, LEFT_MULTIPLY); else { // (*this) <- M2 * (*this) // Where: // (*this) = (m x n), // M2 = (m x p), // M3 = (p x n) // M3 is a copy of *this before it gets resize()d DenseMatrix<T> M3(*this); // Resize *this so that the result can fit this->resize (M2.m(), M3.n()); // Call the multiply function in the base class this->multiply(*this, M2, M3); } }
void DenseMatrix<T>::_multiply_blas(const DenseMatrixBase<T>& other, _BLAS_Multiply_Flag flag) { int result_size = 0; // For each case, determine the size of the final result make sure // that the inner dimensions match switch (flag) { case LEFT_MULTIPLY: { result_size = other.m() * this->n(); if (other.n() == this->m()) break; } case RIGHT_MULTIPLY: { result_size = other.n() * this->m(); if (other.m() == this->n()) break; } case LEFT_MULTIPLY_TRANSPOSE: { result_size = other.n() * this->n(); if (other.m() == this->m()) break; } case RIGHT_MULTIPLY_TRANSPOSE: { result_size = other.m() * this->m(); if (other.n() == this->n()) break; } default: { libMesh::out << "Unknown flag selected or matrices are "; libMesh::out << "incompatible for multiplication." << std::endl; libmesh_error(); } } // For this to work, the passed arg. must actually be a DenseMatrix<T> const DenseMatrix<T>* const_that = libmesh_cast_ptr< const DenseMatrix<T>* >(&other); // Also, although 'that' is logically const in this BLAS routine, // the PETSc BLAS interface does not specify that any of the inputs are // const. To use it, I must cast away const-ness. DenseMatrix<T>* that = const_cast< DenseMatrix<T>* > (const_that); // Initialize A, B pointers for LEFT_MULTIPLY* cases DenseMatrix<T> *A = this, *B = that; // For RIGHT_MULTIPLY* cases, swap the meaning of A and B. // Here is a full table of combinations we can pass to BLASgemm, and what the answer is when finished: // pass A B -> (Fortran) -> A^T B^T -> (C++) -> (A^T B^T)^T -> (identity) -> B A "lt multiply" // pass B A -> (Fortran) -> B^T A^T -> (C++) -> (B^T A^T)^T -> (identity) -> A B "rt multiply" // pass A B^T -> (Fortran) -> A^T B -> (C++) -> (A^T B)^T -> (identity) -> B^T A "lt multiply t" // pass B^T A -> (Fortran) -> B A^T -> (C++) -> (B A^T)^T -> (identity) -> A B^T "rt multiply t" if (flag==RIGHT_MULTIPLY || flag==RIGHT_MULTIPLY_TRANSPOSE) std::swap(A,B); // transa, transb values to pass to blas char transa[] = "n", transb[] = "n"; // Integer values to pass to BLAS: // // M // In Fortran, the number of rows of op(A), // In the BLAS documentation, typically known as 'M'. // // In C/C++, we set: // M = n_cols(A) if (transa='n') // n_rows(A) if (transa='t') int M = static_cast<int>( A->n() ); // N // In Fortran, the number of cols of op(B), and also the number of cols of C. // In the BLAS documentation, typically known as 'N'. // // In C/C++, we set: // N = n_rows(B) if (transb='n') // n_cols(B) if (transb='t') int N = static_cast<int>( B->m() ); // K // In Fortran, the number of cols of op(A), and also // the number of rows of op(B). In the BLAS documentation, // typically known as 'K'. // // In C/C++, we set: // K = n_rows(A) if (transa='n') // n_cols(A) if (transa='t') int K = static_cast<int>( A->m() ); // LDA (leading dimension of A). In our cases, // LDA is always the number of columns of A. int LDA = static_cast<int>( A->n() ); // LDB (leading dimension of B). In our cases, // LDB is always the number of columns of B. int LDB = static_cast<int>( B->n() ); if (flag == LEFT_MULTIPLY_TRANSPOSE) { transb[0] = 't'; N = static_cast<int>( B->n() ); } else if (flag == RIGHT_MULTIPLY_TRANSPOSE) { transa[0] = 't'; std::swap(M,K); } // LDC (leading dimension of C). LDC is the // number of columns in the solution matrix. int LDC = M; // Scalar values to pass to BLAS // // scalar multiplying the whole product AB T alpha = 1.; // scalar multiplying C, which is the original matrix. T beta = 0.; // Storage for the result std::vector<T> result (result_size); // Finally ready to call the BLAS BLASgemm_(transa, transb, &M, &N, &K, &alpha, &(A->_val[0]), &LDA, &(B->_val[0]), &LDB, &beta, &result[0], &LDC); // Update the relevant dimension for this matrix. switch (flag) { case LEFT_MULTIPLY: { this->_m = other.m(); break; } case RIGHT_MULTIPLY: { this->_n = other.n(); break; } case LEFT_MULTIPLY_TRANSPOSE: { this->_m = other.n(); break; } case RIGHT_MULTIPLY_TRANSPOSE: { this->_n = other.m(); break; } default: { libMesh::out << "Unknown flag selected." << std::endl; libmesh_error(); } } // Swap my data vector with the result this->_val.swap(result); }
void DenseMatrixBase<T>::multiply (DenseMatrixBase<T> & M1, const DenseMatrixBase<T> & M2, const DenseMatrixBase<T> & M3) { // Assertions to make sure we have been // passed matrices of the correct dimension. libmesh_assert_equal_to (M1.m(), M2.m()); libmesh_assert_equal_to (M1.n(), M3.n()); libmesh_assert_equal_to (M2.n(), M3.m()); const unsigned int m_s = M2.m(); const unsigned int p_s = M2.n(); const unsigned int n_s = M1.n(); // Do it this way because there is a // decent chance (at least for constraint matrices) // that M3(k,j) = 0. when right-multiplying. for (unsigned int k=0; k<p_s; k++) for (unsigned int j=0; j<n_s; j++) if (M3.el(k,j) != 0.) for (unsigned int i=0; i<m_s; i++) M1.el(i,j) += M2.el(i,k) * M3.el(k,j); }