void DenseMatrix<T>::_matvec_blas(T alpha, T beta, DenseVector<T>& dest, const DenseVector<T>& arg, bool trans) const { // Ensure that dest and arg sizes are compatible if (!trans) { // dest ~ A * arg // (mx1) (mxn) * (nx1) if ((dest.size() != this->m()) || (arg.size() != this->n())) { libMesh::out << "Improper input argument sizes!" << std::endl; libmesh_error(); } } else // trans == true { // Ensure that dest and arg are proper size // dest ~ A^T * arg // (nx1) (nxm) * (mx1) if ((dest.size() != this->n()) || (arg.size() != this->m())) { libMesh::out << "Improper input argument sizes!" << std::endl; libmesh_error(); } } // Calling sequence for dgemv: // // dgemv(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY) // TRANS - CHARACTER*1, 't' for transpose, 'n' for non-transpose multiply // We store everything in row-major order, so pass the transpose flag for // non-transposed matvecs and the 'n' flag for transposed matvecs char TRANS[] = "t"; if (trans) TRANS[0] = 'n'; // M - INTEGER. // On entry, M specifies the number of rows of the matrix A. // In C/C++, pass the number of *cols* of A int M = this->n(); // N - INTEGER. // On entry, N specifies the number of columns of the matrix A. // In C/C++, pass the number of *rows* of A int N = this->m(); // ALPHA - DOUBLE PRECISION. // The scalar constant passed to this function // A - DOUBLE PRECISION array of DIMENSION ( LDA, n ). // Before entry, the leading m by n part of the array A must // contain the matrix of coefficients. // The matrix, *this. Note that _matvec_blas is called from // a const function, vector_mult(), and so we have made this function const // as well. Since BLAS knows nothing about const, we have to cast it away // now. DenseMatrix<T>& a_ref = const_cast< DenseMatrix<T>& > ( *this ); std::vector<T>& a = a_ref.get_values(); // LDA - INTEGER. // On entry, LDA specifies the first dimension of A as declared // in the calling (sub) program. LDA must be at least // max( 1, m ). int LDA = M; // X - DOUBLE PRECISION array of DIMENSION at least // ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' // and at least // ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. // Before entry, the incremented array X must contain the // vector x. // Here, we must cast away the const-ness of "arg" since BLAS knows // nothing about const DenseVector<T>& x_ref = const_cast< DenseVector<T>& > ( arg ); std::vector<T>& x = x_ref.get_values(); // INCX - INTEGER. // On entry, INCX specifies the increment for the elements of // X. INCX must not be zero. int INCX = 1; // BETA - DOUBLE PRECISION. // On entry, BETA specifies the scalar beta. When BETA is // supplied as zero then Y need not be set on input. // The second scalar constant passed to this function // Y - DOUBLE PRECISION array of DIMENSION at least // ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' // and at least // ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. // Before entry with BETA non-zero, the incremented array Y // must contain the vector y. On exit, Y is overwritten by the // updated vector y. // The input vector "dest" std::vector<T>& y = dest.get_values(); // INCY - INTEGER. // On entry, INCY specifies the increment for the elements of // Y. INCY must not be zero. int INCY = 1; // Finally, ready to call the BLAS function BLASgemv_(TRANS, &M, &N, &alpha, &(a[0]), &LDA, &(x[0]), &INCX, &beta, &(y[0]), &INCY); }
void DenseMatrix<T>::_evd_lapack (DenseVector<T> & lambda_real, DenseVector<T> & lambda_imag) { // The calling sequence for dgeev is: // DGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, WR, WI, VL, LDVL, VR, // LDVR, ILO, IHI, SCALE, ABNRM, RCONDE, RCONDV, WORK, LWORK, IWORK, INFO ) // BALANC (input) CHARACTER*1 // Indicates how the input matrix should be diagonally scaled // and/or permuted to improve the conditioning of its // eigenvalues. // = 'N': Do not diagonally scale or permute; char BALANC = 'N'; // JOBVL (input) CHARACTER*1 // = 'N': left eigenvectors of A are not computed; // = 'V': left eigenvectors of A are computed. char JOBVL = 'N'; // JOBVR (input) CHARACTER*1 // = 'N': right eigenvectors of A are not computed; // = 'V': right eigenvectors of A are computed. char JOBVR = 'N'; // SENSE (input) CHARACTER*1 // Determines which reciprocal condition numbers are computed. // = 'N': None are computed; // = 'E': Computed for eigenvalues only; // = 'V': Computed for right eigenvectors only; // = 'B': Computed for eigenvalues and right eigenvectors. char SENSE = 'N'; // N (input) int * // The number of rows/cols of the matrix A. N >= 0. libmesh_assert( this->m() == this->n() ); int N = this->m(); // A (input/output) DOUBLE PRECISION array, dimension (LDA,N) // On entry, the N-by-N matrix A. // On exit, A has been overwritten. // Here, we pass &(_val[0]). // LDA (input) int * // The leading dimension of the array A. LDA >= max(1,N). int LDA = N; // WR (output) DOUBLE PRECISION array, dimension (N) // WI (output) DOUBLE PRECISION array, dimension (N) // WR and WI contain the real and imaginary parts, // respectively, of the computed eigenvalues. Complex // conjugate pairs of eigenvalues appear consecutively // with the eigenvalue having the positive imaginary part // first. lambda_real.resize(N); lambda_imag.resize(N); // VL (output) DOUBLE PRECISION array, dimension (LDVL,N) // If JOBVL = 'V', the left eigenvectors u(j) are stored one // after another in the columns of VL, in the same order // as their eigenvalues. // If JOBVL = 'N', VL is not referenced. // If the j-th eigenvalue is real, then u(j) = VL(:,j), // the j-th column of VL. // If the j-th and (j+1)-st eigenvalues form a complex // conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and // u(j+1) = VL(:,j) - i*VL(:,j+1). // Just set to NULL here. // LDVL (input) INTEGER // The leading dimension of the array VL. LDVL >= 1; if // JOBVL = 'V', LDVL >= N. int LDVL = 1; // VR (output) DOUBLE PRECISION array, dimension (LDVR,N) // If JOBVR = 'V', the right eigenvectors v(j) are stored one // after another in the columns of VR, in the same order // as their eigenvalues. // If JOBVR = 'N', VR is not referenced. // If the j-th eigenvalue is real, then v(j) = VR(:,j), // the j-th column of VR. // If the j-th and (j+1)-st eigenvalues form a complex // conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and // v(j+1) = VR(:,j) - i*VR(:,j+1). // Just set to NULL here. // LDVR (input) INTEGER // The leading dimension of the array VR. LDVR >= 1; if // JOBVR = 'V', LDVR >= N. int LDVR = 1; // Outputs (unused) int ILO = 0; int IHI = 0; std::vector<T> SCALE(N); T ABNRM; std::vector<T> RCONDE(N); std::vector<T> RCONDV(N); // WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) // On exit, if INFO = 0, WORK(1) returns the optimal LWORK. // // LWORK (input) INTEGER // The dimension of the array WORK. int LWORK = 3*N; std::vector<T> WORK( LWORK ); // IWORK (workspace) INTEGER array, dimension (2*N-2) // If SENSE = 'N' or 'E', not referenced. // Just set to NULL // INFO (output) INTEGER // = 0: successful exit // < 0: if INFO = -i, the i-th argument had an illegal value. // > 0: if INFO = i, the QR algorithm failed to compute all the // eigenvalues, and no eigenvectors or condition numbers // have been computed; elements 1:ILO-1 and i+1:N of WR // and WI contain eigenvalues which have converged. int INFO = 0; // Get references to raw data std::vector<T> & lambda_real_val = lambda_real.get_values(); std::vector<T> & lambda_imag_val = lambda_imag.get_values(); // Ready to call the actual factorization routine through SLEPc's interface LAPACKgeevx_( &BALANC, &JOBVL, &JOBVR, &SENSE, &N, &(_val[0]), &LDA, &lambda_real_val[0], &lambda_imag_val[0], libmesh_nullptr, &LDVL, libmesh_nullptr, &LDVR, &ILO, &IHI, &SCALE[0], &ABNRM, &RCONDE[0], &RCONDV[0], &WORK[0], &LWORK, libmesh_nullptr, &INFO ); // Check return value for errors if (INFO != 0) libmesh_error_msg("INFO=" << INFO << ", Error during Lapack eigenvalue calculation!"); }
void DenseMatrix<T>::_lu_back_substitute_lapack (const DenseVector<T>& b, DenseVector<T>& x) { // The calling sequence for getrs is: // dgetrs(TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO) // trans (input) char* // 'n' for no tranpose, 't' for transpose char TRANS[] = "t"; // N (input) int* // The order of the matrix A. N >= 0. int N = this->m(); // NRHS (input) int* // The number of right hand sides, i.e., the number of columns // of the matrix B. NRHS >= 0. int NRHS = 1; // A (input) DOUBLE PRECISION array, dimension (LDA,N) // The factors L and U from the factorization A = P*L*U // as computed by dgetrf. // Here, we pass &(_val[0]) // LDA (input) int* // The leading dimension of the array A. LDA >= max(1,N). int LDA = N; // ipiv (input) int array, dimension (N) // The pivot indices from DGETRF; for 1<=i<=N, row i of the // matrix was interchanged with row IPIV(i). // Here, we pass &(_pivots[0]) which was computed in _lu_decompose_lapack // B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) // On entry, the right hand side matrix B. // On exit, the solution matrix X. // Here, we pass a copy of the rhs vector's data array in x, so that the // passed right-hand side b is unmodified. I don't see a way around this // copy if we want to maintain an unmodified rhs in LibMesh. x = b; std::vector<T>& x_vec = x.get_values(); // We can avoid the copy if we don't care about overwriting the RHS: just // pass b to the Lapack routine and then swap with x before exiting // std::vector<T>& x_vec = b.get_values(); // LDB (input) int* // The leading dimension of the array B. LDB >= max(1,N). int LDB = N; // INFO (output) int* // = 0: successful exit // < 0: if INFO = -i, the i-th argument had an illegal value int INFO = 0; // Finally, ready to call the Lapack getrs function LAPACKgetrs_(TRANS, &N, &NRHS, &(_val[0]), &LDA, &(_pivots[0]), &(x_vec[0]), &LDB, &INFO); // Check return value for errors if (INFO != 0) { libMesh::out << "INFO=" << INFO << ", Error during Lapack LU solve!" << std::endl; libmesh_error(); } // Don't do this if you already made a copy of b above // Swap b and x. The solution will then be in x, and whatever was originally // in x, maybe garbage, maybe nothing, will be in b. // FIXME: Rewrite the LU and Cholesky solves to just take one input, and overwrite // the input. This *should* make user code simpler, as they don't have to create // an extra vector just to pass it in to the solve function! // b.swap(x); }
void DenseMatrix<T>::_svd_solve_lapack(const DenseVector<T> & rhs, DenseVector<T> & x, Real rcond) const { // Since BLAS is expecting column-major storage, we first need to // make a transposed copy of *this, then pass it to the gelss // routine instead of the original. This extra copy is kind of a // bummer, it might be better if we could use the full SVD to // compute the least-squares solution instead... Note that it isn't // completely terrible either, since A_trans gets overwritten by // Lapack, and we usually would end up making a copy of A outside // the function call anyway. DenseMatrix<T> A_trans; this->get_transpose(A_trans); // M is INTEGER // The number of rows of the input matrix. M >= 0. // This is actually the number of *columns* of A_trans. int M = A_trans.n(); // N is INTEGER // The number of columns of the matrix A. N >= 0. // This is actually the number of *rows* of A_trans. int N = A_trans.m(); // We'll use the min and max of (M,N) several times below. int max_MN = std::max(M,N); int min_MN = std::min(M,N); // NRHS is INTEGER // The number of right hand sides, i.e., the number of columns // of the matrices B and X. NRHS >= 0. // This could later be generalized to solve for multiple right-hand // sides... int NRHS = 1; // A is DOUBLE PRECISION array, dimension (LDA,N) // On entry, the M-by-N matrix A. // On exit, the first min(m,n) rows of A are overwritten with // its right singular vectors, stored rowwise. // // The data vector that will be passed to Lapack. std::vector<T> & A_trans_vals = A_trans.get_values(); // LDA is INTEGER // The leading dimension of the array A. LDA >= max(1,M). int LDA = M; // B is DOUBLE PRECISION array, dimension (LDB,NRHS) // On entry, the M-by-NRHS right hand side matrix B. // On exit, B is overwritten by the N-by-NRHS solution // matrix X. If m >= n and RANK = n, the residual // sum-of-squares for the solution in the i-th column is given // by the sum of squares of elements n+1:m in that column. // // Since we don't want the user's rhs vector to be overwritten by // the solution, we copy the rhs values into the solution vector "x" // now. x needs to be long enough to hold both the (Nx1) solution // vector or the (Mx1) rhs, so size it to the max of those. x.resize(max_MN); for (unsigned i=0; i<rhs.size(); ++i) x(i) = rhs(i); // Make the syntax below simpler by grabbing a reference to this array. std::vector<T> & B = x.get_values(); // LDB is INTEGER // The leading dimension of the array B. LDB >= max(1,max(M,N)). int LDB = x.size(); // S is DOUBLE PRECISION array, dimension (min(M,N)) // The singular values of A in decreasing order. // The condition number of A in the 2-norm = S(1)/S(min(m,n)). std::vector<T> S(min_MN); // RCOND is DOUBLE PRECISION // RCOND is used to determine the effective rank of A. // Singular values S(i) <= RCOND*S(1) are treated as zero. // If RCOND < 0, machine precision is used instead. Real RCOND = rcond; // RANK is INTEGER // The effective rank of A, i.e., the number of singular values // which are greater than RCOND*S(1). int RANK = 0; // LWORK is INTEGER // The dimension of the array WORK. LWORK >= 1, and also: // LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS ) // For good performance, LWORK should generally be larger. // // If LWORK = -1, then a workspace query is assumed; the routine // only calculates the optimal size of the WORK array, returns // this value as the first entry of the WORK array, and no error // message related to LWORK is issued by XERBLA. // // The factor of 1.5 is arbitrary and is used to satisfy the "should // generally be larger" clause. int LWORK = 1.5 * (3*min_MN + std::max(2*min_MN, std::max(max_MN, NRHS))); // WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) // On exit, if INFO = 0, WORK(1) returns the optimal LWORK. std::vector<T> WORK(LWORK); // INFO is INTEGER // = 0: successful exit // < 0: if INFO = -i, the i-th argument had an illegal value. // > 0: the algorithm for computing the SVD failed to converge; // if INFO = i, i off-diagonal elements of an intermediate // bidiagonal form did not converge to zero. int INFO = 0; // LAPACKgelss_(const PetscBLASInt *, // M // const PetscBLASInt *, // N // const PetscBLASInt *, // NRHS // PetscScalar *, // A // const PetscBLASInt *, // LDA // PetscScalar *, // B // const PetscBLASInt *, // LDB // PetscReal *, // S(out) = singular values of A in increasing order // const PetscReal *, // RCOND = tolerance for singular values // PetscBLASInt *, // RANK(out) = number of "non-zero" singular values // PetscScalar *, // WORK // const PetscBLASInt *, // LWORK // PetscBLASInt *); // INFO LAPACKgelss_(&M, &N, &NRHS, &A_trans_vals[0], &LDA, &B[0], &LDB, &S[0], &RCOND, &RANK, &WORK[0], &LWORK, &INFO); // Check for errors in the Lapack call if (INFO < 0) libmesh_error_msg("Error, argument " << -INFO << " to LAPACKgelss_ had an illegal value."); if (INFO > 0) libmesh_error_msg("The algorithm for computing the SVD failed to converge!"); // Debugging: print singular values and information about condition number: // libMesh::err << "RCOND=" << RCOND << std::endl; // libMesh::err << "Singular values: " << std::endl; // for (unsigned i=0; i<S.size(); ++i) // libMesh::err << S[i] << std::endl; // libMesh::err << "The condition number of A is approximately: " << S[0]/S.back() << std::endl; // Lapack has already written the solution into B, but it will be // the wrong size for non-square problems, so we need to resize it // correctly. The size of the solution vector should be the number // of columns of the original A matrix. Unfortunately, resizing a // DenseVector currently also zeros it out (unlike a std::vector) so // we'll resize the underlying storage directly (the size is not // stored independently elsewhere). x.get_values().resize(this->n()); }