/* Note: if layout==101 (row major), then this code is known to only work when * nmat == VLEN. To check for accuracy otherwise, transpose everything */ LIBXSMM_INLINE void compact_strsm_ ( unsigned int *layout, char *side, char *uplo, char *transa, char *diag, unsigned int *m, unsigned int *n, float *alpha, float *A, unsigned int *lda, float *B, unsigned int *ldb, unsigned int *nmat, unsigned int *VLEN ) { int i, j, num, asize; float *Ap, *Bp, Atemp[BUFSIZE], Btemp[BUFSIZE]; if ( (*side == 'L') || (*side == 'l') ) asize = *m; else asize = *n; for ( i = 0, num = 0 ; i < (int)(*nmat) ; i+= *VLEN, num++ ) { for ( j = 0 ; j < (int)*VLEN ; j++ ) { /* Unpack the data, call a reference DTRSM, repack the data */ Ap = &A[j+num*(*lda)*asize*(*VLEN)]; Bp = &B[j+num*(*ldb)*(*n)*(*VLEN)]; scopy_to_temp ( *layout, Ap, *lda, asize, asize, Atemp, *VLEN ); scopy_to_temp ( *layout, Bp, *ldb, *m, *n, Btemp, *VLEN ); strsm_ ( side, uplo, transa, diag, m, n, alpha, Atemp, &asize, Btemp, m); scopy_from_temp ( *layout, Bp, *ldb, *m, *n, Btemp, *VLEN ); } } }
void STARPU_STRSM (const char *side, const char *uplo, const char *transa, const char *diag, const int m, const int n, const float alpha, const float *A, const int lda, float *B, const int ldb) { strsm_(side, uplo, transa, diag, &m, &n, &alpha, A, &lda, B, &ldb); }
int f2c_strsm(char* side, char* uplo, char* trans, char* diag, integer* M, integer* N, real* alpha, real* A, integer* lda, real* B, integer* ldb) { strsm_(side, uplo, trans, diag, M, N, alpha, A, lda, B, ldb); return 0; }
// float void TriangularSolve(char *side, char *uplo, char *transa, char *diag, int *m, int *n, float *alpha, float *A, int *lda, float *B, int *ldb) { #ifndef RELEASE CallStackEntry entry("TriangularSolve"); if (m <= 0) throw std::logic_error("Invalid matrix height for triangular solve"); if (n <= 0) throw std::logic_error("Invalid matrix width for triangular solve"); #endif assert(m > 0 && n > 0); strsm_(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); }
GURLS_EXPORT void trsm(const CBLAS_SIDE Side, const CBLAS_UPLO Uplo, const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, float *B, const int ldb) { char side = BlasUtils::charValue(Side); char uplo = BlasUtils::charValue(Uplo); char transA = BlasUtils::charValue(TransA); char diag = BlasUtils::charValue(Diag); strsm_(&side, &uplo, &transA, &diag, const_cast<int*>(&M), const_cast<int*>(&N), const_cast<float*>(&alpha), const_cast<float*>(A), const_cast<int*>(&lda), const_cast<float*>(B), const_cast<int*>(&ldb)); }
/* Subroutine */ int sgetri_(integer *n, real *a, integer *lda, integer *ipiv, real *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ integer i__, j, jb, nb, jj, jp, nn, iws, nbmin; extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), sgemv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer *), strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * ), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer ldwork, lwkopt; logical lquery; extern /* Subroutine */ int strtri_(char *, char *, integer *, real *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SGETRI computes the inverse of a matrix using the LU factorization */ /* computed by SGETRF. */ /* This method inverts U and then computes inv(A) by solving the system */ /* inv(A)*L = inv(U) for inv(A). */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) REAL array, dimension (LDA,N) */ /* On entry, the factors L and U from the factorization */ /* A = P*L*U as computed by SGETRF. */ /* On exit, if INFO = 0, the inverse of the original matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* IPIV (input) INTEGER array, dimension (N) */ /* The pivot indices from SGETRF; for 1<=i<=N, row i of the */ /* matrix was interchanged with row IPIV(i). */ /* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO=0, then WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,N). */ /* For optimal performance LWORK >= N*NB, where NB is */ /* the optimal blocksize returned by ILAENV. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is */ /* singular and its inverse could not be computed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; --work; /* Function Body */ *info = 0; nb = ilaenv_(&c__1, "SGETRI", " ", n, &c_n1, &c_n1, &c_n1); lwkopt = *n * nb; work[1] = (real) lwkopt; lquery = *lwork == -1; if (*n < 0) { *info = -1; } else if (*lda < max(1,*n)) { *info = -3; } else if (*lwork < max(1,*n) && ! lquery) { *info = -6; } if (*info != 0) { i__1 = -(*info); xerbla_("SGETRI", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Form inv(U). If INFO > 0 from STRTRI, then U is singular, */ /* and the inverse is not computed. */ strtri_("Upper", "Non-unit", n, &a[a_offset], lda, info); if (*info > 0) { return 0; } nbmin = 2; ldwork = *n; if (nb > 1 && nb < *n) { /* Computing MAX */ i__1 = ldwork * nb; iws = max(i__1,1); if (*lwork < iws) { nb = *lwork / ldwork; /* Computing MAX */ i__1 = 2, i__2 = ilaenv_(&c__2, "SGETRI", " ", n, &c_n1, &c_n1, & c_n1); nbmin = max(i__1,i__2); } } else { iws = *n; } /* Solve the equation inv(A)*L = inv(U) for inv(A). */ if (nb < nbmin || nb >= *n) { /* Use unblocked code. */ for (j = *n; j >= 1; --j) { /* Copy current column of L to WORK and replace with zeros. */ i__1 = *n; for (i__ = j + 1; i__ <= i__1; ++i__) { work[i__] = a[i__ + j * a_dim1]; a[i__ + j * a_dim1] = 0.f; /* L10: */ } /* Compute current column of inv(A). */ if (j < *n) { i__1 = *n - j; sgemv_("No transpose", n, &i__1, &c_b20, &a[(j + 1) * a_dim1 + 1], lda, &work[j + 1], &c__1, &c_b22, &a[j * a_dim1 + 1], &c__1); } /* L20: */ } } else { /* Use blocked code. */ nn = (*n - 1) / nb * nb + 1; i__1 = -nb; for (j = nn; i__1 < 0 ? j >= 1 : j <= 1; j += i__1) { /* Computing MIN */ i__2 = nb, i__3 = *n - j + 1; jb = min(i__2,i__3); /* Copy current block column of L to WORK and replace with */ /* zeros. */ i__2 = j + jb - 1; for (jj = j; jj <= i__2; ++jj) { i__3 = *n; for (i__ = jj + 1; i__ <= i__3; ++i__) { work[i__ + (jj - j) * ldwork] = a[i__ + jj * a_dim1]; a[i__ + jj * a_dim1] = 0.f; /* L30: */ } /* L40: */ } /* Compute current block column of inv(A). */ if (j + jb <= *n) { i__2 = *n - j - jb + 1; sgemm_("No transpose", "No transpose", n, &jb, &i__2, &c_b20, &a[(j + jb) * a_dim1 + 1], lda, &work[j + jb], & ldwork, &c_b22, &a[j * a_dim1 + 1], lda); } strsm_("Right", "Lower", "No transpose", "Unit", n, &jb, &c_b22, & work[j], &ldwork, &a[j * a_dim1 + 1], lda); /* L50: */ } } /* Apply column interchanges. */ for (j = *n - 1; j >= 1; --j) { jp = ipiv[j]; if (jp != j) { sswap_(n, &a[j * a_dim1 + 1], &c__1, &a[jp * a_dim1 + 1], &c__1); } /* L60: */ } work[1] = (real) iws; return 0; /* End of SGETRI */ } /* sgetri_ */
/* Subroutine */ int ssygv_(integer *itype, char *jobz, char *uplo, integer * n, real *a, integer *lda, real *b, integer *ldb, real *w, real *work, integer *lwork, integer *info) { /* -- LAPACK driver routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= SSYGV computes all the eigenvalues, and optionally, the eigenvectors of a real generalized symmetric-definite eigenproblem, of the form A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and B are assumed to be symmetric and B is also positive definite. Arguments ========= ITYPE (input) INTEGER Specifies the problem type to be solved: = 1: A*x = (lambda)*B*x = 2: A*B*x = (lambda)*x = 3: B*A*x = (lambda)*x JOBZ (input) CHARACTER*1 = 'N': Compute eigenvalues only; = 'V': Compute eigenvalues and eigenvectors. UPLO (input) CHARACTER*1 = 'U': Upper triangles of A and B are stored; = 'L': Lower triangles of A and B are stored. N (input) INTEGER The order of the matrices A and B. N >= 0. A (input/output) REAL array, dimension (LDA, N) On entry, the symmetric matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A. On exit, if JOBZ = 'V', then if INFO = 0, A contains the matrix Z of eigenvectors. The eigenvectors are normalized as follows: if ITYPE = 1 or 2, Z**T*B*Z = I; if ITYPE = 3, Z**T*inv(B)*Z = I. If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') or the lower triangle (if UPLO='L') of A, including the diagonal, is destroyed. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input/output) REAL array, dimension (LDB, N) On entry, the symmetric matrix B. If UPLO = 'U', the leading N-by-N upper triangular part of B contains the upper triangular part of the matrix B. If UPLO = 'L', the leading N-by-N lower triangular part of B contains the lower triangular part of the matrix B. On exit, if INFO <= N, the part of B containing the matrix is overwritten by the triangular factor U or L from the Cholesky factorization B = U**T*U or B = L*L**T. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). W (output) REAL array, dimension (N) If INFO = 0, the eigenvalues in ascending order. WORK (workspace/output) REAL array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The length of the array WORK. LWORK >= max(1,3*N-1). For optimal efficiency, LWORK >= (NB+2)*N, where NB is the blocksize for SSYTRD returned by ILAENV. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: SPOTRF or SSYEV returned an error code: <= N: if INFO = i, SSYEV failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero; > N: if INFO = N + i, for 1 <= i <= N, then the leading minor of order i of B is not positive definite. The factorization of B could not be completed and no eigenvalues or eigenvectors were computed. ===================================================================== Test the input parameters. Parameter adjustments Function Body */ /* Table of constant values */ static real c_b11 = 1.f; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; /* Local variables */ static integer neig; extern logical lsame_(char *, char *); static char trans[1]; static logical upper; extern /* Subroutine */ int strmm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * ); static logical wantz; extern /* Subroutine */ int strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * ), ssyev_(char *, char *, integer *, real *, integer *, real *, real *, integer *, integer *), xerbla_(char *, integer *), spotrf_(char *, integer *, real *, integer *, integer *), ssygst_( integer *, char *, integer *, real *, integer *, real *, integer * , integer *); #define W(I) w[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] wantz = lsame_(jobz, "V"); upper = lsame_(uplo, "U"); *info = 0; if (*itype < 0 || *itype > 3) { *info = -1; } else if (! (wantz || lsame_(jobz, "N"))) { *info = -2; } else if (! (upper || lsame_(uplo, "L"))) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldb < max(1,*n)) { *info = -8; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 1, i__2 = *n * 3 - 1; if (*lwork < max(i__1,i__2)) { *info = -11; } } if (*info != 0) { i__1 = -(*info); xerbla_("SSYGV ", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Form a Cholesky factorization of B. */ spotrf_(uplo, n, &B(1,1), ldb, info); if (*info != 0) { *info = *n + *info; return 0; } /* Transform problem to standard eigenvalue problem and solve. */ ssygst_(itype, uplo, n, &A(1,1), lda, &B(1,1), ldb, info); ssyev_(jobz, uplo, n, &A(1,1), lda, &W(1), &WORK(1), lwork, info); if (wantz) { /* Backtransform eigenvectors to the original problem. */ neig = *n; if (*info > 0) { neig = *info - 1; } if (*itype == 1 || *itype == 2) { /* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */ if (upper) { *(unsigned char *)trans = 'N'; } else { *(unsigned char *)trans = 'T'; } strsm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b11, &B(1,1), ldb, &A(1,1), lda); } else if (*itype == 3) { /* For B*A*x=(lambda)*x; backtransform eigenvectors: x = L*y or U'*y */ if (upper) { *(unsigned char *)trans = 'T'; } else { *(unsigned char *)trans = 'N'; } strmm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b11, &B(1,1), ldb, &A(1,1), lda); } } return 0; /* End of SSYGV */ } /* ssygv_ */
/* Subroutine */ int sgetrf_(integer *m, integer *n, real *a, integer *lda, integer *ipiv, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; /* Local variables */ integer i__, j, jb, nb, iinfo; extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *), sgetf2_(integer *, integer *, real *, integer *, integer *, integer *), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer *, integer *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SGETRF computes an LU factorization of a general M-by-N matrix A */ /* using partial pivoting with row interchanges. */ /* The factorization has the form */ /* A = P * L * U */ /* where P is a permutation matrix, L is lower triangular with unit */ /* diagonal elements (lower trapezoidal if m > n), and U is upper */ /* triangular (upper trapezoidal if m < n). */ /* This is the right-looking Level 3 BLAS version of the algorithm. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) REAL array, dimension (LDA,N) */ /* On entry, the M-by-N matrix to be factored. */ /* On exit, the factors L and U from the factorization */ /* A = P*L*U; the unit diagonal elements of L are not stored. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* IPIV (output) INTEGER array, dimension (min(M,N)) */ /* The pivot indices; for 1 <= i <= min(M,N), row i of the */ /* matrix was interchanged with row IPIV(i). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly */ /* singular, and division by zero will occur if it is used */ /* to solve a system of equations. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); xerbla_("SGETRF", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } /* Determine the block size for this environment. */ nb = ilaenv_(&c__1, "SGETRF", " ", m, n, &c_n1, &c_n1); if (nb <= 1 || nb >= min(*m,*n)) { /* Use unblocked code. */ sgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info); } else { /* Use blocked code. */ i__1 = min(*m,*n); i__2 = nb; for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Computing MIN */ i__3 = min(*m,*n) - j + 1; jb = min(i__3,nb); /* Factor diagonal and subdiagonal blocks and test for exact */ /* singularity. */ i__3 = *m - j + 1; sgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo); /* Adjust INFO and the pivot indices. */ if (*info == 0 && iinfo > 0) { *info = iinfo + j - 1; } /* Computing MIN */ i__4 = *m, i__5 = j + jb - 1; i__3 = min(i__4,i__5); for (i__ = j; i__ <= i__3; ++i__) { ipiv[i__] = j - 1 + ipiv[i__]; /* L10: */ } /* Apply interchanges to columns 1:J-1. */ i__3 = j - 1; i__4 = j + jb - 1; slaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1); if (j + jb <= *n) { /* Apply interchanges to columns J+JB:N. */ i__3 = *n - j - jb + 1; i__4 = j + jb - 1; slaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, & ipiv[1], &c__1); /* Compute block row of U. */ i__3 = *n - j - jb + 1; strsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, & c_b16, &a[j + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda); if (j + jb <= *m) { /* Update trailing submatrix. */ i__3 = *m - j - jb + 1; i__4 = *n - j - jb + 1; sgemm_("No transpose", "No transpose", &i__3, &i__4, &jb, &c_b19, &a[j + jb + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda, &c_b16, &a[j + jb + (j + jb) * a_dim1], lda); } } /* L20: */ } } return 0; /* End of SGETRF */ } /* sgetrf_ */
/* Subroutine */ int spotrs_(char *uplo, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; /* Local variables */ extern logical lsame_(char *, char *); static logical upper; extern /* Subroutine */ int strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * ), xerbla_(char *, integer *); /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= SPOTRS solves a system of linear equations A*X = B with a symmetric positive definite matrix A using the Cholesky factorization A = U**T*U or A = L*L**T computed by SPOTRF. Arguments ========= UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. A (input) REAL array, dimension (LDA,N) The triangular factor U or L from the Cholesky factorization A = U**T*U or A = L*L**T, as computed by SPOTRF. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input/output) REAL array, dimension (LDB,NRHS) On entry, the right hand side matrix B. On exit, the solution matrix X. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); xerbla_("SPOTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { return 0; } if (upper) { /* Solve A*X = B where A = U'*U. Solve U'*X = B, overwriting B with X. */ strsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b164, &a[ a_offset], lda, &b[b_offset], ldb); /* Solve U*X = B, overwriting B with X. */ strsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b164, &a[a_offset], lda, &b[b_offset], ldb); } else { /* Solve A*X = B where A = L*L'. Solve L*X = B, overwriting B with X. */ strsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b164, &a[a_offset], lda, &b[b_offset], ldb); /* Solve L'*X = B, overwriting B with X. */ strsm_("Left", "Lower", "Transpose", "Non-unit", n, nrhs, &c_b164, &a[ a_offset], lda, &b[b_offset], ldb); } return 0; /* End of SPOTRS */ } /* spotrs_ */
void strsm(char side, char uplo, char transa, char diag, int m, int n, float alpha, float *a, int lda, float *b, int ldb) { strsm_(&side, &uplo, &transa, &diag, &m, &n, &alpha, a, &lda, b, &ldb); }
/* Subroutine */ int sgetrs_(char *trans, integer *n, integer *nrhs, real *a, integer *lda, integer *ipiv, real *b, integer *ldb, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; /* Local variables */ extern logical lsame_(char *, char *); extern /* Subroutine */ int strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * ), xerbla_(char *, integer *); logical notran; extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer *, integer *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SGETRS solves a system of linear equations */ /* A * X = B or A' * X = B */ /* with a general N-by-N matrix A using the LU factorization computed */ /* by SGETRF. */ /* Arguments */ /* ========= */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations: */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A'* X = B (Transpose) */ /* = 'C': A'* X = B (Conjugate transpose = Transpose) */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* A (input) REAL array, dimension (LDA,N) */ /* The factors L and U from the factorization A = P*L*U */ /* as computed by SGETRF. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* IPIV (input) INTEGER array, dimension (N) */ /* The pivot indices from SGETRF; for 1<=i<=N, row i of the */ /* matrix was interchanged with row IPIV(i). */ /* B (input/output) REAL array, dimension (LDB,NRHS) */ /* On entry, the right hand side matrix B. */ /* On exit, the solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; notran = lsame_(trans, "N"); if (! notran && ! lsame_(trans, "T") && ! lsame_( trans, "C")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -8; } if (*info != 0) { i__1 = -(*info); xerbla_("SGETRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { return 0; } if (notran) { /* Solve A * X = B. */ /* Apply row interchanges to the right hand sides. */ slaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1); /* Solve L*X = B, overwriting B with X. */ strsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b12, &a[ a_offset], lda, &b[b_offset], ldb); /* Solve U*X = B, overwriting B with X. */ strsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b12, & a[a_offset], lda, &b[b_offset], ldb); } else { /* Solve A' * X = B. */ /* Solve U'*X = B, overwriting B with X. */ strsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b12, &a[ a_offset], lda, &b[b_offset], ldb); /* Solve L'*X = B, overwriting B with X. */ strsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b12, &a[ a_offset], lda, &b[b_offset], ldb); /* Apply row interchanges to the solution vectors. */ slaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1); } return 0; /* End of SGETRS */ } /* sgetrs_ */
/* Subroutine */ int stfsm_(char *transr, char *side, char *uplo, char *trans, char *diag, integer *m, integer *n, real *alpha, real *a, real *b, integer *ldb) { /* System generated locals */ integer b_dim1, b_offset, i__1, i__2; /* Local variables */ integer i__, j, k, m1, m2, n1, n2, info; logical normaltransr, lside; logical lower; logical misodd, nisodd, notrans; /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* Purpose */ /* ======= */ /* Level 3 BLAS like routine for A in RFP Format. */ /* STFSM solves the matrix equation */ /* op( A )*X = alpha*B or X*op( A ) = alpha*B */ /* where alpha is a scalar, X and B are m by n matrices, A is a unit, or */ /* non-unit, upper or lower triangular matrix and op( A ) is one of */ /* op( A ) = A or op( A ) = A'. */ /* A is in Rectangular Full Packed (RFP) Format. */ /* The matrix X is overwritten on B. */ /* Arguments */ /* ========== */ /* TRANSR - (input) CHARACTER */ /* = 'N': The Normal Form of RFP A is stored; */ /* = 'T': The Transpose Form of RFP A is stored. */ /* SIDE - (input) CHARACTER */ /* On entry, SIDE specifies whether op( A ) appears on the left */ /* or right of X as follows: */ /* SIDE = 'L' or 'l' op( A )*X = alpha*B. */ /* SIDE = 'R' or 'r' X*op( A ) = alpha*B. */ /* Unchanged on exit. */ /* UPLO - (input) CHARACTER */ /* On entry, UPLO specifies whether the RFP matrix A came from */ /* an upper or lower triangular matrix as follows: */ /* UPLO = 'U' or 'u' RFP A came from an upper triangular matrix */ /* UPLO = 'L' or 'l' RFP A came from a lower triangular matrix */ /* Unchanged on exit. */ /* TRANS - (input) CHARACTER */ /* On entry, TRANS specifies the form of op( A ) to be used */ /* in the matrix multiplication as follows: */ /* TRANS = 'N' or 'n' op( A ) = A. */ /* TRANS = 'T' or 't' op( A ) = A'. */ /* Unchanged on exit. */ /* DIAG - (input) CHARACTER */ /* On entry, DIAG specifies whether or not RFP A is unit */ /* triangular as follows: */ /* DIAG = 'U' or 'u' A is assumed to be unit triangular. */ /* DIAG = 'N' or 'n' A is not assumed to be unit */ /* triangular. */ /* Unchanged on exit. */ /* M - (input) INTEGER. */ /* On entry, M specifies the number of rows of B. M must be at */ /* least zero. */ /* Unchanged on exit. */ /* N - (input) INTEGER. */ /* On entry, N specifies the number of columns of B. N must be */ /* at least zero. */ /* Unchanged on exit. */ /* ALPHA - (input) REAL. */ /* On entry, ALPHA specifies the scalar alpha. When alpha is */ /* zero then A is not referenced and B need not be set before */ /* entry. */ /* Unchanged on exit. */ /* A - (input) REAL array, dimension (NT); */ /* NT = N*(N+1)/2. On entry, the matrix A in RFP Format. */ /* RFP Format is described by TRANSR, UPLO and N as follows: */ /* If TRANSR='N' then RFP A is (0:N,0:K-1) when N is even; */ /* K=N/2. RFP A is (0:N-1,0:K) when N is odd; K=N/2. If */ /* TRANSR = 'T' then RFP is the transpose of RFP A as */ /* defined when TRANSR = 'N'. The contents of RFP A are defined */ /* by UPLO as follows: If UPLO = 'U' the RFP A contains the NT */ /* elements of upper packed A either in normal or */ /* transpose Format. If UPLO = 'L' the RFP A contains */ /* the NT elements of lower packed A either in normal or */ /* transpose Format. The LDA of RFP A is (N+1)/2 when */ /* TRANSR = 'T'. When TRANSR is 'N' the LDA is N+1 when N is */ /* even and is N when is odd. */ /* See the Note below for more details. Unchanged on exit. */ /* B - (input/ouptut) REAL array, DIMENSION (LDB,N) */ /* Before entry, the leading m by n part of the array B must */ /* contain the right-hand side matrix B, and on exit is */ /* overwritten by the solution matrix X. */ /* LDB - (input) INTEGER. */ /* On entry, LDB specifies the first dimension of B as declared */ /* in the calling (sub) program. LDB must be at least */ /* max( 1, m ). */ /* Unchanged on exit. */ /* Further Details */ /* =============== */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* even. We give an example where N = 6. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 05 00 */ /* 11 12 13 14 15 10 11 */ /* 22 23 24 25 20 21 22 */ /* 33 34 35 30 31 32 33 */ /* 44 45 40 41 42 43 44 */ /* 55 50 51 52 53 54 55 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(4:6,0:2) consists of */ /* the transpose of the first three columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:2,0:2) consists of */ /* the transpose of the last three columns of AP lower. */ /* This covers the case N even and TRANSR = 'N'. */ /* RFP A RFP A */ /* 03 04 05 33 43 53 */ /* 13 14 15 00 44 54 */ /* 23 24 25 10 11 55 */ /* 33 34 35 20 21 22 */ /* 00 44 45 30 31 32 */ /* 01 11 55 40 41 42 */ /* 02 12 22 50 51 52 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 03 13 23 33 00 01 02 33 00 10 20 30 40 50 */ /* 04 14 24 34 44 11 12 43 44 11 21 31 41 51 */ /* 05 15 25 35 45 55 22 53 54 55 22 32 42 52 */ /* We first consider Rectangular Full Packed (RFP) Format when N is */ /* odd. We give an example where N = 5. */ /* AP is Upper AP is Lower */ /* 00 01 02 03 04 00 */ /* 11 12 13 14 10 11 */ /* 22 23 24 20 21 22 */ /* 33 34 30 31 32 33 */ /* 44 40 41 42 43 44 */ /* Let TRANSR = 'N'. RFP holds AP as follows: */ /* For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */ /* three columns of AP upper. The lower triangle A(3:4,0:1) consists of */ /* the transpose of the first two columns of AP upper. */ /* For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */ /* three columns of AP lower. The upper triangle A(0:1,1:2) consists of */ /* the transpose of the last two columns of AP lower. */ /* This covers the case N odd and TRANSR = 'N'. */ /* RFP A RFP A */ /* 02 03 04 00 33 43 */ /* 12 13 14 10 11 44 */ /* 22 23 24 20 21 22 */ /* 00 33 34 30 31 32 */ /* 01 11 44 40 41 42 */ /* Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */ /* transpose of RFP A above. One therefore gets: */ /* RFP A RFP A */ /* 02 12 22 00 01 00 10 20 30 40 50 */ /* 03 13 23 33 11 33 11 21 31 41 51 */ /* 04 14 24 34 44 43 44 22 32 42 52 */ /* Reference */ /* ========= */ /* ===================================================================== */ /* Test the input parameters. */ /* Parameter adjustments */ b_dim1 = *ldb - 1 - 0 + 1; b_offset = 0 + b_dim1 * 0; b -= b_offset; /* Function Body */ info = 0; normaltransr = lsame_(transr, "N"); lside = lsame_(side, "L"); lower = lsame_(uplo, "L"); notrans = lsame_(trans, "N"); if (! normaltransr && ! lsame_(transr, "T")) { info = -1; } else if (! lside && ! lsame_(side, "R")) { info = -2; } else if (! lower && ! lsame_(uplo, "U")) { info = -3; } else if (! notrans && ! lsame_(trans, "T")) { info = -4; } else if (! lsame_(diag, "N") && ! lsame_(diag, "U")) { info = -5; } else if (*m < 0) { info = -6; } else if (*n < 0) { info = -7; } else if (*ldb < max(1,*m)) { info = -11; } if (info != 0) { i__1 = -info; xerbla_("STFSM ", &i__1); return 0; } /* Quick return when ( (N.EQ.0).OR.(M.EQ.0) ) */ if (*m == 0 || *n == 0) { return 0; } /* Quick return when ALPHA.EQ.(0D+0) */ if (*alpha == 0.f) { i__1 = *n - 1; for (j = 0; j <= i__1; ++j) { i__2 = *m - 1; for (i__ = 0; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = 0.f; } } return 0; } if (lside) { /* SIDE = 'L' */ /* A is M-by-M. */ /* If M is odd, set NISODD = .TRUE., and M1 and M2. */ /* If M is even, NISODD = .FALSE., and M. */ if (*m % 2 == 0) { misodd = FALSE_; k = *m / 2; } else { misodd = TRUE_; if (lower) { m2 = *m / 2; m1 = *m - m2; } else { m1 = *m / 2; m2 = *m - m1; } } if (misodd) { /* SIDE = 'L' and N is odd */ if (normaltransr) { /* SIDE = 'L', N is odd, and TRANSR = 'N' */ if (lower) { /* SIDE ='L', N is odd, TRANSR = 'N', and UPLO = 'L' */ if (notrans) { /* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'L', and */ /* TRANS = 'N' */ if (*m == 1) { strsm_("L", "L", "N", diag, &m1, n, alpha, a, m, & b[b_offset], ldb); } else { strsm_("L", "L", "N", diag, &m1, n, alpha, a, m, & b[b_offset], ldb); sgemm_("N", "N", &m2, n, &m1, &c_b23, &a[m1], m, & b[b_offset], ldb, alpha, &b[m1], ldb); strsm_("L", "U", "T", diag, &m2, n, &c_b27, &a[*m] , m, &b[m1], ldb); } } else { /* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'L', and */ /* TRANS = 'T' */ if (*m == 1) { strsm_("L", "L", "T", diag, &m1, n, alpha, a, m, & b[b_offset], ldb); } else { strsm_("L", "U", "N", diag, &m2, n, alpha, &a[*m], m, &b[m1], ldb); sgemm_("T", "N", &m1, n, &m2, &c_b23, &a[m1], m, & b[m1], ldb, alpha, &b[b_offset], ldb); strsm_("L", "L", "T", diag, &m1, n, &c_b27, a, m, &b[b_offset], ldb); } } } else { /* SIDE ='L', N is odd, TRANSR = 'N', and UPLO = 'U' */ if (! notrans) { /* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'U', and */ /* TRANS = 'N' */ strsm_("L", "L", "N", diag, &m1, n, alpha, &a[m2], m, &b[b_offset], ldb); sgemm_("T", "N", &m2, n, &m1, &c_b23, a, m, &b[ b_offset], ldb, alpha, &b[m1], ldb); strsm_("L", "U", "T", diag, &m2, n, &c_b27, &a[m1], m, &b[m1], ldb); } else { /* SIDE ='L', N is odd, TRANSR = 'N', UPLO = 'U', and */ /* TRANS = 'T' */ strsm_("L", "U", "N", diag, &m2, n, alpha, &a[m1], m, &b[m1], ldb); sgemm_("N", "N", &m1, n, &m2, &c_b23, a, m, &b[m1], ldb, alpha, &b[b_offset], ldb); strsm_("L", "L", "T", diag, &m1, n, &c_b27, &a[m2], m, &b[b_offset], ldb); } } } else { /* SIDE = 'L', N is odd, and TRANSR = 'T' */ if (lower) { /* SIDE ='L', N is odd, TRANSR = 'T', and UPLO = 'L' */ if (notrans) { /* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'L', and */ /* TRANS = 'N' */ if (*m == 1) { strsm_("L", "U", "T", diag, &m1, n, alpha, a, &m1, &b[b_offset], ldb); } else { strsm_("L", "U", "T", diag, &m1, n, alpha, a, &m1, &b[b_offset], ldb); sgemm_("T", "N", &m2, n, &m1, &c_b23, &a[m1 * m1], &m1, &b[b_offset], ldb, alpha, &b[m1], ldb); strsm_("L", "L", "N", diag, &m2, n, &c_b27, &a[1], &m1, &b[m1], ldb); } } else { /* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'L', and */ /* TRANS = 'T' */ if (*m == 1) { strsm_("L", "U", "N", diag, &m1, n, alpha, a, &m1, &b[b_offset], ldb); } else { strsm_("L", "L", "T", diag, &m2, n, alpha, &a[1], &m1, &b[m1], ldb); sgemm_("N", "N", &m1, n, &m2, &c_b23, &a[m1 * m1], &m1, &b[m1], ldb, alpha, &b[b_offset], ldb); strsm_("L", "U", "N", diag, &m1, n, &c_b27, a, & m1, &b[b_offset], ldb); } } } else { /* SIDE ='L', N is odd, TRANSR = 'T', and UPLO = 'U' */ if (! notrans) { /* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'U', and */ /* TRANS = 'N' */ strsm_("L", "U", "T", diag, &m1, n, alpha, &a[m2 * m2] , &m2, &b[b_offset], ldb); sgemm_("N", "N", &m2, n, &m1, &c_b23, a, &m2, &b[ b_offset], ldb, alpha, &b[m1], ldb); strsm_("L", "L", "N", diag, &m2, n, &c_b27, &a[m1 * m2], &m2, &b[m1], ldb); } else { /* SIDE ='L', N is odd, TRANSR = 'T', UPLO = 'U', and */ /* TRANS = 'T' */ strsm_("L", "L", "T", diag, &m2, n, alpha, &a[m1 * m2] , &m2, &b[m1], ldb); sgemm_("T", "N", &m1, n, &m2, &c_b23, a, &m2, &b[m1], ldb, alpha, &b[b_offset], ldb); strsm_("L", "U", "N", diag, &m1, n, &c_b27, &a[m2 * m2], &m2, &b[b_offset], ldb); } } } } else { /* SIDE = 'L' and N is even */ if (normaltransr) { /* SIDE = 'L', N is even, and TRANSR = 'N' */ if (lower) { /* SIDE ='L', N is even, TRANSR = 'N', and UPLO = 'L' */ if (notrans) { /* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'L', */ /* and TRANS = 'N' */ i__1 = *m + 1; strsm_("L", "L", "N", diag, &k, n, alpha, &a[1], & i__1, &b[b_offset], ldb); i__1 = *m + 1; sgemm_("N", "N", &k, n, &k, &c_b23, &a[k + 1], &i__1, &b[b_offset], ldb, alpha, &b[k], ldb); i__1 = *m + 1; strsm_("L", "U", "T", diag, &k, n, &c_b27, a, &i__1, & b[k], ldb); } else { /* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'L', */ /* and TRANS = 'T' */ i__1 = *m + 1; strsm_("L", "U", "N", diag, &k, n, alpha, a, &i__1, & b[k], ldb); i__1 = *m + 1; sgemm_("T", "N", &k, n, &k, &c_b23, &a[k + 1], &i__1, &b[k], ldb, alpha, &b[b_offset], ldb); i__1 = *m + 1; strsm_("L", "L", "T", diag, &k, n, &c_b27, &a[1], & i__1, &b[b_offset], ldb); } } else { /* SIDE ='L', N is even, TRANSR = 'N', and UPLO = 'U' */ if (! notrans) { /* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'U', */ /* and TRANS = 'N' */ i__1 = *m + 1; strsm_("L", "L", "N", diag, &k, n, alpha, &a[k + 1], & i__1, &b[b_offset], ldb); i__1 = *m + 1; sgemm_("T", "N", &k, n, &k, &c_b23, a, &i__1, &b[ b_offset], ldb, alpha, &b[k], ldb); i__1 = *m + 1; strsm_("L", "U", "T", diag, &k, n, &c_b27, &a[k], & i__1, &b[k], ldb); } else { /* SIDE ='L', N is even, TRANSR = 'N', UPLO = 'U', */ /* and TRANS = 'T' */ i__1 = *m + 1; strsm_("L", "U", "N", diag, &k, n, alpha, &a[k], & i__1, &b[k], ldb); i__1 = *m + 1; sgemm_("N", "N", &k, n, &k, &c_b23, a, &i__1, &b[k], ldb, alpha, &b[b_offset], ldb); i__1 = *m + 1; strsm_("L", "L", "T", diag, &k, n, &c_b27, &a[k + 1], &i__1, &b[b_offset], ldb); } } } else { /* SIDE = 'L', N is even, and TRANSR = 'T' */ if (lower) { /* SIDE ='L', N is even, TRANSR = 'T', and UPLO = 'L' */ if (notrans) { /* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'L', */ /* and TRANS = 'N' */ strsm_("L", "U", "T", diag, &k, n, alpha, &a[k], &k, & b[b_offset], ldb); sgemm_("T", "N", &k, n, &k, &c_b23, &a[k * (k + 1)], & k, &b[b_offset], ldb, alpha, &b[k], ldb); strsm_("L", "L", "N", diag, &k, n, &c_b27, a, &k, &b[ k], ldb); } else { /* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'L', */ /* and TRANS = 'T' */ strsm_("L", "L", "T", diag, &k, n, alpha, a, &k, &b[k] , ldb); sgemm_("N", "N", &k, n, &k, &c_b23, &a[k * (k + 1)], & k, &b[k], ldb, alpha, &b[b_offset], ldb); strsm_("L", "U", "N", diag, &k, n, &c_b27, &a[k], &k, &b[b_offset], ldb); } } else { /* SIDE ='L', N is even, TRANSR = 'T', and UPLO = 'U' */ if (! notrans) { /* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'U', */ /* and TRANS = 'N' */ strsm_("L", "U", "T", diag, &k, n, alpha, &a[k * (k + 1)], &k, &b[b_offset], ldb); sgemm_("N", "N", &k, n, &k, &c_b23, a, &k, &b[ b_offset], ldb, alpha, &b[k], ldb); strsm_("L", "L", "N", diag, &k, n, &c_b27, &a[k * k], &k, &b[k], ldb); } else { /* SIDE ='L', N is even, TRANSR = 'T', UPLO = 'U', */ /* and TRANS = 'T' */ strsm_("L", "L", "T", diag, &k, n, alpha, &a[k * k], & k, &b[k], ldb); sgemm_("T", "N", &k, n, &k, &c_b23, a, &k, &b[k], ldb, alpha, &b[b_offset], ldb); strsm_("L", "U", "N", diag, &k, n, &c_b27, &a[k * (k + 1)], &k, &b[b_offset], ldb); } } } } } else { /* SIDE = 'R' */ /* A is N-by-N. */ /* If N is odd, set NISODD = .TRUE., and N1 and N2. */ /* If N is even, NISODD = .FALSE., and K. */ if (*n % 2 == 0) { nisodd = FALSE_; k = *n / 2; } else { nisodd = TRUE_; if (lower) { n2 = *n / 2; n1 = *n - n2; } else { n1 = *n / 2; n2 = *n - n1; } } if (nisodd) { /* SIDE = 'R' and N is odd */ if (normaltransr) { /* SIDE = 'R', N is odd, and TRANSR = 'N' */ if (lower) { /* SIDE ='R', N is odd, TRANSR = 'N', and UPLO = 'L' */ if (notrans) { /* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'L', and */ /* TRANS = 'N' */ strsm_("R", "U", "T", diag, m, &n2, alpha, &a[*n], n, &b[n1 * b_dim1], ldb); sgemm_("N", "N", m, &n1, &n2, &c_b23, &b[n1 * b_dim1], ldb, &a[n1], n, alpha, b, ldb); strsm_("R", "L", "N", diag, m, &n1, &c_b27, a, n, b, ldb); } else { /* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'L', and */ /* TRANS = 'T' */ strsm_("R", "L", "T", diag, m, &n1, alpha, a, n, b, ldb); sgemm_("N", "T", m, &n2, &n1, &c_b23, b, ldb, &a[n1], n, alpha, &b[n1 * b_dim1], ldb); strsm_("R", "U", "N", diag, m, &n2, &c_b27, &a[*n], n, &b[n1 * b_dim1], ldb); } } else { /* SIDE ='R', N is odd, TRANSR = 'N', and UPLO = 'U' */ if (notrans) { /* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'U', and */ /* TRANS = 'N' */ strsm_("R", "L", "T", diag, m, &n1, alpha, &a[n2], n, b, ldb); sgemm_("N", "N", m, &n2, &n1, &c_b23, b, ldb, a, n, alpha, &b[n1 * b_dim1], ldb); strsm_("R", "U", "N", diag, m, &n2, &c_b27, &a[n1], n, &b[n1 * b_dim1], ldb); } else { /* SIDE ='R', N is odd, TRANSR = 'N', UPLO = 'U', and */ /* TRANS = 'T' */ strsm_("R", "U", "T", diag, m, &n2, alpha, &a[n1], n, &b[n1 * b_dim1], ldb); sgemm_("N", "T", m, &n1, &n2, &c_b23, &b[n1 * b_dim1], ldb, a, n, alpha, b, ldb); strsm_("R", "L", "N", diag, m, &n1, &c_b27, &a[n2], n, b, ldb); } } } else { /* SIDE = 'R', N is odd, and TRANSR = 'T' */ if (lower) { /* SIDE ='R', N is odd, TRANSR = 'T', and UPLO = 'L' */ if (notrans) { /* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'L', and */ /* TRANS = 'N' */ strsm_("R", "L", "N", diag, m, &n2, alpha, &a[1], &n1, &b[n1 * b_dim1], ldb); sgemm_("N", "T", m, &n1, &n2, &c_b23, &b[n1 * b_dim1], ldb, &a[n1 * n1], &n1, alpha, b, ldb); strsm_("R", "U", "T", diag, m, &n1, &c_b27, a, &n1, b, ldb); } else { /* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'L', and */ /* TRANS = 'T' */ strsm_("R", "U", "N", diag, m, &n1, alpha, a, &n1, b, ldb); sgemm_("N", "N", m, &n2, &n1, &c_b23, b, ldb, &a[n1 * n1], &n1, alpha, &b[n1 * b_dim1], ldb); strsm_("R", "L", "T", diag, m, &n2, &c_b27, &a[1], & n1, &b[n1 * b_dim1], ldb); } } else { /* SIDE ='R', N is odd, TRANSR = 'T', and UPLO = 'U' */ if (notrans) { /* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'U', and */ /* TRANS = 'N' */ strsm_("R", "U", "N", diag, m, &n1, alpha, &a[n2 * n2] , &n2, b, ldb); sgemm_("N", "T", m, &n2, &n1, &c_b23, b, ldb, a, &n2, alpha, &b[n1 * b_dim1], ldb); strsm_("R", "L", "T", diag, m, &n2, &c_b27, &a[n1 * n2], &n2, &b[n1 * b_dim1], ldb); } else { /* SIDE ='R', N is odd, TRANSR = 'T', UPLO = 'U', and */ /* TRANS = 'T' */ strsm_("R", "L", "N", diag, m, &n2, alpha, &a[n1 * n2] , &n2, &b[n1 * b_dim1], ldb); sgemm_("N", "N", m, &n1, &n2, &c_b23, &b[n1 * b_dim1], ldb, a, &n2, alpha, b, ldb); strsm_("R", "U", "T", diag, m, &n1, &c_b27, &a[n2 * n2], &n2, b, ldb); } } } } else { /* SIDE = 'R' and N is even */ if (normaltransr) { /* SIDE = 'R', N is even, and TRANSR = 'N' */ if (lower) { /* SIDE ='R', N is even, TRANSR = 'N', and UPLO = 'L' */ if (notrans) { /* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'L', */ /* and TRANS = 'N' */ i__1 = *n + 1; strsm_("R", "U", "T", diag, m, &k, alpha, a, &i__1, & b[k * b_dim1], ldb); i__1 = *n + 1; sgemm_("N", "N", m, &k, &k, &c_b23, &b[k * b_dim1], ldb, &a[k + 1], &i__1, alpha, b, ldb); i__1 = *n + 1; strsm_("R", "L", "N", diag, m, &k, &c_b27, &a[1], & i__1, b, ldb); } else { /* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'L', */ /* and TRANS = 'T' */ i__1 = *n + 1; strsm_("R", "L", "T", diag, m, &k, alpha, &a[1], & i__1, b, ldb); i__1 = *n + 1; sgemm_("N", "T", m, &k, &k, &c_b23, b, ldb, &a[k + 1], &i__1, alpha, &b[k * b_dim1], ldb); i__1 = *n + 1; strsm_("R", "U", "N", diag, m, &k, &c_b27, a, &i__1, & b[k * b_dim1], ldb); } } else { /* SIDE ='R', N is even, TRANSR = 'N', and UPLO = 'U' */ if (notrans) { /* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'U', */ /* and TRANS = 'N' */ i__1 = *n + 1; strsm_("R", "L", "T", diag, m, &k, alpha, &a[k + 1], & i__1, b, ldb); i__1 = *n + 1; sgemm_("N", "N", m, &k, &k, &c_b23, b, ldb, a, &i__1, alpha, &b[k * b_dim1], ldb); i__1 = *n + 1; strsm_("R", "U", "N", diag, m, &k, &c_b27, &a[k], & i__1, &b[k * b_dim1], ldb); } else { /* SIDE ='R', N is even, TRANSR = 'N', UPLO = 'U', */ /* and TRANS = 'T' */ i__1 = *n + 1; strsm_("R", "U", "T", diag, m, &k, alpha, &a[k], & i__1, &b[k * b_dim1], ldb); i__1 = *n + 1; sgemm_("N", "T", m, &k, &k, &c_b23, &b[k * b_dim1], ldb, a, &i__1, alpha, b, ldb); i__1 = *n + 1; strsm_("R", "L", "N", diag, m, &k, &c_b27, &a[k + 1], &i__1, b, ldb); } } } else { /* SIDE = 'R', N is even, and TRANSR = 'T' */ if (lower) { /* SIDE ='R', N is even, TRANSR = 'T', and UPLO = 'L' */ if (notrans) { /* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'L', */ /* and TRANS = 'N' */ strsm_("R", "L", "N", diag, m, &k, alpha, a, &k, &b[k * b_dim1], ldb); sgemm_("N", "T", m, &k, &k, &c_b23, &b[k * b_dim1], ldb, &a[(k + 1) * k], &k, alpha, b, ldb); strsm_("R", "U", "T", diag, m, &k, &c_b27, &a[k], &k, b, ldb); } else { /* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'L', */ /* and TRANS = 'T' */ strsm_("R", "U", "N", diag, m, &k, alpha, &a[k], &k, b, ldb); sgemm_("N", "N", m, &k, &k, &c_b23, b, ldb, &a[(k + 1) * k], &k, alpha, &b[k * b_dim1], ldb); strsm_("R", "L", "T", diag, m, &k, &c_b27, a, &k, &b[ k * b_dim1], ldb); } } else { /* SIDE ='R', N is even, TRANSR = 'T', and UPLO = 'U' */ if (notrans) { /* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'U', */ /* and TRANS = 'N' */ strsm_("R", "U", "N", diag, m, &k, alpha, &a[(k + 1) * k], &k, b, ldb); sgemm_("N", "T", m, &k, &k, &c_b23, b, ldb, a, &k, alpha, &b[k * b_dim1], ldb); strsm_("R", "L", "T", diag, m, &k, &c_b27, &a[k * k], &k, &b[k * b_dim1], ldb); } else { /* SIDE ='R', N is even, TRANSR = 'T', UPLO = 'U', */ /* and TRANS = 'T' */ strsm_("R", "L", "N", diag, m, &k, alpha, &a[k * k], & k, &b[k * b_dim1], ldb); sgemm_("N", "N", m, &k, &k, &c_b23, &b[k * b_dim1], ldb, a, &k, alpha, b, ldb); strsm_("R", "U", "T", diag, m, &k, &c_b27, &a[(k + 1) * k], &k, b, ldb); } } } } } return 0; /* End of STFSM */ } /* stfsm_ */
/* Subroutine */ int sgeqrs_(integer *m, integer *n, integer *nrhs, real *a, integer *lda, real *tau, real *b, integer *ldb, real *work, integer * lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; /* Local variables */ extern /* Subroutine */ int strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * ), xerbla_(char *, integer *), sormqr_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, integer *, integer *); /* -- LAPACK routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* Solve the least squares problem */ /* min || A*X - B || */ /* using the QR factorization */ /* A = Q*R */ /* computed by SGEQRF. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. M >= N >= 0. */ /* NRHS (input) INTEGER */ /* The number of columns of B. NRHS >= 0. */ /* A (input) REAL array, dimension (LDA,N) */ /* Details of the QR factorization of the original matrix A as */ /* returned by SGEQRF. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= M. */ /* TAU (input) REAL array, dimension (N) */ /* Details of the orthogonal matrix Q. */ /* B (input/output) REAL array, dimension (LDB,NRHS) */ /* On entry, the m-by-nrhs right hand side matrix B. */ /* On exit, the n-by-nrhs solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= M. */ /* WORK (workspace) REAL array, dimension (LWORK) */ /* LWORK (input) INTEGER */ /* The length of the array WORK. LWORK must be at least NRHS, */ /* and should be at least NRHS*NB, where NB is the block size */ /* for this environment. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --work; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0 || *n > *m) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else if (*ldb < max(1,*m)) { *info = -8; } else if (*lwork < 1 || *lwork < *nrhs && *m > 0 && *n > 0) { *info = -10; } if (*info != 0) { i__1 = -(*info); xerbla_("SGEQRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0 || *m == 0) { return 0; } /* B := Q' * B */ sormqr_("Left", "Transpose", m, nrhs, n, &a[a_offset], lda, &tau[1], &b[ b_offset], ldb, &work[1], lwork, info); /* Solve R*X = B(1:n,:) */ strsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b9, &a[ a_offset], lda, &b[b_offset], ldb); return 0; /* End of SGEQRS */ } /* sgeqrs_ */
/* Subroutine */ int sgbtrf_(integer *m, integer *n, integer *kl, integer *ku, real *ab, integer *ldab, integer *ipiv, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6; real r__1; /* Local variables */ integer i__, j, i2, i3, j2, j3, k2, jb, nb, ii, jj, jm, ip, jp, km, ju, kv, nw; extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, integer *, real *, integer *, real *, integer *); real temp; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); real work13[4160] /* was [65][64] */, work31[4160] /* was [65][ 64] */; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer * ), strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *), sgbtf2_(integer *, integer *, integer *, integer *, real *, integer *, integer *, integer *), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *), isamax_(integer *, real *, integer *); extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer *, integer *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SGBTRF computes an LU factorization of a real m-by-n band matrix A */ /* using partial pivoting with row interchanges. */ /* This is the blocked version of the algorithm, calling Level 3 BLAS. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* KL (input) INTEGER */ /* The number of subdiagonals within the band of A. KL >= 0. */ /* KU (input) INTEGER */ /* The number of superdiagonals within the band of A. KU >= 0. */ /* AB (input/output) REAL array, dimension (LDAB,N) */ /* On entry, the matrix A in band storage, in rows KL+1 to */ /* 2*KL+KU+1; rows 1 to KL of the array need not be set. */ /* The j-th column of A is stored in the j-th column of the */ /* array AB as follows: */ /* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl) */ /* On exit, details of the factorization: U is stored as an */ /* upper triangular band matrix with KL+KU superdiagonals in */ /* rows 1 to KL+KU+1, and the multipliers used during the */ /* factorization are stored in rows KL+KU+2 to 2*KL+KU+1. */ /* See below for further details. */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= 2*KL+KU+1. */ /* IPIV (output) INTEGER array, dimension (min(M,N)) */ /* The pivot indices; for 1 <= i <= min(M,N), row i of the */ /* matrix was interchanged with row IPIV(i). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly */ /* singular, and division by zero will occur if it is used */ /* to solve a system of equations. */ /* Further Details */ /* =============== */ /* The band storage scheme is illustrated by the following example, when */ /* M = N = 6, KL = 2, KU = 1: */ /* On entry: On exit: */ /* * * * + + + * * * u14 u25 u36 */ /* * * + + + + * * u13 u24 u35 u46 */ /* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56 */ /* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66 */ /* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 * */ /* a31 a42 a53 a64 * * m31 m42 m53 m64 * * */ /* Array elements marked * are not used by the routine; elements marked */ /* + need not be set on entry, but are required by the routine to store */ /* elements of U because of fill-in resulting from the row interchanges. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* KV is the number of superdiagonals in the factor U, allowing for */ /* fill-in */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --ipiv; /* Function Body */ kv = *ku + *kl; /* Test the input parameters. */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kl < 0) { *info = -3; } else if (*ku < 0) { *info = -4; } else if (*ldab < *kl + kv + 1) { *info = -6; } if (*info != 0) { i__1 = -(*info); xerbla_("SGBTRF", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } /* Determine the block size for this environment */ nb = ilaenv_(&c__1, "SGBTRF", " ", m, n, kl, ku); /* The block size must not exceed the limit set by the size of the */ /* local arrays WORK13 and WORK31. */ nb = min(nb,64); if (nb <= 1 || nb > *kl) { /* Use unblocked code */ sgbtf2_(m, n, kl, ku, &ab[ab_offset], ldab, &ipiv[1], info); } else { /* Use blocked code */ /* Zero the superdiagonal elements of the work array WORK13 */ i__1 = nb; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { work13[i__ + j * 65 - 66] = 0.f; /* L10: */ } /* L20: */ } /* Zero the subdiagonal elements of the work array WORK31 */ i__1 = nb; for (j = 1; j <= i__1; ++j) { i__2 = nb; for (i__ = j + 1; i__ <= i__2; ++i__) { work31[i__ + j * 65 - 66] = 0.f; /* L30: */ } /* L40: */ } /* Gaussian elimination with partial pivoting */ /* Set fill-in elements in columns KU+2 to KV to zero */ i__1 = min(kv,*n); for (j = *ku + 2; j <= i__1; ++j) { i__2 = *kl; for (i__ = kv - j + 2; i__ <= i__2; ++i__) { ab[i__ + j * ab_dim1] = 0.f; /* L50: */ } /* L60: */ } /* JU is the index of the last column affected by the current */ /* stage of the factorization */ ju = 1; i__1 = min(*m,*n); i__2 = nb; for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Computing MIN */ i__3 = nb, i__4 = min(*m,*n) - j + 1; jb = min(i__3,i__4); /* The active part of the matrix is partitioned */ /* A11 A12 A13 */ /* A21 A22 A23 */ /* A31 A32 A33 */ /* Here A11, A21 and A31 denote the current block of JB columns */ /* which is about to be factorized. The number of rows in the */ /* partitioning are JB, I2, I3 respectively, and the numbers */ /* of columns are JB, J2, J3. The superdiagonal elements of A13 */ /* and the subdiagonal elements of A31 lie outside the band. */ /* Computing MIN */ i__3 = *kl - jb, i__4 = *m - j - jb + 1; i2 = min(i__3,i__4); /* Computing MIN */ i__3 = jb, i__4 = *m - j - *kl + 1; i3 = min(i__3,i__4); /* J2 and J3 are computed after JU has been updated. */ /* Factorize the current block of JB columns */ i__3 = j + jb - 1; for (jj = j; jj <= i__3; ++jj) { /* Set fill-in elements in column JJ+KV to zero */ if (jj + kv <= *n) { i__4 = *kl; for (i__ = 1; i__ <= i__4; ++i__) { ab[i__ + (jj + kv) * ab_dim1] = 0.f; /* L70: */ } } /* Find pivot and test for singularity. KM is the number of */ /* subdiagonal elements in the current column. */ /* Computing MIN */ i__4 = *kl, i__5 = *m - jj; km = min(i__4,i__5); i__4 = km + 1; jp = isamax_(&i__4, &ab[kv + 1 + jj * ab_dim1], &c__1); ipiv[jj] = jp + jj - j; if (ab[kv + jp + jj * ab_dim1] != 0.f) { /* Computing MAX */ /* Computing MIN */ i__6 = jj + *ku + jp - 1; i__4 = ju, i__5 = min(i__6,*n); ju = max(i__4,i__5); if (jp != 1) { /* Apply interchange to columns J to J+JB-1 */ if (jp + jj - 1 < j + *kl) { i__4 = *ldab - 1; i__5 = *ldab - 1; sswap_(&jb, &ab[kv + 1 + jj - j + j * ab_dim1], & i__4, &ab[kv + jp + jj - j + j * ab_dim1], &i__5); } else { /* The interchange affects columns J to JJ-1 of A31 */ /* which are stored in the work array WORK31 */ i__4 = jj - j; i__5 = *ldab - 1; sswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], &i__5, &work31[jp + jj - j - *kl - 1], & c__65); i__4 = j + jb - jj; i__5 = *ldab - 1; i__6 = *ldab - 1; sswap_(&i__4, &ab[kv + 1 + jj * ab_dim1], &i__5, & ab[kv + jp + jj * ab_dim1], &i__6); } } /* Compute multipliers */ r__1 = 1.f / ab[kv + 1 + jj * ab_dim1]; sscal_(&km, &r__1, &ab[kv + 2 + jj * ab_dim1], &c__1); /* Update trailing submatrix within the band and within */ /* the current block. JM is the index of the last column */ /* which needs to be updated. */ /* Computing MIN */ i__4 = ju, i__5 = j + jb - 1; jm = min(i__4,i__5); if (jm > jj) { i__4 = jm - jj; i__5 = *ldab - 1; i__6 = *ldab - 1; sger_(&km, &i__4, &c_b18, &ab[kv + 2 + jj * ab_dim1], &c__1, &ab[kv + (jj + 1) * ab_dim1], &i__5, & ab[kv + 1 + (jj + 1) * ab_dim1], &i__6); } } else { /* If pivot is zero, set INFO to the index of the pivot */ /* unless a zero pivot has already been found. */ if (*info == 0) { *info = jj; } } /* Copy current column of A31 into the work array WORK31 */ /* Computing MIN */ i__4 = jj - j + 1; nw = min(i__4,i3); if (nw > 0) { scopy_(&nw, &ab[kv + *kl + 1 - jj + j + jj * ab_dim1], & c__1, &work31[(jj - j + 1) * 65 - 65], &c__1); } /* L80: */ } if (j + jb <= *n) { /* Apply the row interchanges to the other blocks. */ /* Computing MIN */ i__3 = ju - j + 1; j2 = min(i__3,kv) - jb; /* Computing MAX */ i__3 = 0, i__4 = ju - j - kv + 1; j3 = max(i__3,i__4); /* Use SLASWP to apply the row interchanges to A12, A22, and */ /* A32. */ i__3 = *ldab - 1; slaswp_(&j2, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__3, & c__1, &jb, &ipiv[j], &c__1); /* Adjust the pivot indices. */ i__3 = j + jb - 1; for (i__ = j; i__ <= i__3; ++i__) { ipiv[i__] = ipiv[i__] + j - 1; /* L90: */ } /* Apply the row interchanges to A13, A23, and A33 */ /* columnwise. */ k2 = j - 1 + jb + j2; i__3 = j3; for (i__ = 1; i__ <= i__3; ++i__) { jj = k2 + i__; i__4 = j + jb - 1; for (ii = j + i__ - 1; ii <= i__4; ++ii) { ip = ipiv[ii]; if (ip != ii) { temp = ab[kv + 1 + ii - jj + jj * ab_dim1]; ab[kv + 1 + ii - jj + jj * ab_dim1] = ab[kv + 1 + ip - jj + jj * ab_dim1]; ab[kv + 1 + ip - jj + jj * ab_dim1] = temp; } /* L100: */ } /* L110: */ } /* Update the relevant part of the trailing submatrix */ if (j2 > 0) { /* Update A12 */ i__3 = *ldab - 1; i__4 = *ldab - 1; strsm_("Left", "Lower", "No transpose", "Unit", &jb, &j2, &c_b31, &ab[kv + 1 + j * ab_dim1], &i__3, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__4); if (i2 > 0) { /* Update A22 */ i__3 = *ldab - 1; i__4 = *ldab - 1; i__5 = *ldab - 1; sgemm_("No transpose", "No transpose", &i2, &j2, &jb, &c_b18, &ab[kv + 1 + jb + j * ab_dim1], &i__3, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__4, &c_b31, &ab[kv + 1 + (j + jb) * ab_dim1], & i__5); } if (i3 > 0) { /* Update A32 */ i__3 = *ldab - 1; i__4 = *ldab - 1; sgemm_("No transpose", "No transpose", &i3, &j2, &jb, &c_b18, work31, &c__65, &ab[kv + 1 - jb + (j + jb) * ab_dim1], &i__3, &c_b31, &ab[kv + *kl + 1 - jb + (j + jb) * ab_dim1], &i__4); } } if (j3 > 0) { /* Copy the lower triangle of A13 into the work array */ /* WORK13 */ i__3 = j3; for (jj = 1; jj <= i__3; ++jj) { i__4 = jb; for (ii = jj; ii <= i__4; ++ii) { work13[ii + jj * 65 - 66] = ab[ii - jj + 1 + (jj + j + kv - 1) * ab_dim1]; /* L120: */ } /* L130: */ } /* Update A13 in the work array */ i__3 = *ldab - 1; strsm_("Left", "Lower", "No transpose", "Unit", &jb, &j3, &c_b31, &ab[kv + 1 + j * ab_dim1], &i__3, work13, &c__65); if (i2 > 0) { /* Update A23 */ i__3 = *ldab - 1; i__4 = *ldab - 1; sgemm_("No transpose", "No transpose", &i2, &j3, &jb, &c_b18, &ab[kv + 1 + jb + j * ab_dim1], &i__3, work13, &c__65, &c_b31, &ab[jb + 1 + (j + kv) * ab_dim1], &i__4); } if (i3 > 0) { /* Update A33 */ i__3 = *ldab - 1; sgemm_("No transpose", "No transpose", &i3, &j3, &jb, &c_b18, work31, &c__65, work13, &c__65, & c_b31, &ab[*kl + 1 + (j + kv) * ab_dim1], & i__3); } /* Copy the lower triangle of A13 back into place */ i__3 = j3; for (jj = 1; jj <= i__3; ++jj) { i__4 = jb; for (ii = jj; ii <= i__4; ++ii) { ab[ii - jj + 1 + (jj + j + kv - 1) * ab_dim1] = work13[ii + jj * 65 - 66]; /* L140: */ } /* L150: */ } } } else { /* Adjust the pivot indices. */ i__3 = j + jb - 1; for (i__ = j; i__ <= i__3; ++i__) { ipiv[i__] = ipiv[i__] + j - 1; /* L160: */ } } /* Partially undo the interchanges in the current block to */ /* restore the upper triangular form of A31 and copy the upper */ /* triangle of A31 back into place */ i__3 = j; for (jj = j + jb - 1; jj >= i__3; --jj) { jp = ipiv[jj] - jj + 1; if (jp != 1) { /* Apply interchange to columns J to JJ-1 */ if (jp + jj - 1 < j + *kl) { /* The interchange does not affect A31 */ i__4 = jj - j; i__5 = *ldab - 1; i__6 = *ldab - 1; sswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], & i__5, &ab[kv + jp + jj - j + j * ab_dim1], & i__6); } else { /* The interchange does affect A31 */ i__4 = jj - j; i__5 = *ldab - 1; sswap_(&i__4, &ab[kv + 1 + jj - j + j * ab_dim1], & i__5, &work31[jp + jj - j - *kl - 1], &c__65); } } /* Copy the current column of A31 back into place */ /* Computing MIN */ i__4 = i3, i__5 = jj - j + 1; nw = min(i__4,i__5); if (nw > 0) { scopy_(&nw, &work31[(jj - j + 1) * 65 - 65], &c__1, &ab[ kv + *kl + 1 - jj + j + jj * ab_dim1], &c__1); } /* L170: */ } /* L180: */ } } return 0; /* End of SGBTRF */ } /* sgbtrf_ */
/* Subroutine */ int ssygvx_(integer *itype, char *jobz, char *range, char * uplo, integer *n, real *a, integer *lda, real *b, integer *ldb, real * vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, real *w, real *z__, integer *ldz, real *work, integer *lwork, integer *iwork, integer *ifail, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, z_dim1, z_offset, i__1, i__2; /* Local variables */ integer nb; char trans[1]; logical upper; logical wantz; logical alleig, indeig, valeig; integer lwkmin; integer lwkopt; logical lquery; /* -- LAPACK driver routine (version 3.2) -- */ /* November 2006 */ /* Purpose */ /* ======= */ /* SSYGVX computes selected eigenvalues, and optionally, eigenvectors */ /* of a real generalized symmetric-definite eigenproblem, of the form */ /* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A */ /* and B are assumed to be symmetric and B is also positive definite. */ /* Eigenvalues and eigenvectors can be selected by specifying either a */ /* range of values or a range of indices for the desired eigenvalues. */ /* Arguments */ /* ========= */ /* ITYPE (input) INTEGER */ /* Specifies the problem type to be solved: */ /* = 1: A*x = (lambda)*B*x */ /* = 2: A*B*x = (lambda)*x */ /* = 3: B*A*x = (lambda)*x */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* RANGE (input) CHARACTER*1 */ /* = 'A': all eigenvalues will be found. */ /* = 'V': all eigenvalues in the half-open interval (VL,VU] */ /* will be found. */ /* = 'I': the IL-th through IU-th eigenvalues will be found. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A and B are stored; */ /* = 'L': Lower triangle of A and B are stored. */ /* N (input) INTEGER */ /* The order of the matrix pencil (A,B). N >= 0. */ /* A (input/output) REAL array, dimension (LDA, N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the */ /* leading N-by-N upper triangular part of A contains the */ /* upper triangular part of the matrix A. If UPLO = 'L', */ /* the leading N-by-N lower triangular part of A contains */ /* the lower triangular part of the matrix A. */ /* On exit, the lower triangle (if UPLO='L') or the upper */ /* triangle (if UPLO='U') of A, including the diagonal, is */ /* destroyed. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input/output) REAL array, dimension (LDA, N) */ /* On entry, the symmetric matrix B. If UPLO = 'U', the */ /* leading N-by-N upper triangular part of B contains the */ /* upper triangular part of the matrix B. If UPLO = 'L', */ /* the leading N-by-N lower triangular part of B contains */ /* the lower triangular part of the matrix B. */ /* On exit, if INFO <= N, the part of B containing the matrix is */ /* overwritten by the triangular factor U or L from the Cholesky */ /* factorization B = U**T*U or B = L*L**T. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* VL (input) REAL */ /* VU (input) REAL */ /* If RANGE='V', the lower and upper bounds of the interval to */ /* be searched for eigenvalues. VL < VU. */ /* Not referenced if RANGE = 'A' or 'I'. */ /* IL (input) INTEGER */ /* IU (input) INTEGER */ /* If RANGE='I', the indices (in ascending order) of the */ /* smallest and largest eigenvalues to be returned. */ /* 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */ /* Not referenced if RANGE = 'A' or 'V'. */ /* ABSTOL (input) REAL */ /* The absolute error tolerance for the eigenvalues. */ /* An approximate eigenvalue is accepted as converged */ /* when it is determined to lie in an interval [a,b] */ /* of width less than or equal to */ /* ABSTOL + EPS * max( |a|,|b| ) , */ /* where EPS is the machine precision. If ABSTOL is less than */ /* or equal to zero, then EPS*|T| will be used in its place, */ /* where |T| is the 1-norm of the tridiagonal matrix obtained */ /* by reducing A to tridiagonal form. */ /* Eigenvalues will be computed most accurately when ABSTOL is */ /* set to twice the underflow threshold 2*DLAMCH('S'), not zero. */ /* If this routine returns with INFO>0, indicating that some */ /* eigenvectors did not converge, try setting ABSTOL to */ /* 2*SLAMCH('S'). */ /* M (output) INTEGER */ /* The total number of eigenvalues found. 0 <= M <= N. */ /* If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */ /* W (output) REAL array, dimension (N) */ /* On normal exit, the first M elements contain the selected */ /* eigenvalues in ascending order. */ /* Z (output) REAL array, dimension (LDZ, max(1,M)) */ /* If JOBZ = 'N', then Z is not referenced. */ /* If JOBZ = 'V', then if INFO = 0, the first M columns of Z */ /* contain the orthonormal eigenvectors of the matrix A */ /* corresponding to the selected eigenvalues, with the i-th */ /* column of Z holding the eigenvector associated with W(i). */ /* The eigenvectors are normalized as follows: */ /* if ITYPE = 1 or 2, Z**T*B*Z = I; */ /* if ITYPE = 3, Z**T*inv(B)*Z = I. */ /* If an eigenvector fails to converge, then that column of Z */ /* contains the latest approximation to the eigenvector, and the */ /* index of the eigenvector is returned in IFAIL. */ /* Note: the user must ensure that at least max(1,M) columns are */ /* supplied in the array Z; if RANGE = 'V', the exact value of M */ /* is not known in advance and an upper bound must be used. */ /* LDZ (input) INTEGER */ /* The leading dimension of the array Z. LDZ >= 1, and if */ /* JOBZ = 'V', LDZ >= max(1,N). */ /* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The length of the array WORK. LWORK >= max(1,8*N). */ /* For optimal efficiency, LWORK >= (NB+3)*N, */ /* where NB is the blocksize for SSYTRD returned by ILAENV. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* IWORK (workspace) INTEGER array, dimension (5*N) */ /* IFAIL (output) INTEGER array, dimension (N) */ /* If JOBZ = 'V', then if INFO = 0, the first M elements of */ /* IFAIL are zero. If INFO > 0, then IFAIL contains the */ /* indices of the eigenvectors that failed to converge. */ /* If JOBZ = 'N', then IFAIL is not referenced. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: SPOTRF or SSYEVX returned an error code: */ /* <= N: if INFO = i, SSYEVX failed to converge; */ /* i eigenvectors failed to converge. Their indices */ /* are stored in array IFAIL. */ /* > N: if INFO = N + i, for 1 <= i <= N, then the leading */ /* minor of order i of B is not positive definite. */ /* The factorization of B could not be completed and */ /* no eigenvalues or eigenvectors were computed. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ /* ===================================================================== */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --w; z_dim1 = *ldz; z_offset = 1 + z_dim1; z__ -= z_offset; --work; --iwork; --ifail; /* Function Body */ upper = lsame_(uplo, "U"); wantz = lsame_(jobz, "V"); alleig = lsame_(range, "A"); valeig = lsame_(range, "V"); indeig = lsame_(range, "I"); lquery = *lwork == -1; *info = 0; if (*itype < 1 || *itype > 3) { *info = -1; } else if (! (wantz || lsame_(jobz, "N"))) { *info = -2; } else if (! (alleig || valeig || indeig)) { *info = -3; } else if (! (upper || lsame_(uplo, "L"))) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*lda < max(1,*n)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -9; } else { if (valeig) { if (*n > 0 && *vu <= *vl) { *info = -11; } } else if (indeig) { if (*il < 1 || *il > max(1,*n)) { *info = -12; } else if (*iu < min(*n,*il) || *iu > *n) { *info = -13; } } } if (*info == 0) { if (*ldz < 1 || wantz && *ldz < *n) { *info = -18; } } if (*info == 0) { /* Computing MAX */ i__1 = 1, i__2 = *n << 3; lwkmin = max(i__1,i__2); nb = ilaenv_(&c__1, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1); /* Computing MAX */ i__1 = lwkmin, i__2 = (nb + 3) * *n; lwkopt = max(i__1,i__2); work[1] = (real) lwkopt; if (*lwork < lwkmin && ! lquery) { *info = -20; } } if (*info != 0) { i__1 = -(*info); xerbla_("SSYGVX", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ *m = 0; if (*n == 0) { return 0; } /* Form a Cholesky factorization of B. */ spotrf_(uplo, n, &b[b_offset], ldb, info); if (*info != 0) { *info = *n + *info; return 0; } /* Transform problem to standard eigenvalue problem and solve. */ ssygst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); ssyevx_(jobz, range, uplo, n, &a[a_offset], lda, vl, vu, il, iu, abstol, m, &w[1], &z__[z_offset], ldz, &work[1], lwork, &iwork[1], &ifail[ 1], info); if (wantz) { /* Backtransform eigenvectors to the original problem. */ if (*info > 0) { *m = *info - 1; } if (*itype == 1 || *itype == 2) { /* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ /* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */ if (upper) { *(unsigned char *)trans = 'N'; } else { *(unsigned char *)trans = 'T'; } strsm_("Left", uplo, trans, "Non-unit", n, m, &c_b19, &b[b_offset] , ldb, &z__[z_offset], ldz); } else if (*itype == 3) { /* For B*A*x=(lambda)*x; */ /* backtransform eigenvectors: x = L*y or U'*y */ if (upper) { *(unsigned char *)trans = 'T'; } else { *(unsigned char *)trans = 'N'; } strmm_("Left", uplo, trans, "Non-unit", n, m, &c_b19, &b[b_offset] , ldb, &z__[z_offset], ldz); } } /* Set WORK(1) to optimal workspace size. */ work[1] = (real) lwkopt; return 0; /* End of SSYGVX */ } /* ssygvx_ */
/* Subroutine */ int strtrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, integer * info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= STRTRS solves a triangular system of the form A * X = B or A**T * X = B, where A is a triangular matrix of order N, and B is an N-by-NRHS matrix. A check is made to verify that A is nonsingular. Arguments ========= UPLO (input) CHARACTER*1 = 'U': A is upper triangular; = 'L': A is lower triangular. TRANS (input) CHARACTER*1 Specifies the form of the system of equations: = 'N': A * X = B (No transpose) = 'T': A**T * X = B (Transpose) = 'C': A**H * X = B (Conjugate transpose = Transpose) DIAG (input) CHARACTER*1 = 'N': A is non-unit triangular; = 'U': A is unit triangular. N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. A (input) REAL array, dimension (LDA,N) The triangular matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of the array A contains the upper triangular matrix, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of the array A contains the lower triangular matrix, and the strictly upper triangular part of A is not referenced. If DIAG = 'U', the diagonal elements of A are also not referenced and are assumed to be 1. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input/output) REAL array, dimension (LDB,NRHS) On entry, the right hand side matrix B. On exit, if INFO = 0, the solution matrix X. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, the i-th diagonal element of A is zero, indicating that the matrix is singular and the solutions X have not been computed. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static real c_b12 = 1.f; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; /* Local variables */ extern logical lsame_(char *, char *); extern /* Subroutine */ int strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * ), xerbla_(char *, integer *); static logical nounit; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; /* Function Body */ *info = 0; nounit = lsame_(diag, "N"); if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { *info = -1; } else if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C")) { *info = -2; } else if (! nounit && ! lsame_(diag, "U")) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*nrhs < 0) { *info = -5; } else if (*lda < max(1,*n)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -9; } if (*info != 0) { i__1 = -(*info); xerbla_("STRTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Check for singularity. */ if (nounit) { i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { if (a_ref(*info, *info) == 0.f) { return 0; } /* L10: */ } } *info = 0; /* Solve A * x = b or A' * x = b. */ strsm_("Left", uplo, trans, diag, n, nrhs, &c_b12, &a[a_offset], lda, &b[ b_offset], ldb); return 0; /* End of STRTRS */ } /* strtrs_ */
/* Subroutine */ int sgerqs_(integer *m, integer *n, integer *nrhs, real *a, integer *lda, real *tau, real *b, integer *ldb, real *work, integer * lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; /* Local variables */ extern /* Subroutine */ int strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * ), xerbla_(char *, integer *), slaset_(char *, integer *, integer *, real *, real *, real *, integer *), sormrq_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer * , real *, integer *, integer *); #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= Compute a minimum-norm solution min || A*X - B || using the RQ factorization A = R*Q computed by SGERQF. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= M >= 0. NRHS (input) INTEGER The number of columns of B. NRHS >= 0. A (input) REAL array, dimension (LDA,N) Details of the RQ factorization of the original matrix A as returned by SGERQF. LDA (input) INTEGER The leading dimension of the array A. LDA >= M. TAU (input) REAL array, dimension (M) Details of the orthogonal matrix Q. B (input/output) REAL array, dimension (LDB,NRHS) On entry, the right hand side vectors for the linear system. On exit, the solution vectors X. Each solution vector is contained in rows 1:N of a column of B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). WORK (workspace) REAL array, dimension (LWORK) LWORK (input) INTEGER The length of the array WORK. LWORK must be at least NRHS, and should be at least NRHS*NB, where NB is the block size for this environment. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input parameters. Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --tau; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --work; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0 || *m > *n) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -8; } else if (*lwork < 1 || *lwork < *nrhs && *m > 0 && *n > 0) { *info = -10; } if (*info != 0) { i__1 = -(*info); xerbla_("SGERQS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0 || *m == 0) { return 0; } /* Solve R*X = B(n-m+1:n,:) */ strsm_("Left", "Upper", "No transpose", "Non-unit", m, nrhs, &c_b7, & a_ref(1, *n - *m + 1), lda, &b_ref(*n - *m + 1, 1), ldb); /* Set B(1:n-m,:) to zero */ i__1 = *n - *m; slaset_("Full", &i__1, nrhs, &c_b9, &c_b9, &b[b_offset], ldb); /* B := Q' * B */ sormrq_("Left", "Transpose", n, nrhs, m, &a[a_offset], lda, &tau[1], &b[ b_offset], ldb, &work[1], lwork, info); return 0; /* End of SGERQS */ } /* sgerqs_ */
/* Subroutine */ int sgerqs_(integer *m, integer *n, integer *nrhs, real *a, integer *lda, real *tau, real *b, integer *ldb, real *work, integer * lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; /* Local variables */ /* -- LAPACK routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* Compute a minimum-norm solution */ /* min || A*X - B || */ /* using the RQ factorization */ /* A = R*Q */ /* computed by SGERQF. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= M >= 0. */ /* NRHS (input) INTEGER */ /* The number of columns of B. NRHS >= 0. */ /* A (input) REAL array, dimension (LDA,N) */ /* Details of the RQ factorization of the original matrix A as */ /* returned by SGERQF. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= M. */ /* TAU (input) REAL array, dimension (M) */ /* Details of the orthogonal matrix Q. */ /* B (input/output) REAL array, dimension (LDB,NRHS) */ /* On entry, the right hand side vectors for the linear system. */ /* On exit, the solution vectors X. Each solution vector */ /* is contained in rows 1:N of a column of B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* WORK (workspace) REAL array, dimension (LWORK) */ /* LWORK (input) INTEGER */ /* The length of the array WORK. LWORK must be at least NRHS, */ /* and should be at least NRHS*NB, where NB is the block size */ /* for this environment. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --tau; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --work; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0 || *m > *n) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -8; } else if (*lwork < 1 || *lwork < *nrhs && *m > 0 && *n > 0) { *info = -10; } if (*info != 0) { i__1 = -(*info); this_xerbla_("SGERQS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0 || *m == 0) { return 0; } /* Solve R*X = B(n-m+1:n,:) */ strsm_("Left", "Upper", "No transpose", "Non-unit", m, nrhs, &c_b7, &a[(* n - *m + 1) * a_dim1 + 1], lda, &b[*n - *m + 1 + b_dim1], ldb); /* Set B(1:n-m,:) to zero */ i__1 = *n - *m; slaset_("Full", &i__1, nrhs, &c_b9, &c_b9, &b[b_offset], ldb); /* B := Q' * B */ sormrq_("Left", "Transpose", n, nrhs, m, &a[a_offset], lda, &tau[1], &b[ b_offset], ldb, &work[1], lwork, info); return 0; /* End of SGERQS */ } /* sgerqs_ */
void sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U, int *perm_c, int *perm_r, SuperMatrix *B, SuperLUStat_t *stat, int *info) { /* * Purpose * ======= * * SGSTRS solves a system of linear equations A*X=B or A'*X=B * with A sparse and B dense, using the LU factorization computed by * SGSTRF. * * See supermatrix.h for the definition of 'SuperMatrix' structure. * * Arguments * ========= * * trans (input) trans_t * Specifies the form of the system of equations: * = NOTRANS: A * X = B (No transpose) * = TRANS: A'* X = B (Transpose) * = CONJ: A**H * X = B (Conjugate transpose) * * L (input) SuperMatrix* * The factor L from the factorization Pr*A*Pc=L*U as computed by * sgstrf(). Use compressed row subscripts storage for supernodes, * i.e., L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU. * * U (input) SuperMatrix* * The factor U from the factorization Pr*A*Pc=L*U as computed by * sgstrf(). Use column-wise storage scheme, i.e., U has types: * Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU. * * perm_c (input) int*, dimension (L->ncol) * Column permutation vector, which defines the * permutation matrix Pc; perm_c[i] = j means column i of A is * in position j in A*Pc. * * perm_r (input) int*, dimension (L->nrow) * Row permutation vector, which defines the permutation matrix Pr; * perm_r[i] = j means row i of A is in position j in Pr*A. * * B (input/output) SuperMatrix* * B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE. * On entry, the right hand side matrix. * On exit, the solution matrix if info = 0; * * stat (output) SuperLUStat_t* * Record the statistics on runtime and floating-point operation count. * See util.h for the definition of 'SuperLUStat_t'. * * info (output) int* * = 0: successful exit * < 0: if info = -i, the i-th argument had an illegal value * */ #ifdef _CRAY _fcd ftcs1, ftcs2, ftcs3, ftcs4; #endif int incx = 1, incy = 1; #ifdef USE_VENDOR_BLAS float alpha = 1.0, beta = 1.0; float *work_col; #endif DNformat *Bstore; float *Bmat; SCformat *Lstore; NCformat *Ustore; float *Lval, *Uval; int fsupc, nrow, nsupr, nsupc, luptr, istart, irow; int i, j, k, iptr, jcol, n, ldb, nrhs; float *work, *rhs_work, *soln; flops_t solve_ops; void sprint_soln(); /* Test input parameters ... */ *info = 0; Bstore = B->Store; ldb = Bstore->lda; nrhs = B->ncol; if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1; else if ( L->nrow != L->ncol || L->nrow < 0 || L->Stype != SLU_SC || L->Dtype != SLU_S || L->Mtype != SLU_TRLU ) *info = -2; else if ( U->nrow != U->ncol || U->nrow < 0 || U->Stype != SLU_NC || U->Dtype != SLU_S || U->Mtype != SLU_TRU ) *info = -3; else if ( ldb < SUPERLU_MAX(0, L->nrow) || B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE ) *info = -6; if ( *info ) { i = -(*info); xerbla_("sgstrs", &i); return; } n = L->nrow; work = floatCalloc(n * nrhs); if ( !work ) ABORT("Malloc fails for local work[]."); soln = floatMalloc(n); if ( !soln ) ABORT("Malloc fails for local soln[]."); Bmat = Bstore->nzval; Lstore = L->Store; Lval = Lstore->nzval; Ustore = U->Store; Uval = Ustore->nzval; solve_ops = 0; if ( trans == NOTRANS ) { /* Permute right hand sides to form Pr*B */ for (i = 0; i < nrhs; i++) { rhs_work = &Bmat[i*ldb]; for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k]; for (k = 0; k < n; k++) rhs_work[k] = soln[k]; } /* Forward solve PLy=Pb. */ for (k = 0; k <= Lstore->nsuper; k++) { fsupc = L_FST_SUPC(k); istart = L_SUB_START(fsupc); nsupr = L_SUB_START(fsupc+1) - istart; nsupc = L_FST_SUPC(k+1) - fsupc; nrow = nsupr - nsupc; solve_ops += nsupc * (nsupc - 1) * nrhs; solve_ops += 2 * nrow * nsupc * nrhs; if ( nsupc == 1 ) { for (j = 0; j < nrhs; j++) { rhs_work = &Bmat[j*ldb]; luptr = L_NZ_START(fsupc); for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){ irow = L_SUB(iptr); ++luptr; rhs_work[irow] -= rhs_work[fsupc] * Lval[luptr]; } } } else { luptr = L_NZ_START(fsupc); #ifdef USE_VENDOR_BLAS #ifdef _CRAY ftcs1 = _cptofcd("L", strlen("L")); ftcs2 = _cptofcd("N", strlen("N")); ftcs3 = _cptofcd("U", strlen("U")); STRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha, &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb); SGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha, &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, &beta, &work[0], &n ); #else strsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha, &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb); sgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha, &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, &beta, &work[0], &n ); #endif for (j = 0; j < nrhs; j++) { rhs_work = &Bmat[j*ldb]; work_col = &work[j*n]; iptr = istart + nsupc; for (i = 0; i < nrow; i++) { irow = L_SUB(iptr); rhs_work[irow] -= work_col[i]; /* Scatter */ work_col[i] = 0.0; iptr++; } } #else for (j = 0; j < nrhs; j++) { rhs_work = &Bmat[j*ldb]; slsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]); smatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc], &rhs_work[fsupc], &work[0] ); iptr = istart + nsupc; for (i = 0; i < nrow; i++) { irow = L_SUB(iptr); rhs_work[irow] -= work[i]; work[i] = 0.0; iptr++; } } #endif } /* else ... */ } /* for L-solve */ #ifdef DEBUG printf("After L-solve: y=\n"); sprint_soln(n, nrhs, Bmat); #endif /* * Back solve Ux=y. */ for (k = Lstore->nsuper; k >= 0; k--) { fsupc = L_FST_SUPC(k); istart = L_SUB_START(fsupc); nsupr = L_SUB_START(fsupc+1) - istart; nsupc = L_FST_SUPC(k+1) - fsupc; luptr = L_NZ_START(fsupc); solve_ops += nsupc * (nsupc + 1) * nrhs; if ( nsupc == 1 ) { rhs_work = &Bmat[0]; for (j = 0; j < nrhs; j++) { rhs_work[fsupc] /= Lval[luptr]; rhs_work += ldb; } } else { #ifdef USE_VENDOR_BLAS #ifdef _CRAY ftcs1 = _cptofcd("L", strlen("L")); ftcs2 = _cptofcd("U", strlen("U")); ftcs3 = _cptofcd("N", strlen("N")); STRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha, &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb); #else strsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha, &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb); #endif #else for (j = 0; j < nrhs; j++) susolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] ); #endif } for (j = 0; j < nrhs; ++j) { rhs_work = &Bmat[j*ldb]; for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) { solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol)); for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){ irow = U_SUB(i); rhs_work[irow] -= rhs_work[jcol] * Uval[i]; } } } } /* for U-solve */ #ifdef DEBUG printf("After U-solve: x=\n"); sprint_soln(n, nrhs, Bmat); #endif /* Compute the final solution X := Pc*X. */ for (i = 0; i < nrhs; i++) { rhs_work = &Bmat[i*ldb]; for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]]; for (k = 0; k < n; k++) rhs_work[k] = soln[k]; } stat->ops[SOLVE] = solve_ops; } else { /* Solve A'*X=B or CONJ(A)*X=B */ /* Permute right hand sides to form Pc'*B. */ for (i = 0; i < nrhs; i++) { rhs_work = &Bmat[i*ldb]; for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k]; for (k = 0; k < n; k++) rhs_work[k] = soln[k]; } stat->ops[SOLVE] = 0; for (k = 0; k < nrhs; ++k) { /* Multiply by inv(U'). */ sp_strsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info); /* Multiply by inv(L'). */ sp_strsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info); } /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */ for (i = 0; i < nrhs; i++) { rhs_work = &Bmat[i*ldb]; for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]]; for (k = 0; k < n; k++) rhs_work[k] = soln[k]; } } SUPERLU_FREE(work); SUPERLU_FREE(soln); }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int r, n_repeats; side_t side; uplo_t uplo; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; if( argc < 7 ) { printf("Usage:\n"); printf("test_foo.x m n k p_begin p_inc p_end:\n"); exit; } int world_size, world_rank, provided; MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided ); MPI_Comm_size( MPI_COMM_WORLD, &world_size ); MPI_Comm_rank( MPI_COMM_WORLD, &world_rank ); m_input = strtol( argv[1], NULL, 10 ); n_input = strtol( argv[2], NULL, 10 ); p_begin = strtol( argv[4], NULL, 10 ); p_inc = strtol( argv[5], NULL, 10 ); p_end = strtol( argv[6], NULL, 10 ); #if 1 dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; #else dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_FLOAT; //dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_SCOMPLEX; #endif side = BLIS_LEFT; //side = BLIS_RIGHT; uplo = BLIS_LOWER; //uplo = BLIS_UPPER; for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); if ( bli_is_left( side ) ) bli_obj_create( dt_a, m, m, 0, 0, &a ); else bli_obj_create( dt_a, n, n, 0, 0, &a ); bli_obj_create( dt_b, m, n, 0, 0, &b ); bli_obj_create( dt_c, m, n, 0, 0, &c ); bli_obj_create( dt_c, m, n, 0, 0, &c_save ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uplo, &a ); //bli_obj_set_diag( BLIS_UNIT_DIAG, &a ); bli_randm( &a ); bli_randm( &c ); bli_randm( &b ); /* { obj_t a2; bli_obj_alias_to( &a, &a2 ); bli_obj_toggle_uplo( &a2 ); bli_obj_inc_diag_offset( 1, &a2 ); bli_setm( &BLIS_ZERO, &a2 ); bli_obj_inc_diag_offset( -2, &a2 ); bli_obj_toggle_uplo( &a2 ); bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a2 ); bli_scalm( &BLIS_TWO, &a2 ); //bli_scalm( &BLIS_TWO, &a ); } */ bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT /* obj_t ar, ai; bli_obj_alias_to( &a, &ar ); bli_obj_alias_to( &a, &ai ); bli_obj_set_dt( BLIS_DOUBLE, &ar ); ar.rs *= 2; ar.cs *= 2; bli_obj_set_dt( BLIS_DOUBLE, &ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1; bli_printm( "ar", &ar, "%4.1f", "" ); bli_printm( "ai", &ai, "%4.1f", "" ); */ bli_invertd( &a ); bli_printm( "a", &a, "%4.1f", "" ); bli_invertd( &a ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); bli_trsm( side, //bli_trsm4m( side, //bli_trsm3m( side, &alpha, &a, &c ); #else if ( bli_is_real( dt_a ) ) { f77_char side = 'L'; f77_char uplo = 'L'; f77_char transa = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float * alphap = bli_obj_buffer( &alpha ); float * ap = bli_obj_buffer( &a ); float * cp = bli_obj_buffer( &c ); strsm_( &side, &uplo, &transa, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } else // if ( bli_is_complex( dt_a ) ) { f77_char side = 'L'; f77_char uplo = 'L'; f77_char transa = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); scomplex* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); scomplex* cp = bli_obj_buffer( &c ); ctrsm_( &side, //ztrsm_( &side, &uplo, &transa, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt_a ) ) gflops *= 4.0; #ifdef BLIS printf( "data_trsm_blis" ); #else printf( "data_trsm_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, c; obj_t c_save; obj_t alpha; dim_t m, n; dim_t p; dim_t p_begin, p_max, p_inc; int m_input, n_input; ind_t ind; num_t dt; char dt_ch; int r, n_repeats; side_t side; uplo_t uploa; trans_t transa; diag_t diaga; f77_char f77_side; f77_char f77_uploa; f77_char f77_transa; f77_char f77_diaga; double dtime; double dtime_save; double gflops; //bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; dt = DT; ind = IND; p_begin = P_BEGIN; p_max = P_MAX; p_inc = P_INC; m_input = -1; n_input = -1; // Supress compiler warnings about unused variable 'ind'. ( void )ind; #if 0 cntx_t* cntx; ind_t ind_mod = ind; // A hack to use 3m1 as 1mpb (with 1m as 1mbp). if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; // Initialize a context for the current induced method and datatype. cntx = bli_gks_query_ind_cntx( ind_mod, dt ); // Set k to the kc blocksize for the current datatype. k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); #elif 1 //k_input = 256; #endif // Choose the char corresponding to the requested datatype. if ( bli_is_float( dt ) ) dt_ch = 's'; else if ( bli_is_double( dt ) ) dt_ch = 'd'; else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; else dt_ch = 'z'; #if 0 side = BLIS_LEFT; #else side = BLIS_RIGHT; #endif #if 0 uploa = BLIS_LOWER; #else uploa = BLIS_UPPER; #endif transa = BLIS_NO_TRANSPOSE; diaga = BLIS_NONUNIT_DIAG; bli_param_map_blis_to_netlib_side( side, &f77_side ); bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); bli_param_map_blis_to_netlib_trans( transa, &f77_transa ); bli_param_map_blis_to_netlib_diag( diaga, &f77_diaga ); // Begin with initializing the last entry to zero so that // matlab allocates space for the entire array once up-front. for ( p = p_begin; p + p_inc <= p_max; p += p_inc ) ; printf( "data_%s_%ctrsm_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )0, ( unsigned long )0, 0.0 ); for ( p = p_begin; p <= p_max; p += p_inc ) { if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); if ( bli_is_left( side ) ) bli_obj_create( dt, m, m, 0, 0, &a ); else bli_obj_create( dt, n, n, 0, 0, &a ); bli_obj_create( dt, m, n, 0, 0, &c ); //bli_obj_create( dt, m, n, n, 1, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &c ); bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( uploa, &a ); bli_obj_set_conjtrans( transa, &a ); bli_obj_set_diag( diaga, &a ); bli_randm( &a ); bli_mktrim( &a ); // Load the diagonal of A to make it more likely to be invertible. bli_shiftd( &BLIS_TWO, &a ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_copym( &c, &c_save ); #if 0 //def BLIS bli_ind_disable_all_dt( dt ); bli_ind_enable_dt( ind, dt ); #endif dtime_save = DBL_MAX; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_trsm( side, &alpha, &a, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); float* alphap = ( float* )bli_obj_buffer( &alpha ); float* ap = ( float* )bli_obj_buffer( &a ); float* cp = ( float* )bli_obj_buffer( &c ); strsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = ( double* )bli_obj_buffer( &alpha ); double* ap = ( double* )bli_obj_buffer( &a ); double* cp = ( double* )bli_obj_buffer( &c ); dtrsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); #ifdef EIGEN float* alphap = ( float* )bli_obj_buffer( &alpha ); float* ap = ( float* )bli_obj_buffer( &a ); float* cp = ( float* )bli_obj_buffer( &c ); #else scomplex* alphap = ( scomplex* )bli_obj_buffer( &alpha ); scomplex* ap = ( scomplex* )bli_obj_buffer( &a ); scomplex* cp = ( scomplex* )bli_obj_buffer( &c ); #endif ctrsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( &c ); f77_int kk = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldc = bli_obj_col_stride( &c ); #ifdef EIGEN double* alphap = ( double* )bli_obj_buffer( &alpha ); double* ap = ( double* )bli_obj_buffer( &a ); double* cp = ( double* )bli_obj_buffer( &c ); #else dcomplex* alphap = ( dcomplex* )bli_obj_buffer( &alpha ); dcomplex* ap = ( dcomplex* )bli_obj_buffer( &a ); dcomplex* cp = ( dcomplex* )bli_obj_buffer( &c ); #endif ztrsm_( &f77_side, &f77_uploa, &f77_transa, &f77_diaga, &mm, &kk, alphap, ap, &lda, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%4.1f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; printf( "data_%s_%ctrsm_%s", THR_STR, dt_ch, STR ); printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, gflops ); bli_obj_free( &alpha ); bli_obj_free( &a ); bli_obj_free( &c ); bli_obj_free( &c_save ); } //bli_finalize(); return 0; }
/* Subroutine */ int ssygv_(integer *itype, char *jobz, char *uplo, integer * n, real *a, integer *lda, real *b, integer *ldb, real *w, real *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; /* Local variables */ integer nb, neig; char trans[1]; logical upper; logical wantz; integer lwkmin; integer lwkopt; logical lquery; /* -- LAPACK driver routine (version 3.2) -- */ /* November 2006 */ /* Purpose */ /* ======= */ /* SSYGV computes all the eigenvalues, and optionally, the eigenvectors */ /* of a real generalized symmetric-definite eigenproblem, of the form */ /* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. */ /* Here A and B are assumed to be symmetric and B is also */ /* positive definite. */ /* Arguments */ /* ========= */ /* ITYPE (input) INTEGER */ /* Specifies the problem type to be solved: */ /* = 1: A*x = (lambda)*B*x */ /* = 2: A*B*x = (lambda)*x */ /* = 3: B*A*x = (lambda)*x */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangles of A and B are stored; */ /* = 'L': Lower triangles of A and B are stored. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* A (input/output) REAL array, dimension (LDA, N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the */ /* leading N-by-N upper triangular part of A contains the */ /* upper triangular part of the matrix A. If UPLO = 'L', */ /* the leading N-by-N lower triangular part of A contains */ /* the lower triangular part of the matrix A. */ /* On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ /* matrix Z of eigenvectors. The eigenvectors are normalized */ /* as follows: */ /* if ITYPE = 1 or 2, Z**T*B*Z = I; */ /* if ITYPE = 3, Z**T*inv(B)*Z = I. */ /* If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */ /* or the lower triangle (if UPLO='L') of A, including the */ /* diagonal, is destroyed. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input/output) REAL array, dimension (LDB, N) */ /* On entry, the symmetric positive definite matrix B. */ /* If UPLO = 'U', the leading N-by-N upper triangular part of B */ /* contains the upper triangular part of the matrix B. */ /* If UPLO = 'L', the leading N-by-N lower triangular part of B */ /* contains the lower triangular part of the matrix B. */ /* On exit, if INFO <= N, the part of B containing the matrix is */ /* overwritten by the triangular factor U or L from the Cholesky */ /* factorization B = U**T*U or B = L*L**T. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* W (output) REAL array, dimension (N) */ /* If INFO = 0, the eigenvalues in ascending order. */ /* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The length of the array WORK. LWORK >= max(1,3*N-1). */ /* For optimal efficiency, LWORK >= (NB+2)*N, */ /* where NB is the blocksize for SSYTRD returned by ILAENV. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: SPOTRF or SSYEV returned an error code: */ /* <= N: if INFO = i, SSYEV failed to converge; */ /* i off-diagonal elements of an intermediate */ /* tridiagonal form did not converge to zero; */ /* > N: if INFO = N + i, for 1 <= i <= N, then the leading */ /* minor of order i of B is not positive definite. */ /* The factorization of B could not be completed and */ /* no eigenvalues or eigenvectors were computed. */ /* ===================================================================== */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --w; --work; /* Function Body */ wantz = lsame_(jobz, "V"); upper = lsame_(uplo, "U"); lquery = *lwork == -1; *info = 0; if (*itype < 1 || *itype > 3) { *info = -1; } else if (! (wantz || lsame_(jobz, "N"))) { *info = -2; } else if (! (upper || lsame_(uplo, "L"))) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldb < max(1,*n)) { *info = -8; } if (*info == 0) { /* Computing MAX */ i__1 = 1, i__2 = *n * 3 - 1; lwkmin = max(i__1,i__2); nb = ilaenv_(&c__1, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1); /* Computing MAX */ i__1 = lwkmin, i__2 = (nb + 2) * *n; lwkopt = max(i__1,i__2); work[1] = (real) lwkopt; if (*lwork < lwkmin && ! lquery) { *info = -11; } } if (*info != 0) { i__1 = -(*info); xerbla_("SSYGV ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Form a Cholesky factorization of B. */ spotrf_(uplo, n, &b[b_offset], ldb, info); if (*info != 0) { *info = *n + *info; return 0; } /* Transform problem to standard eigenvalue problem and solve. */ ssygst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); ssyev_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, info); if (wantz) { /* Backtransform eigenvectors to the original problem. */ neig = *n; if (*info > 0) { neig = *info - 1; } if (*itype == 1 || *itype == 2) { /* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ /* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */ if (upper) { *(unsigned char *)trans = 'N'; } else { *(unsigned char *)trans = 'T'; } strsm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b16, &b[ b_offset], ldb, &a[a_offset], lda); } else if (*itype == 3) { /* For B*A*x=(lambda)*x; */ /* backtransform eigenvectors: x = L*y or U'*y */ if (upper) { *(unsigned char *)trans = 'T'; } else { *(unsigned char *)trans = 'N'; } strmm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b16, &b[ b_offset], ldb, &a[a_offset], lda); } } work[1] = (real) lwkopt; return 0; /* End of SSYGV */ } /* ssygv_ */
/* Subroutine */ int ssygst_(integer *itype, char *uplo, integer *n, real *a, integer *lda, real *b, integer *ldb, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; /* Local variables */ integer k, kb, nb; extern logical lsame_(char *, char *); logical upper; extern /* Subroutine */ int strmm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * ), ssymm_(char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, real * , real *, integer *), strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real * , integer *), ssygs2_(integer *, char *, integer *, real *, integer *, real *, integer *, integer * ), ssyr2k_(char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SSYGST reduces a real symmetric-definite generalized eigenproblem */ /* to standard form. */ /* If ITYPE = 1, the problem is A*x = lambda*B*x, */ /* and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) */ /* If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */ /* B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. */ /* B must have been previously factorized as U**T*U or L*L**T by SPOTRF. */ /* Arguments */ /* ========= */ /* ITYPE (input) INTEGER */ /* = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); */ /* = 2 or 3: compute U*A*U**T or L**T*A*L. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored and B is factored as */ /* U**T*U; */ /* = 'L': Lower triangle of A is stored and B is factored as */ /* L*L**T. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* A (input/output) REAL array, dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* N-by-N upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading N-by-N lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, if INFO = 0, the transformed matrix, stored in the */ /* same format as A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input) REAL array, dimension (LDB,N) */ /* The triangular factor from the Cholesky factorization of B, */ /* as returned by SPOTRF. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (*itype < 1 || *itype > 3) { *info = -1; } else if (! upper && ! lsame_(uplo, "L")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); xerbla_("SSYGST", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine the block size for this environment. */ nb = ilaenv_(&c__1, "SSYGST", uplo, n, &c_n1, &c_n1, &c_n1); if (nb <= 1 || nb >= *n) { /* Use unblocked code */ ssygs2_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); } else { /* Use blocked code */ if (*itype == 1) { if (upper) { /* Compute inv(U')*A*inv(U) */ i__1 = *n; i__2 = nb; for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the upper triangle of A(k:n,k:n) */ ssygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, info); if (k + kb <= *n) { i__3 = *n - k - kb + 1; strsm_("Left", uplo, "Transpose", "Non-unit", &kb, & i__3, &c_b14, &b[k + k * b_dim1], ldb, &a[k + (k + kb) * a_dim1], lda); i__3 = *n - k - kb + 1; ssymm_("Left", uplo, &kb, &i__3, &c_b16, &a[k + k * a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, &c_b14, &a[k + (k + kb) * a_dim1], lda); i__3 = *n - k - kb + 1; ssyr2k_(uplo, "Transpose", &i__3, &kb, &c_b19, &a[k + (k + kb) * a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, &c_b14, &a[k + kb + (k + kb) * a_dim1], lda); i__3 = *n - k - kb + 1; ssymm_("Left", uplo, &kb, &i__3, &c_b16, &a[k + k * a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, &c_b14, &a[k + (k + kb) * a_dim1], lda); i__3 = *n - k - kb + 1; strsm_("Right", uplo, "No transpose", "Non-unit", &kb, &i__3, &c_b14, &b[k + kb + (k + kb) * b_dim1] , ldb, &a[k + (k + kb) * a_dim1], lda); } /* L10: */ } } else { /* Compute inv(L)*A*inv(L') */ i__2 = *n; i__1 = nb; for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the lower triangle of A(k:n,k:n) */ ssygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, info); if (k + kb <= *n) { i__3 = *n - k - kb + 1; strsm_("Right", uplo, "Transpose", "Non-unit", &i__3, &kb, &c_b14, &b[k + k * b_dim1], ldb, &a[k + kb + k * a_dim1], lda); i__3 = *n - k - kb + 1; ssymm_("Right", uplo, &i__3, &kb, &c_b16, &a[k + k * a_dim1], lda, &b[k + kb + k * b_dim1], ldb, & c_b14, &a[k + kb + k * a_dim1], lda); i__3 = *n - k - kb + 1; ssyr2k_(uplo, "No transpose", &i__3, &kb, &c_b19, &a[ k + kb + k * a_dim1], lda, &b[k + kb + k * b_dim1], ldb, &c_b14, &a[k + kb + (k + kb) * a_dim1], lda); i__3 = *n - k - kb + 1; ssymm_("Right", uplo, &i__3, &kb, &c_b16, &a[k + k * a_dim1], lda, &b[k + kb + k * b_dim1], ldb, & c_b14, &a[k + kb + k * a_dim1], lda); i__3 = *n - k - kb + 1; strsm_("Left", uplo, "No transpose", "Non-unit", & i__3, &kb, &c_b14, &b[k + kb + (k + kb) * b_dim1], ldb, &a[k + kb + k * a_dim1], lda); } /* L20: */ } } } else { if (upper) { /* Compute U*A*U' */ i__1 = *n; i__2 = nb; for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */ i__3 = k - 1; strmm_("Left", uplo, "No transpose", "Non-unit", &i__3, & kb, &c_b14, &b[b_offset], ldb, &a[k * a_dim1 + 1], lda) ; i__3 = k - 1; ssymm_("Right", uplo, &i__3, &kb, &c_b52, &a[k + k * a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b14, &a[ k * a_dim1 + 1], lda); i__3 = k - 1; ssyr2k_(uplo, "No transpose", &i__3, &kb, &c_b14, &a[k * a_dim1 + 1], lda, &b[k * b_dim1 + 1], ldb, &c_b14, &a[a_offset], lda); i__3 = k - 1; ssymm_("Right", uplo, &i__3, &kb, &c_b52, &a[k + k * a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b14, &a[ k * a_dim1 + 1], lda); i__3 = k - 1; strmm_("Right", uplo, "Transpose", "Non-unit", &i__3, &kb, &c_b14, &b[k + k * b_dim1], ldb, &a[k * a_dim1 + 1], lda); ssygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, info); /* L30: */ } } else { /* Compute L'*A*L */ i__2 = *n; i__1 = nb; for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */ i__3 = k - 1; strmm_("Right", uplo, "No transpose", "Non-unit", &kb, & i__3, &c_b14, &b[b_offset], ldb, &a[k + a_dim1], lda); i__3 = k - 1; ssymm_("Left", uplo, &kb, &i__3, &c_b52, &a[k + k * a_dim1], lda, &b[k + b_dim1], ldb, &c_b14, &a[k + a_dim1], lda); i__3 = k - 1; ssyr2k_(uplo, "Transpose", &i__3, &kb, &c_b14, &a[k + a_dim1], lda, &b[k + b_dim1], ldb, &c_b14, &a[ a_offset], lda); i__3 = k - 1; ssymm_("Left", uplo, &kb, &i__3, &c_b52, &a[k + k * a_dim1], lda, &b[k + b_dim1], ldb, &c_b14, &a[k + a_dim1], lda); i__3 = k - 1; strmm_("Left", uplo, "Transpose", "Non-unit", &kb, &i__3, &c_b14, &b[k + k * b_dim1], ldb, &a[k + a_dim1], lda); ssygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + k * b_dim1], ldb, info); /* L40: */ } } } } return 0; /* End of SSYGST */ } /* ssygst_ */
/* Subroutine */ int sgelsy_(integer *m, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, integer *jpvt, real *rcond, integer *rank, real *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; real r__1, r__2; /* Local variables */ integer i__, j; real c1, c2, s1, s2; integer nb, mn, nb1, nb2, nb3, nb4; real anrm, bnrm, smin, smax; integer iascl, ibscl, ismin, ismax; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *); real wsize; extern /* Subroutine */ int strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * ), slaic1_(integer *, integer *, real *, real *, real *, real *, real *, real *, real *), sgeqp3_( integer *, integer *, real *, integer *, integer *, real *, real * , integer *, integer *), slabad_(real *, real *); extern doublereal slamch_(char *), slange_(char *, integer *, integer *, real *, integer *, real *); extern /* Subroutine */ int xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); real bignum; extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, real *, integer *, integer *, real *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *, real *, integer *); integer lwkmin; real sminpr, smaxpr, smlnum; integer lwkopt; logical lquery; extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, integer *, integer *), sormrz_(char *, char *, integer *, integer *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, integer *, integer *), stzrzf_(integer *, integer *, real *, integer *, real *, real *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SGELSY computes the minimum-norm solution to a real linear least */ /* squares problem: */ /* minimize || A * X - B || */ /* using a complete orthogonal factorization of A. A is an M-by-N */ /* matrix which may be rank-deficient. */ /* Several right hand side vectors b and solution vectors x can be */ /* handled in a single call; they are stored as the columns of the */ /* M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ /* matrix X. */ /* The routine first computes a QR factorization with column pivoting: */ /* A * P = Q * [ R11 R12 ] */ /* [ 0 R22 ] */ /* with R11 defined as the largest leading submatrix whose estimated */ /* condition number is less than 1/RCOND. The order of R11, RANK, */ /* is the effective rank of A. */ /* Then, R22 is considered to be negligible, and R12 is annihilated */ /* by orthogonal transformations from the right, arriving at the */ /* complete orthogonal factorization: */ /* A * P = Q * [ T11 0 ] * Z */ /* [ 0 0 ] */ /* The minimum-norm solution is then */ /* X = P * Z' [ inv(T11)*Q1'*B ] */ /* [ 0 ] */ /* where Q1 consists of the first RANK columns of Q. */ /* This routine is basically identical to the original xGELSX except */ /* three differences: */ /* o The call to the subroutine xGEQPF has been substituted by the */ /* the call to the subroutine xGEQP3. This subroutine is a Blas-3 */ /* version of the QR factorization with column pivoting. */ /* o Matrix B (the right hand side) is updated with Blas-3. */ /* o The permutation of matrix B (the right hand side) is faster and */ /* more simple. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of */ /* columns of matrices B and X. NRHS >= 0. */ /* A (input/output) REAL array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, A has been overwritten by details of its */ /* complete orthogonal factorization. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* B (input/output) REAL array, dimension (LDB,NRHS) */ /* On entry, the M-by-NRHS right hand side matrix B. */ /* On exit, the N-by-NRHS solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,M,N). */ /* JPVT (input/output) INTEGER array, dimension (N) */ /* On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted */ /* to the front of AP, otherwise column i is a free column. */ /* On exit, if JPVT(i) = k, then the i-th column of AP */ /* was the k-th column of A. */ /* RCOND (input) REAL */ /* RCOND is used to determine the effective rank of A, which */ /* is defined as the order of the largest leading triangular */ /* submatrix R11 in the QR factorization with pivoting of A, */ /* whose estimated condition number < 1/RCOND. */ /* RANK (output) INTEGER */ /* The effective rank of A, i.e., the order of the submatrix */ /* R11. This is the same as the order of the submatrix T11 */ /* in the complete orthogonal factorization of A. */ /* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* The unblocked strategy requires that: */ /* LWORK >= MAX( MN+3*N+1, 2*MN+NRHS ), */ /* where MN = min( M, N ). */ /* The block algorithm requires that: */ /* LWORK >= MAX( MN+2*N+NB*(N+1), 2*MN+NB*NRHS ), */ /* where NB is an upper bound on the blocksize returned */ /* by ILAENV for the routines SGEQP3, STZRZF, STZRQF, SORMQR, */ /* and SORMRZ. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: If INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */ /* E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */ /* G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --jpvt; --work; /* Function Body */ mn = min(*m,*n); ismin = mn + 1; ismax = (mn << 1) + 1; /* Test the input arguments. */ *info = 0; lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = max(1,*m); if (*ldb < max(i__1,*n)) { *info = -7; } } /* Figure out optimal block size */ if (*info == 0) { if (mn == 0 || *nrhs == 0) { lwkmin = 1; lwkopt = 1; } else { nb1 = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1); nb2 = ilaenv_(&c__1, "SGERQF", " ", m, n, &c_n1, &c_n1); nb3 = ilaenv_(&c__1, "SORMQR", " ", m, n, nrhs, &c_n1); nb4 = ilaenv_(&c__1, "SORMRQ", " ", m, n, nrhs, &c_n1); /* Computing MAX */ i__1 = max(nb1,nb2), i__1 = max(i__1,nb3); nb = max(i__1,nb4); /* Computing MAX */ i__1 = mn << 1, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = mn + *nrhs; lwkmin = mn + max(i__1,i__2); /* Computing MAX */ i__1 = lwkmin, i__2 = mn + (*n << 1) + nb * (*n + 1), i__1 = max( i__1,i__2), i__2 = (mn << 1) + nb * *nrhs; lwkopt = max(i__1,i__2); } work[1] = (real) lwkopt; if (*lwork < lwkmin && ! lquery) { *info = -12; } } if (*info != 0) { i__1 = -(*info); xerbla_("SGELSY", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (mn == 0 || *nrhs == 0) { *rank = 0; return 0; } /* Get machine parameters */ smlnum = slamch_("S") / slamch_("P"); bignum = 1.f / smlnum; slabad_(&smlnum, &bignum); /* Scale A, B if max entries outside range [SMLNUM,BIGNUM] */ anrm = slange_("M", m, n, &a[a_offset], lda, &work[1]); iascl = 0; if (anrm > 0.f && anrm < smlnum) { /* Scale matrix norm up to SMLNUM */ slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, info); iascl = 1; } else if (anrm > bignum) { /* Scale matrix norm down to BIGNUM */ slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, info); iascl = 2; } else if (anrm == 0.f) { /* Matrix all zero. Return zero solution. */ i__1 = max(*m,*n); slaset_("F", &i__1, nrhs, &c_b31, &c_b31, &b[b_offset], ldb); *rank = 0; goto L70; } bnrm = slange_("M", m, nrhs, &b[b_offset], ldb, &work[1]); ibscl = 0; if (bnrm > 0.f && bnrm < smlnum) { /* Scale matrix norm up to SMLNUM */ slascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, info); ibscl = 1; } else if (bnrm > bignum) { /* Scale matrix norm down to BIGNUM */ slascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, info); ibscl = 2; } /* Compute QR factorization with column pivoting of A: */ /* A * P = Q * R */ i__1 = *lwork - mn; sgeqp3_(m, n, &a[a_offset], lda, &jpvt[1], &work[1], &work[mn + 1], &i__1, info); wsize = mn + work[mn + 1]; /* workspace: MN+2*N+NB*(N+1). */ /* Details of Householder rotations stored in WORK(1:MN). */ /* Determine RANK using incremental condition estimation */ work[ismin] = 1.f; work[ismax] = 1.f; smax = (r__1 = a[a_dim1 + 1], dabs(r__1)); smin = smax; if ((r__1 = a[a_dim1 + 1], dabs(r__1)) == 0.f) { *rank = 0; i__1 = max(*m,*n); slaset_("F", &i__1, nrhs, &c_b31, &c_b31, &b[b_offset], ldb); goto L70; } else { *rank = 1; } L10: if (*rank < mn) { i__ = *rank + 1; slaic1_(&c__2, rank, &work[ismin], &smin, &a[i__ * a_dim1 + 1], &a[ i__ + i__ * a_dim1], &sminpr, &s1, &c1); slaic1_(&c__1, rank, &work[ismax], &smax, &a[i__ * a_dim1 + 1], &a[ i__ + i__ * a_dim1], &smaxpr, &s2, &c2); if (smaxpr * *rcond <= sminpr) { i__1 = *rank; for (i__ = 1; i__ <= i__1; ++i__) { work[ismin + i__ - 1] = s1 * work[ismin + i__ - 1]; work[ismax + i__ - 1] = s2 * work[ismax + i__ - 1]; /* L20: */ } work[ismin + *rank] = c1; work[ismax + *rank] = c2; smin = sminpr; smax = smaxpr; ++(*rank); goto L10; } } /* workspace: 3*MN. */ /* Logically partition R = [ R11 R12 ] */ /* [ 0 R22 ] */ /* where R11 = R(1:RANK,1:RANK) */ /* [R11,R12] = [ T11, 0 ] * Y */ if (*rank < *n) { i__1 = *lwork - (mn << 1); stzrzf_(rank, n, &a[a_offset], lda, &work[mn + 1], &work[(mn << 1) + 1], &i__1, info); } /* workspace: 2*MN. */ /* Details of Householder rotations stored in WORK(MN+1:2*MN) */ /* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) */ i__1 = *lwork - (mn << 1); sormqr_("Left", "Transpose", m, nrhs, &mn, &a[a_offset], lda, &work[1], & b[b_offset], ldb, &work[(mn << 1) + 1], &i__1, info); /* Computing MAX */ r__1 = wsize, r__2 = (mn << 1) + work[(mn << 1) + 1]; wsize = dmax(r__1,r__2); /* workspace: 2*MN+NB*NRHS. */ /* B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) */ strsm_("Left", "Upper", "No transpose", "Non-unit", rank, nrhs, &c_b54, & a[a_offset], lda, &b[b_offset], ldb); i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = *rank + 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = 0.f; /* L30: */ } /* L40: */ } /* B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS) */ if (*rank < *n) { i__1 = *n - *rank; i__2 = *lwork - (mn << 1); sormrz_("Left", "Transpose", n, nrhs, rank, &i__1, &a[a_offset], lda, &work[mn + 1], &b[b_offset], ldb, &work[(mn << 1) + 1], &i__2, info); } /* workspace: 2*MN+NRHS. */ /* B(1:N,1:NRHS) := P * B(1:N,1:NRHS) */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[jpvt[i__]] = b[i__ + j * b_dim1]; /* L50: */ } scopy_(n, &work[1], &c__1, &b[j * b_dim1 + 1], &c__1); /* L60: */ } /* workspace: N. */ /* Undo scaling */ if (iascl == 1) { slascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, info); slascl_("U", &c__0, &c__0, &smlnum, &anrm, rank, rank, &a[a_offset], lda, info); } else if (iascl == 2) { slascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, info); slascl_("U", &c__0, &c__0, &bignum, &anrm, rank, rank, &a[a_offset], lda, info); } if (ibscl == 1) { slascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, info); } else if (ibscl == 2) { slascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, info); } L70: work[1] = (real) lwkopt; return 0; /* End of SGELSY */ } /* sgelsy_ */
/* Subroutine */ int ssygvd_(integer *itype, char *jobz, char *uplo, integer * n, real *a, integer *lda, real *b, integer *ldb, real *w, real *work, integer *lwork, integer *iwork, integer *liwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; real r__1, r__2; /* Local variables */ integer lopt; extern logical lsame_(char *, char *); integer lwmin; char trans[1]; integer liopt; logical upper; extern /* Subroutine */ int strmm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * ); logical wantz; extern /* Subroutine */ int strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * ), xerbla_(char *, integer *); integer liwmin; extern /* Subroutine */ int spotrf_(char *, integer *, real *, integer *, integer *), ssyevd_(char *, char *, integer *, real *, integer *, real *, real *, integer *, integer *, integer *, integer *); logical lquery; extern /* Subroutine */ int ssygst_(integer *, char *, integer *, real *, integer *, real *, integer *, integer *); /* -- LAPACK driver routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SSYGVD computes all the eigenvalues, and optionally, the eigenvectors */ /* of a real generalized symmetric-definite eigenproblem, of the form */ /* A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x. Here A and */ /* B are assumed to be symmetric and B is also positive definite. */ /* If eigenvectors are desired, it uses a divide and conquer algorithm. */ /* The divide and conquer algorithm makes very mild assumptions about */ /* floating point arithmetic. It will work on machines with a guard */ /* digit in add/subtract, or on those binary machines without guard */ /* digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */ /* Cray-2. It could conceivably fail on hexadecimal or decimal machines */ /* without guard digits, but we know of none. */ /* Arguments */ /* ========= */ /* ITYPE (input) INTEGER */ /* Specifies the problem type to be solved: */ /* = 1: A*x = (lambda)*B*x */ /* = 2: A*B*x = (lambda)*x */ /* = 3: B*A*x = (lambda)*x */ /* JOBZ (input) CHARACTER*1 */ /* = 'N': Compute eigenvalues only; */ /* = 'V': Compute eigenvalues and eigenvectors. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangles of A and B are stored; */ /* = 'L': Lower triangles of A and B are stored. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* A (input/output) REAL array, dimension (LDA, N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the */ /* leading N-by-N upper triangular part of A contains the */ /* upper triangular part of the matrix A. If UPLO = 'L', */ /* the leading N-by-N lower triangular part of A contains */ /* the lower triangular part of the matrix A. */ /* On exit, if JOBZ = 'V', then if INFO = 0, A contains the */ /* matrix Z of eigenvectors. The eigenvectors are normalized */ /* as follows: */ /* if ITYPE = 1 or 2, Z**T*B*Z = I; */ /* if ITYPE = 3, Z**T*inv(B)*Z = I. */ /* If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */ /* or the lower triangle (if UPLO='L') of A, including the */ /* diagonal, is destroyed. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input/output) REAL array, dimension (LDB, N) */ /* On entry, the symmetric matrix B. If UPLO = 'U', the */ /* leading N-by-N upper triangular part of B contains the */ /* upper triangular part of the matrix B. If UPLO = 'L', */ /* the leading N-by-N lower triangular part of B contains */ /* the lower triangular part of the matrix B. */ /* On exit, if INFO <= N, the part of B containing the matrix is */ /* overwritten by the triangular factor U or L from the Cholesky */ /* factorization B = U**T*U or B = L*L**T. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* W (output) REAL array, dimension (N) */ /* If INFO = 0, the eigenvalues in ascending order. */ /* WORK (workspace/output) REAL array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* If N <= 1, LWORK >= 1. */ /* If JOBZ = 'N' and N > 1, LWORK >= 2*N+1. */ /* If JOBZ = 'V' and N > 1, LWORK >= 1 + 6*N + 2*N**2. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal sizes of the WORK and IWORK */ /* arrays, returns these values as the first entries of the WORK */ /* and IWORK arrays, and no error message related to LWORK or */ /* LIWORK is issued by XERBLA. */ /* IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */ /* On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */ /* LIWORK (input) INTEGER */ /* The dimension of the array IWORK. */ /* If N <= 1, LIWORK >= 1. */ /* If JOBZ = 'N' and N > 1, LIWORK >= 1. */ /* If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. */ /* If LIWORK = -1, then a workspace query is assumed; the */ /* routine only calculates the optimal sizes of the WORK and */ /* IWORK arrays, returns these values as the first entries of */ /* the WORK and IWORK arrays, and no error message related to */ /* LWORK or LIWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: SPOTRF or SSYEVD returned an error code: */ /* <= N: if INFO = i and JOBZ = 'N', then the algorithm */ /* failed to converge; i off-diagonal elements of an */ /* intermediate tridiagonal form did not converge to */ /* zero; */ /* if INFO = i and JOBZ = 'V', then the algorithm */ /* failed to compute an eigenvalue while working on */ /* the submatrix lying in rows and columns INFO/(N+1) */ /* through mod(INFO,N+1); */ /* > N: if INFO = N + i, for 1 <= i <= N, then the leading */ /* minor of order i of B is not positive definite. */ /* The factorization of B could not be completed and */ /* no eigenvalues or eigenvectors were computed. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */ /* Modified so that no backsubstitution is performed if SSYEVD fails to */ /* converge (NEIG in old code could be greater than N causing out of */ /* bounds reference to A - reported by Ralf Meyer). Also corrected the */ /* description of INFO and the test on ITYPE. Sven, 16 Feb 05. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --w; --work; --iwork; /* Function Body */ wantz = lsame_(jobz, "V"); upper = lsame_(uplo, "U"); lquery = *lwork == -1 || *liwork == -1; *info = 0; if (*n <= 1) { liwmin = 1; lwmin = 1; } else if (wantz) { liwmin = *n * 5 + 3; /* Computing 2nd power */ i__1 = *n; lwmin = *n * 6 + 1 + (i__1 * i__1 << 1); } else { liwmin = 1; lwmin = (*n << 1) + 1; } lopt = lwmin; liopt = liwmin; if (*itype < 1 || *itype > 3) { *info = -1; } else if (! (wantz || lsame_(jobz, "N"))) { *info = -2; } else if (! (upper || lsame_(uplo, "L"))) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldb < max(1,*n)) { *info = -8; } if (*info == 0) { work[1] = (real) lopt; iwork[1] = liopt; if (*lwork < lwmin && ! lquery) { *info = -11; } else if (*liwork < liwmin && ! lquery) { *info = -13; } } if (*info != 0) { i__1 = -(*info); xerbla_("SSYGVD", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Form a Cholesky factorization of B. */ spotrf_(uplo, n, &b[b_offset], ldb, info); if (*info != 0) { *info = *n + *info; return 0; } /* Transform problem to standard eigenvalue problem and solve. */ ssygst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); ssyevd_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, &iwork[ 1], liwork, info); /* Computing MAX */ r__1 = (real) lopt; lopt = dmax(r__1,work[1]); /* Computing MAX */ r__1 = (real) liopt, r__2 = (real) iwork[1]; liopt = dmax(r__1,r__2); if (wantz && *info == 0) { /* Backtransform eigenvectors to the original problem. */ if (*itype == 1 || *itype == 2) { /* For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */ /* backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */ if (upper) { *(unsigned char *)trans = 'N'; } else { *(unsigned char *)trans = 'T'; } strsm_("Left", uplo, trans, "Non-unit", n, n, &c_b11, &b[b_offset] , ldb, &a[a_offset], lda); } else if (*itype == 3) { /* For B*A*x=(lambda)*x; */ /* backtransform eigenvectors: x = L*y or U'*y */ if (upper) { *(unsigned char *)trans = 'T'; } else { *(unsigned char *)trans = 'N'; } strmm_("Left", uplo, trans, "Non-unit", n, n, &c_b11, &b[b_offset] , ldb, &a[a_offset], lda); } } work[1] = (real) lopt; iwork[1] = liopt; return 0; /* End of SSYGVD */ } /* ssygvd_ */
/* Subroutine */ int ssygst_(integer *itype, char *uplo, integer *n, real *a, integer *lda, real *b, integer *ldb, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= SSYGST reduces a real symmetric-definite generalized eigenproblem to standard form. If ITYPE = 1, the problem is A*x = lambda*B*x, and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. B must have been previously factorized as U**T*U or L*L**T by SPOTRF. Arguments ========= ITYPE (input) INTEGER = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); = 2 or 3: compute U*A*U**T or L**T*A*L. UPLO (input) CHARACTER = 'U': Upper triangle of A is stored and B is factored as U**T*U; = 'L': Lower triangle of A is stored and B is factored as L*L**T. N (input) INTEGER The order of the matrices A and B. N >= 0. A (input/output) REAL array, dimension (LDA,N) On entry, the symmetric matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if INFO = 0, the transformed matrix, stored in the same format as A. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input) REAL array, dimension (LDB,N) The triangular factor from the Cholesky factorization of B, as returned by SPOTRF. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static real c_b14 = 1.f; static real c_b16 = -.5f; static real c_b19 = -1.f; static real c_b52 = .5f; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; /* Local variables */ static integer k; extern logical lsame_(char *, char *); static logical upper; extern /* Subroutine */ int strmm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * ), ssymm_(char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, real * , real *, integer *), strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real * , integer *); static integer kb, nb; extern /* Subroutine */ int ssygs2_(integer *, char *, integer *, real *, integer *, real *, integer *, integer *), ssyr2k_(char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), xerbla_( char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (*itype < 1 || *itype > 3) { *info = -1; } else if (! upper && ! lsame_(uplo, "L")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); xerbla_("SSYGST", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine the block size for this environment. */ nb = ilaenv_(&c__1, "SSYGST", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( ftnlen)1); if (nb <= 1 || nb >= *n) { /* Use unblocked code */ ssygs2_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); } else { /* Use blocked code */ if (*itype == 1) { if (upper) { /* Compute inv(U')*A*inv(U) */ i__1 = *n; i__2 = nb; for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the upper triangle of A(k:n,k:n) */ ssygs2_(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k), ldb, info); if (k + kb <= *n) { i__3 = *n - k - kb + 1; strsm_("Left", uplo, "Transpose", "Non-unit", &kb, & i__3, &c_b14, &b_ref(k, k), ldb, &a_ref(k, k + kb), lda); i__3 = *n - k - kb + 1; ssymm_("Left", uplo, &kb, &i__3, &c_b16, &a_ref(k, k), lda, &b_ref(k, k + kb), ldb, &c_b14, &a_ref( k, k + kb), lda); i__3 = *n - k - kb + 1; ssyr2k_(uplo, "Transpose", &i__3, &kb, &c_b19, &a_ref( k, k + kb), lda, &b_ref(k, k + kb), ldb, & c_b14, &a_ref(k + kb, k + kb), lda); i__3 = *n - k - kb + 1; ssymm_("Left", uplo, &kb, &i__3, &c_b16, &a_ref(k, k), lda, &b_ref(k, k + kb), ldb, &c_b14, &a_ref( k, k + kb), lda); i__3 = *n - k - kb + 1; strsm_("Right", uplo, "No transpose", "Non-unit", &kb, &i__3, &c_b14, &b_ref(k + kb, k + kb), ldb, & a_ref(k, k + kb), lda); } /* L10: */ } } else { /* Compute inv(L)*A*inv(L') */ i__2 = *n; i__1 = nb; for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the lower triangle of A(k:n,k:n) */ ssygs2_(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k), ldb, info); if (k + kb <= *n) { i__3 = *n - k - kb + 1; strsm_("Right", uplo, "Transpose", "Non-unit", &i__3, &kb, &c_b14, &b_ref(k, k), ldb, &a_ref(k + kb, k), lda); i__3 = *n - k - kb + 1; ssymm_("Right", uplo, &i__3, &kb, &c_b16, &a_ref(k, k) , lda, &b_ref(k + kb, k), ldb, &c_b14, &a_ref( k + kb, k), lda); i__3 = *n - k - kb + 1; ssyr2k_(uplo, "No transpose", &i__3, &kb, &c_b19, & a_ref(k + kb, k), lda, &b_ref(k + kb, k), ldb, &c_b14, &a_ref(k + kb, k + kb), lda); i__3 = *n - k - kb + 1; ssymm_("Right", uplo, &i__3, &kb, &c_b16, &a_ref(k, k) , lda, &b_ref(k + kb, k), ldb, &c_b14, &a_ref( k + kb, k), lda); i__3 = *n - k - kb + 1; strsm_("Left", uplo, "No transpose", "Non-unit", & i__3, &kb, &c_b14, &b_ref(k + kb, k + kb), ldb, &a_ref(k + kb, k), lda); } /* L20: */ } } } else { if (upper) { /* Compute U*A*U' */ i__1 = *n; i__2 = nb; for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */ i__3 = k - 1; strmm_("Left", uplo, "No transpose", "Non-unit", &i__3, & kb, &c_b14, &b[b_offset], ldb, &a_ref(1, k), lda); i__3 = k - 1; ssymm_("Right", uplo, &i__3, &kb, &c_b52, &a_ref(k, k), lda, &b_ref(1, k), ldb, &c_b14, &a_ref(1, k), lda); i__3 = k - 1; ssyr2k_(uplo, "No transpose", &i__3, &kb, &c_b14, &a_ref( 1, k), lda, &b_ref(1, k), ldb, &c_b14, &a[ a_offset], lda); i__3 = k - 1; ssymm_("Right", uplo, &i__3, &kb, &c_b52, &a_ref(k, k), lda, &b_ref(1, k), ldb, &c_b14, &a_ref(1, k), lda); i__3 = k - 1; strmm_("Right", uplo, "Transpose", "Non-unit", &i__3, &kb, &c_b14, &b_ref(k, k), ldb, &a_ref(1, k), lda); ssygs2_(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k), ldb, info); /* L30: */ } } else { /* Compute L'*A*L */ i__2 = *n; i__1 = nb; for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */ i__3 = k - 1; strmm_("Right", uplo, "No transpose", "Non-unit", &kb, & i__3, &c_b14, &b[b_offset], ldb, &a_ref(k, 1), lda); i__3 = k - 1; ssymm_("Left", uplo, &kb, &i__3, &c_b52, &a_ref(k, k), lda, &b_ref(k, 1), ldb, &c_b14, &a_ref(k, 1), lda); i__3 = k - 1; ssyr2k_(uplo, "Transpose", &i__3, &kb, &c_b14, &a_ref(k, 1), lda, &b_ref(k, 1), ldb, &c_b14, &a[a_offset], lda); i__3 = k - 1; ssymm_("Left", uplo, &kb, &i__3, &c_b52, &a_ref(k, k), lda, &b_ref(k, 1), ldb, &c_b14, &a_ref(k, 1), lda); i__3 = k - 1; strmm_("Left", uplo, "Transpose", "Non-unit", &kb, &i__3, &c_b14, &b_ref(k, k), ldb, &a_ref(k, 1), lda); ssygs2_(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k), ldb, info); /* L40: */ } } } } return 0; /* End of SSYGST */ } /* ssygst_ */
/* Subroutine */ int strtri_(char *uplo, char *diag, integer *n, real *a, integer *lda, integer *info, ftnlen uplo_len, ftnlen diag_len) { /* System generated locals */ address a__1[2]; integer a_dim1, a_offset, i__1, i__2[2], i__3, i__4, i__5; char ch__1[2]; /* Builtin functions */ /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen); /* Local variables */ static integer j, jb, nb, nn; extern logical lsame_(char *, char *, ftnlen, ftnlen); static logical upper; extern /* Subroutine */ int strmm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * , ftnlen, ftnlen, ftnlen, ftnlen), strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen, ftnlen), strti2_(char *, char * , integer *, real *, integer *, integer *, ftnlen, ftnlen), xerbla_(char *, integer *, ftnlen); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); static logical nounit; /* -- LAPACK routine (version 3.0) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ /* Courant Institute, Argonne National Lab, and Rice University */ /* March 31, 1993 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* STRTRI computes the inverse of a real upper or lower triangular */ /* matrix A. */ /* This is the Level 3 BLAS version of the algorithm. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': A is upper triangular; */ /* = 'L': A is lower triangular. */ /* DIAG (input) CHARACTER*1 */ /* = 'N': A is non-unit triangular; */ /* = 'U': A is unit triangular. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) REAL array, dimension (LDA,N) */ /* On entry, the triangular matrix A. If UPLO = 'U', the */ /* leading N-by-N upper triangular part of the array A contains */ /* the upper triangular matrix, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading N-by-N lower triangular part of the array A contains */ /* the lower triangular matrix, and the strictly upper */ /* triangular part of A is not referenced. If DIAG = 'U', the */ /* diagonal elements of A are also not referenced and are */ /* assumed to be 1. */ /* On exit, the (triangular) inverse of the original matrix, in */ /* the same storage format. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, A(i,i) is exactly zero. The triangular */ /* matrix is singular and its inverse can not be computed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ *info = 0; upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1); nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1); if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) { *info = -1; } else if (! nounit && ! lsame_(diag, "U", (ftnlen)1, (ftnlen)1)) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } if (*info != 0) { i__1 = -(*info); xerbla_("STRTRI", &i__1, (ftnlen)6); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Check for singularity if non-unit. */ if (nounit) { i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { if (a[*info + *info * a_dim1] == 0.f) { return 0; } /* L10: */ } *info = 0; } /* Determine the block size for this environment. */ /* Writing concatenation */ i__2[0] = 1, a__1[0] = uplo; i__2[1] = 1, a__1[1] = diag; s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2); nb = ilaenv_(&c__1, "STRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( ftnlen)2); if (nb <= 1 || nb >= *n) { /* Use unblocked code */ strti2_(uplo, diag, n, &a[a_offset], lda, info, (ftnlen)1, (ftnlen)1); } else { /* Use blocked code */ if (upper) { /* Compute inverse of upper triangular matrix */ i__1 = *n; i__3 = nb; for (j = 1; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) { /* Computing MIN */ i__4 = nb, i__5 = *n - j + 1; jb = min(i__4,i__5); /* Compute rows 1:j-1 of current block column */ i__4 = j - 1; strmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, & c_b18, &a[a_offset], lda, &a[j * a_dim1 + 1], lda, ( ftnlen)4, (ftnlen)5, (ftnlen)12, (ftnlen)1); i__4 = j - 1; strsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, & c_b22, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1], lda, (ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)1); /* Compute inverse of current diagonal block */ strti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info, ( ftnlen)5, (ftnlen)1); /* L20: */ } } else { /* Compute inverse of lower triangular matrix */ nn = (*n - 1) / nb * nb + 1; i__3 = -nb; for (j = nn; i__3 < 0 ? j >= 1 : j <= 1; j += i__3) { /* Computing MIN */ i__1 = nb, i__4 = *n - j + 1; jb = min(i__1,i__4); if (j + jb <= *n) { /* Compute rows j+jb:n of current block column */ i__1 = *n - j - jb + 1; strmm_("Left", "Lower", "No transpose", diag, &i__1, &jb, &c_b18, &a[j + jb + (j + jb) * a_dim1], lda, &a[j + jb + j * a_dim1], lda, (ftnlen)4, (ftnlen)5, ( ftnlen)12, (ftnlen)1); i__1 = *n - j - jb + 1; strsm_("Right", "Lower", "No transpose", diag, &i__1, &jb, &c_b22, &a[j + j * a_dim1], lda, &a[j + jb + j * a_dim1], lda, (ftnlen)5, (ftnlen)5, (ftnlen)12, ( ftnlen)1); } /* Compute inverse of current diagonal block */ strti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info, ( ftnlen)5, (ftnlen)1); /* L30: */ } } } return 0; /* End of STRTRI */ } /* strtri_ */
/* Subroutine */ int strtrs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, real *a, integer *lda, real *b, integer *ldb, integer * info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1; /* Local variables */ extern logical lsame_(char *, char *); extern /* Subroutine */ int strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * ), xerbla_(char *, integer *); logical nounit; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* STRTRS solves a triangular system of the form */ /* A * X = B or A**T * X = B, */ /* where A is a triangular matrix of order N, and B is an N-by-NRHS */ /* matrix. A check is made to verify that A is nonsingular. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': A is upper triangular; */ /* = 'L': A is lower triangular. */ /* TRANS (input) CHARACTER*1 */ /* Specifies the form of the system of equations: */ /* = 'N': A * X = B (No transpose) */ /* = 'T': A**T * X = B (Transpose) */ /* = 'C': A**H * X = B (Conjugate transpose = Transpose) */ /* DIAG (input) CHARACTER*1 */ /* = 'N': A is non-unit triangular; */ /* = 'U': A is unit triangular. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of columns */ /* of the matrix B. NRHS >= 0. */ /* A (input) REAL array, dimension (LDA,N) */ /* The triangular matrix A. If UPLO = 'U', the leading N-by-N */ /* upper triangular part of the array A contains the upper */ /* triangular matrix, and the strictly lower triangular part of */ /* A is not referenced. If UPLO = 'L', the leading N-by-N lower */ /* triangular part of the array A contains the lower triangular */ /* matrix, and the strictly upper triangular part of A is not */ /* referenced. If DIAG = 'U', the diagonal elements of A are */ /* also not referenced and are assumed to be 1. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input/output) REAL array, dimension (LDB,NRHS) */ /* On entry, the right hand side matrix B. */ /* On exit, if INFO = 0, the solution matrix X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the i-th diagonal element of A is zero, */ /* indicating that the matrix is singular and the solutions */ /* X have not been computed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; /* Function Body */ *info = 0; nounit = lsame_(diag, "N"); if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { *info = -1; } else if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C")) { *info = -2; } else if (! nounit && ! lsame_(diag, "U")) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*nrhs < 0) { *info = -5; } else if (*lda < max(1,*n)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -9; } if (*info != 0) { i__1 = -(*info); xerbla_("STRTRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Check for singularity. */ if (nounit) { i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { if (a[*info + *info * a_dim1] == 0.f) { return 0; } /* L10: */ } } *info = 0; /* Solve A * x = b or A' * x = b. */ strsm_("Left", uplo, trans, diag, n, nrhs, &c_b12, &a[a_offset], lda, &b[ b_offset], ldb); return 0; /* End of STRTRS */ } /* strtrs_ */
/* Subroutine */ int spotrf_(char *uplo, integer *n, real *a, integer *lda, integer *info, ftnlen uplo_len) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ static integer j, jb, nb; extern logical lsame_(char *, char *, ftnlen, ftnlen); extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen, ftnlen); static logical upper; extern /* Subroutine */ int strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * , ftnlen, ftnlen, ftnlen, ftnlen), ssyrk_(char *, char *, integer *, integer *, real *, real *, integer *, real *, real *, integer * , ftnlen, ftnlen), spotf2_(char *, integer *, real *, integer *, integer *, ftnlen), xerbla_(char *, integer *, ftnlen); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); /* -- LAPACK routine (version 3.0) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ /* Courant Institute, Argonne National Lab, and Rice University */ /* March 31, 1993 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SPOTRF computes the Cholesky factorization of a real symmetric */ /* positive definite matrix A. */ /* The factorization has the form */ /* A = U**T * U, if UPLO = 'U', or */ /* A = L * L**T, if UPLO = 'L', */ /* where U is an upper triangular matrix and L is lower triangular. */ /* This is the block version of the algorithm, calling Level 3 BLAS. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored; */ /* = 'L': Lower triangle of A is stored. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) REAL array, dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* N-by-N upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading N-by-N lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, if INFO = 0, the factor U or L from the Cholesky */ /* factorization A = U**T*U or A = L*L**T. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the leading minor of order i is not */ /* positive definite, and the factorization could not be */ /* completed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ *info = 0; upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1); if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); xerbla_("SPOTRF", &i__1, (ftnlen)6); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine the block size for this environment. */ nb = ilaenv_(&c__1, "SPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( ftnlen)1); if (nb <= 1 || nb >= *n) { /* Use unblocked code. */ spotf2_(uplo, n, &a[a_offset], lda, info, (ftnlen)1); } else { /* Use blocked code. */ if (upper) { /* Compute the Cholesky factorization A = U'*U. */ i__1 = *n; i__2 = nb; for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Update and factorize the current diagonal block and test */ /* for non-positive-definiteness. */ /* Computing MIN */ i__3 = nb, i__4 = *n - j + 1; jb = min(i__3,i__4); i__3 = j - 1; ssyrk_("Upper", "Transpose", &jb, &i__3, &c_b13, &a[j * a_dim1 + 1], lda, &c_b14, &a[j + j * a_dim1], lda, ( ftnlen)5, (ftnlen)9); spotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info, (ftnlen) 5); if (*info != 0) { goto L30; } if (j + jb <= *n) { /* Compute the current block row. */ i__3 = *n - j - jb + 1; i__4 = j - 1; sgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, & c_b13, &a[j * a_dim1 + 1], lda, &a[(j + jb) * a_dim1 + 1], lda, &c_b14, &a[j + (j + jb) * a_dim1], lda, (ftnlen)9, (ftnlen)12); i__3 = *n - j - jb + 1; strsm_("Left", "Upper", "Transpose", "Non-unit", &jb, & i__3, &c_b14, &a[j + j * a_dim1], lda, &a[j + (j + jb) * a_dim1], lda, (ftnlen)4, (ftnlen)5, ( ftnlen)9, (ftnlen)8); } /* L10: */ } } else { /* Compute the Cholesky factorization A = L*L'. */ i__2 = *n; i__1 = nb; for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Update and factorize the current diagonal block and test */ /* for non-positive-definiteness. */ /* Computing MIN */ i__3 = nb, i__4 = *n - j + 1; jb = min(i__3,i__4); i__3 = j - 1; ssyrk_("Lower", "No transpose", &jb, &i__3, &c_b13, &a[j + a_dim1], lda, &c_b14, &a[j + j * a_dim1], lda, ( ftnlen)5, (ftnlen)12); spotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info, (ftnlen) 5); if (*info != 0) { goto L30; } if (j + jb <= *n) { /* Compute the current block column. */ i__3 = *n - j - jb + 1; i__4 = j - 1; sgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, & c_b13, &a[j + jb + a_dim1], lda, &a[j + a_dim1], lda, &c_b14, &a[j + jb + j * a_dim1], lda, ( ftnlen)12, (ftnlen)9); i__3 = *n - j - jb + 1; strsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, & jb, &c_b14, &a[j + j * a_dim1], lda, &a[j + jb + j * a_dim1], lda, (ftnlen)5, (ftnlen)5, (ftnlen)9, (ftnlen)8); } /* L20: */ } } } goto L40; L30: *info = *info + j - 1; L40: return 0; /* End of SPOTRF */ } /* spotrf_ */
/* Subroutine */ int sgetrf_(integer *m, integer *n, real *a, integer *lda, integer *ipiv, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; real r__1; /* Local variables */ integer i__, j, ipivstart, jpivstart, jp; real tmp; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); integer kcols; real sfmin; integer nstep; extern /* Subroutine */ int strsm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * ); integer kahead; extern doublereal slamch_(char *); extern /* Subroutine */ int xerbla_(char *, integer *); extern integer isamax_(integer *, real *, integer *); integer npived; extern logical sisnan_(real *); integer kstart; extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer *, integer *, integer *, integer *); integer ntopiv; /* -- LAPACK routine (version 3.X) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* May 2008 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SGETRF computes an LU factorization of a general M-by-N matrix A */ /* using partial pivoting with row interchanges. */ /* The factorization has the form */ /* A = P * L * U */ /* where P is a permutation matrix, L is lower triangular with unit */ /* diagonal elements (lower trapezoidal if m > n), and U is upper */ /* triangular (upper trapezoidal if m < n). */ /* This code implements an iterative version of Sivan Toledo's recursive */ /* LU algorithm[1]. For square matrices, this iterative versions should */ /* be within a factor of two of the optimum number of memory transfers. */ /* The pattern is as follows, with the large blocks of U being updated */ /* in one call to STRSM, and the dotted lines denoting sections that */ /* have had all pending permutations applied: */ /* 1 2 3 4 5 6 7 8 */ /* +-+-+---+-------+------ */ /* | |1| | | */ /* |.+-+ 2 | | */ /* | | | | | */ /* |.|.+-+-+ 4 | */ /* | | | |1| | */ /* | | |.+-+ | */ /* | | | | | | */ /* |.|.|.|.+-+-+---+ 8 */ /* | | | | | |1| | */ /* | | | | |.+-+ 2 | */ /* | | | | | | | | */ /* | | | | |.|.+-+-+ */ /* | | | | | | | |1| */ /* | | | | | | |.+-+ */ /* | | | | | | | | | */ /* |.|.|.|.|.|.|.|.+----- */ /* | | | | | | | | | */ /* The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in */ /* the binary expansion of the current column. Each Schur update is */ /* applied as soon as the necessary portion of U is available. */ /* [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with */ /* Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997), */ /* 1065-1081. http://dx.doi.org/10.1137/S0895479896297744 */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* A (input/output) REAL array, dimension (LDA,N) */ /* On entry, the M-by-N matrix to be factored. */ /* On exit, the factors L and U from the factorization */ /* A = P*L*U; the unit diagonal elements of L are not stored. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* IPIV (output) INTEGER array, dimension (min(M,N)) */ /* The pivot indices; for 1 <= i <= min(M,N), row i of the */ /* matrix was interchanged with row IPIV(i). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, U(i,i) is exactly zero. The factorization */ /* has been completed, but the factor U is exactly */ /* singular, and division by zero will occur if it is used */ /* to solve a system of equations. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; /* Function Body */ *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); xerbla_("SGETRF", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { return 0; } /* Compute machine safe minimum */ sfmin = slamch_("S"); nstep = min(*m,*n); i__1 = nstep; for (j = 1; j <= i__1; ++j) { kahead = j & -j; kstart = j + 1 - kahead; /* Computing MIN */ i__2 = kahead, i__3 = *m - j; kcols = min(i__2,i__3); /* Find pivot. */ i__2 = *m - j + 1; jp = j - 1 + isamax_(&i__2, &a[j + j * a_dim1], &c__1); ipiv[j] = jp; /* Permute just this column. */ if (jp != j) { tmp = a[j + j * a_dim1]; a[j + j * a_dim1] = a[jp + j * a_dim1]; a[jp + j * a_dim1] = tmp; } /* Apply pending permutations to L */ ntopiv = 1; ipivstart = j; jpivstart = j - ntopiv; while(ntopiv < kahead) { slaswp_(&ntopiv, &a[jpivstart * a_dim1 + 1], lda, &ipivstart, &j, &ipiv[1], &c__1); ipivstart -= ntopiv; ntopiv <<= 1; jpivstart -= ntopiv; } /* Permute U block to match L */ slaswp_(&kcols, &a[(j + 1) * a_dim1 + 1], lda, &kstart, &j, &ipiv[1], &c__1); /* Factor the current column */ if (a[j + j * a_dim1] != 0.f && ! sisnan_(&a[j + j * a_dim1])) { if ((r__1 = a[j + j * a_dim1], dabs(r__1)) >= sfmin) { i__2 = *m - j; r__1 = 1.f / a[j + j * a_dim1]; sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1); } else { i__2 = *m - j; for (i__ = 1; i__ <= i__2; ++i__) { a[j + i__ + j * a_dim1] /= a[j + j * a_dim1]; } } } else if (a[j + j * a_dim1] == 0.f && *info == 0) { *info = j; } /* Solve for U block. */ strsm_("Left", "Lower", "No transpose", "Unit", &kahead, &kcols, & c_b12, &a[kstart + kstart * a_dim1], lda, &a[kstart + (j + 1) * a_dim1], lda); /* Schur complement. */ i__2 = *m - j; sgemm_("No transpose", "No transpose", &i__2, &kcols, &kahead, &c_b15, &a[j + 1 + kstart * a_dim1], lda, &a[kstart + (j + 1) * a_dim1], lda, &c_b12, &a[j + 1 + (j + 1) * a_dim1], lda); } /* Handle pivot permutations on the way out of the recursion */ npived = nstep & -nstep; j = nstep - npived; while(j > 0) { ntopiv = j & -j; i__1 = j + 1; slaswp_(&ntopiv, &a[(j - ntopiv + 1) * a_dim1 + 1], lda, &i__1, & nstep, &ipiv[1], &c__1); j -= ntopiv; } /* If short and wide, handle the rest of the columns. */ if (*m < *n) { i__1 = *n - *m; slaswp_(&i__1, &a[(*m + kcols + 1) * a_dim1 + 1], lda, &c__1, m, & ipiv[1], &c__1); i__1 = *n - *m; strsm_("Left", "Lower", "No transpose", "Unit", m, &i__1, &c_b12, &a[ a_offset], lda, &a[(*m + kcols + 1) * a_dim1 + 1], lda); } return 0; /* End of SGETRF */ } /* sgetrf_ */