real THBlas_(dot)(int64_t n, real *x, int64_t incx, real *y, int64_t incy) { if(n == 1) { incx = 1; incy = 1; } #if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT)) if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) ) { int i_n = (int)n; int i_incx = (int)incx; int i_incy = (int)incy; #if defined(TH_REAL_IS_DOUBLE) return (real) ddot_(&i_n, x, &i_incx, y, &i_incy); #else return (real) sdot_(&i_n, x, &i_incx, y, &i_incy); #endif } #endif { int64_t i; real sum = 0; for(i = 0; i < n; i++) sum += x[i*incx]*y[i*incy]; return sum; } }
int main() { const unsigned int N = 100; const unsigned int ONE = 1; float a[N]; float b[N]; for (unsigned int i = 0; i < N; ++i) { a[i] = float(i); b[i] = a[i] / 2.; } float exact_result = 0.; for (unsigned int i = 0; i < N; ++i) exact_result += a[i] * b[i]; float result = sdot_(&N, a, &ONE, b, &ONE); if (std::abs(result - exact_result) < 1e-6 * fabs(exact_result)) { std::cout << "SUCCESS" << std::endl; return 0; } else { std::cout << "FAILED" << std::endl; return 1; } }
doublereal f2c_sdot(integer* N, real* X, integer* incX, real* Y, integer* incY) { return sdot_(N, X, incX, Y, incY); }
/* This routine computes the pure pseudoinverse from the svd returned by svdcmp (U*S*V^T) so Agi = V*S^-1*U^T. The algorithm used is a little overly tricky using an internally allocated work vector to make the routine nondestructive to the input matrices. This computes the "pure" pseudoinverse by setting the svd cutoff value based on float epsilon (from float.h). Note input U is mxn, s is an n vector, V is nxn, and the output Agi is nxm. Function returns the number of singular values actually used to compute Agi. Author: Gary L. Pavlis */ int pseudoinverse(float **U, float *s, float **V, int m, int n, float **Agi) { int i,j, k; /* counters*/ float *work; /* work space */ float smax; float sinv; double sv_cutoff; int nsv_used; #ifndef SUNPERF int one=1; #endif if((work=(float *)calloc(n,sizeof(float))) == NULL) elog_die(1,"Pseudoinverse computation: cannot alloc work array of length %d\n", n); /* first find the larges singular value, then just zero all those smaller than the cutoff determined as the ratio wrt to largest singular value */ smax = 0.0; for(i=0;i<n;++i) if(s[i] > smax) smax = s[i]; sv_cutoff = (double)smax*FLT_EPSILON; /* This is a copy operation */ for(i=0;i<m;++i) for(j=0;j<n;++j) Agi[j][i] = U[i][j]; /* this works because of C storage order, but is strange. It is the multiply by S^-1 */ for(j=0,nsv_used=0;j<n;++j) { if( (double)s[j] > sv_cutoff) { sinv = 1.0/s[j]; ++nsv_used; } else sinv = 0.0; #ifdef SUNPERF sscal(m,sinv,Agi[j],1); #else sscal_(&m,&sinv,Agi[j],&one); #endif } /* multiply by V using a column work vector*/ for(j=0;j<m;++j) { for(k=0;k<n;++k) work[k] = Agi[k][j]; for(i=0;i<n;++i) #ifdef SUNPERF Agi[i][j] = sdot(n,work,1,V[i],1); #else Agi[i][j] = sdot_(&n,work,&one,V[i],&one); #endif } free(work); return(nsv_used); }
float STARPU_SDOT(const int n, const float *x, const int incx, const float *y, const int incy) { float retVal = 0; /* GOTOBLAS will return a FLOATRET which is a double, not a float */ retVal = (float)sdot_(&n, x, &incx, y, &incy); return retVal; }
GURLS_EXPORT float dot(const gVec<float>& x, const gVec<float>& y) { if ( x.getSize() != y.getSize() ) throw gException(gurls::Exception_Inconsistent_Size); int n = x.getSize(); int incr = 1; return sdot_(&n, const_cast<float*>(x.getData()), &incr, const_cast<float*>(y.getData()), &incr); }
/* Subroutine */ int slapll_(integer *n, real *x, integer *incx, real *y, integer *incy, real *ssmin) { /* System generated locals */ integer i__1; /* Local variables */ real c__, a11, a12, a22, tau; extern real sdot_(integer *, real *, integer *, real *, integer *); extern /* Subroutine */ int slas2_(real *, real *, real *, real *, real *) ; real ssmax; extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, real *, integer *), slarfg_(integer *, real *, real *, integer *, real *); /* -- LAPACK auxiliary routine (version 3.4.2) -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* September 2012 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Quick return if possible */ /* Parameter adjustments */ --y; --x; /* Function Body */ if (*n <= 1) { *ssmin = 0.f; return 0; } /* Compute the QR factorization of the N-by-2 matrix ( X Y ) */ slarfg_(n, &x[1], &x[*incx + 1], incx, &tau); a11 = x[1]; x[1] = 1.f; c__ = -tau * sdot_(n, &x[1], incx, &y[1], incy); saxpy_(n, &c__, &x[1], incx, &y[1], incy); i__1 = *n - 1; slarfg_(&i__1, &y[*incy + 1], &y[(*incy << 1) + 1], incy, &tau); a12 = y[1]; a22 = y[*incy + 1]; /* Compute the SVD of 2-by-2 Upper triangular matrix. */ slas2_(&a11, &a12, &a22, ssmin, &ssmax); return 0; /* End of SLAPLL */ }
/* Subroutine */ int sspgst_(integer *itype, char *uplo, integer *n, real *ap, real *bp, integer *info) { /* System generated locals */ integer i__1, i__2; real r__1; /* Local variables */ integer j, k, j1, k1, jj, kk; real ct, ajj; integer j1j1; real akk; integer k1k1; real bjj, bkk; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); extern /* Subroutine */ int sspr2_(char *, integer *, real *, real *, integer *, real *, integer *, real *); extern logical lsame_(char *, char *); extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); logical upper; extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, real *, integer *), sspmv_(char *, integer *, real *, real *, real *, integer *, real *, real *, integer *), stpmv_( char *, char *, char *, integer *, real *, real *, integer *), stpsv_(char *, char *, char *, integer *, real *, real *, integer *), xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SSPGST reduces a real symmetric-definite generalized eigenproblem */ /* to standard form, using packed storage. */ /* If ITYPE = 1, the problem is A*x = lambda*B*x, */ /* and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) */ /* If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */ /* B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. */ /* B must have been previously factorized as U**T*U or L*L**T by SPPTRF. */ /* Arguments */ /* ========= */ /* ITYPE (input) INTEGER */ /* = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); */ /* = 2 or 3: compute U*A*U**T or L**T*A*L. */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangle of A is stored and B is factored as */ /* U**T*U; */ /* = 'L': Lower triangle of A is stored and B is factored as */ /* L*L**T. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* AP (input/output) REAL array, dimension (N*(N+1)/2) */ /* On entry, the upper or lower triangle of the symmetric matrix */ /* A, packed columnwise in a linear array. The j-th column of A */ /* is stored in the array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */ /* On exit, if INFO = 0, the transformed matrix, stored in the */ /* same format as A. */ /* BP (input) REAL array, dimension (N*(N+1)/2) */ /* The triangular factor from the Cholesky factorization of B, */ /* stored in the same format as A, as returned by SPPTRF. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --bp; --ap; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (*itype < 1 || *itype > 3) { *info = -1; } else if (! upper && ! lsame_(uplo, "L")) { *info = -2; } else if (*n < 0) { *info = -3; } if (*info != 0) { i__1 = -(*info); xerbla_("SSPGST", &i__1); return 0; } if (*itype == 1) { if (upper) { /* Compute inv(U')*A*inv(U) */ /* J1 and JJ are the indices of A(1,j) and A(j,j) */ jj = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { j1 = jj + 1; jj += j; /* Compute the j-th column of the upper triangle of A */ bjj = bp[jj]; stpsv_(uplo, "Transpose", "Nonunit", &j, &bp[1], &ap[j1], & c__1); i__2 = j - 1; sspmv_(uplo, &i__2, &c_b9, &ap[1], &bp[j1], &c__1, &c_b11, & ap[j1], &c__1); i__2 = j - 1; r__1 = 1.f / bjj; sscal_(&i__2, &r__1, &ap[j1], &c__1); i__2 = j - 1; ap[jj] = (ap[jj] - sdot_(&i__2, &ap[j1], &c__1, &bp[j1], & c__1)) / bjj; /* L10: */ } } else { /* Compute inv(L)*A*inv(L') */ /* KK and K1K1 are the indices of A(k,k) and A(k+1,k+1) */ kk = 1; i__1 = *n; for (k = 1; k <= i__1; ++k) { k1k1 = kk + *n - k + 1; /* Update the lower triangle of A(k:n,k:n) */ akk = ap[kk]; bkk = bp[kk]; /* Computing 2nd power */ r__1 = bkk; akk /= r__1 * r__1; ap[kk] = akk; if (k < *n) { i__2 = *n - k; r__1 = 1.f / bkk; sscal_(&i__2, &r__1, &ap[kk + 1], &c__1); ct = akk * -.5f; i__2 = *n - k; saxpy_(&i__2, &ct, &bp[kk + 1], &c__1, &ap[kk + 1], &c__1) ; i__2 = *n - k; sspr2_(uplo, &i__2, &c_b9, &ap[kk + 1], &c__1, &bp[kk + 1] , &c__1, &ap[k1k1]); i__2 = *n - k; saxpy_(&i__2, &ct, &bp[kk + 1], &c__1, &ap[kk + 1], &c__1) ; i__2 = *n - k; stpsv_(uplo, "No transpose", "Non-unit", &i__2, &bp[k1k1], &ap[kk + 1], &c__1); } kk = k1k1; /* L20: */ } } } else { if (upper) { /* Compute U*A*U' */ /* K1 and KK are the indices of A(1,k) and A(k,k) */ kk = 0; i__1 = *n; for (k = 1; k <= i__1; ++k) { k1 = kk + 1; kk += k; /* Update the upper triangle of A(1:k,1:k) */ akk = ap[kk]; bkk = bp[kk]; i__2 = k - 1; stpmv_(uplo, "No transpose", "Non-unit", &i__2, &bp[1], &ap[ k1], &c__1); ct = akk * .5f; i__2 = k - 1; saxpy_(&i__2, &ct, &bp[k1], &c__1, &ap[k1], &c__1); i__2 = k - 1; sspr2_(uplo, &i__2, &c_b11, &ap[k1], &c__1, &bp[k1], &c__1, & ap[1]); i__2 = k - 1; saxpy_(&i__2, &ct, &bp[k1], &c__1, &ap[k1], &c__1); i__2 = k - 1; sscal_(&i__2, &bkk, &ap[k1], &c__1); /* Computing 2nd power */ r__1 = bkk; ap[kk] = akk * (r__1 * r__1); /* L30: */ } } else { /* Compute L'*A*L */ /* JJ and J1J1 are the indices of A(j,j) and A(j+1,j+1) */ jj = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { j1j1 = jj + *n - j + 1; /* Compute the j-th column of the lower triangle of A */ ajj = ap[jj]; bjj = bp[jj]; i__2 = *n - j; ap[jj] = ajj * bjj + sdot_(&i__2, &ap[jj + 1], &c__1, &bp[jj + 1], &c__1); i__2 = *n - j; sscal_(&i__2, &bjj, &ap[jj + 1], &c__1); i__2 = *n - j; sspmv_(uplo, &i__2, &c_b11, &ap[j1j1], &bp[jj + 1], &c__1, & c_b11, &ap[jj + 1], &c__1); i__2 = *n - j + 1; stpmv_(uplo, "Transpose", "Non-unit", &i__2, &bp[jj], &ap[jj], &c__1); jj = j1j1; /* L40: */ } } } return 0; /* End of SSPGST */ } /* sspgst_ */
/* Subroutine */ int slarfy_(char *uplo, integer *n, real *v, integer *incv, real *tau, real *c__, integer *ldc, real *work) { /* System generated locals */ integer c_dim1, c_offset; real r__1; /* Local variables */ extern doublereal sdot_(integer *, real *, integer *, real *, integer *); extern /* Subroutine */ int ssyr2_(char *, integer *, real *, real *, integer *, real *, integer *, real *, integer *); real alpha; extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, real *, integer *), ssymv_(char *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); /* -- LAPACK auxiliary test routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SLARFY applies an elementary reflector, or Householder matrix, H, */ /* to an n x n symmetric matrix C, from both the left and the right. */ /* H is represented in the form */ /* H = I - tau * v * v' */ /* where tau is a scalar and v is a vector. */ /* If tau is zero, then H is taken to be the unit matrix. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the upper or lower triangular part of the */ /* symmetric matrix C is stored. */ /* = 'U': Upper triangle */ /* = 'L': Lower triangle */ /* N (input) INTEGER */ /* The number of rows and columns of the matrix C. N >= 0. */ /* V (input) REAL array, dimension */ /* (1 + (N-1)*abs(INCV)) */ /* The vector v as described above. */ /* INCV (input) INTEGER */ /* The increment between successive elements of v. INCV must */ /* not be zero. */ /* TAU (input) REAL */ /* The value tau as described above. */ /* C (input/output) REAL array, dimension (LDC, N) */ /* On entry, the matrix C. */ /* On exit, C is overwritten by H * C * H'. */ /* LDC (input) INTEGER */ /* The leading dimension of the array C. LDC >= max( 1, N ). */ /* WORK (workspace) REAL array, dimension (N) */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --v; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; /* Function Body */ if (*tau == 0.f) { return 0; } /* Form w:= C * v */ ssymv_(uplo, n, &c_b2, &c__[c_offset], ldc, &v[1], incv, &c_b3, &work[1], &c__1); alpha = *tau * -.5f * sdot_(n, &work[1], &c__1, &v[1], incv); saxpy_(n, &alpha, &v[1], incv, &work[1], &c__1); /* C := C - v * w' - w * v' */ r__1 = -(*tau); ssyr2_(uplo, n, &r__1, &v[1], incv, &work[1], &c__1, &c__[c_offset], ldc); return 0; /* End of SLARFY */ } /* slarfy_ */
/* Subroutine */ int spptri_(char *uplo, integer *n, real *ap, integer *info) { /* System generated locals */ integer i__1, i__2; /* Local variables */ integer j, jc, jj; real ajj; integer jjn; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); extern /* Subroutine */ int sspr_(char *, integer *, real *, real *, integer *, real *); extern logical lsame_(char *, char *); extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); logical upper; extern /* Subroutine */ int stpmv_(char *, char *, char *, integer *, real *, real *, integer *), xerbla_(char * , integer *), stptri_(char *, char *, integer *, real *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SPPTRI computes the inverse of a real symmetric positive definite */ /* matrix A using the Cholesky factorization A = U**T*U or A = L*L**T */ /* computed by SPPTRF. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* = 'U': Upper triangular factor is stored in AP; */ /* = 'L': Lower triangular factor is stored in AP. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* AP (input/output) REAL array, dimension (N*(N+1)/2) */ /* On entry, the triangular factor U or L from the Cholesky */ /* factorization A = U**T*U or A = L*L**T, packed columnwise as */ /* a linear array. The j-th column of U or L is stored in the */ /* array AP as follows: */ /* if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j; */ /* if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n. */ /* On exit, the upper or lower triangle of the (symmetric) */ /* inverse of A, overwriting the input factor U or L. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the (i,i) element of the factor U or L is */ /* zero, and the inverse could not be computed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --ap; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } if (*info != 0) { i__1 = -(*info); xerbla_("SPPTRI", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Invert the triangular Cholesky factor U or L. */ stptri_(uplo, "Non-unit", n, &ap[1], info); if (*info > 0) { return 0; } if (upper) { /* Compute the product inv(U) * inv(U)'. */ jj = 0; i__1 = *n; for (j = 1; j <= i__1; ++j) { jc = jj + 1; jj += j; if (j > 1) { i__2 = j - 1; sspr_("Upper", &i__2, &c_b8, &ap[jc], &c__1, &ap[1]); } ajj = ap[jj]; sscal_(&j, &ajj, &ap[jc], &c__1); /* L10: */ } } else { /* Compute the product inv(L)' * inv(L). */ jj = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { jjn = jj + *n - j + 1; i__2 = *n - j + 1; ap[jj] = sdot_(&i__2, &ap[jj], &c__1, &ap[jj], &c__1); if (j < *n) { i__2 = *n - j; stpmv_("Lower", "Transpose", "Non-unit", &i__2, &ap[jjn], &ap[ jj + 1], &c__1); } jj = jjn; /* L20: */ } } return 0; /* End of SPPTRI */ } /* spptri_ */
/* Subroutine */ int ssytd2_(char *uplo, integer *n, real *a, integer *lda, real *d, real *e, real *tau, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 Purpose ======= SSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal form T by an orthogonal similarity transformation: Q' * A * Q = T. Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the upper or lower triangular part of the symmetric matrix A is stored: = 'U': Upper triangular = 'L': Lower triangular N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) REAL array, dimension (LDA,N) On entry, the symmetric matrix A. If UPLO = 'U', the leading n-by-n upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading n-by-n lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if UPLO = 'U', the diagonal and first superdiagonal of A are overwritten by the corresponding elements of the tridiagonal matrix T, and the elements above the first superdiagonal, with the array TAU, represent the orthogonal matrix Q as a product of elementary reflectors; if UPLO = 'L', the diagonal and first subdiagonal of A are over- written by the corresponding elements of the tridiagonal matrix T, and the elements below the first subdiagonal, with the array TAU, represent the orthogonal matrix Q as a product of elementary reflectors. See Further Details. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). D (output) REAL array, dimension (N) The diagonal elements of the tridiagonal matrix T: D(i) = A(i,i). E (output) REAL array, dimension (N-1) The off-diagonal elements of the tridiagonal matrix T: E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. TAU (output) REAL array, dimension (N-1) The scalar factors of the elementary reflectors (see Further Details). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. Further Details =============== If UPLO = 'U', the matrix Q is represented as a product of elementary reflectors Q = H(n-1) . . . H(2) H(1). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in A(1:i-1,i+1), and tau in TAU(i). If UPLO = 'L', the matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(n-1). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), and tau in TAU(i). The contents of A on exit are illustrated by the following examples with n = 5: if UPLO = 'U': if UPLO = 'L': ( d e v2 v3 v4 ) ( d ) ( d e v3 v4 ) ( e d ) ( d e v4 ) ( v1 e d ) ( d e ) ( v1 v2 e d ) ( d ) ( v1 v2 v3 e d ) where d and e denote diagonal and off-diagonal elements of T, and vi denotes an element of the vector defining H(i). ===================================================================== Test the input parameters Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; static real c_b8 = 0.f; static real c_b14 = -1.f; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ static real taui; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); static integer i; extern /* Subroutine */ int ssyr2_(char *, integer *, real *, real *, integer *, real *, integer *, real *, integer *); static real alpha; extern logical lsame_(char *, char *); static logical upper; extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, real *, integer *), ssymv_(char *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), xerbla_(char *, integer *), slarfg_(integer *, real *, real *, integer *, real *); #define D(I) d[(I)-1] #define E(I) e[(I)-1] #define TAU(I) tau[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); xerbla_("SSYTD2", &i__1); return 0; } /* Quick return if possible */ if (*n <= 0) { return 0; } if (upper) { /* Reduce the upper triangle of A */ for (i = *n - 1; i >= 1; --i) { /* Generate elementary reflector H(i) = I - tau * v * v' to annihilate A(1:i-1,i+1) */ slarfg_(&i, &A(i,i+1), &A(1,i+1), & c__1, &taui); E(i) = A(i,i+1); if (taui != 0.f) { /* Apply H(i) from both sides to A(1:i,1:i) */ A(i,i+1) = 1.f; /* Compute x := tau * A * v storing x in TAU(1: i) */ ssymv_(uplo, &i, &taui, &A(1,1), lda, &A(1,i+1), &c__1, &c_b8, &TAU(1), &c__1); /* Compute w := x - 1/2 * tau * (x'*v) * v */ alpha = taui * -.5f * sdot_(&i, &TAU(1), &c__1, &A(1,i+1), &c__1); saxpy_(&i, &alpha, &A(1,i+1), &c__1, &TAU(1), & c__1); /* Apply the transformation as a rank-2 update: A := A - v * w' - w * v' */ ssyr2_(uplo, &i, &c_b14, &A(1,i+1), &c__1, & TAU(1), &c__1, &A(1,1), lda); A(i,i+1) = E(i); } D(i + 1) = A(i+1,i+1); TAU(i) = taui; /* L10: */ } D(1) = A(1,1); } else { /* Reduce the lower triangle of A */ i__1 = *n - 1; for (i = 1; i <= *n-1; ++i) { /* Generate elementary reflector H(i) = I - tau * v * v' to annihilate A(i+2:n,i) */ i__2 = *n - i; /* Computing MIN */ i__3 = i + 2; slarfg_(&i__2, &A(i+1,i), &A(min(i+2,*n),i), &c__1, &taui); E(i) = A(i+1,i); if (taui != 0.f) { /* Apply H(i) from both sides to A(i+1:n,i+1:n) */ A(i+1,i) = 1.f; /* Compute x := tau * A * v storing y in TAU(i: n-1) */ i__2 = *n - i; ssymv_(uplo, &i__2, &taui, &A(i+1,i+1), lda, &A(i+1,i), &c__1, &c_b8, &TAU(i), &c__1); /* Compute w := x - 1/2 * tau * (x'*v) * v */ i__2 = *n - i; alpha = taui * -.5f * sdot_(&i__2, &TAU(i), &c__1, &A(i+1,i), &c__1); i__2 = *n - i; saxpy_(&i__2, &alpha, &A(i+1,i), &c__1, &TAU(i), &c__1); /* Apply the transformation as a rank-2 update: A := A - v * w' - w * v' */ i__2 = *n - i; ssyr2_(uplo, &i__2, &c_b14, &A(i+1,i), &c__1, & TAU(i), &c__1, &A(i+1,i+1), lda); A(i+1,i) = E(i); } D(i) = A(i,i); TAU(i) = taui; /* L20: */ } D(*n) = A(*n,*n); } return 0; /* End of SSYTD2 */ } /* ssytd2_ */
/* Subroutine */ int slatdf_(integer *ijob, integer *n, real *z__, integer * ldz, real *rhs, real *rdsum, real *rdscal, integer *ipiv, integer * jpiv) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= SLATDF uses the LU factorization of the n-by-n matrix Z computed by SGETC2 and computes a contribution to the reciprocal Dif-estimate by solving Z * x = b for x, and choosing the r.h.s. b such that the norm of x is as large as possible. On entry RHS = b holds the contribution from earlier solved sub-systems, and on return RHS = x. The factorization of Z returned by SGETC2 has the form Z = P*L*U*Q, where P and Q are permutation matrices. L is lower triangular with unit diagonal elements and U is upper triangular. Arguments ========= IJOB (input) INTEGER IJOB = 2: First compute an approximative null-vector e of Z using SGECON, e is normalized and solve for Zx = +-e - f with the sign giving the greater value of 2-norm(x). About 5 times as expensive as Default. IJOB .ne. 2: Local look ahead strategy where all entries of the r.h.s. b is choosen as either +1 or -1 (Default). N (input) INTEGER The number of columns of the matrix Z. Z (input) REAL array, dimension (LDZ, N) On entry, the LU part of the factorization of the n-by-n matrix Z computed by SGETC2: Z = P * L * U * Q LDZ (input) INTEGER The leading dimension of the array Z. LDA >= max(1, N). RHS (input/output) REAL array, dimension N. On entry, RHS contains contributions from other subsystems. On exit, RHS contains the solution of the subsystem with entries acoording to the value of IJOB (see above). RDSUM (input/output) REAL On entry, the sum of squares of computed contributions to the Dif-estimate under computation by STGSYL, where the scaling factor RDSCAL (see below) has been factored out. On exit, the corresponding sum of squares updated with the contributions from the current sub-system. If TRANS = 'T' RDSUM is not touched. NOTE: RDSUM only makes sense when STGSY2 is called by STGSYL. RDSCAL (input/output) REAL On entry, scaling factor used to prevent overflow in RDSUM. On exit, RDSCAL is updated w.r.t. the current contributions in RDSUM. If TRANS = 'T', RDSCAL is not touched. NOTE: RDSCAL only makes sense when STGSY2 is called by STGSYL. IPIV (input) INTEGER array, dimension (N). The pivot indices; for 1 <= i <= N, row i of the matrix has been interchanged with row IPIV(i). JPIV (input) INTEGER array, dimension (N). The pivot indices; for 1 <= j <= N, column j of the matrix has been interchanged with column JPIV(j). Further Details =============== Based on contributions by Bo Kagstrom and Peter Poromaa, Department of Computing Science, Umea University, S-901 87 Umea, Sweden. This routine is a further developed implementation of algorithm BSOLVE in [1] using complete pivoting in the LU factorization. [1] Bo Kagstrom and Lars Westin, Generalized Schur Methods with Condition Estimators for Solving the Generalized Sylvester Equation, IEEE Transactions on Automatic Control, Vol. 34, No. 7, July 1989, pp 745-751. [2] Peter Poromaa, On Efficient and Robust Estimators for the Separation between two Regular Matrix Pairs with Applications in Condition Estimation. Report IMINF-95.05, Departement of Computing Science, Umea University, S-901 87 Umea, Sweden, 1995. ===================================================================== Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static real c_b23 = 1.f; static real c_b37 = -1.f; /* System generated locals */ integer z_dim1, z_offset, i__1, i__2; real r__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer info; static real temp; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); static real work[32]; static integer i__, j, k; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static real pmone; extern doublereal sasum_(integer *, real *, integer *); static real sminu; static integer iwork[8]; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), saxpy_(integer *, real *, real *, integer *, real *, integer *); static real splus; extern /* Subroutine */ int sgesc2_(integer *, real *, integer *, real *, integer *, integer *, real *); static real bm, bp, xm[8], xp[8]; extern /* Subroutine */ int sgecon_(char *, integer *, real *, integer *, real *, real *, real *, integer *, integer *), slassq_( integer *, real *, integer *, real *, real *), slaswp_(integer *, real *, integer *, integer *, integer *, integer *, integer *); #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --rhs; --ipiv; --jpiv; /* Function Body */ if (*ijob != 2) { /* Apply permutations IPIV to RHS */ i__1 = *n - 1; slaswp_(&c__1, &rhs[1], ldz, &c__1, &i__1, &ipiv[1], &c__1); /* Solve for L-part choosing RHS either to +1 or -1. */ pmone = -1.f; i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { bp = rhs[j] + 1.f; bm = rhs[j] - 1.f; splus = 1.f; /* Look-ahead for L-part RHS(1:N-1) = + or -1, SPLUS and SMIN computed more efficiently than in BSOLVE [1]. */ i__2 = *n - j; splus += sdot_(&i__2, &z___ref(j + 1, j), &c__1, &z___ref(j + 1, j), &c__1); i__2 = *n - j; sminu = sdot_(&i__2, &z___ref(j + 1, j), &c__1, &rhs[j + 1], & c__1); splus *= rhs[j]; if (splus > sminu) { rhs[j] = bp; } else if (sminu > splus) { rhs[j] = bm; } else { /* In this case the updating sums are equal and we can choose RHS(J) +1 or -1. The first time this happens we choose -1, thereafter +1. This is a simple way to get good estimates of matrices like Byers well-known example (see [1]). (Not done in BSOLVE.) */ rhs[j] += pmone; pmone = 1.f; } /* Compute the remaining r.h.s. */ temp = -rhs[j]; i__2 = *n - j; saxpy_(&i__2, &temp, &z___ref(j + 1, j), &c__1, &rhs[j + 1], & c__1); /* L10: */ } /* Solve for U-part, look-ahead for RHS(N) = +-1. This is not done in BSOLVE and will hopefully give us a better estimate because any ill-conditioning of the original matrix is transfered to U and not to L. U(N, N) is an approximation to sigma_min(LU). */ i__1 = *n - 1; scopy_(&i__1, &rhs[1], &c__1, xp, &c__1); xp[*n - 1] = rhs[*n] + 1.f; rhs[*n] += -1.f; splus = 0.f; sminu = 0.f; for (i__ = *n; i__ >= 1; --i__) { temp = 1.f / z___ref(i__, i__); xp[i__ - 1] *= temp; rhs[i__] *= temp; i__1 = *n; for (k = i__ + 1; k <= i__1; ++k) { xp[i__ - 1] -= xp[k - 1] * (z___ref(i__, k) * temp); rhs[i__] -= rhs[k] * (z___ref(i__, k) * temp); /* L20: */ } splus += (r__1 = xp[i__ - 1], dabs(r__1)); sminu += (r__1 = rhs[i__], dabs(r__1)); /* L30: */ } if (splus > sminu) { scopy_(n, xp, &c__1, &rhs[1], &c__1); } /* Apply the permutations JPIV to the computed solution (RHS) */ i__1 = *n - 1; slaswp_(&c__1, &rhs[1], ldz, &c__1, &i__1, &jpiv[1], &c_n1); /* Compute the sum of squares */ slassq_(n, &rhs[1], &c__1, rdscal, rdsum); } else { /* IJOB = 2, Compute approximate nullvector XM of Z */ sgecon_("I", n, &z__[z_offset], ldz, &c_b23, &temp, work, iwork, & info); scopy_(n, &work[*n], &c__1, xm, &c__1); /* Compute RHS */ i__1 = *n - 1; slaswp_(&c__1, xm, ldz, &c__1, &i__1, &ipiv[1], &c_n1); temp = 1.f / sqrt(sdot_(n, xm, &c__1, xm, &c__1)); sscal_(n, &temp, xm, &c__1); scopy_(n, xm, &c__1, xp, &c__1); saxpy_(n, &c_b23, &rhs[1], &c__1, xp, &c__1); saxpy_(n, &c_b37, xm, &c__1, &rhs[1], &c__1); sgesc2_(n, &z__[z_offset], ldz, &rhs[1], &ipiv[1], &jpiv[1], &temp); sgesc2_(n, &z__[z_offset], ldz, xp, &ipiv[1], &jpiv[1], &temp); if (sasum_(n, xp, &c__1) > sasum_(n, &rhs[1], &c__1)) { scopy_(n, xp, &c__1, &rhs[1], &c__1); } /* Compute the sum of squares */ slassq_(n, &rhs[1], &c__1, rdscal, rdsum); } return 0; /* End of SLATDF */ } /* slatdf_ */
/* Subroutine */ int spst01_(char *uplo, integer *n, real *a, integer *lda, real *afac, integer *ldafac, real *perm, integer *ldperm, integer * piv, real *rwork, real *resid, integer *rank) { /* System generated locals */ integer a_dim1, a_offset, afac_dim1, afac_offset, perm_dim1, perm_offset, i__1, i__2; /* Local variables */ integer i__, j, k; real t, eps; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); extern /* Subroutine */ int ssyr_(char *, integer *, real *, real *, integer *, real *, integer *); extern logical lsame_(char *, char *); extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); real anorm; extern /* Subroutine */ int strmv_(char *, char *, char *, integer *, real *, integer *, real *, integer *); extern doublereal slamch_(char *), slansy_(char *, char *, integer *, real *, integer *, real *); /* -- LAPACK test routine (version 3.1) -- */ /* Craig Lucas, University of Manchester / NAG Ltd. */ /* October, 2008 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SPST01 reconstructs a symmetric positive semidefinite matrix A */ /* from its L or U factors and the permutation matrix P and computes */ /* the residual */ /* norm( P*L*L'*P' - A ) / ( N * norm(A) * EPS ) or */ /* norm( P*U'*U*P' - A ) / ( N * norm(A) * EPS ), */ /* where EPS is the machine epsilon. */ /* Arguments */ /* ========== */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the upper or lower triangular part of the */ /* symmetric matrix A is stored: */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* N (input) INTEGER */ /* The number of rows and columns of the matrix A. N >= 0. */ /* A (input) REAL array, dimension (LDA,N) */ /* The original symmetric matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N) */ /* AFAC (input) REAL array, dimension (LDAFAC,N) */ /* The factor L or U from the L*L' or U'*U */ /* factorization of A. */ /* LDAFAC (input) INTEGER */ /* The leading dimension of the array AFAC. LDAFAC >= max(1,N). */ /* PERM (output) REAL array, dimension (LDPERM,N) */ /* Overwritten with the reconstructed matrix, and then with the */ /* difference P*L*L'*P' - A (or P*U'*U*P' - A) */ /* LDPERM (input) INTEGER */ /* The leading dimension of the array PERM. */ /* LDAPERM >= max(1,N). */ /* PIV (input) INTEGER array, dimension (N) */ /* PIV is such that the nonzero entries are */ /* P( PIV( K ), K ) = 1. */ /* RWORK (workspace) REAL array, dimension (N) */ /* RESID (output) REAL */ /* If UPLO = 'L', norm(L*L' - A) / ( N * norm(A) * EPS ) */ /* If UPLO = 'U', norm(U'*U - A) / ( N * norm(A) * EPS ) */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Quick exit if N = 0. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; afac_dim1 = *ldafac; afac_offset = 1 + afac_dim1; afac -= afac_offset; perm_dim1 = *ldperm; perm_offset = 1 + perm_dim1; perm -= perm_offset; --piv; --rwork; /* Function Body */ if (*n <= 0) { *resid = 0.f; return 0; } /* Exit with RESID = 1/EPS if ANORM = 0. */ eps = slamch_("Epsilon"); anorm = slansy_("1", uplo, n, &a[a_offset], lda, &rwork[1]); if (anorm <= 0.f) { *resid = 1.f / eps; return 0; } /* Compute the product U'*U, overwriting U. */ if (lsame_(uplo, "U")) { if (*rank < *n) { i__1 = *n; for (j = *rank + 1; j <= i__1; ++j) { i__2 = j; for (i__ = *rank + 1; i__ <= i__2; ++i__) { afac[i__ + j * afac_dim1] = 0.f; /* L100: */ } /* L110: */ } } for (k = *n; k >= 1; --k) { /* Compute the (K,K) element of the result. */ t = sdot_(&k, &afac[k * afac_dim1 + 1], &c__1, &afac[k * afac_dim1 + 1], &c__1); afac[k + k * afac_dim1] = t; /* Compute the rest of column K. */ i__1 = k - 1; strmv_("Upper", "Transpose", "Non-unit", &i__1, &afac[afac_offset] , ldafac, &afac[k * afac_dim1 + 1], &c__1); /* L120: */ } /* Compute the product L*L', overwriting L. */ } else { if (*rank < *n) { i__1 = *n; for (j = *rank + 1; j <= i__1; ++j) { i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { afac[i__ + j * afac_dim1] = 0.f; /* L130: */ } /* L140: */ } } for (k = *n; k >= 1; --k) { /* Add a multiple of column K of the factor L to each of */ /* columns K+1 through N. */ if (k + 1 <= *n) { i__1 = *n - k; ssyr_("Lower", &i__1, &c_b18, &afac[k + 1 + k * afac_dim1], & c__1, &afac[k + 1 + (k + 1) * afac_dim1], ldafac); } /* Scale column K by the diagonal element. */ t = afac[k + k * afac_dim1]; i__1 = *n - k + 1; sscal_(&i__1, &t, &afac[k + k * afac_dim1], &c__1); /* L150: */ } } /* Form P*L*L'*P' or P*U'*U*P' */ if (lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (piv[i__] <= piv[j]) { if (i__ <= j) { perm[piv[i__] + piv[j] * perm_dim1] = afac[i__ + j * afac_dim1]; } else { perm[piv[i__] + piv[j] * perm_dim1] = afac[j + i__ * afac_dim1]; } } /* L160: */ } /* L170: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (piv[i__] >= piv[j]) { if (i__ >= j) { perm[piv[i__] + piv[j] * perm_dim1] = afac[i__ + j * afac_dim1]; } else { perm[piv[i__] + piv[j] * perm_dim1] = afac[j + i__ * afac_dim1]; } } /* L180: */ } /* L190: */ } } /* Compute the difference P*L*L'*P' - A (or P*U'*U*P' - A). */ if (lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { perm[i__ + j * perm_dim1] -= a[i__ + j * a_dim1]; /* L200: */ } /* L210: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = j; i__ <= i__2; ++i__) { perm[i__ + j * perm_dim1] -= a[i__ + j * a_dim1]; /* L220: */ } /* L230: */ } } /* Compute norm( P*L*L'P - A ) / ( N * norm(A) * EPS ), or */ /* ( P*U'*U*P' - A )/ ( N * norm(A) * EPS ). */ *resid = slansy_("1", uplo, n, &perm[perm_offset], ldafac, &rwork[1]); *resid = *resid / (real) (*n) / anorm / eps; return 0; /* End of SPST01 */ } /* spst01_ */
/* Subroutine */ int slatrd_(char *uplo, int *n, int *nb, real *a, int *lda, real *e, real *tau, real *w, int *ldw) { /* -- LAPACK auxiliary routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 Purpose ======= SLATRD reduces NB rows and columns of a real symmetric matrix A to symmetric tridiagonal form by an orthogonal similarity transformation Q' * A * Q, and returns the matrices V and W which are needed to apply the transformation to the unreduced part of A. If UPLO = 'U', SLATRD reduces the last NB rows and columns of a matrix, of which the upper triangle is supplied; if UPLO = 'L', SLATRD reduces the first NB rows and columns of a matrix, of which the lower triangle is supplied. This is an auxiliary routine called by SSYTRD. Arguments ========= UPLO (input) CHARACTER Specifies whether the upper or lower triangular part of the symmetric matrix A is stored: = 'U': Upper triangular = 'L': Lower triangular N (input) INTEGER The order of the matrix A. NB (input) INTEGER The number of rows and columns to be reduced. A (input/output) REAL array, dimension (LDA,N) On entry, the symmetric matrix A. If UPLO = 'U', the leading n-by-n upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading n-by-n lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit: if UPLO = 'U', the last NB columns have been reduced to tridiagonal form, with the diagonal elements overwriting the diagonal elements of A; the elements above the diagonal with the array TAU, represent the orthogonal matrix Q as a product of elementary reflectors; if UPLO = 'L', the first NB columns have been reduced to tridiagonal form, with the diagonal elements overwriting the diagonal elements of A; the elements below the diagonal with the array TAU, represent the orthogonal matrix Q as a product of elementary reflectors. See Further Details. LDA (input) INTEGER The leading dimension of the array A. LDA >= (1,N). E (output) REAL array, dimension (N-1) If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal elements of the last NB columns of the reduced matrix; if UPLO = 'L', E(1:nb) contains the subdiagonal elements of the first NB columns of the reduced matrix. TAU (output) REAL array, dimension (N-1) The scalar factors of the elementary reflectors, stored in TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. See Further Details. W (output) REAL array, dimension (LDW,NB) The n-by-nb matrix W required to update the unreduced part of A. LDW (input) INTEGER The leading dimension of the array W. LDW >= max(1,N). Further Details =============== If UPLO = 'U', the matrix Q is represented as a product of elementary reflectors Q = H(n) H(n-1) . . . H(n-nb+1). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), and tau in TAU(i-1). If UPLO = 'L', the matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(nb). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), and tau in TAU(i). The elements of the vectors v together form the n-by-nb matrix V which is needed, with W, to apply the transformation to the unreduced part of the matrix, using a symmetric rank-2k update of the form: A := A - V*W' - W*V'. The contents of A on exit are illustrated by the following examples with n = 5 and nb = 2: if UPLO = 'U': if UPLO = 'L': ( a a a v4 v5 ) ( d ) ( a a v4 v5 ) ( 1 d ) ( a 1 v5 ) ( v1 1 a ) ( d 1 ) ( v1 v2 a a ) ( d ) ( v1 v2 a a a ) where d denotes a diagonal element of the reduced matrix, a denotes an element of the original matrix that is unchanged, and vi denotes an element of the vector defining H(i). ===================================================================== Quick return if possible Parameter adjustments Function Body */ /* Table of constant values */ static real c_b5 = -1.f; static real c_b6 = 1.f; static int c__1 = 1; static real c_b16 = 0.f; /* System generated locals */ /* Unused variables commented out by MDG on 03-09-05 int a_dim1, a_offset, w_dim1, w_offset; */ int i__1, i__2, i__3; /* Local variables */ extern doublereal sdot_(int *, real *, int *, real *, int *); static int i; static real alpha; extern logical lsame_(char *, char *); extern /* Subroutine */ int sscal_(int *, real *, real *, int *), sgemv_(char *, int *, int *, real *, real *, int *, real *, int *, real *, real *, int *), saxpy_( int *, real *, real *, int *, real *, int *), ssymv_( char *, int *, real *, real *, int *, real *, int *, real *, real *, int *); static int iw; extern /* Subroutine */ int slarfg_(int *, real *, real *, int *, real *); #define E(I) e[(I)-1] #define TAU(I) tau[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define W(I,J) w[(I)-1 + ((J)-1)* ( *ldw)] if (*n <= 0) { return 0; } if (lsame_(uplo, "U")) { /* Reduce last NB columns of upper triangle */ i__1 = *n - *nb + 1; for (i = *n; i >= *n-*nb+1; --i) { iw = i - *n + *nb; if (i < *n) { /* Update A(1:i,i) */ i__2 = *n - i; sgemv_("No transpose", &i, &i__2, &c_b5, &A(1,i+1), lda, &W(i,iw+1), ldw, &c_b6, &A(1,i), &c__1); i__2 = *n - i; sgemv_("No transpose", &i, &i__2, &c_b5, &W(1,iw+1), ldw, &A(i,i+1), lda, &c_b6, &A(1,i), &c__1); } if (i > 1) { /* Generate elementary reflector H(i) to annihila te A(1:i-2,i) */ i__2 = i - 1; slarfg_(&i__2, &A(i-1,i), &A(1,i), & c__1, &TAU(i - 1)); E(i - 1) = A(i-1,i); A(i-1,i) = 1.f; /* Compute W(1:i-1,i) */ i__2 = i - 1; ssymv_("Upper", &i__2, &c_b6, &A(1,1), lda, &A(1,i), &c__1, &c_b16, &W(1,iw), & c__1); if (i < *n) { i__2 = i - 1; i__3 = *n - i; sgemv_("Transpose", &i__2, &i__3, &c_b6, &W(1,iw+1), ldw, &A(1,i), &c__1, & c_b16, &W(i+1,iw), &c__1); i__2 = i - 1; i__3 = *n - i; sgemv_("No transpose", &i__2, &i__3, &c_b5, &A(1,i+1), lda, &W(i+1,iw), &c__1, &c_b6, &W(1,iw), &c__1); i__2 = i - 1; i__3 = *n - i; sgemv_("Transpose", &i__2, &i__3, &c_b6, &A(1,i+1), lda, &A(1,i), &c__1, & c_b16, &W(i+1,iw), &c__1); i__2 = i - 1; i__3 = *n - i; sgemv_("No transpose", &i__2, &i__3, &c_b5, &W(1,iw+1), ldw, &W(i+1,iw), &c__1, &c_b6, &W(1,iw), &c__1); } i__2 = i - 1; sscal_(&i__2, &TAU(i - 1), &W(1,iw), &c__1); i__2 = i - 1; alpha = TAU(i - 1) * -.5f * sdot_(&i__2, &W(1,iw), &c__1, &A(1,i), &c__1); i__2 = i - 1; saxpy_(&i__2, &alpha, &A(1,i), &c__1, &W(1,iw), &c__1); } /* L10: */ } } else { /* Reduce first NB columns of lower triangle */ i__1 = *nb; for (i = 1; i <= *nb; ++i) { /* Update A(i:n,i) */ i__2 = *n - i + 1; i__3 = i - 1; sgemv_("No transpose", &i__2, &i__3, &c_b5, &A(i,1), lda, & W(i,1), ldw, &c_b6, &A(i,i), &c__1) ; i__2 = *n - i + 1; i__3 = i - 1; sgemv_("No transpose", &i__2, &i__3, &c_b5, &W(i,1), ldw, & A(i,1), lda, &c_b6, &A(i,i), &c__1) ; if (i < *n) { /* Generate elementary reflector H(i) to annihila te A(i+2:n,i) */ i__2 = *n - i; /* Computing MIN */ i__3 = i + 2; slarfg_(&i__2, &A(i+1,i), &A(min(i+2,*n),i), &c__1, &TAU(i)); E(i) = A(i+1,i); A(i+1,i) = 1.f; /* Compute W(i+1:n,i) */ i__2 = *n - i; ssymv_("Lower", &i__2, &c_b6, &A(i+1,i+1), lda, &A(i+1,i), &c__1, &c_b16, &W(i+1,i), &c__1); i__2 = *n - i; i__3 = i - 1; sgemv_("Transpose", &i__2, &i__3, &c_b6, &W(i+1,1), ldw, &A(i+1,i), &c__1, &c_b16, &W(1,i), &c__1); i__2 = *n - i; i__3 = i - 1; sgemv_("No transpose", &i__2, &i__3, &c_b5, &A(i+1,1) , lda, &W(1,i), &c__1, &c_b6, &W(i+1,i), &c__1); i__2 = *n - i; i__3 = i - 1; sgemv_("Transpose", &i__2, &i__3, &c_b6, &A(i+1,1), lda, &A(i+1,i), &c__1, &c_b16, &W(1,i), &c__1); i__2 = *n - i; i__3 = i - 1; sgemv_("No transpose", &i__2, &i__3, &c_b5, &W(i+1,1) , ldw, &W(1,i), &c__1, &c_b6, &W(i+1,i), &c__1); i__2 = *n - i; sscal_(&i__2, &TAU(i), &W(i+1,i), &c__1); i__2 = *n - i; alpha = TAU(i) * -.5f * sdot_(&i__2, &W(i+1,i), & c__1, &A(i+1,i), &c__1); i__2 = *n - i; saxpy_(&i__2, &alpha, &A(i+1,i), &c__1, &W(i+1,i), &c__1); } /* L20: */ } } return 0; /* End of SLATRD */ } /* slatrd_ */
/* Subroutine */ int ssytri_rook_(char *uplo, integer *n, real *a, integer * lda, integer *ipiv, real *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1; real r__1; /* Local variables */ real d__; integer k; real t, ak; integer kp; real akp1, temp; extern real sdot_(integer *, real *, integer *, real *, integer *); real akkp1; extern logical lsame_(char *, char *); integer kstep; logical upper; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer * ), ssymv_(char *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), xerbla_(char *, integer *); /* -- LAPACK computational routine (version 3.4.1) -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* April 2012 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; --work; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); xerbla_("SSYTRI_ROOK", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Check that the diagonal matrix D is nonsingular. */ if (upper) { /* Upper triangular storage: examine D from bottom to top */ for (*info = *n; *info >= 1; --(*info)) { if (ipiv[*info] > 0 && a[*info + *info * a_dim1] == 0.f) { return 0; } /* L10: */ } } else { /* Lower triangular storage: examine D from top to bottom. */ i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { if (ipiv[*info] > 0 && a[*info + *info * a_dim1] == 0.f) { return 0; } /* L20: */ } } *info = 0; if (upper) { /* Compute inv(A) from the factorization A = U*D*U**T. */ /* K is the main loop index, increasing from 1 to N in steps of */ /* 1 or 2, depending on the size of the diagonal blocks. */ k = 1; L30: /* If K > N, exit from loop. */ if (k > *n) { goto L40; } if (ipiv[k] > 0) { /* 1 x 1 diagonal block */ /* Invert the diagonal block. */ a[k + k * a_dim1] = 1.f / a[k + k * a_dim1]; /* Compute column K of the inverse. */ if (k > 1) { i__1 = k - 1; scopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1); i__1 = k - 1; ssymv_(uplo, &i__1, &c_b11, &a[a_offset], lda, &work[1], & c__1, &c_b13, &a[k * a_dim1 + 1], &c__1); i__1 = k - 1; a[k + k * a_dim1] -= sdot_(&i__1, &work[1], &c__1, &a[k * a_dim1 + 1], &c__1); } kstep = 1; } else { /* 2 x 2 diagonal block */ /* Invert the diagonal block. */ t = (r__1 = a[k + (k + 1) * a_dim1], f2c_abs(r__1)); ak = a[k + k * a_dim1] / t; akp1 = a[k + 1 + (k + 1) * a_dim1] / t; akkp1 = a[k + (k + 1) * a_dim1] / t; d__ = t * (ak * akp1 - 1.f); a[k + k * a_dim1] = akp1 / d__; a[k + 1 + (k + 1) * a_dim1] = ak / d__; a[k + (k + 1) * a_dim1] = -akkp1 / d__; /* Compute columns K and K+1 of the inverse. */ if (k > 1) { i__1 = k - 1; scopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1); i__1 = k - 1; ssymv_(uplo, &i__1, &c_b11, &a[a_offset], lda, &work[1], & c__1, &c_b13, &a[k * a_dim1 + 1], &c__1); i__1 = k - 1; a[k + k * a_dim1] -= sdot_(&i__1, &work[1], &c__1, &a[k * a_dim1 + 1], &c__1); i__1 = k - 1; a[k + (k + 1) * a_dim1] -= sdot_(&i__1, &a[k * a_dim1 + 1], & c__1, &a[(k + 1) * a_dim1 + 1], &c__1); i__1 = k - 1; scopy_(&i__1, &a[(k + 1) * a_dim1 + 1], &c__1, &work[1], & c__1); i__1 = k - 1; ssymv_(uplo, &i__1, &c_b11, &a[a_offset], lda, &work[1], & c__1, &c_b13, &a[(k + 1) * a_dim1 + 1], &c__1); i__1 = k - 1; a[k + 1 + (k + 1) * a_dim1] -= sdot_(&i__1, &work[1], &c__1, & a[(k + 1) * a_dim1 + 1], &c__1); } kstep = 2; } if (kstep == 1) { /* Interchange rows and columns K and IPIV(K) in the leading */ /* submatrix A(1:k+1,1:k+1) */ kp = ipiv[k]; if (kp != k) { if (kp > 1) { i__1 = kp - 1; sswap_(&i__1, &a[k * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); } i__1 = k - kp - 1; sswap_(&i__1, &a[kp + 1 + k * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); temp = a[k + k * a_dim1]; a[k + k * a_dim1] = a[kp + kp * a_dim1]; a[kp + kp * a_dim1] = temp; } } else { /* Interchange rows and columns K and K+1 with -IPIV(K) and */ /* -IPIV(K+1)in the leading submatrix A(1:k+1,1:k+1) */ kp = -ipiv[k]; if (kp != k) { if (kp > 1) { i__1 = kp - 1; sswap_(&i__1, &a[k * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); } i__1 = k - kp - 1; sswap_(&i__1, &a[kp + 1 + k * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); temp = a[k + k * a_dim1]; a[k + k * a_dim1] = a[kp + kp * a_dim1]; a[kp + kp * a_dim1] = temp; temp = a[k + (k + 1) * a_dim1]; a[k + (k + 1) * a_dim1] = a[kp + (k + 1) * a_dim1]; a[kp + (k + 1) * a_dim1] = temp; } ++k; kp = -ipiv[k]; if (kp != k) { if (kp > 1) { i__1 = kp - 1; sswap_(&i__1, &a[k * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1); } i__1 = k - kp - 1; sswap_(&i__1, &a[kp + 1 + k * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); temp = a[k + k * a_dim1]; a[k + k * a_dim1] = a[kp + kp * a_dim1]; a[kp + kp * a_dim1] = temp; } } ++k; goto L30; L40: ; } else { /* Compute inv(A) from the factorization A = L*D*L**T. */ /* K is the main loop index, increasing from 1 to N in steps of */ /* 1 or 2, depending on the size of the diagonal blocks. */ k = *n; L50: /* If K < 1, exit from loop. */ if (k < 1) { goto L60; } if (ipiv[k] > 0) { /* 1 x 1 diagonal block */ /* Invert the diagonal block. */ a[k + k * a_dim1] = 1.f / a[k + k * a_dim1]; /* Compute column K of the inverse. */ if (k < *n) { i__1 = *n - k; scopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &work[1], &c__1); i__1 = *n - k; ssymv_(uplo, &i__1, &c_b11, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, &c_b13, &a[k + 1 + k * a_dim1], & c__1); i__1 = *n - k; a[k + k * a_dim1] -= sdot_(&i__1, &work[1], &c__1, &a[k + 1 + k * a_dim1], &c__1); } kstep = 1; } else { /* 2 x 2 diagonal block */ /* Invert the diagonal block. */ t = (r__1 = a[k + (k - 1) * a_dim1], f2c_abs(r__1)); ak = a[k - 1 + (k - 1) * a_dim1] / t; akp1 = a[k + k * a_dim1] / t; akkp1 = a[k + (k - 1) * a_dim1] / t; d__ = t * (ak * akp1 - 1.f); a[k - 1 + (k - 1) * a_dim1] = akp1 / d__; a[k + k * a_dim1] = ak / d__; a[k + (k - 1) * a_dim1] = -akkp1 / d__; /* Compute columns K-1 and K of the inverse. */ if (k < *n) { i__1 = *n - k; scopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &work[1], &c__1); i__1 = *n - k; ssymv_(uplo, &i__1, &c_b11, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, &c_b13, &a[k + 1 + k * a_dim1], & c__1); i__1 = *n - k; a[k + k * a_dim1] -= sdot_(&i__1, &work[1], &c__1, &a[k + 1 + k * a_dim1], &c__1); i__1 = *n - k; a[k + (k - 1) * a_dim1] -= sdot_(&i__1, &a[k + 1 + k * a_dim1] , &c__1, &a[k + 1 + (k - 1) * a_dim1], &c__1); i__1 = *n - k; scopy_(&i__1, &a[k + 1 + (k - 1) * a_dim1], &c__1, &work[1], & c__1); i__1 = *n - k; ssymv_(uplo, &i__1, &c_b11, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, &c_b13, &a[k + 1 + (k - 1) * a_dim1] , &c__1); i__1 = *n - k; a[k - 1 + (k - 1) * a_dim1] -= sdot_(&i__1, &work[1], &c__1, & a[k + 1 + (k - 1) * a_dim1], &c__1); } kstep = 2; } if (kstep == 1) { /* Interchange rows and columns K and IPIV(K) in the trailing */ /* submatrix A(k-1:n,k-1:n) */ kp = ipiv[k]; if (kp != k) { if (kp < *n) { i__1 = *n - kp; sswap_(&i__1, &a[kp + 1 + k * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); } i__1 = kp - k - 1; sswap_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &a[kp + (k + 1) * a_dim1], lda); temp = a[k + k * a_dim1]; a[k + k * a_dim1] = a[kp + kp * a_dim1]; a[kp + kp * a_dim1] = temp; } } else { /* Interchange rows and columns K and K-1 with -IPIV(K) and */ /* -IPIV(K-1) in the trailing submatrix A(k-1:n,k-1:n) */ kp = -ipiv[k]; if (kp != k) { if (kp < *n) { i__1 = *n - kp; sswap_(&i__1, &a[kp + 1 + k * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); } i__1 = kp - k - 1; sswap_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &a[kp + (k + 1) * a_dim1], lda); temp = a[k + k * a_dim1]; a[k + k * a_dim1] = a[kp + kp * a_dim1]; a[kp + kp * a_dim1] = temp; temp = a[k + (k - 1) * a_dim1]; a[k + (k - 1) * a_dim1] = a[kp + (k - 1) * a_dim1]; a[kp + (k - 1) * a_dim1] = temp; } --k; kp = -ipiv[k]; if (kp != k) { if (kp < *n) { i__1 = *n - kp; sswap_(&i__1, &a[kp + 1 + k * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); } i__1 = kp - k - 1; sswap_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &a[kp + (k + 1) * a_dim1], lda); temp = a[k + k * a_dim1]; a[k + k * a_dim1] = a[kp + kp * a_dim1]; a[kp + kp * a_dim1] = temp; } } --k; goto L50; L60: ; } return 0; /* End of SSYTRI_ROOK */ }
/* Subroutine */ int snaitr_(integer *ido, char *bmat, integer *n, integer *k, integer *np, integer *nb, real *resid, real *rnorm, real *v, integer *ldv, real *h__, integer *ldh, integer *ipntr, real *workd, integer * info, ftnlen bmat_len) { /* Initialized data */ static logical first = TRUE_; /* System generated locals */ integer h_dim1, h_offset, v_dim1, v_offset, i__1, i__2; real r__1, r__2; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer i__, j; static real t0, t1, t2, t3, t4, t5; static integer jj, ipj, irj, ivj; static real ulp, tst1; static integer ierr, iter; static real unfl, ovfl; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); static integer itry; static real temp1; static logical orth1, orth2, step3, step4; extern doublereal snrm2_(integer *, real *, integer *); static real betaj; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static integer infol; extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *, ftnlen); static real xtemp[2]; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *); static real wnorm; extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, real *, integer *), ivout_(integer *, integer *, integer *, integer *, char *, ftnlen), smout_(integer *, integer *, integer * , real *, integer *, integer *, char *, ftnlen), svout_(integer *, integer *, real *, integer *, char *, ftnlen), sgetv0_(integer *, char *, integer *, logical *, integer *, integer *, real *, integer *, real *, real *, integer *, real *, integer *, ftnlen); static real rnorm1; extern /* Subroutine */ int slabad_(real *, real *); extern doublereal slamch_(char *, ftnlen); extern /* Subroutine */ int second_(real *), slascl_(char *, integer *, integer *, real *, real *, integer *, integer *, real *, integer * , integer *, ftnlen); static logical rstart; static integer msglvl; static real smlnum; extern doublereal slanhs_(char *, integer *, real *, integer *, real *, ftnlen); /* %----------------------------------------------------% */ /* | Include files for debugging and timing information | */ /* %----------------------------------------------------% */ /* \SCCS Information: @(#) */ /* FILE: debug.h SID: 2.3 DATE OF SID: 11/16/95 RELEASE: 2 */ /* %---------------------------------% */ /* | See debug.doc for documentation | */ /* %---------------------------------% */ /* %------------------% */ /* | Scalar Arguments | */ /* %------------------% */ /* %--------------------------------% */ /* | See stat.doc for documentation | */ /* %--------------------------------% */ /* \SCCS Information: @(#) */ /* FILE: stat.h SID: 2.2 DATE OF SID: 11/16/95 RELEASE: 2 */ /* %-----------------% */ /* | Array Arguments | */ /* %-----------------% */ /* %------------% */ /* | Parameters | */ /* %------------% */ /* %---------------% */ /* | Local Scalars | */ /* %---------------% */ /* %-----------------------% */ /* | Local Array Arguments | */ /* %-----------------------% */ /* %----------------------% */ /* | External Subroutines | */ /* %----------------------% */ /* %--------------------% */ /* | External Functions | */ /* %--------------------% */ /* %---------------------% */ /* | Intrinsic Functions | */ /* %---------------------% */ /* %-----------------% */ /* | Data statements | */ /* %-----------------% */ /* Parameter adjustments */ --workd; --resid; v_dim1 = *ldv; v_offset = 1 + v_dim1; v -= v_offset; h_dim1 = *ldh; h_offset = 1 + h_dim1; h__ -= h_offset; --ipntr; /* Function Body */ /* %-----------------------% */ /* | Executable Statements | */ /* %-----------------------% */ if (first) { /* %-----------------------------------------% */ /* | Set machine-dependent constants for the | */ /* | the splitting and deflation criterion. | */ /* | If norm(H) <= sqrt(OVFL), | */ /* | overflow should not occur. | */ /* | REFERENCE: LAPACK subroutine slahqr | */ /* %-----------------------------------------% */ unfl = slamch_("safe minimum", (ftnlen)12); ovfl = 1.f / unfl; slabad_(&unfl, &ovfl); ulp = slamch_("precision", (ftnlen)9); smlnum = unfl * (*n / ulp); first = FALSE_; } if (*ido == 0) { /* %-------------------------------% */ /* | Initialize timing statistics | */ /* | & message level for debugging | */ /* %-------------------------------% */ second_(&t0); msglvl = debug_1.mnaitr; /* %------------------------------% */ /* | Initial call to this routine | */ /* %------------------------------% */ *info = 0; step3 = FALSE_; step4 = FALSE_; rstart = FALSE_; orth1 = FALSE_; orth2 = FALSE_; j = *k + 1; ipj = 1; irj = ipj + *n; ivj = irj + *n; } /* %-------------------------------------------------% */ /* | When in reverse communication mode one of: | */ /* | STEP3, STEP4, ORTH1, ORTH2, RSTART | */ /* | will be .true. when .... | */ /* | STEP3: return from computing OP*v_{j}. | */ /* | STEP4: return from computing B-norm of OP*v_{j} | */ /* | ORTH1: return from computing B-norm of r_{j+1} | */ /* | ORTH2: return from computing B-norm of | */ /* | correction to the residual vector. | */ /* | RSTART: return from OP computations needed by | */ /* | sgetv0. | */ /* %-------------------------------------------------% */ if (step3) { goto L50; } if (step4) { goto L60; } if (orth1) { goto L70; } if (orth2) { goto L90; } if (rstart) { goto L30; } /* %-----------------------------% */ /* | Else this is the first step | */ /* %-----------------------------% */ /* %--------------------------------------------------------------% */ /* | | */ /* | A R N O L D I I T E R A T I O N L O O P | */ /* | | */ /* | Note: B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) | */ /* %--------------------------------------------------------------% */ L1000: if (msglvl > 1) { ivout_(&debug_1.logfil, &c__1, &j, &debug_1.ndigit, "_naitr: generat" "ing Arnoldi vector number", (ftnlen)40); svout_(&debug_1.logfil, &c__1, rnorm, &debug_1.ndigit, "_naitr: B-no" "rm of the current residual is", (ftnlen)41); } /* %---------------------------------------------------% */ /* | STEP 1: Check if the B norm of j-th residual | */ /* | vector is zero. Equivalent to determing whether | */ /* | an exact j-step Arnoldi factorization is present. | */ /* %---------------------------------------------------% */ betaj = *rnorm; if (*rnorm > 0.f) { goto L40; } /* %---------------------------------------------------% */ /* | Invariant subspace found, generate a new starting | */ /* | vector which is orthogonal to the current Arnoldi | */ /* | basis and continue the iteration. | */ /* %---------------------------------------------------% */ if (msglvl > 0) { ivout_(&debug_1.logfil, &c__1, &j, &debug_1.ndigit, "_naitr: ****** " "RESTART AT STEP ******", (ftnlen)37); } /* %---------------------------------------------% */ /* | ITRY is the loop variable that controls the | */ /* | maximum amount of times that a restart is | */ /* | attempted. NRSTRT is used by stat.h | */ /* %---------------------------------------------% */ betaj = 0.f; ++timing_1.nrstrt; itry = 1; L20: rstart = TRUE_; *ido = 0; L30: /* %--------------------------------------% */ /* | If in reverse communication mode and | */ /* | RSTART = .true. flow returns here. | */ /* %--------------------------------------% */ sgetv0_(ido, bmat, &itry, &c_false, n, &j, &v[v_offset], ldv, &resid[1], rnorm, &ipntr[1], &workd[1], &ierr, (ftnlen)1); if (*ido != 99) { goto L9000; } if (ierr < 0) { ++itry; if (itry <= 3) { goto L20; } /* %------------------------------------------------% */ /* | Give up after several restart attempts. | */ /* | Set INFO to the size of the invariant subspace | */ /* | which spans OP and exit. | */ /* %------------------------------------------------% */ *info = j - 1; second_(&t1); timing_1.tnaitr += t1 - t0; *ido = 99; goto L9000; } L40: /* %---------------------------------------------------------% */ /* | STEP 2: v_{j} = r_{j-1}/rnorm and p_{j} = p_{j}/rnorm | */ /* | Note that p_{j} = B*r_{j-1}. In order to avoid overflow | */ /* | when reciprocating a small RNORM, test against lower | */ /* | machine bound. | */ /* %---------------------------------------------------------% */ scopy_(n, &resid[1], &c__1, &v[j * v_dim1 + 1], &c__1); if (*rnorm >= unfl) { temp1 = 1.f / *rnorm; sscal_(n, &temp1, &v[j * v_dim1 + 1], &c__1); sscal_(n, &temp1, &workd[ipj], &c__1); } else { /* %-----------------------------------------% */ /* | To scale both v_{j} and p_{j} carefully | */ /* | use LAPACK routine SLASCL | */ /* %-----------------------------------------% */ slascl_("General", &i__, &i__, rnorm, &c_b25, n, &c__1, &v[j * v_dim1 + 1], n, &infol, (ftnlen)7); slascl_("General", &i__, &i__, rnorm, &c_b25, n, &c__1, &workd[ipj], n, &infol, (ftnlen)7); } /* %------------------------------------------------------% */ /* | STEP 3: r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} | */ /* | Note that this is not quite yet r_{j}. See STEP 4 | */ /* %------------------------------------------------------% */ step3 = TRUE_; ++timing_1.nopx; second_(&t2); scopy_(n, &v[j * v_dim1 + 1], &c__1, &workd[ivj], &c__1); ipntr[1] = ivj; ipntr[2] = irj; ipntr[3] = ipj; *ido = 1; /* %-----------------------------------% */ /* | Exit in order to compute OP*v_{j} | */ /* %-----------------------------------% */ goto L9000; L50: /* %----------------------------------% */ /* | Back from reverse communication; | */ /* | WORKD(IRJ:IRJ+N-1) := OP*v_{j} | */ /* | if step3 = .true. | */ /* %----------------------------------% */ second_(&t3); timing_1.tmvopx += t3 - t2; step3 = FALSE_; /* %------------------------------------------% */ /* | Put another copy of OP*v_{j} into RESID. | */ /* %------------------------------------------% */ scopy_(n, &workd[irj], &c__1, &resid[1], &c__1); /* %---------------------------------------% */ /* | STEP 4: Finish extending the Arnoldi | */ /* | factorization to length j. | */ /* %---------------------------------------% */ second_(&t2); if (*(unsigned char *)bmat == 'G') { ++timing_1.nbx; step4 = TRUE_; ipntr[1] = irj; ipntr[2] = ipj; *ido = 2; /* %-------------------------------------% */ /* | Exit in order to compute B*OP*v_{j} | */ /* %-------------------------------------% */ goto L9000; } else if (*(unsigned char *)bmat == 'I') { scopy_(n, &resid[1], &c__1, &workd[ipj], &c__1); } L60: /* %----------------------------------% */ /* | Back from reverse communication; | */ /* | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j} | */ /* | if step4 = .true. | */ /* %----------------------------------% */ if (*(unsigned char *)bmat == 'G') { second_(&t3); timing_1.tmvbx += t3 - t2; } step4 = FALSE_; /* %-------------------------------------% */ /* | The following is needed for STEP 5. | */ /* | Compute the B-norm of OP*v_{j}. | */ /* %-------------------------------------% */ if (*(unsigned char *)bmat == 'G') { wnorm = sdot_(n, &resid[1], &c__1, &workd[ipj], &c__1); wnorm = sqrt((dabs(wnorm))); } else if (*(unsigned char *)bmat == 'I') { wnorm = snrm2_(n, &resid[1], &c__1); } /* %-----------------------------------------% */ /* | Compute the j-th residual corresponding | */ /* | to the j step factorization. | */ /* | Use Classical Gram Schmidt and compute: | */ /* | w_{j} <- V_{j}^T * B * OP * v_{j} | */ /* | r_{j} <- OP*v_{j} - V_{j} * w_{j} | */ /* %-----------------------------------------% */ /* %------------------------------------------% */ /* | Compute the j Fourier coefficients w_{j} | */ /* | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}. | */ /* %------------------------------------------% */ sgemv_("T", n, &j, &c_b25, &v[v_offset], ldv, &workd[ipj], &c__1, &c_b47, &h__[j * h_dim1 + 1], &c__1, (ftnlen)1); /* %--------------------------------------% */ /* | Orthogonalize r_{j} against V_{j}. | */ /* | RESID contains OP*v_{j}. See STEP 3. | */ /* %--------------------------------------% */ sgemv_("N", n, &j, &c_b50, &v[v_offset], ldv, &h__[j * h_dim1 + 1], &c__1, &c_b25, &resid[1], &c__1, (ftnlen)1); if (j > 1) { h__[j + (j - 1) * h_dim1] = betaj; } second_(&t4); orth1 = TRUE_; second_(&t2); if (*(unsigned char *)bmat == 'G') { ++timing_1.nbx; scopy_(n, &resid[1], &c__1, &workd[irj], &c__1); ipntr[1] = irj; ipntr[2] = ipj; *ido = 2; /* %----------------------------------% */ /* | Exit in order to compute B*r_{j} | */ /* %----------------------------------% */ goto L9000; } else if (*(unsigned char *)bmat == 'I') { scopy_(n, &resid[1], &c__1, &workd[ipj], &c__1); } L70: /* %---------------------------------------------------% */ /* | Back from reverse communication if ORTH1 = .true. | */ /* | WORKD(IPJ:IPJ+N-1) := B*r_{j}. | */ /* %---------------------------------------------------% */ if (*(unsigned char *)bmat == 'G') { second_(&t3); timing_1.tmvbx += t3 - t2; } orth1 = FALSE_; /* %------------------------------% */ /* | Compute the B-norm of r_{j}. | */ /* %------------------------------% */ if (*(unsigned char *)bmat == 'G') { *rnorm = sdot_(n, &resid[1], &c__1, &workd[ipj], &c__1); *rnorm = sqrt((dabs(*rnorm))); } else if (*(unsigned char *)bmat == 'I') { *rnorm = snrm2_(n, &resid[1], &c__1); } /* %-----------------------------------------------------------% */ /* | STEP 5: Re-orthogonalization / Iterative refinement phase | */ /* | Maximum NITER_ITREF tries. | */ /* | | */ /* | s = V_{j}^T * B * r_{j} | */ /* | r_{j} = r_{j} - V_{j}*s | */ /* | alphaj = alphaj + s_{j} | */ /* | | */ /* | The stopping criteria used for iterative refinement is | */ /* | discussed in Parlett's book SEP, page 107 and in Gragg & | */ /* | Reichel ACM TOMS paper; Algorithm 686, Dec. 1990. | */ /* | Determine if we need to correct the residual. The goal is | */ /* | to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} || | */ /* | The following test determines whether the sine of the | */ /* | angle between OP*x and the computed residual is less | */ /* | than or equal to 0.717. | */ /* %-----------------------------------------------------------% */ if (*rnorm > wnorm * .717f) { goto L100; } iter = 0; ++timing_1.nrorth; /* %---------------------------------------------------% */ /* | Enter the Iterative refinement phase. If further | */ /* | refinement is necessary, loop back here. The loop | */ /* | variable is ITER. Perform a step of Classical | */ /* | Gram-Schmidt using all the Arnoldi vectors V_{j} | */ /* %---------------------------------------------------% */ L80: if (msglvl > 2) { xtemp[0] = wnorm; xtemp[1] = *rnorm; svout_(&debug_1.logfil, &c__2, xtemp, &debug_1.ndigit, "_naitr: re-o" "rthonalization; wnorm and rnorm are", (ftnlen)47); svout_(&debug_1.logfil, &j, &h__[j * h_dim1 + 1], &debug_1.ndigit, "_naitr: j-th column of H", (ftnlen)24); } /* %----------------------------------------------------% */ /* | Compute V_{j}^T * B * r_{j}. | */ /* | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). | */ /* %----------------------------------------------------% */ sgemv_("T", n, &j, &c_b25, &v[v_offset], ldv, &workd[ipj], &c__1, &c_b47, &workd[irj], &c__1, (ftnlen)1); /* %---------------------------------------------% */ /* | Compute the correction to the residual: | */ /* | r_{j} = r_{j} - V_{j} * WORKD(IRJ:IRJ+J-1). | */ /* | The correction to H is v(:,1:J)*H(1:J,1:J) | */ /* | + v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j. | */ /* %---------------------------------------------% */ sgemv_("N", n, &j, &c_b50, &v[v_offset], ldv, &workd[irj], &c__1, &c_b25, &resid[1], &c__1, (ftnlen)1); saxpy_(&j, &c_b25, &workd[irj], &c__1, &h__[j * h_dim1 + 1], &c__1); orth2 = TRUE_; second_(&t2); if (*(unsigned char *)bmat == 'G') { ++timing_1.nbx; scopy_(n, &resid[1], &c__1, &workd[irj], &c__1); ipntr[1] = irj; ipntr[2] = ipj; *ido = 2; /* %-----------------------------------% */ /* | Exit in order to compute B*r_{j}. | */ /* | r_{j} is the corrected residual. | */ /* %-----------------------------------% */ goto L9000; } else if (*(unsigned char *)bmat == 'I') { scopy_(n, &resid[1], &c__1, &workd[ipj], &c__1); } L90: /* %---------------------------------------------------% */ /* | Back from reverse communication if ORTH2 = .true. | */ /* %---------------------------------------------------% */ if (*(unsigned char *)bmat == 'G') { second_(&t3); timing_1.tmvbx += t3 - t2; } /* %-----------------------------------------------------% */ /* | Compute the B-norm of the corrected residual r_{j}. | */ /* %-----------------------------------------------------% */ if (*(unsigned char *)bmat == 'G') { rnorm1 = sdot_(n, &resid[1], &c__1, &workd[ipj], &c__1); rnorm1 = sqrt((dabs(rnorm1))); } else if (*(unsigned char *)bmat == 'I') { rnorm1 = snrm2_(n, &resid[1], &c__1); } if (msglvl > 0 && iter > 0) { ivout_(&debug_1.logfil, &c__1, &j, &debug_1.ndigit, "_naitr: Iterati" "ve refinement for Arnoldi residual", (ftnlen)49); if (msglvl > 2) { xtemp[0] = *rnorm; xtemp[1] = rnorm1; svout_(&debug_1.logfil, &c__2, xtemp, &debug_1.ndigit, "_naitr: " "iterative refinement ; rnorm and rnorm1 are", (ftnlen)51); } } /* %-----------------------------------------% */ /* | Determine if we need to perform another | */ /* | step of re-orthogonalization. | */ /* %-----------------------------------------% */ if (rnorm1 > *rnorm * .717f) { /* %---------------------------------------% */ /* | No need for further refinement. | */ /* | The cosine of the angle between the | */ /* | corrected residual vector and the old | */ /* | residual vector is greater than 0.717 | */ /* | In other words the corrected residual | */ /* | and the old residual vector share an | */ /* | angle of less than arcCOS(0.717) | */ /* %---------------------------------------% */ *rnorm = rnorm1; } else { /* %-------------------------------------------% */ /* | Another step of iterative refinement step | */ /* | is required. NITREF is used by stat.h | */ /* %-------------------------------------------% */ ++timing_1.nitref; *rnorm = rnorm1; ++iter; if (iter <= 1) { goto L80; } /* %-------------------------------------------------% */ /* | Otherwise RESID is numerically in the span of V | */ /* %-------------------------------------------------% */ i__1 = *n; for (jj = 1; jj <= i__1; ++jj) { resid[jj] = 0.f; /* L95: */ } *rnorm = 0.f; } /* %----------------------------------------------% */ /* | Branch here directly if iterative refinement | */ /* | wasn't necessary or after at most NITER_REF | */ /* | steps of iterative refinement. | */ /* %----------------------------------------------% */ L100: rstart = FALSE_; orth2 = FALSE_; second_(&t5); timing_1.titref += t5 - t4; /* %------------------------------------% */ /* | STEP 6: Update j = j+1; Continue | */ /* %------------------------------------% */ ++j; if (j > *k + *np) { second_(&t1); timing_1.tnaitr += t1 - t0; *ido = 99; i__1 = *k + *np - 1; for (i__ = max(1,*k); i__ <= i__1; ++i__) { /* %--------------------------------------------% */ /* | Check for splitting and deflation. | */ /* | Use a standard test as in the QR algorithm | */ /* | REFERENCE: LAPACK subroutine slahqr | */ /* %--------------------------------------------% */ tst1 = (r__1 = h__[i__ + i__ * h_dim1], dabs(r__1)) + (r__2 = h__[ i__ + 1 + (i__ + 1) * h_dim1], dabs(r__2)); if (tst1 == 0.f) { i__2 = *k + *np; tst1 = slanhs_("1", &i__2, &h__[h_offset], ldh, &workd[*n + 1] , (ftnlen)1); } /* Computing MAX */ r__2 = ulp * tst1; if ((r__1 = h__[i__ + 1 + i__ * h_dim1], dabs(r__1)) <= dmax(r__2, smlnum)) { h__[i__ + 1 + i__ * h_dim1] = 0.f; } /* L110: */ } if (msglvl > 2) { i__1 = *k + *np; i__2 = *k + *np; smout_(&debug_1.logfil, &i__1, &i__2, &h__[h_offset], ldh, & debug_1.ndigit, "_naitr: Final upper Hessenberg matrix H" " of order K+NP", (ftnlen)53); } goto L9000; } /* %--------------------------------------------------------% */ /* | Loop back to extend the factorization by another step. | */ /* %--------------------------------------------------------% */ goto L1000; /* %---------------------------------------------------------------% */ /* | | */ /* | E N D O F M A I N I T E R A T I O N L O O P | */ /* | | */ /* %---------------------------------------------------------------% */ L9000: return 0; /* %---------------% */ /* | End of snaitr | */ /* %---------------% */ } /* snaitr_ */
GURLS_EXPORT float dot(const int N, const float *X, const int incX, const float *Y, const int incY) { return sdot_(const_cast<int*>(&N), const_cast<float*>(X), const_cast<int*>(&incX), const_cast<float*>(Y), const_cast<int*>(&incY)); }
int cggbal_(char *job, int *n, complex *a, int *lda, complex *b, int *ldb, int *ilo, int *ihi, float *lscale, float *rscale, float *work, int *info) { /* System generated locals */ int a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; float r__1, r__2, r__3; /* Builtin functions */ double r_lg10(float *), r_imag(complex *), c_abs(complex *), r_sign(float *, float *), pow_ri(float *, int *); /* Local variables */ int i__, j, k, l, m; float t; int jc; float ta, tb, tc; int ir; float ew; int it, nr, ip1, jp1, lm1; float cab, rab, ewc, cor, sum; int nrp2, icab, lcab; float beta, coef; int irab, lrab; float basl, cmax; extern double sdot_(int *, float *, int *, float *, int *); float coef2, coef5, gamma, alpha; extern int lsame_(char *, char *); extern int sscal_(int *, float *, float *, int *); float sfmin; extern int cswap_(int *, complex *, int *, complex *, int *); float sfmax; int iflow, kount; extern int saxpy_(int *, float *, float *, int *, float *, int *); float pgamma; extern int icamax_(int *, complex *, int *); extern double slamch_(char *); extern int csscal_(int *, float *, complex *, int *), xerbla_(char *, int *); int lsfmin, lsfmax; /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* CGGBAL balances a pair of general complex matrices (A,B). This */ /* involves, first, permuting A and B by similarity transformations to */ /* isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N */ /* elements on the diagonal; and second, applying a diagonal similarity */ /* transformation to rows and columns ILO to IHI to make the rows */ /* and columns as close in norm as possible. Both steps are optional. */ /* Balancing may reduce the 1-norm of the matrices, and improve the */ /* accuracy of the computed eigenvalues and/or eigenvectors in the */ /* generalized eigenvalue problem A*x = lambda*B*x. */ /* Arguments */ /* ========= */ /* JOB (input) CHARACTER*1 */ /* Specifies the operations to be performed on A and B: */ /* = 'N': none: simply set ILO = 1, IHI = N, LSCALE(I) = 1.0 */ /* and RSCALE(I) = 1.0 for i=1,...,N; */ /* = 'P': permute only; */ /* = 'S': scale only; */ /* = 'B': both permute and scale. */ /* N (input) INTEGER */ /* The order of the matrices A and B. N >= 0. */ /* A (input/output) COMPLEX array, dimension (LDA,N) */ /* On entry, the input matrix A. */ /* On exit, A is overwritten by the balanced matrix. */ /* If JOB = 'N', A is not referenced. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= MAX(1,N). */ /* B (input/output) COMPLEX array, dimension (LDB,N) */ /* On entry, the input matrix B. */ /* On exit, B is overwritten by the balanced matrix. */ /* If JOB = 'N', B is not referenced. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= MAX(1,N). */ /* ILO (output) INTEGER */ /* IHI (output) INTEGER */ /* ILO and IHI are set to ints such that on exit */ /* A(i,j) = 0 and B(i,j) = 0 if i > j and */ /* j = 1,...,ILO-1 or i = IHI+1,...,N. */ /* If JOB = 'N' or 'S', ILO = 1 and IHI = N. */ /* LSCALE (output) REAL array, dimension (N) */ /* Details of the permutations and scaling factors applied */ /* to the left side of A and B. If P(j) is the index of the */ /* row interchanged with row j, and D(j) is the scaling factor */ /* applied to row j, then */ /* LSCALE(j) = P(j) for J = 1,...,ILO-1 */ /* = D(j) for J = ILO,...,IHI */ /* = P(j) for J = IHI+1,...,N. */ /* The order in which the interchanges are made is N to IHI+1, */ /* then 1 to ILO-1. */ /* RSCALE (output) REAL array, dimension (N) */ /* Details of the permutations and scaling factors applied */ /* to the right side of A and B. If P(j) is the index of the */ /* column interchanged with column j, and D(j) is the scaling */ /* factor applied to column j, then */ /* RSCALE(j) = P(j) for J = 1,...,ILO-1 */ /* = D(j) for J = ILO,...,IHI */ /* = P(j) for J = IHI+1,...,N. */ /* The order in which the interchanges are made is N to IHI+1, */ /* then 1 to ILO-1. */ /* WORK (workspace) REAL array, dimension (lwork) */ /* lwork must be at least MAX(1,6*N) when JOB = 'S' or 'B', and */ /* at least 1 when JOB = 'N' or 'P'. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* See R.C. WARD, Balancing the generalized eigenvalue problem, */ /* SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Statement Functions .. */ /* .. */ /* .. Statement Function definitions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --lscale; --rscale; --work; /* Function Body */ *info = 0; if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") && ! lsame_(job, "B")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < MAX(1,*n)) { *info = -4; } else if (*ldb < MAX(1,*n)) { *info = -6; } if (*info != 0) { i__1 = -(*info); xerbla_("CGGBAL", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { *ilo = 1; *ihi = *n; return 0; } if (*n == 1) { *ilo = 1; *ihi = *n; lscale[1] = 1.f; rscale[1] = 1.f; return 0; } if (lsame_(job, "N")) { *ilo = 1; *ihi = *n; i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { lscale[i__] = 1.f; rscale[i__] = 1.f; /* L10: */ } return 0; } k = 1; l = *n; if (lsame_(job, "S")) { goto L190; } goto L30; /* Permute the matrices A and B to isolate the eigenvalues. */ /* Find row with one nonzero in columns 1 through L */ L20: l = lm1; if (l != 1) { goto L30; } rscale[1] = 1.f; lscale[1] = 1.f; goto L190; L30: lm1 = l - 1; for (i__ = l; i__ >= 1; --i__) { i__1 = lm1; for (j = 1; j <= i__1; ++j) { jp1 = j + 1; i__2 = i__ + j * a_dim1; i__3 = i__ + j * b_dim1; if (a[i__2].r != 0.f || a[i__2].i != 0.f || (b[i__3].r != 0.f || b[i__3].i != 0.f)) { goto L50; } /* L40: */ } j = l; goto L70; L50: i__1 = l; for (j = jp1; j <= i__1; ++j) { i__2 = i__ + j * a_dim1; i__3 = i__ + j * b_dim1; if (a[i__2].r != 0.f || a[i__2].i != 0.f || (b[i__3].r != 0.f || b[i__3].i != 0.f)) { goto L80; } /* L60: */ } j = jp1 - 1; L70: m = l; iflow = 1; goto L160; L80: ; } goto L100; /* Find column with one nonzero in rows K through N */ L90: ++k; L100: i__1 = l; for (j = k; j <= i__1; ++j) { i__2 = lm1; for (i__ = k; i__ <= i__2; ++i__) { ip1 = i__ + 1; i__3 = i__ + j * a_dim1; i__4 = i__ + j * b_dim1; if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r != 0.f || b[i__4].i != 0.f)) { goto L120; } /* L110: */ } i__ = l; goto L140; L120: i__2 = l; for (i__ = ip1; i__ <= i__2; ++i__) { i__3 = i__ + j * a_dim1; i__4 = i__ + j * b_dim1; if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r != 0.f || b[i__4].i != 0.f)) { goto L150; } /* L130: */ } i__ = ip1 - 1; L140: m = k; iflow = 2; goto L160; L150: ; } goto L190; /* Permute rows M and I */ L160: lscale[m] = (float) i__; if (i__ == m) { goto L170; } i__1 = *n - k + 1; cswap_(&i__1, &a[i__ + k * a_dim1], lda, &a[m + k * a_dim1], lda); i__1 = *n - k + 1; cswap_(&i__1, &b[i__ + k * b_dim1], ldb, &b[m + k * b_dim1], ldb); /* Permute columns M and J */ L170: rscale[m] = (float) j; if (j == m) { goto L180; } cswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1); cswap_(&l, &b[j * b_dim1 + 1], &c__1, &b[m * b_dim1 + 1], &c__1); L180: switch (iflow) { case 1: goto L20; case 2: goto L90; } L190: *ilo = k; *ihi = l; if (lsame_(job, "P")) { i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { lscale[i__] = 1.f; rscale[i__] = 1.f; /* L195: */ } return 0; } if (*ilo == *ihi) { return 0; } /* Balance the submatrix in rows ILO to IHI. */ nr = *ihi - *ilo + 1; i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { rscale[i__] = 0.f; lscale[i__] = 0.f; work[i__] = 0.f; work[i__ + *n] = 0.f; work[i__ + (*n << 1)] = 0.f; work[i__ + *n * 3] = 0.f; work[i__ + (*n << 2)] = 0.f; work[i__ + *n * 5] = 0.f; /* L200: */ } /* Compute right side vector in resulting linear equations */ basl = r_lg10(&c_b36); i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { i__2 = *ihi; for (j = *ilo; j <= i__2; ++j) { i__3 = i__ + j * a_dim1; if (a[i__3].r == 0.f && a[i__3].i == 0.f) { ta = 0.f; goto L210; } i__3 = i__ + j * a_dim1; r__3 = (r__1 = a[i__3].r, ABS(r__1)) + (r__2 = r_imag(&a[i__ + j * a_dim1]), ABS(r__2)); ta = r_lg10(&r__3) / basl; L210: i__3 = i__ + j * b_dim1; if (b[i__3].r == 0.f && b[i__3].i == 0.f) { tb = 0.f; goto L220; } i__3 = i__ + j * b_dim1; r__3 = (r__1 = b[i__3].r, ABS(r__1)) + (r__2 = r_imag(&b[i__ + j * b_dim1]), ABS(r__2)); tb = r_lg10(&r__3) / basl; L220: work[i__ + (*n << 2)] = work[i__ + (*n << 2)] - ta - tb; work[j + *n * 5] = work[j + *n * 5] - ta - tb; /* L230: */ } /* L240: */ } coef = 1.f / (float) (nr << 1); coef2 = coef * coef; coef5 = coef2 * .5f; nrp2 = nr + 2; beta = 0.f; it = 1; /* Start generalized conjugate gradient iteration */ L250: gamma = sdot_(&nr, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + (*n << 2)] , &c__1) + sdot_(&nr, &work[*ilo + *n * 5], &c__1, &work[*ilo + * n * 5], &c__1); ew = 0.f; ewc = 0.f; i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { ew += work[i__ + (*n << 2)]; ewc += work[i__ + *n * 5]; /* L260: */ } /* Computing 2nd power */ r__1 = ew; /* Computing 2nd power */ r__2 = ewc; /* Computing 2nd power */ r__3 = ew - ewc; gamma = coef * gamma - coef2 * (r__1 * r__1 + r__2 * r__2) - coef5 * ( r__3 * r__3); if (gamma == 0.f) { goto L350; } if (it != 1) { beta = gamma / pgamma; } t = coef5 * (ewc - ew * 3.f); tc = coef5 * (ew - ewc * 3.f); sscal_(&nr, &beta, &work[*ilo], &c__1); sscal_(&nr, &beta, &work[*ilo + *n], &c__1); saxpy_(&nr, &coef, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + *n], & c__1); saxpy_(&nr, &coef, &work[*ilo + *n * 5], &c__1, &work[*ilo], &c__1); i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { work[i__] += tc; work[i__ + *n] += t; /* L270: */ } /* Apply matrix to vector */ i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { kount = 0; sum = 0.f; i__2 = *ihi; for (j = *ilo; j <= i__2; ++j) { i__3 = i__ + j * a_dim1; if (a[i__3].r == 0.f && a[i__3].i == 0.f) { goto L280; } ++kount; sum += work[j]; L280: i__3 = i__ + j * b_dim1; if (b[i__3].r == 0.f && b[i__3].i == 0.f) { goto L290; } ++kount; sum += work[j]; L290: ; } work[i__ + (*n << 1)] = (float) kount * work[i__ + *n] + sum; /* L300: */ } i__1 = *ihi; for (j = *ilo; j <= i__1; ++j) { kount = 0; sum = 0.f; i__2 = *ihi; for (i__ = *ilo; i__ <= i__2; ++i__) { i__3 = i__ + j * a_dim1; if (a[i__3].r == 0.f && a[i__3].i == 0.f) { goto L310; } ++kount; sum += work[i__ + *n]; L310: i__3 = i__ + j * b_dim1; if (b[i__3].r == 0.f && b[i__3].i == 0.f) { goto L320; } ++kount; sum += work[i__ + *n]; L320: ; } work[j + *n * 3] = (float) kount * work[j] + sum; /* L330: */ } sum = sdot_(&nr, &work[*ilo + *n], &c__1, &work[*ilo + (*n << 1)], &c__1) + sdot_(&nr, &work[*ilo], &c__1, &work[*ilo + *n * 3], &c__1); alpha = gamma / sum; /* Determine correction to current iteration */ cmax = 0.f; i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { cor = alpha * work[i__ + *n]; if (ABS(cor) > cmax) { cmax = ABS(cor); } lscale[i__] += cor; cor = alpha * work[i__]; if (ABS(cor) > cmax) { cmax = ABS(cor); } rscale[i__] += cor; /* L340: */ } if (cmax < .5f) { goto L350; } r__1 = -alpha; saxpy_(&nr, &r__1, &work[*ilo + (*n << 1)], &c__1, &work[*ilo + (*n << 2)] , &c__1); r__1 = -alpha; saxpy_(&nr, &r__1, &work[*ilo + *n * 3], &c__1, &work[*ilo + *n * 5], & c__1); pgamma = gamma; ++it; if (it <= nrp2) { goto L250; } /* End generalized conjugate gradient iteration */ L350: sfmin = slamch_("S"); sfmax = 1.f / sfmin; lsfmin = (int) (r_lg10(&sfmin) / basl + 1.f); lsfmax = (int) (r_lg10(&sfmax) / basl); i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { i__2 = *n - *ilo + 1; irab = icamax_(&i__2, &a[i__ + *ilo * a_dim1], lda); rab = c_abs(&a[i__ + (irab + *ilo - 1) * a_dim1]); i__2 = *n - *ilo + 1; irab = icamax_(&i__2, &b[i__ + *ilo * b_dim1], ldb); /* Computing MAX */ r__1 = rab, r__2 = c_abs(&b[i__ + (irab + *ilo - 1) * b_dim1]); rab = MAX(r__1,r__2); r__1 = rab + sfmin; lrab = (int) (r_lg10(&r__1) / basl + 1.f); ir = lscale[i__] + r_sign(&c_b72, &lscale[i__]); /* Computing MIN */ i__2 = MAX(ir,lsfmin), i__2 = MIN(i__2,lsfmax), i__3 = lsfmax - lrab; ir = MIN(i__2,i__3); lscale[i__] = pow_ri(&c_b36, &ir); icab = icamax_(ihi, &a[i__ * a_dim1 + 1], &c__1); cab = c_abs(&a[icab + i__ * a_dim1]); icab = icamax_(ihi, &b[i__ * b_dim1 + 1], &c__1); /* Computing MAX */ r__1 = cab, r__2 = c_abs(&b[icab + i__ * b_dim1]); cab = MAX(r__1,r__2); r__1 = cab + sfmin; lcab = (int) (r_lg10(&r__1) / basl + 1.f); jc = rscale[i__] + r_sign(&c_b72, &rscale[i__]); /* Computing MIN */ i__2 = MAX(jc,lsfmin), i__2 = MIN(i__2,lsfmax), i__3 = lsfmax - lcab; jc = MIN(i__2,i__3); rscale[i__] = pow_ri(&c_b36, &jc); /* L360: */ } /* Row scaling of matrices A and B */ i__1 = *ihi; for (i__ = *ilo; i__ <= i__1; ++i__) { i__2 = *n - *ilo + 1; csscal_(&i__2, &lscale[i__], &a[i__ + *ilo * a_dim1], lda); i__2 = *n - *ilo + 1; csscal_(&i__2, &lscale[i__], &b[i__ + *ilo * b_dim1], ldb); /* L370: */ } /* Column scaling of matrices A and B */ i__1 = *ihi; for (j = *ilo; j <= i__1; ++j) { csscal_(ihi, &rscale[j], &a[j * a_dim1 + 1], &c__1); csscal_(ihi, &rscale[j], &b[j * b_dim1 + 1], &c__1); /* L380: */ } return 0; /* End of CGGBAL */ } /* cggbal_ */
/* Subroutine */ int slatrs_(char *uplo, char *trans, char *diag, char * normin, integer *n, real *a, integer *lda, real *x, real *scale, real *cnorm, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; real r__1, r__2, r__3; /* Local variables */ integer i__, j; real xj, rec, tjj; integer jinc; real xbnd; integer imax; real tmax, tjjs; extern real sdot_(integer *, real *, integer *, real *, integer *); real xmax, grow, sumj; extern logical lsame_(char *, char *); extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); real tscal, uscal; integer jlast; extern real sasum_(integer *, real *, integer *); logical upper; extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, real *, integer *), strsv_(char *, char *, char *, integer *, real *, integer *, real *, integer *); extern real slamch_(char *); extern /* Subroutine */ int xerbla_(char *, integer *); real bignum; extern integer isamax_(integer *, real *, integer *); logical notran; integer jfirst; real smlnum; logical nounit; /* -- LAPACK auxiliary routine (version 3.4.2) -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* September 2012 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --x; --cnorm; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); notran = lsame_(trans, "N"); nounit = lsame_(diag, "N"); /* Test the input parameters. */ if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (! notran && ! lsame_(trans, "T") && ! lsame_(trans, "C")) { *info = -2; } else if (! nounit && ! lsame_(diag, "U")) { *info = -3; } else if (! lsame_(normin, "Y") && ! lsame_(normin, "N")) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*lda < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); xerbla_("SLATRS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine machine dependent parameters to control overflow. */ smlnum = slamch_("Safe minimum") / slamch_("Precision"); bignum = 1.f / smlnum; *scale = 1.f; if (lsame_(normin, "N")) { /* Compute the 1-norm of each column, not including the diagonal. */ if (upper) { /* A is upper triangular. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; cnorm[j] = sasum_(&i__2, &a[j * a_dim1 + 1], &c__1); /* L10: */ } } else { /* A is lower triangular. */ i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { i__2 = *n - j; cnorm[j] = sasum_(&i__2, &a[j + 1 + j * a_dim1], &c__1); /* L20: */ } cnorm[*n] = 0.f; } } /* Scale the column norms by TSCAL if the maximum element in CNORM is */ /* greater than BIGNUM. */ imax = isamax_(n, &cnorm[1], &c__1); tmax = cnorm[imax]; if (tmax <= bignum) { tscal = 1.f; } else { tscal = 1.f / (smlnum * tmax); sscal_(n, &tscal, &cnorm[1], &c__1); } /* Compute a bound on the computed solution vector to see if the */ /* Level 2 BLAS routine STRSV can be used. */ j = isamax_(n, &x[1], &c__1); xmax = (r__1 = x[j], f2c_abs(r__1)); xbnd = xmax; if (notran) { /* Compute the growth in A * x = b. */ if (upper) { jfirst = *n; jlast = 1; jinc = -1; } else { jfirst = 1; jlast = *n; jinc = 1; } if (tscal != 1.f) { grow = 0.f; goto L50; } if (nounit) { /* A is non-unit triangular. */ /* Compute GROW = 1/G(j) and XBND = 1/M(j). */ /* Initially, G(0) = max{ x(i), i=1,...,n} . */ grow = 1.f / max(xbnd,smlnum); xbnd = grow; i__1 = jlast; i__2 = jinc; for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L50; } /* M(j) = G(j-1) / f2c_abs(A(j,j)) */ tjj = (r__1 = a[j + j * a_dim1], f2c_abs(r__1)); /* Computing MIN */ r__1 = xbnd; r__2 = min(1.f,tjj) * grow; // , expr subst xbnd = min(r__1,r__2); if (tjj + cnorm[j] >= smlnum) { /* G(j) = G(j-1)*( 1 + CNORM(j) / f2c_abs(A(j,j)) ) */ grow *= tjj / (tjj + cnorm[j]); } else { /* G(j) could overflow, set GROW to 0. */ grow = 0.f; } /* L30: */ } grow = xbnd; } else { /* A is unit triangular. */ /* Compute GROW = 1/G(j), where G(0) = max{ x(i), i=1,...,n} . */ /* Computing MIN */ r__1 = 1.f; r__2 = 1.f / max(xbnd,smlnum); // , expr subst grow = min(r__1,r__2); i__2 = jlast; i__1 = jinc; for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L50; } /* G(j) = G(j-1)*( 1 + CNORM(j) ) */ grow *= 1.f / (cnorm[j] + 1.f); /* L40: */ } } L50: ; } else { /* Compute the growth in A**T * x = b. */ if (upper) { jfirst = 1; jlast = *n; jinc = 1; } else { jfirst = *n; jlast = 1; jinc = -1; } if (tscal != 1.f) { grow = 0.f; goto L80; } if (nounit) { /* A is non-unit triangular. */ /* Compute GROW = 1/G(j) and XBND = 1/M(j). */ /* Initially, M(0) = max{ x(i), i=1,...,n} . */ grow = 1.f / max(xbnd,smlnum); xbnd = grow; i__1 = jlast; i__2 = jinc; for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L80; } /* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */ xj = cnorm[j] + 1.f; /* Computing MIN */ r__1 = grow; r__2 = xbnd / xj; // , expr subst grow = min(r__1,r__2); /* M(j) = M(j-1)*( 1 + CNORM(j) ) / f2c_abs(A(j,j)) */ tjj = (r__1 = a[j + j * a_dim1], f2c_abs(r__1)); if (xj > tjj) { xbnd *= tjj / xj; } /* L60: */ } grow = min(grow,xbnd); } else { /* A is unit triangular. */ /* Compute GROW = 1/G(j), where G(0) = max{ x(i), i=1,...,n} . */ /* Computing MIN */ r__1 = 1.f; r__2 = 1.f / max(xbnd,smlnum); // , expr subst grow = min(r__1,r__2); i__2 = jlast; i__1 = jinc; for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L80; } /* G(j) = ( 1 + CNORM(j) )*G(j-1) */ xj = cnorm[j] + 1.f; grow /= xj; /* L70: */ } } L80: ; } if (grow * tscal > smlnum) { /* Use the Level 2 BLAS solve if the reciprocal of the bound on */ /* elements of X is not too small. */ strsv_(uplo, trans, diag, n, &a[a_offset], lda, &x[1], &c__1); } else { /* Use a Level 1 BLAS solve, scaling intermediate results. */ if (xmax > bignum) { /* Scale X so that its components are less than or equal to */ /* BIGNUM in absolute value. */ *scale = bignum / xmax; sscal_(n, scale, &x[1], &c__1); xmax = bignum; } if (notran) { /* Solve A * x = b */ i__1 = jlast; i__2 = jinc; for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Compute x(j) = b(j) / A(j,j), scaling x if necessary. */ xj = (r__1 = x[j], f2c_abs(r__1)); if (nounit) { tjjs = a[j + j * a_dim1] * tscal; } else { tjjs = tscal; if (tscal == 1.f) { goto L95; } } tjj = f2c_abs(tjjs); if (tjj > smlnum) { /* f2c_abs(A(j,j)) > SMLNUM: */ if (tjj < 1.f) { if (xj > tjj * bignum) { /* Scale x by 1/b(j). */ rec = 1.f / xj; sscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } x[j] /= tjjs; xj = (r__1 = x[j], f2c_abs(r__1)); } else if (tjj > 0.f) { /* 0 < f2c_abs(A(j,j)) <= SMLNUM: */ if (xj > tjj * bignum) { /* Scale x by (1/f2c_abs(x(j)))*f2c_abs(A(j,j))*BIGNUM */ /* to avoid overflow when dividing by A(j,j). */ rec = tjj * bignum / xj; if (cnorm[j] > 1.f) { /* Scale by 1/CNORM(j) to avoid overflow when */ /* multiplying x(j) times column j. */ rec /= cnorm[j]; } sscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } x[j] /= tjjs; xj = (r__1 = x[j], f2c_abs(r__1)); } else { /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ /* scale = 0, and compute a solution to A*x = 0. */ i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { x[i__] = 0.f; /* L90: */ } x[j] = 1.f; xj = 1.f; *scale = 0.f; xmax = 0.f; } L95: /* Scale x if necessary to avoid overflow when adding a */ /* multiple of column j of A. */ if (xj > 1.f) { rec = 1.f / xj; if (cnorm[j] > (bignum - xmax) * rec) { /* Scale x by 1/(2*f2c_abs(x(j))). */ rec *= .5f; sscal_(n, &rec, &x[1], &c__1); *scale *= rec; } } else if (xj * cnorm[j] > bignum - xmax) { /* Scale x by 1/2. */ sscal_(n, &c_b36, &x[1], &c__1); *scale *= .5f; } if (upper) { if (j > 1) { /* Compute the update */ /* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) */ i__3 = j - 1; r__1 = -x[j] * tscal; saxpy_(&i__3, &r__1, &a[j * a_dim1 + 1], &c__1, &x[1], &c__1); i__3 = j - 1; i__ = isamax_(&i__3, &x[1], &c__1); xmax = (r__1 = x[i__], f2c_abs(r__1)); } } else { if (j < *n) { /* Compute the update */ /* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) */ i__3 = *n - j; r__1 = -x[j] * tscal; saxpy_(&i__3, &r__1, &a[j + 1 + j * a_dim1], &c__1, & x[j + 1], &c__1); i__3 = *n - j; i__ = j + isamax_(&i__3, &x[j + 1], &c__1); xmax = (r__1 = x[i__], f2c_abs(r__1)); } } /* L100: */ } } else { /* Solve A**T * x = b */ i__2 = jlast; i__1 = jinc; for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Compute x(j) = b(j) - sum A(k,j)*x(k). */ /* k<>j */ xj = (r__1 = x[j], f2c_abs(r__1)); uscal = tscal; rec = 1.f / max(xmax,1.f); if (cnorm[j] > (bignum - xj) * rec) { /* If x(j) could overflow, scale x by 1/(2*XMAX). */ rec *= .5f; if (nounit) { tjjs = a[j + j * a_dim1] * tscal; } else { tjjs = tscal; } tjj = f2c_abs(tjjs); if (tjj > 1.f) { /* Divide by A(j,j) when scaling x if A(j,j) > 1. */ /* Computing MIN */ r__1 = 1.f; r__2 = rec * tjj; // , expr subst rec = min(r__1,r__2); uscal /= tjjs; } if (rec < 1.f) { sscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } sumj = 0.f; if (uscal == 1.f) { /* If the scaling needed for A in the dot product is 1, */ /* call SDOT to perform the dot product. */ if (upper) { i__3 = j - 1; sumj = sdot_(&i__3, &a[j * a_dim1 + 1], &c__1, &x[1], &c__1); } else if (j < *n) { i__3 = *n - j; sumj = sdot_(&i__3, &a[j + 1 + j * a_dim1], &c__1, &x[ j + 1], &c__1); } } else { /* Otherwise, use in-line code for the dot product. */ if (upper) { i__3 = j - 1; for (i__ = 1; i__ <= i__3; ++i__) { sumj += a[i__ + j * a_dim1] * uscal * x[i__]; /* L110: */ } } else if (j < *n) { i__3 = *n; for (i__ = j + 1; i__ <= i__3; ++i__) { sumj += a[i__ + j * a_dim1] * uscal * x[i__]; /* L120: */ } } } if (uscal == tscal) { /* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) */ /* was not used to scale the dotproduct. */ x[j] -= sumj; xj = (r__1 = x[j], f2c_abs(r__1)); if (nounit) { tjjs = a[j + j * a_dim1] * tscal; } else { tjjs = tscal; if (tscal == 1.f) { goto L135; } } /* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ tjj = f2c_abs(tjjs); if (tjj > smlnum) { /* f2c_abs(A(j,j)) > SMLNUM: */ if (tjj < 1.f) { if (xj > tjj * bignum) { /* Scale X by 1/f2c_abs(x(j)). */ rec = 1.f / xj; sscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } x[j] /= tjjs; } else if (tjj > 0.f) { /* 0 < f2c_abs(A(j,j)) <= SMLNUM: */ if (xj > tjj * bignum) { /* Scale x by (1/f2c_abs(x(j)))*f2c_abs(A(j,j))*BIGNUM. */ rec = tjj * bignum / xj; sscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } x[j] /= tjjs; } else { /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ /* scale = 0, and compute a solution to A**T*x = 0. */ i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { x[i__] = 0.f; /* L130: */ } x[j] = 1.f; *scale = 0.f; xmax = 0.f; } L135: ; } else { /* Compute x(j) := x(j) / A(j,j) - sumj if the dot */ /* product has already been divided by 1/A(j,j). */ x[j] = x[j] / tjjs - sumj; } /* Computing MAX */ r__2 = xmax; r__3 = (r__1 = x[j], f2c_abs(r__1)); // , expr subst xmax = max(r__2,r__3); /* L140: */ } } *scale /= tscal; } /* Scale the column norms by 1/TSCAL for return. */ if (tscal != 1.f) { r__1 = 1.f / tscal; sscal_(n, &r__1, &cnorm[1], &c__1); } return 0; /* End of SLATRS */ }
/* Subroutine */ int sstein_(integer *n, real *d, real *e, integer *m, real * w, integer *iblock, integer *isplit, real *z, integer *ldz, real * work, integer *iwork, integer *ifail, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= SSTEIN computes the eigenvectors of a real symmetric tridiagonal matrix T corresponding to specified eigenvalues, using inverse iteration. The maximum number of iterations allowed for each eigenvector is specified by an internal parameter MAXITS (currently set to 5). Arguments ========= N (input) INTEGER The order of the matrix. N >= 0. D (input) REAL array, dimension (N) The n diagonal elements of the tridiagonal matrix T. E (input) REAL array, dimension (N) The (n-1) subdiagonal elements of the tridiagonal matrix T, in elements 1 to N-1. E(N) need not be set. M (input) INTEGER The number of eigenvectors to be found. 0 <= M <= N. W (input) REAL array, dimension (N) The first M elements of W contain the eigenvalues for which eigenvectors are to be computed. The eigenvalues should be grouped by split-off block and ordered from smallest to largest within the block. ( The output array W from SSTEBZ with ORDER = 'B' is expected here. ) IBLOCK (input) INTEGER array, dimension (N) The submatrix indices associated with the corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to the first submatrix from the top, =2 if W(i) belongs to the second submatrix, etc. ( The output array IBLOCK from SSTEBZ is expected here. ) ISPLIT (input) INTEGER array, dimension (N) The splitting points, at which T breaks up into submatrices. The first submatrix consists of rows/columns 1 to ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 through ISPLIT( 2 ), etc. ( The output array ISPLIT from SSTEBZ is expected here. ) Z (output) REAL array, dimension (LDZ, M) The computed eigenvectors. The eigenvector associated with the eigenvalue W(i) is stored in the i-th column of Z. Any vector which fails to converge is set to its current iterate after MAXITS iterations. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= max(1,N). WORK (workspace) REAL array, dimension (5*N) IWORK (workspace) INTEGER array, dimension (N) IFAIL (output) INTEGER array, dimension (M) On normal exit, all elements of IFAIL are zero. If one or more eigenvectors fail to converge after MAXITS iterations, then their indices are stored in array IFAIL. INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, then i eigenvectors failed to converge in MAXITS iterations. Their indices are stored in array IFAIL. Internal Parameters =================== MAXITS INTEGER, default = 5 The maximum number of iterations performed. EXTRA INTEGER, default = 2 The number of iterations performed after norm growth criterion is satisfied, should be at least 1. ===================================================================== Test the input parameters. Parameter adjustments Function Body */ /* Table of constant values */ static integer c__2 = 2; static integer c__1 = 1; static integer c_n1 = -1; /* System generated locals */ integer z_dim1, z_offset, i__1, i__2, i__3; real r__1, r__2, r__3, r__4, r__5; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer jblk, nblk, jmax; extern doublereal sdot_(integer *, real *, integer *, real *, integer *), snrm2_(integer *, real *, integer *); static integer i, j, iseed[4], gpind, iinfo; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static integer b1; extern doublereal sasum_(integer *, real *, integer *); static integer j1; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *); static real ortol; extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, real *, integer *); static integer indrv1, indrv2, indrv3, indrv4, indrv5, bn; static real xj; extern doublereal slamch_(char *); extern /* Subroutine */ int xerbla_(char *, integer *), slagtf_( integer *, real *, real *, real *, real *, real *, real *, integer *, integer *); static integer nrmchk; extern integer isamax_(integer *, real *, integer *); extern /* Subroutine */ int slagts_(integer *, integer *, real *, real *, real *, real *, integer *, real *, real *, integer *); static integer blksiz; static real onenrm, pertol; extern /* Subroutine */ int slarnv_(integer *, integer *, integer *, real *); static real stpcrt, scl, eps, ctr, sep, nrm, tol; static integer its; static real xjm, eps1; #define ISEED(I) iseed[(I)] #define D(I) d[(I)-1] #define E(I) e[(I)-1] #define W(I) w[(I)-1] #define IBLOCK(I) iblock[(I)-1] #define ISPLIT(I) isplit[(I)-1] #define WORK(I) work[(I)-1] #define IWORK(I) iwork[(I)-1] #define IFAIL(I) ifail[(I)-1] #define Z(I,J) z[(I)-1 + ((J)-1)* ( *ldz)] *info = 0; i__1 = *m; for (i = 1; i <= *m; ++i) { IFAIL(i) = 0; /* L10: */ } if (*n < 0) { *info = -1; } else if (*m < 0 || *m > *n) { *info = -4; } else if (*ldz < max(1,*n)) { *info = -9; } else { i__1 = *m; for (j = 2; j <= *m; ++j) { if (IBLOCK(j) < IBLOCK(j - 1)) { *info = -6; goto L30; } if (IBLOCK(j) == IBLOCK(j - 1) && W(j) < W(j - 1)) { *info = -5; goto L30; } /* L20: */ } L30: ; } if (*info != 0) { i__1 = -(*info); xerbla_("SSTEIN", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *m == 0) { return 0; } else if (*n == 1) { Z(1,1) = 1.f; return 0; } /* Get machine constants. */ eps = slamch_("Precision"); /* Initialize seed for random number generator SLARNV. */ for (i = 1; i <= 4; ++i) { ISEED(i - 1) = 1; /* L40: */ } /* Initialize pointers. */ indrv1 = 0; indrv2 = indrv1 + *n; indrv3 = indrv2 + *n; indrv4 = indrv3 + *n; indrv5 = indrv4 + *n; /* Compute eigenvectors of matrix blocks. */ j1 = 1; i__1 = IBLOCK(*m); for (nblk = 1; nblk <= IBLOCK(*m); ++nblk) { /* Find starting and ending indices of block nblk. */ if (nblk == 1) { b1 = 1; } else { b1 = ISPLIT(nblk - 1) + 1; } bn = ISPLIT(nblk); blksiz = bn - b1 + 1; if (blksiz == 1) { goto L60; } gpind = b1; /* Compute reorthogonalization criterion and stopping criterion . */ onenrm = (r__1 = D(b1), dabs(r__1)) + (r__2 = E(b1), dabs(r__2)); /* Computing MAX */ r__3 = onenrm, r__4 = (r__1 = D(bn), dabs(r__1)) + (r__2 = E(bn - 1), dabs(r__2)); onenrm = dmax(r__3,r__4); i__2 = bn - 1; for (i = b1 + 1; i <= bn-1; ++i) { /* Computing MAX */ r__4 = onenrm, r__5 = (r__1 = D(i), dabs(r__1)) + (r__2 = E(i - 1) , dabs(r__2)) + (r__3 = E(i), dabs(r__3)); onenrm = dmax(r__4,r__5); /* L50: */ } ortol = onenrm * .001f; stpcrt = sqrt(.1f / blksiz); /* Loop through eigenvalues of block nblk. */ L60: jblk = 0; i__2 = *m; for (j = j1; j <= *m; ++j) { if (IBLOCK(j) != nblk) { j1 = j; goto L160; } ++jblk; xj = W(j); /* Skip all the work if the block size is one. */ if (blksiz == 1) { WORK(indrv1 + 1) = 1.f; goto L120; } /* If eigenvalues j and j-1 are too close, add a relativ ely small perturbation. */ if (jblk > 1) { eps1 = (r__1 = eps * xj, dabs(r__1)); pertol = eps1 * 10.f; sep = xj - xjm; if (sep < pertol) { xj = xjm + pertol; } } its = 0; nrmchk = 0; /* Get random starting vector. */ slarnv_(&c__2, iseed, &blksiz, &WORK(indrv1 + 1)); /* Copy the matrix T so it won't be destroyed in factori zation. */ scopy_(&blksiz, &D(b1), &c__1, &WORK(indrv4 + 1), &c__1); i__3 = blksiz - 1; scopy_(&i__3, &E(b1), &c__1, &WORK(indrv2 + 2), &c__1); i__3 = blksiz - 1; scopy_(&i__3, &E(b1), &c__1, &WORK(indrv3 + 1), &c__1); /* Compute LU factors with partial pivoting ( PT = LU ) */ tol = 0.f; slagtf_(&blksiz, &WORK(indrv4 + 1), &xj, &WORK(indrv2 + 2), &WORK( indrv3 + 1), &tol, &WORK(indrv5 + 1), &IWORK(1), &iinfo); /* Update iteration count. */ L70: ++its; if (its > 5) { goto L100; } /* Normalize and scale the righthand side vector Pb. Computing MAX */ r__2 = eps, r__3 = (r__1 = WORK(indrv4 + blksiz), dabs(r__1)); scl = blksiz * onenrm * dmax(r__2,r__3) / sasum_(&blksiz, &WORK( indrv1 + 1), &c__1); sscal_(&blksiz, &scl, &WORK(indrv1 + 1), &c__1); /* Solve the system LU = Pb. */ slagts_(&c_n1, &blksiz, &WORK(indrv4 + 1), &WORK(indrv2 + 2), & WORK(indrv3 + 1), &WORK(indrv5 + 1), &IWORK(1), &WORK( indrv1 + 1), &tol, &iinfo); /* Reorthogonalize by modified Gram-Schmidt if eigenvalu es are close enough. */ if (jblk == 1) { goto L90; } if ((r__1 = xj - xjm, dabs(r__1)) > ortol) { gpind = j; } if (gpind != j) { i__3 = j - 1; for (i = gpind; i <= j-1; ++i) { ctr = -(doublereal)sdot_(&blksiz, &WORK(indrv1 + 1), & c__1, &Z(b1,i), &c__1); saxpy_(&blksiz, &ctr, &Z(b1,i), &c__1, &WORK( indrv1 + 1), &c__1); /* L80: */ } } /* Check the infinity norm of the iterate. */ L90: jmax = isamax_(&blksiz, &WORK(indrv1 + 1), &c__1); nrm = (r__1 = WORK(indrv1 + jmax), dabs(r__1)); /* Continue for additional iterations after norm reaches stopping criterion. */ if (nrm < stpcrt) { goto L70; } ++nrmchk; if (nrmchk < 3) { goto L70; } goto L110; /* If stopping criterion was not satisfied, update info and store eigenvector number in array ifail. */ L100: ++(*info); IFAIL(*info) = j; /* Accept iterate as jth eigenvector. */ L110: scl = 1.f / snrm2_(&blksiz, &WORK(indrv1 + 1), &c__1); jmax = isamax_(&blksiz, &WORK(indrv1 + 1), &c__1); if (WORK(indrv1 + jmax) < 0.f) { scl = -(doublereal)scl; } sscal_(&blksiz, &scl, &WORK(indrv1 + 1), &c__1); L120: i__3 = *n; for (i = 1; i <= *n; ++i) { Z(i,j) = 0.f; /* L130: */ } i__3 = blksiz; for (i = 1; i <= blksiz; ++i) { Z(b1+i-1,j) = WORK(indrv1 + i); /* L140: */ } /* Save the shift to check eigenvalue spacing at next iteration. */ xjm = xj; /* L150: */ } L160: ; } return 0; /* End of SSTEIN */ } /* sstein_ */
/* Subroutine */ int ssytri_(char *uplo, integer *n, real *a, integer *lda, integer *ipiv, real *work, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= SSYTRI computes the inverse of a real symmetric indefinite matrix A using the factorization A = U*D*U**T or A = L*D*L**T computed by SSYTRF. Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the details of the factorization are stored as an upper or lower triangular matrix. = 'U': Upper triangular, form is A = U*D*U**T; = 'L': Lower triangular, form is A = L*D*L**T. N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) REAL array, dimension (LDA,N) On entry, the block diagonal matrix D and the multipliers used to obtain the factor U or L as computed by SSYTRF. On exit, if INFO = 0, the (symmetric) inverse of the original matrix. If UPLO = 'U', the upper triangular part of the inverse is formed and the part of A below the diagonal is not referenced; if UPLO = 'L' the lower triangular part of the inverse is formed and the part of A above the diagonal is not referenced. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). IPIV (input) INTEGER array, dimension (N) Details of the interchanges and the block structure of D as determined by SSYTRF. WORK (workspace) REAL array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its inverse could not be computed. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static real c_b11 = -1.f; static real c_b13 = 0.f; /* System generated locals */ integer a_dim1, a_offset, i__1; real r__1; /* Local variables */ static real temp; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); static real akkp1, d__; static integer k; static real t; extern logical lsame_(char *, char *); static integer kstep; static logical upper; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer * ), ssymv_(char *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static real ak; static integer kp; extern /* Subroutine */ int xerbla_(char *, integer *); static real akp1; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --ipiv; --work; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); xerbla_("SSYTRI", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Check that the diagonal matrix D is nonsingular. */ if (upper) { /* Upper triangular storage: examine D from bottom to top */ for (*info = *n; *info >= 1; --(*info)) { if (ipiv[*info] > 0 && a_ref(*info, *info) == 0.f) { return 0; } /* L10: */ } } else { /* Lower triangular storage: examine D from top to bottom. */ i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { if (ipiv[*info] > 0 && a_ref(*info, *info) == 0.f) { return 0; } /* L20: */ } } *info = 0; if (upper) { /* Compute inv(A) from the factorization A = U*D*U'. K is the main loop index, increasing from 1 to N in steps of 1 or 2, depending on the size of the diagonal blocks. */ k = 1; L30: /* If K > N, exit from loop. */ if (k > *n) { goto L40; } if (ipiv[k] > 0) { /* 1 x 1 diagonal block Invert the diagonal block. */ a_ref(k, k) = 1.f / a_ref(k, k); /* Compute column K of the inverse. */ if (k > 1) { i__1 = k - 1; scopy_(&i__1, &a_ref(1, k), &c__1, &work[1], &c__1); i__1 = k - 1; ssymv_(uplo, &i__1, &c_b11, &a[a_offset], lda, &work[1], & c__1, &c_b13, &a_ref(1, k), &c__1); i__1 = k - 1; a_ref(k, k) = a_ref(k, k) - sdot_(&i__1, &work[1], &c__1, & a_ref(1, k), &c__1); } kstep = 1; } else { /* 2 x 2 diagonal block Invert the diagonal block. */ t = (r__1 = a_ref(k, k + 1), dabs(r__1)); ak = a_ref(k, k) / t; akp1 = a_ref(k + 1, k + 1) / t; akkp1 = a_ref(k, k + 1) / t; d__ = t * (ak * akp1 - 1.f); a_ref(k, k) = akp1 / d__; a_ref(k + 1, k + 1) = ak / d__; a_ref(k, k + 1) = -akkp1 / d__; /* Compute columns K and K+1 of the inverse. */ if (k > 1) { i__1 = k - 1; scopy_(&i__1, &a_ref(1, k), &c__1, &work[1], &c__1); i__1 = k - 1; ssymv_(uplo, &i__1, &c_b11, &a[a_offset], lda, &work[1], & c__1, &c_b13, &a_ref(1, k), &c__1); i__1 = k - 1; a_ref(k, k) = a_ref(k, k) - sdot_(&i__1, &work[1], &c__1, & a_ref(1, k), &c__1); i__1 = k - 1; a_ref(k, k + 1) = a_ref(k, k + 1) - sdot_(&i__1, &a_ref(1, k), &c__1, &a_ref(1, k + 1), &c__1); i__1 = k - 1; scopy_(&i__1, &a_ref(1, k + 1), &c__1, &work[1], &c__1); i__1 = k - 1; ssymv_(uplo, &i__1, &c_b11, &a[a_offset], lda, &work[1], & c__1, &c_b13, &a_ref(1, k + 1), &c__1); i__1 = k - 1; a_ref(k + 1, k + 1) = a_ref(k + 1, k + 1) - sdot_(&i__1, & work[1], &c__1, &a_ref(1, k + 1), &c__1); } kstep = 2; } kp = (i__1 = ipiv[k], abs(i__1)); if (kp != k) { /* Interchange rows and columns K and KP in the leading submatrix A(1:k+1,1:k+1) */ i__1 = kp - 1; sswap_(&i__1, &a_ref(1, k), &c__1, &a_ref(1, kp), &c__1); i__1 = k - kp - 1; sswap_(&i__1, &a_ref(kp + 1, k), &c__1, &a_ref(kp, kp + 1), lda); temp = a_ref(k, k); a_ref(k, k) = a_ref(kp, kp); a_ref(kp, kp) = temp; if (kstep == 2) { temp = a_ref(k, k + 1); a_ref(k, k + 1) = a_ref(kp, k + 1); a_ref(kp, k + 1) = temp; } } k += kstep; goto L30; L40: ; } else { /* Compute inv(A) from the factorization A = L*D*L'. K is the main loop index, increasing from 1 to N in steps of 1 or 2, depending on the size of the diagonal blocks. */ k = *n; L50: /* If K < 1, exit from loop. */ if (k < 1) { goto L60; } if (ipiv[k] > 0) { /* 1 x 1 diagonal block Invert the diagonal block. */ a_ref(k, k) = 1.f / a_ref(k, k); /* Compute column K of the inverse. */ if (k < *n) { i__1 = *n - k; scopy_(&i__1, &a_ref(k + 1, k), &c__1, &work[1], &c__1); i__1 = *n - k; ssymv_(uplo, &i__1, &c_b11, &a_ref(k + 1, k + 1), lda, &work[ 1], &c__1, &c_b13, &a_ref(k + 1, k), &c__1) ; i__1 = *n - k; a_ref(k, k) = a_ref(k, k) - sdot_(&i__1, &work[1], &c__1, & a_ref(k + 1, k), &c__1); } kstep = 1; } else { /* 2 x 2 diagonal block Invert the diagonal block. */ t = (r__1 = a_ref(k, k - 1), dabs(r__1)); ak = a_ref(k - 1, k - 1) / t; akp1 = a_ref(k, k) / t; akkp1 = a_ref(k, k - 1) / t; d__ = t * (ak * akp1 - 1.f); a_ref(k - 1, k - 1) = akp1 / d__; a_ref(k, k) = ak / d__; a_ref(k, k - 1) = -akkp1 / d__; /* Compute columns K-1 and K of the inverse. */ if (k < *n) { i__1 = *n - k; scopy_(&i__1, &a_ref(k + 1, k), &c__1, &work[1], &c__1); i__1 = *n - k; ssymv_(uplo, &i__1, &c_b11, &a_ref(k + 1, k + 1), lda, &work[ 1], &c__1, &c_b13, &a_ref(k + 1, k), &c__1) ; i__1 = *n - k; a_ref(k, k) = a_ref(k, k) - sdot_(&i__1, &work[1], &c__1, & a_ref(k + 1, k), &c__1); i__1 = *n - k; a_ref(k, k - 1) = a_ref(k, k - 1) - sdot_(&i__1, &a_ref(k + 1, k), &c__1, &a_ref(k + 1, k - 1), &c__1); i__1 = *n - k; scopy_(&i__1, &a_ref(k + 1, k - 1), &c__1, &work[1], &c__1); i__1 = *n - k; ssymv_(uplo, &i__1, &c_b11, &a_ref(k + 1, k + 1), lda, &work[ 1], &c__1, &c_b13, &a_ref(k + 1, k - 1), &c__1); i__1 = *n - k; a_ref(k - 1, k - 1) = a_ref(k - 1, k - 1) - sdot_(&i__1, & work[1], &c__1, &a_ref(k + 1, k - 1), &c__1); } kstep = 2; } kp = (i__1 = ipiv[k], abs(i__1)); if (kp != k) { /* Interchange rows and columns K and KP in the trailing submatrix A(k-1:n,k-1:n) */ if (kp < *n) { i__1 = *n - kp; sswap_(&i__1, &a_ref(kp + 1, k), &c__1, &a_ref(kp + 1, kp), & c__1); } i__1 = kp - k - 1; sswap_(&i__1, &a_ref(k + 1, k), &c__1, &a_ref(kp, k + 1), lda); temp = a_ref(k, k); a_ref(k, k) = a_ref(kp, kp); a_ref(kp, kp) = temp; if (kstep == 2) { temp = a_ref(k, k - 1); a_ref(k, k - 1) = a_ref(kp, k - 1); a_ref(kp, k - 1) = temp; } } k -= kstep; goto L50; L60: ; } return 0; /* End of SSYTRI */ } /* ssytri_ */
/* Subroutine */ int slaqtr_(logical *ltran, logical *lreal, integer *n, real *t, integer *ldt, real *b, real *w, real *scale, real *x, real *work, integer *info) { /* System generated locals */ integer t_dim1, t_offset, i__1, i__2; real r__1, r__2, r__3, r__4, r__5, r__6; /* Local variables */ real d__[4] /* was [2][2] */; integer i__, j, k; real v[4] /* was [2][2] */, z__; integer j1, j2, n1, n2; real si, xj, sr, rec, eps, tjj, tmp; integer ierr; real smin; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); real xmax; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); integer jnext; extern doublereal sasum_(integer *, real *, integer *); real sminw, xnorm; extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, real *, integer *), slaln2_(logical *, integer *, integer *, real *, real *, real *, integer *, real *, real *, real *, integer *, real *, real *, real *, integer *, real *, real *, integer *); real scaloc; extern doublereal slamch_(char *), slange_(char *, integer *, integer *, real *, integer *, real *); real bignum; extern integer isamax_(integer *, real *, integer *); extern /* Subroutine */ int sladiv_(real *, real *, real *, real *, real * , real *); logical notran; real smlnum; /* -- LAPACK auxiliary routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SLAQTR solves the real quasi-triangular system */ /* op(T)*p = scale*c, if LREAL = .TRUE. */ /* or the complex quasi-triangular systems */ /* op(T + iB)*(p+iq) = scale*(c+id), if LREAL = .FALSE. */ /* in real arithmetic, where T is upper quasi-triangular. */ /* If LREAL = .FALSE., then the first diagonal block of T must be */ /* 1 by 1, B is the specially structured matrix */ /* B = [ b(1) b(2) ... b(n) ] */ /* [ w ] */ /* [ w ] */ /* [ . ] */ /* [ w ] */ /* op(A) = A or A', A' denotes the conjugate transpose of */ /* matrix A. */ /* On input, X = [ c ]. On output, X = [ p ]. */ /* [ d ] [ q ] */ /* This subroutine is designed for the condition number estimation */ /* in routine STRSNA. */ /* Arguments */ /* ========= */ /* LTRAN (input) LOGICAL */ /* On entry, LTRAN specifies the option of conjugate transpose: */ /* = .FALSE., op(T+i*B) = T+i*B, */ /* = .TRUE., op(T+i*B) = (T+i*B)'. */ /* LREAL (input) LOGICAL */ /* On entry, LREAL specifies the input matrix structure: */ /* = .FALSE., the input is complex */ /* = .TRUE., the input is real */ /* N (input) INTEGER */ /* On entry, N specifies the order of T+i*B. N >= 0. */ /* T (input) REAL array, dimension (LDT,N) */ /* On entry, T contains a matrix in Schur canonical form. */ /* If LREAL = .FALSE., then the first diagonal block of T must */ /* be 1 by 1. */ /* LDT (input) INTEGER */ /* The leading dimension of the matrix T. LDT >= max(1,N). */ /* B (input) REAL array, dimension (N) */ /* On entry, B contains the elements to form the matrix */ /* B as described above. */ /* If LREAL = .TRUE., B is not referenced. */ /* W (input) REAL */ /* On entry, W is the diagonal element of the matrix B. */ /* If LREAL = .TRUE., W is not referenced. */ /* SCALE (output) REAL */ /* On exit, SCALE is the scale factor. */ /* X (input/output) REAL array, dimension (2*N) */ /* On entry, X contains the right hand side of the system. */ /* On exit, X is overwritten by the solution. */ /* WORK (workspace) REAL array, dimension (N) */ /* INFO (output) INTEGER */ /* On exit, INFO is set to */ /* 0: successful exit. */ /* 1: the some diagonal 1 by 1 block has been perturbed by */ /* a small number SMIN to keep nonsingularity. */ /* 2: the some diagonal 2 by 2 block has been perturbed by */ /* a small number in SLALN2 to keep nonsingularity. */ /* NOTE: In the interests of speed, this routine does not */ /* check the inputs for errors. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Do not test the input parameters for errors */ /* Parameter adjustments */ t_dim1 = *ldt; t_offset = 1 + t_dim1; t -= t_offset; --b; --x; --work; /* Function Body */ notran = ! (*ltran); *info = 0; /* Quick return if possible */ if (*n == 0) { return 0; } /* Set constants to control overflow */ eps = slamch_("P"); smlnum = slamch_("S") / eps; bignum = 1.f / smlnum; xnorm = slange_("M", n, n, &t[t_offset], ldt, d__); if (! (*lreal)) { /* Computing MAX */ r__1 = xnorm, r__2 = dabs(*w), r__1 = max(r__1,r__2), r__2 = slange_( "M", n, &c__1, &b[1], n, d__); xnorm = dmax(r__1,r__2); } /* Computing MAX */ r__1 = smlnum, r__2 = eps * xnorm; smin = dmax(r__1,r__2); /* Compute 1-norm of each column of strictly upper triangular */ /* part of T to control overflow in triangular solver. */ work[1] = 0.f; i__1 = *n; for (j = 2; j <= i__1; ++j) { i__2 = j - 1; work[j] = sasum_(&i__2, &t[j * t_dim1 + 1], &c__1); /* L10: */ } if (! (*lreal)) { i__1 = *n; for (i__ = 2; i__ <= i__1; ++i__) { work[i__] += (r__1 = b[i__], dabs(r__1)); /* L20: */ } } n2 = *n << 1; n1 = *n; if (! (*lreal)) { n1 = n2; } k = isamax_(&n1, &x[1], &c__1); xmax = (r__1 = x[k], dabs(r__1)); *scale = 1.f; if (xmax > bignum) { *scale = bignum / xmax; sscal_(&n1, scale, &x[1], &c__1); xmax = bignum; } if (*lreal) { if (notran) { /* Solve T*p = scale*c */ jnext = *n; for (j = *n; j >= 1; --j) { if (j > jnext) { goto L30; } j1 = j; j2 = j; jnext = j - 1; if (j > 1) { if (t[j + (j - 1) * t_dim1] != 0.f) { j1 = j - 1; jnext = j - 2; } } if (j1 == j2) { /* Meet 1 by 1 diagonal block */ /* Scale to avoid overflow when computing */ /* x(j) = b(j)/T(j,j) */ xj = (r__1 = x[j1], dabs(r__1)); tjj = (r__1 = t[j1 + j1 * t_dim1], dabs(r__1)); tmp = t[j1 + j1 * t_dim1]; if (tjj < smin) { tmp = smin; tjj = smin; *info = 1; } if (xj == 0.f) { goto L30; } if (tjj < 1.f) { if (xj > bignum * tjj) { rec = 1.f / xj; sscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } x[j1] /= tmp; xj = (r__1 = x[j1], dabs(r__1)); /* Scale x if necessary to avoid overflow when adding a */ /* multiple of column j1 of T. */ if (xj > 1.f) { rec = 1.f / xj; if (work[j1] > (bignum - xmax) * rec) { sscal_(n, &rec, &x[1], &c__1); *scale *= rec; } } if (j1 > 1) { i__1 = j1 - 1; r__1 = -x[j1]; saxpy_(&i__1, &r__1, &t[j1 * t_dim1 + 1], &c__1, &x[1] , &c__1); i__1 = j1 - 1; k = isamax_(&i__1, &x[1], &c__1); xmax = (r__1 = x[k], dabs(r__1)); } } else { /* Meet 2 by 2 diagonal block */ /* Call 2 by 2 linear system solve, to take */ /* care of possible overflow by scaling factor. */ d__[0] = x[j1]; d__[1] = x[j2]; slaln2_(&c_false, &c__2, &c__1, &smin, &c_b21, &t[j1 + j1 * t_dim1], ldt, &c_b21, &c_b21, d__, &c__2, & c_b25, &c_b25, v, &c__2, &scaloc, &xnorm, &ierr); if (ierr != 0) { *info = 2; } if (scaloc != 1.f) { sscal_(n, &scaloc, &x[1], &c__1); *scale *= scaloc; } x[j1] = v[0]; x[j2] = v[1]; /* Scale V(1,1) (= X(J1)) and/or V(2,1) (=X(J2)) */ /* to avoid overflow in updating right-hand side. */ /* Computing MAX */ r__1 = dabs(v[0]), r__2 = dabs(v[1]); xj = dmax(r__1,r__2); if (xj > 1.f) { rec = 1.f / xj; /* Computing MAX */ r__1 = work[j1], r__2 = work[j2]; if (dmax(r__1,r__2) > (bignum - xmax) * rec) { sscal_(n, &rec, &x[1], &c__1); *scale *= rec; } } /* Update right-hand side */ if (j1 > 1) { i__1 = j1 - 1; r__1 = -x[j1]; saxpy_(&i__1, &r__1, &t[j1 * t_dim1 + 1], &c__1, &x[1] , &c__1); i__1 = j1 - 1; r__1 = -x[j2]; saxpy_(&i__1, &r__1, &t[j2 * t_dim1 + 1], &c__1, &x[1] , &c__1); i__1 = j1 - 1; k = isamax_(&i__1, &x[1], &c__1); xmax = (r__1 = x[k], dabs(r__1)); } } L30: ; } } else { /* Solve T'*p = scale*c */ jnext = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (j < jnext) { goto L40; } j1 = j; j2 = j; jnext = j + 1; if (j < *n) { if (t[j + 1 + j * t_dim1] != 0.f) { j2 = j + 1; jnext = j + 2; } } if (j1 == j2) { /* 1 by 1 diagonal block */ /* Scale if necessary to avoid overflow in forming the */ /* right-hand side element by inner product. */ xj = (r__1 = x[j1], dabs(r__1)); if (xmax > 1.f) { rec = 1.f / xmax; if (work[j1] > (bignum - xj) * rec) { sscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } i__2 = j1 - 1; x[j1] -= sdot_(&i__2, &t[j1 * t_dim1 + 1], &c__1, &x[1], & c__1); xj = (r__1 = x[j1], dabs(r__1)); tjj = (r__1 = t[j1 + j1 * t_dim1], dabs(r__1)); tmp = t[j1 + j1 * t_dim1]; if (tjj < smin) { tmp = smin; tjj = smin; *info = 1; } if (tjj < 1.f) { if (xj > bignum * tjj) { rec = 1.f / xj; sscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } x[j1] /= tmp; /* Computing MAX */ r__2 = xmax, r__3 = (r__1 = x[j1], dabs(r__1)); xmax = dmax(r__2,r__3); } else { /* 2 by 2 diagonal block */ /* Scale if necessary to avoid overflow in forming the */ /* right-hand side elements by inner product. */ /* Computing MAX */ r__3 = (r__1 = x[j1], dabs(r__1)), r__4 = (r__2 = x[j2], dabs(r__2)); xj = dmax(r__3,r__4); if (xmax > 1.f) { rec = 1.f / xmax; /* Computing MAX */ r__1 = work[j2], r__2 = work[j1]; if (dmax(r__1,r__2) > (bignum - xj) * rec) { sscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } i__2 = j1 - 1; d__[0] = x[j1] - sdot_(&i__2, &t[j1 * t_dim1 + 1], &c__1, &x[1], &c__1); i__2 = j1 - 1; d__[1] = x[j2] - sdot_(&i__2, &t[j2 * t_dim1 + 1], &c__1, &x[1], &c__1); slaln2_(&c_true, &c__2, &c__1, &smin, &c_b21, &t[j1 + j1 * t_dim1], ldt, &c_b21, &c_b21, d__, &c__2, &c_b25, &c_b25, v, &c__2, &scaloc, &xnorm, &ierr); if (ierr != 0) { *info = 2; } if (scaloc != 1.f) { sscal_(n, &scaloc, &x[1], &c__1); *scale *= scaloc; } x[j1] = v[0]; x[j2] = v[1]; /* Computing MAX */ r__3 = (r__1 = x[j1], dabs(r__1)), r__4 = (r__2 = x[j2], dabs(r__2)), r__3 = max(r__3,r__4); xmax = dmax(r__3,xmax); } L40: ; } } } else { /* Computing MAX */ r__1 = eps * dabs(*w); sminw = dmax(r__1,smin); if (notran) { /* Solve (T + iB)*(p+iq) = c+id */ jnext = *n; for (j = *n; j >= 1; --j) { if (j > jnext) { goto L70; } j1 = j; j2 = j; jnext = j - 1; if (j > 1) { if (t[j + (j - 1) * t_dim1] != 0.f) { j1 = j - 1; jnext = j - 2; } } if (j1 == j2) { /* 1 by 1 diagonal block */ /* Scale if necessary to avoid overflow in division */ z__ = *w; if (j1 == 1) { z__ = b[1]; } xj = (r__1 = x[j1], dabs(r__1)) + (r__2 = x[*n + j1], dabs(r__2)); tjj = (r__1 = t[j1 + j1 * t_dim1], dabs(r__1)) + dabs(z__) ; tmp = t[j1 + j1 * t_dim1]; if (tjj < sminw) { tmp = sminw; tjj = sminw; *info = 1; } if (xj == 0.f) { goto L70; } if (tjj < 1.f) { if (xj > bignum * tjj) { rec = 1.f / xj; sscal_(&n2, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } sladiv_(&x[j1], &x[*n + j1], &tmp, &z__, &sr, &si); x[j1] = sr; x[*n + j1] = si; xj = (r__1 = x[j1], dabs(r__1)) + (r__2 = x[*n + j1], dabs(r__2)); /* Scale x if necessary to avoid overflow when adding a */ /* multiple of column j1 of T. */ if (xj > 1.f) { rec = 1.f / xj; if (work[j1] > (bignum - xmax) * rec) { sscal_(&n2, &rec, &x[1], &c__1); *scale *= rec; } } if (j1 > 1) { i__1 = j1 - 1; r__1 = -x[j1]; saxpy_(&i__1, &r__1, &t[j1 * t_dim1 + 1], &c__1, &x[1] , &c__1); i__1 = j1 - 1; r__1 = -x[*n + j1]; saxpy_(&i__1, &r__1, &t[j1 * t_dim1 + 1], &c__1, &x[* n + 1], &c__1); x[1] += b[j1] * x[*n + j1]; x[*n + 1] -= b[j1] * x[j1]; xmax = 0.f; i__1 = j1 - 1; for (k = 1; k <= i__1; ++k) { /* Computing MAX */ r__3 = xmax, r__4 = (r__1 = x[k], dabs(r__1)) + ( r__2 = x[k + *n], dabs(r__2)); xmax = dmax(r__3,r__4); /* L50: */ } } } else { /* Meet 2 by 2 diagonal block */ d__[0] = x[j1]; d__[1] = x[j2]; d__[2] = x[*n + j1]; d__[3] = x[*n + j2]; r__1 = -(*w); slaln2_(&c_false, &c__2, &c__2, &sminw, &c_b21, &t[j1 + j1 * t_dim1], ldt, &c_b21, &c_b21, d__, &c__2, & c_b25, &r__1, v, &c__2, &scaloc, &xnorm, &ierr); if (ierr != 0) { *info = 2; } if (scaloc != 1.f) { i__1 = *n << 1; sscal_(&i__1, &scaloc, &x[1], &c__1); *scale = scaloc * *scale; } x[j1] = v[0]; x[j2] = v[1]; x[*n + j1] = v[2]; x[*n + j2] = v[3]; /* Scale X(J1), .... to avoid overflow in */ /* updating right hand side. */ /* Computing MAX */ r__1 = dabs(v[0]) + dabs(v[2]), r__2 = dabs(v[1]) + dabs( v[3]); xj = dmax(r__1,r__2); if (xj > 1.f) { rec = 1.f / xj; /* Computing MAX */ r__1 = work[j1], r__2 = work[j2]; if (dmax(r__1,r__2) > (bignum - xmax) * rec) { sscal_(&n2, &rec, &x[1], &c__1); *scale *= rec; } } /* Update the right-hand side. */ if (j1 > 1) { i__1 = j1 - 1; r__1 = -x[j1]; saxpy_(&i__1, &r__1, &t[j1 * t_dim1 + 1], &c__1, &x[1] , &c__1); i__1 = j1 - 1; r__1 = -x[j2]; saxpy_(&i__1, &r__1, &t[j2 * t_dim1 + 1], &c__1, &x[1] , &c__1); i__1 = j1 - 1; r__1 = -x[*n + j1]; saxpy_(&i__1, &r__1, &t[j1 * t_dim1 + 1], &c__1, &x[* n + 1], &c__1); i__1 = j1 - 1; r__1 = -x[*n + j2]; saxpy_(&i__1, &r__1, &t[j2 * t_dim1 + 1], &c__1, &x[* n + 1], &c__1); x[1] = x[1] + b[j1] * x[*n + j1] + b[j2] * x[*n + j2]; x[*n + 1] = x[*n + 1] - b[j1] * x[j1] - b[j2] * x[j2]; xmax = 0.f; i__1 = j1 - 1; for (k = 1; k <= i__1; ++k) { /* Computing MAX */ r__3 = (r__1 = x[k], dabs(r__1)) + (r__2 = x[k + * n], dabs(r__2)); xmax = dmax(r__3,xmax); /* L60: */ } } } L70: ; } } else { /* Solve (T + iB)'*(p+iq) = c+id */ jnext = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { if (j < jnext) { goto L80; } j1 = j; j2 = j; jnext = j + 1; if (j < *n) { if (t[j + 1 + j * t_dim1] != 0.f) { j2 = j + 1; jnext = j + 2; } } if (j1 == j2) { /* 1 by 1 diagonal block */ /* Scale if necessary to avoid overflow in forming the */ /* right-hand side element by inner product. */ xj = (r__1 = x[j1], dabs(r__1)) + (r__2 = x[j1 + *n], dabs(r__2)); if (xmax > 1.f) { rec = 1.f / xmax; if (work[j1] > (bignum - xj) * rec) { sscal_(&n2, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } i__2 = j1 - 1; x[j1] -= sdot_(&i__2, &t[j1 * t_dim1 + 1], &c__1, &x[1], & c__1); i__2 = j1 - 1; x[*n + j1] -= sdot_(&i__2, &t[j1 * t_dim1 + 1], &c__1, &x[ *n + 1], &c__1); if (j1 > 1) { x[j1] -= b[j1] * x[*n + 1]; x[*n + j1] += b[j1] * x[1]; } xj = (r__1 = x[j1], dabs(r__1)) + (r__2 = x[j1 + *n], dabs(r__2)); z__ = *w; if (j1 == 1) { z__ = b[1]; } /* Scale if necessary to avoid overflow in */ /* complex division */ tjj = (r__1 = t[j1 + j1 * t_dim1], dabs(r__1)) + dabs(z__) ; tmp = t[j1 + j1 * t_dim1]; if (tjj < sminw) { tmp = sminw; tjj = sminw; *info = 1; } if (tjj < 1.f) { if (xj > bignum * tjj) { rec = 1.f / xj; sscal_(&n2, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } r__1 = -z__; sladiv_(&x[j1], &x[*n + j1], &tmp, &r__1, &sr, &si); x[j1] = sr; x[j1 + *n] = si; /* Computing MAX */ r__3 = (r__1 = x[j1], dabs(r__1)) + (r__2 = x[j1 + *n], dabs(r__2)); xmax = dmax(r__3,xmax); } else { /* 2 by 2 diagonal block */ /* Scale if necessary to avoid overflow in forming the */ /* right-hand side element by inner product. */ /* Computing MAX */ r__5 = (r__1 = x[j1], dabs(r__1)) + (r__2 = x[*n + j1], dabs(r__2)), r__6 = (r__3 = x[j2], dabs(r__3)) + ( r__4 = x[*n + j2], dabs(r__4)); xj = dmax(r__5,r__6); if (xmax > 1.f) { rec = 1.f / xmax; /* Computing MAX */ r__1 = work[j1], r__2 = work[j2]; if (dmax(r__1,r__2) > (bignum - xj) / xmax) { sscal_(&n2, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } i__2 = j1 - 1; d__[0] = x[j1] - sdot_(&i__2, &t[j1 * t_dim1 + 1], &c__1, &x[1], &c__1); i__2 = j1 - 1; d__[1] = x[j2] - sdot_(&i__2, &t[j2 * t_dim1 + 1], &c__1, &x[1], &c__1); i__2 = j1 - 1; d__[2] = x[*n + j1] - sdot_(&i__2, &t[j1 * t_dim1 + 1], & c__1, &x[*n + 1], &c__1); i__2 = j1 - 1; d__[3] = x[*n + j2] - sdot_(&i__2, &t[j2 * t_dim1 + 1], & c__1, &x[*n + 1], &c__1); d__[0] -= b[j1] * x[*n + 1]; d__[1] -= b[j2] * x[*n + 1]; d__[2] += b[j1] * x[1]; d__[3] += b[j2] * x[1]; slaln2_(&c_true, &c__2, &c__2, &sminw, &c_b21, &t[j1 + j1 * t_dim1], ldt, &c_b21, &c_b21, d__, &c__2, & c_b25, w, v, &c__2, &scaloc, &xnorm, &ierr); if (ierr != 0) { *info = 2; } if (scaloc != 1.f) { sscal_(&n2, &scaloc, &x[1], &c__1); *scale = scaloc * *scale; } x[j1] = v[0]; x[j2] = v[1]; x[*n + j1] = v[2]; x[*n + j2] = v[3]; /* Computing MAX */ r__5 = (r__1 = x[j1], dabs(r__1)) + (r__2 = x[*n + j1], dabs(r__2)), r__6 = (r__3 = x[j2], dabs(r__3)) + ( r__4 = x[*n + j2], dabs(r__4)), r__5 = max(r__5, r__6); xmax = dmax(r__5,xmax); } L80: ; } } } return 0; /* End of SLAQTR */ } /* slaqtr_ */
/* Subroutine */ int slaic1_(integer *job, integer *j, real *x, real *sest, real *w, real *gamma, real *sestpr, real *s, real *c__) { /* System generated locals */ real r__1, r__2, r__3, r__4; /* Builtin functions */ double sqrt(doublereal), r_sign(real *, real *); /* Local variables */ real b, t, s1, s2, eps, tmp, sine; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); real test, zeta1, zeta2, alpha, norma, absgam, absalp; extern doublereal slamch_(char *); real cosine, absest; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SLAIC1 applies one step of incremental condition estimation in */ /* its simplest version: */ /* Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j */ /* lower triangular matrix L, such that */ /* twonorm(L*x) = sest */ /* Then SLAIC1 computes sestpr, s, c such that */ /* the vector */ /* [ s*x ] */ /* xhat = [ c ] */ /* is an approximate singular vector of */ /* [ L 0 ] */ /* Lhat = [ w' gamma ] */ /* in the sense that */ /* twonorm(Lhat*xhat) = sestpr. */ /* Depending on JOB, an estimate for the largest or smallest singular */ /* value is computed. */ /* Note that [s c]' and sestpr**2 is an eigenpair of the system */ /* diag(sest*sest, 0) + [alpha gamma] * [ alpha ] */ /* [ gamma ] */ /* where alpha = x'*w. */ /* Arguments */ /* ========= */ /* JOB (input) INTEGER */ /* = 1: an estimate for the largest singular value is computed. */ /* = 2: an estimate for the smallest singular value is computed. */ /* J (input) INTEGER */ /* Length of X and W */ /* X (input) REAL array, dimension (J) */ /* The j-vector x. */ /* SEST (input) REAL */ /* Estimated singular value of j by j matrix L */ /* W (input) REAL array, dimension (J) */ /* The j-vector w. */ /* GAMMA (input) REAL */ /* The diagonal element gamma. */ /* SESTPR (output) REAL */ /* Estimated singular value of (j+1) by (j+1) matrix Lhat. */ /* S (output) REAL */ /* Sine needed in forming xhat. */ /* C (output) REAL */ /* Cosine needed in forming xhat. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ --w; --x; /* Function Body */ eps = slamch_("Epsilon"); alpha = sdot_(j, &x[1], &c__1, &w[1], &c__1); absalp = dabs(alpha); absgam = dabs(*gamma); absest = dabs(*sest); if (*job == 1) { /* Estimating largest singular value */ /* special cases */ if (*sest == 0.f) { s1 = dmax(absgam,absalp); if (s1 == 0.f) { *s = 0.f; *c__ = 1.f; *sestpr = 0.f; } else { *s = alpha / s1; *c__ = *gamma / s1; tmp = sqrt(*s * *s + *c__ * *c__); *s /= tmp; *c__ /= tmp; *sestpr = s1 * tmp; } return 0; } else if (absgam <= eps * absest) { *s = 1.f; *c__ = 0.f; tmp = dmax(absest,absalp); s1 = absest / tmp; s2 = absalp / tmp; *sestpr = tmp * sqrt(s1 * s1 + s2 * s2); return 0; } else if (absalp <= eps * absest) { s1 = absgam; s2 = absest; if (s1 <= s2) { *s = 1.f; *c__ = 0.f; *sestpr = s2; } else { *s = 0.f; *c__ = 1.f; *sestpr = s1; } return 0; } else if (absest <= eps * absalp || absest <= eps * absgam) { s1 = absgam; s2 = absalp; if (s1 <= s2) { tmp = s1 / s2; *s = sqrt(tmp * tmp + 1.f); *sestpr = s2 * *s; *c__ = *gamma / s2 / *s; *s = r_sign(&c_b5, &alpha) / *s; } else { tmp = s2 / s1; *c__ = sqrt(tmp * tmp + 1.f); *sestpr = s1 * *c__; *s = alpha / s1 / *c__; *c__ = r_sign(&c_b5, gamma) / *c__; } return 0; } else { /* normal case */ zeta1 = alpha / absest; zeta2 = *gamma / absest; b = (1.f - zeta1 * zeta1 - zeta2 * zeta2) * .5f; *c__ = zeta1 * zeta1; if (b > 0.f) { t = *c__ / (b + sqrt(b * b + *c__)); } else { t = sqrt(b * b + *c__) - b; } sine = -zeta1 / t; cosine = -zeta2 / (t + 1.f); tmp = sqrt(sine * sine + cosine * cosine); *s = sine / tmp; *c__ = cosine / tmp; *sestpr = sqrt(t + 1.f) * absest; return 0; } } else if (*job == 2) { /* Estimating smallest singular value */ /* special cases */ if (*sest == 0.f) { *sestpr = 0.f; if (dmax(absgam,absalp) == 0.f) { sine = 1.f; cosine = 0.f; } else { sine = -(*gamma); cosine = alpha; } /* Computing MAX */ r__1 = dabs(sine), r__2 = dabs(cosine); s1 = dmax(r__1,r__2); *s = sine / s1; *c__ = cosine / s1; tmp = sqrt(*s * *s + *c__ * *c__); *s /= tmp; *c__ /= tmp; return 0; } else if (absgam <= eps * absest) { *s = 0.f; *c__ = 1.f; *sestpr = absgam; return 0; } else if (absalp <= eps * absest) { s1 = absgam; s2 = absest; if (s1 <= s2) { *s = 0.f; *c__ = 1.f; *sestpr = s1; } else { *s = 1.f; *c__ = 0.f; *sestpr = s2; } return 0; } else if (absest <= eps * absalp || absest <= eps * absgam) { s1 = absgam; s2 = absalp; if (s1 <= s2) { tmp = s1 / s2; *c__ = sqrt(tmp * tmp + 1.f); *sestpr = absest * (tmp / *c__); *s = -(*gamma / s2) / *c__; *c__ = r_sign(&c_b5, &alpha) / *c__; } else { tmp = s2 / s1; *s = sqrt(tmp * tmp + 1.f); *sestpr = absest / *s; *c__ = alpha / s1 / *s; *s = -r_sign(&c_b5, gamma) / *s; } return 0; } else { /* normal case */ zeta1 = alpha / absest; zeta2 = *gamma / absest; /* Computing MAX */ r__3 = zeta1 * zeta1 + 1.f + (r__1 = zeta1 * zeta2, dabs(r__1)), r__4 = (r__2 = zeta1 * zeta2, dabs(r__2)) + zeta2 * zeta2; norma = dmax(r__3,r__4); /* See if root is closer to zero or to ONE */ test = (zeta1 - zeta2) * 2.f * (zeta1 + zeta2) + 1.f; if (test >= 0.f) { /* root is close to zero, compute directly */ b = (zeta1 * zeta1 + zeta2 * zeta2 + 1.f) * .5f; *c__ = zeta2 * zeta2; t = *c__ / (b + sqrt((r__1 = b * b - *c__, dabs(r__1)))); sine = zeta1 / (1.f - t); cosine = -zeta2 / t; *sestpr = sqrt(t + eps * 4.f * eps * norma) * absest; } else { /* root is closer to ONE, shift by that amount */ b = (zeta2 * zeta2 + zeta1 * zeta1 - 1.f) * .5f; *c__ = zeta1 * zeta1; if (b >= 0.f) { t = -(*c__) / (b + sqrt(b * b + *c__)); } else { t = b - sqrt(b * b + *c__); } sine = -zeta1 / t; cosine = -zeta2 / (t + 1.f); *sestpr = sqrt(t + 1.f + eps * 4.f * eps * norma) * absest; } tmp = sqrt(sine * sine + cosine * cosine); *s = sine / tmp; *c__ = cosine / tmp; return 0; } } return 0; /* End of SLAIC1 */ } /* slaic1_ */
float sdot( int n, float *x, int incx, float *y, int incy) { return sdot_(&n, x, &incx, y, &incy); }
/* Subroutine */ int ssytri_(char *uplo, integer *n, real *a, integer *lda, integer *ipiv, real *work, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1; real r__1; /* Local variables */ real d__; integer k; real t, ak; integer kp; real akp1, temp; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); real akkp1; extern logical lsame_(char *, char *); integer kstep; logical upper; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer * ), ssymv_(char *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), xerbla_(char *, integer *); /* -- LAPACK routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SSYTRI computes the inverse of a real symmetric indefinite matrix */ /* A using the factorization A = U*D*U**T or A = L*D*L**T computed by */ /* SSYTRF. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the details of the factorization are stored */ /* as an upper or lower triangular matrix. */ /* = 'U': Upper triangular, form is A = U*D*U**T; */ /* = 'L': Lower triangular, form is A = L*D*L**T. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) REAL array, dimension (LDA,N) */ /* On entry, the block diagonal matrix D and the multipliers */ /* used to obtain the factor U or L as computed by SSYTRF. */ /* On exit, if INFO = 0, the (symmetric) inverse of the original */ /* matrix. If UPLO = 'U', the upper triangular part of the */ /* inverse is formed and the part of A below the diagonal is not */ /* referenced; if UPLO = 'L' the lower triangular part of the */ /* inverse is formed and the part of A above the diagonal is */ /* not referenced. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* IPIV (input) INTEGER array, dimension (N) */ /* Details of the interchanges and the block structure of D */ /* as determined by SSYTRF. */ /* WORK (workspace) REAL array, dimension (N) */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its */ /* inverse could not be computed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --ipiv; --work; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); xerbla_("SSYTRI", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Check that the diagonal matrix D is nonsingular. */ if (upper) { /* Upper triangular storage: examine D from bottom to top */ for (*info = *n; *info >= 1; --(*info)) { if (ipiv[*info] > 0 && a[*info + *info * a_dim1] == 0.f) { return 0; } /* L10: */ } } else { /* Lower triangular storage: examine D from top to bottom. */ i__1 = *n; for (*info = 1; *info <= i__1; ++(*info)) { if (ipiv[*info] > 0 && a[*info + *info * a_dim1] == 0.f) { return 0; } /* L20: */ } } *info = 0; if (upper) { /* Compute inv(A) from the factorization A = U*D*U'. */ /* K is the main loop index, increasing from 1 to N in steps of */ /* 1 or 2, depending on the size of the diagonal blocks. */ k = 1; L30: /* If K > N, exit from loop. */ if (k > *n) { goto L40; } if (ipiv[k] > 0) { /* 1 x 1 diagonal block */ /* Invert the diagonal block. */ a[k + k * a_dim1] = 1.f / a[k + k * a_dim1]; /* Compute column K of the inverse. */ if (k > 1) { i__1 = k - 1; scopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1); i__1 = k - 1; ssymv_(uplo, &i__1, &c_b11, &a[a_offset], lda, &work[1], & c__1, &c_b13, &a[k * a_dim1 + 1], &c__1); i__1 = k - 1; a[k + k * a_dim1] -= sdot_(&i__1, &work[1], &c__1, &a[k * a_dim1 + 1], &c__1); } kstep = 1; } else { /* 2 x 2 diagonal block */ /* Invert the diagonal block. */ t = (r__1 = a[k + (k + 1) * a_dim1], dabs(r__1)); ak = a[k + k * a_dim1] / t; akp1 = a[k + 1 + (k + 1) * a_dim1] / t; akkp1 = a[k + (k + 1) * a_dim1] / t; d__ = t * (ak * akp1 - 1.f); a[k + k * a_dim1] = akp1 / d__; a[k + 1 + (k + 1) * a_dim1] = ak / d__; a[k + (k + 1) * a_dim1] = -akkp1 / d__; /* Compute columns K and K+1 of the inverse. */ if (k > 1) { i__1 = k - 1; scopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1); i__1 = k - 1; ssymv_(uplo, &i__1, &c_b11, &a[a_offset], lda, &work[1], & c__1, &c_b13, &a[k * a_dim1 + 1], &c__1); i__1 = k - 1; a[k + k * a_dim1] -= sdot_(&i__1, &work[1], &c__1, &a[k * a_dim1 + 1], &c__1); i__1 = k - 1; a[k + (k + 1) * a_dim1] -= sdot_(&i__1, &a[k * a_dim1 + 1], & c__1, &a[(k + 1) * a_dim1 + 1], &c__1); i__1 = k - 1; scopy_(&i__1, &a[(k + 1) * a_dim1 + 1], &c__1, &work[1], & c__1); i__1 = k - 1; ssymv_(uplo, &i__1, &c_b11, &a[a_offset], lda, &work[1], & c__1, &c_b13, &a[(k + 1) * a_dim1 + 1], &c__1); i__1 = k - 1; a[k + 1 + (k + 1) * a_dim1] -= sdot_(&i__1, &work[1], &c__1, & a[(k + 1) * a_dim1 + 1], &c__1); } kstep = 2; } kp = (i__1 = ipiv[k], abs(i__1)); if (kp != k) { /* Interchange rows and columns K and KP in the leading */ /* submatrix A(1:k+1,1:k+1) */ i__1 = kp - 1; sswap_(&i__1, &a[k * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], & c__1); i__1 = k - kp - 1; sswap_(&i__1, &a[kp + 1 + k * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda); temp = a[k + k * a_dim1]; a[k + k * a_dim1] = a[kp + kp * a_dim1]; a[kp + kp * a_dim1] = temp; if (kstep == 2) { temp = a[k + (k + 1) * a_dim1]; a[k + (k + 1) * a_dim1] = a[kp + (k + 1) * a_dim1]; a[kp + (k + 1) * a_dim1] = temp; } } k += kstep; goto L30; L40: ; } else { /* Compute inv(A) from the factorization A = L*D*L'. */ /* K is the main loop index, increasing from 1 to N in steps of */ /* 1 or 2, depending on the size of the diagonal blocks. */ k = *n; L50: /* If K < 1, exit from loop. */ if (k < 1) { goto L60; } if (ipiv[k] > 0) { /* 1 x 1 diagonal block */ /* Invert the diagonal block. */ a[k + k * a_dim1] = 1.f / a[k + k * a_dim1]; /* Compute column K of the inverse. */ if (k < *n) { i__1 = *n - k; scopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &work[1], &c__1); i__1 = *n - k; ssymv_(uplo, &i__1, &c_b11, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, &c_b13, &a[k + 1 + k * a_dim1], & c__1); i__1 = *n - k; a[k + k * a_dim1] -= sdot_(&i__1, &work[1], &c__1, &a[k + 1 + k * a_dim1], &c__1); } kstep = 1; } else { /* 2 x 2 diagonal block */ /* Invert the diagonal block. */ t = (r__1 = a[k + (k - 1) * a_dim1], dabs(r__1)); ak = a[k - 1 + (k - 1) * a_dim1] / t; akp1 = a[k + k * a_dim1] / t; akkp1 = a[k + (k - 1) * a_dim1] / t; d__ = t * (ak * akp1 - 1.f); a[k - 1 + (k - 1) * a_dim1] = akp1 / d__; a[k + k * a_dim1] = ak / d__; a[k + (k - 1) * a_dim1] = -akkp1 / d__; /* Compute columns K-1 and K of the inverse. */ if (k < *n) { i__1 = *n - k; scopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &work[1], &c__1); i__1 = *n - k; ssymv_(uplo, &i__1, &c_b11, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, &c_b13, &a[k + 1 + k * a_dim1], & c__1); i__1 = *n - k; a[k + k * a_dim1] -= sdot_(&i__1, &work[1], &c__1, &a[k + 1 + k * a_dim1], &c__1); i__1 = *n - k; a[k + (k - 1) * a_dim1] -= sdot_(&i__1, &a[k + 1 + k * a_dim1] , &c__1, &a[k + 1 + (k - 1) * a_dim1], &c__1); i__1 = *n - k; scopy_(&i__1, &a[k + 1 + (k - 1) * a_dim1], &c__1, &work[1], & c__1); i__1 = *n - k; ssymv_(uplo, &i__1, &c_b11, &a[k + 1 + (k + 1) * a_dim1], lda, &work[1], &c__1, &c_b13, &a[k + 1 + (k - 1) * a_dim1] , &c__1); i__1 = *n - k; a[k - 1 + (k - 1) * a_dim1] -= sdot_(&i__1, &work[1], &c__1, & a[k + 1 + (k - 1) * a_dim1], &c__1); } kstep = 2; } kp = (i__1 = ipiv[k], abs(i__1)); if (kp != k) { /* Interchange rows and columns K and KP in the trailing */ /* submatrix A(k-1:n,k-1:n) */ if (kp < *n) { i__1 = *n - kp; sswap_(&i__1, &a[kp + 1 + k * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1); } i__1 = kp - k - 1; sswap_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &a[kp + (k + 1) * a_dim1], lda); temp = a[k + k * a_dim1]; a[k + k * a_dim1] = a[kp + kp * a_dim1]; a[kp + kp * a_dim1] = temp; if (kstep == 2) { temp = a[k + (k - 1) * a_dim1]; a[k + (k - 1) * a_dim1] = a[kp + (k - 1) * a_dim1]; a[kp + (k - 1) * a_dim1] = temp; } } k -= kstep; goto L50; L60: ; } return 0; /* End of SSYTRI */ } /* ssytri_ */
int sfgmr(int n, void (*smatvec) (float, float[], float, float[]), void (*spsolve) (int, float[], float[]), float *rhs, float *sol, double tol, int im, int *itmax, FILE * fits) { /*---------------------------------------------------------------------- | *** Preconditioned FGMRES *** +----------------------------------------------------------------------- | This is a simple version of the ARMS preconditioned FGMRES algorithm. +----------------------------------------------------------------------- | Y. S. Dec. 2000. -- Apr. 2008 +----------------------------------------------------------------------- | on entry: |---------- | | rhs = real vector of length n containing the right hand side. | sol = real vector of length n containing an initial guess to the | solution on input. | tol = tolerance for stopping iteration | im = Krylov subspace dimension | (itmax) = max number of iterations allowed. | fits = NULL: no output | != NULL: file handle to output " resid vs time and its" | | on return: |---------- | fgmr int = 0 --> successful return. | int = 1 --> convergence not achieved in itmax iterations. | sol = contains an approximate solution (upon successful return). | itmax = has changed. It now contains the number of steps required | to converge -- +----------------------------------------------------------------------- | internal work arrays: |---------- | vv = work array of length [im+1][n] (used to store the Arnoldi | basis) | hh = work array of length [im][im+1] (Householder matrix) | z = work array of length [im][n] to store preconditioned vectors +----------------------------------------------------------------------- | subroutines called : | matvec - matrix-vector multiplication operation | psolve - (right) preconditionning operation | psolve can be a NULL pointer (GMRES without preconditioner) +---------------------------------------------------------------------*/ int maxits = *itmax; int i, i1, ii, j, k, k1, its, retval, i_1 = 1, i_2 = 2; float beta, eps1 = 0.0, t, t0, gam; float **hh, *c, *s, *rs; float **vv, **z, tt; float zero = 0.0; float one = 1.0; its = 0; vv = (float **)SUPERLU_MALLOC((im + 1) * sizeof(float *)); for (i = 0; i <= im; i++) vv[i] = floatMalloc(n); z = (float **)SUPERLU_MALLOC(im * sizeof(float *)); hh = (float **)SUPERLU_MALLOC(im * sizeof(float *)); for (i = 0; i < im; i++) { hh[i] = floatMalloc(i + 2); z[i] = floatMalloc(n); } c = floatMalloc(im); s = floatMalloc(im); rs = floatMalloc(im + 1); /*---- outer loop starts here ----*/ do { /*---- compute initial residual vector ----*/ smatvec(one, sol, zero, vv[0]); for (j = 0; j < n; j++) vv[0][j] = rhs[j] - vv[0][j]; /* vv[0]= initial residual */ beta = snrm2_(&n, vv[0], &i_1); /*---- print info if fits != null ----*/ if (fits != NULL && its == 0) fprintf(fits, "%8d %10.2e\n", its, beta); /*if ( beta <= tol * dnrm2_(&n, rhs, &i_1) )*/ if ( !(beta > tol * snrm2_(&n, rhs, &i_1)) ) break; t = 1.0 / beta; /*---- normalize: vv[0] = vv[0] / beta ----*/ for (j = 0; j < n; j++) vv[0][j] = vv[0][j] * t; if (its == 0) eps1 = tol * beta; /*---- initialize 1-st term of rhs of hessenberg system ----*/ rs[0] = beta; for (i = 0; i < im; i++) { its++; i1 = i + 1; /*------------------------------------------------------------ | (Right) Preconditioning Operation z_{j} = M^{-1} v_{j} +-----------------------------------------------------------*/ if (spsolve) spsolve(n, z[i], vv[i]); else scopy_(&n, vv[i], &i_1, z[i], &i_1); /*---- matvec operation w = A z_{j} = A M^{-1} v_{j} ----*/ smatvec(one, z[i], zero, vv[i1]); /*------------------------------------------------------------ | modified gram - schmidt... | h_{i,j} = (w,v_{i}) | w = w - h_{i,j} v_{i} +------------------------------------------------------------*/ t0 = snrm2_(&n, vv[i1], &i_1); for (j = 0; j <= i; j++) { float negt; tt = sdot_(&n, vv[j], &i_1, vv[i1], &i_1); hh[i][j] = tt; negt = -tt; saxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1); } /*---- h_{j+1,j} = ||w||_{2} ----*/ t = snrm2_(&n, vv[i1], &i_1); while (t < 0.5 * t0) { t0 = t; for (j = 0; j <= i; j++) { float negt; tt = sdot_(&n, vv[j], &i_1, vv[i1], &i_1); hh[i][j] += tt; negt = -tt; saxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1); } t = snrm2_(&n, vv[i1], &i_1); } hh[i][i1] = t; if (t != 0.0) { /*---- v_{j+1} = w / h_{j+1,j} ----*/ t = 1.0 / t; for (k = 0; k < n; k++) vv[i1][k] = vv[i1][k] * t; } /*--------------------------------------------------- | done with modified gram schimdt and arnoldi step | now update factorization of hh +--------------------------------------------------*/ /*-------------------------------------------------------- | perform previous transformations on i-th column of h +-------------------------------------------------------*/ for (k = 1; k <= i; k++) { k1 = k - 1; tt = hh[i][k1]; hh[i][k1] = c[k1] * tt + s[k1] * hh[i][k]; hh[i][k] = -s[k1] * tt + c[k1] * hh[i][k]; } gam = sqrt(pow(hh[i][i], 2) + pow(hh[i][i1], 2)); /*--------------------------------------------------- | if gamma is zero then any small value will do | affect only residual estimate +--------------------------------------------------*/ /* if (gam == 0.0) gam = epsmac; */ /*---- get next plane rotation ---*/ if (gam == 0.0) { c[i] = one; s[i] = zero; } else { c[i] = hh[i][i] / gam; s[i] = hh[i][i1] / gam; } rs[i1] = -s[i] * rs[i]; rs[i] = c[i] * rs[i]; /*---------------------------------------------------- | determine residual norm and test for convergence +---------------------------------------------------*/ hh[i][i] = c[i] * hh[i][i] + s[i] * hh[i][i1]; beta = fabs(rs[i1]); if (fits != NULL) fprintf(fits, "%8d %10.2e\n", its, beta); if (beta <= eps1 || its >= maxits) break; } if (i == im) i--; /*---- now compute solution. 1st, solve upper triangular system ----*/ rs[i] = rs[i] / hh[i][i]; for (ii = 1; ii <= i; ii++) { k = i - ii; k1 = k + 1; tt = rs[k]; for (j = k1; j <= i; j++) tt = tt - hh[j][k] * rs[j]; rs[k] = tt / hh[k][k]; } /*---- linear combination of v[i]'s to get sol. ----*/ for (j = 0; j <= i; j++) { tt = rs[j]; for (k = 0; k < n; k++) sol[k] += tt * z[j][k]; } /* calculate the residual and output */ smatvec(one, sol, zero, vv[0]); for (j = 0; j < n; j++) vv[0][j] = rhs[j] - vv[0][j]; /* vv[0]= initial residual */ /*---- print info if fits != null ----*/ beta = snrm2_(&n, vv[0], &i_1); /*---- restart outer loop if needed ----*/ /*if (beta >= eps1 / tol)*/ if ( !(beta < eps1 / tol) ) { its = maxits + 10; break; } if (beta <= eps1) break; } while(its < maxits); retval = (its >= maxits); for (i = 0; i <= im; i++) SUPERLU_FREE(vv[i]); SUPERLU_FREE(vv); for (i = 0; i < im; i++) { SUPERLU_FREE(hh[i]); SUPERLU_FREE(z[i]); } SUPERLU_FREE(hh); SUPERLU_FREE(z); SUPERLU_FREE(c); SUPERLU_FREE(s); SUPERLU_FREE(rs); *itmax = its; return retval; } /*----end of fgmr ----*/
/* Subroutine */ int slaic1_(integer *job, integer *j, real *x, real *sest, real *w, real *gamma, real *sestpr, real *s, real *c__) { /* System generated locals */ real r__1, r__2, r__3, r__4; /* Builtin functions */ double sqrt(doublereal), r_sign(real *, real *); /* Local variables */ static real sine; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); static real test, zeta1, zeta2, b, t, alpha, norma, s1, s2, absgam, absalp; extern doublereal slamch_(char *); static real cosine, absest, eps, tmp; /* -- LAPACK auxiliary routine (instrumented to count ops, version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= SLAIC1 applies one step of incremental condition estimation in its simplest version: Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j lower triangular matrix L, such that twonorm(L*x) = sest Then SLAIC1 computes sestpr, s, c such that the vector [ s*x ] xhat = [ c ] is an approximate singular vector of [ L 0 ] Lhat = [ w' gamma ] in the sense that twonorm(Lhat*xhat) = sestpr. Depending on JOB, an estimate for the largest or smallest singular value is computed. Note that [s c]' and sestpr**2 is an eigenpair of the system diag(sest*sest, 0) + [alpha gamma] * [ alpha ] [ gamma ] where alpha = x'*w. Arguments ========= JOB (input) INTEGER = 1: an estimate for the largest singular value is computed. = 2: an estimate for the smallest singular value is computed. J (input) INTEGER Length of X and W X (input) REAL array, dimension (J) The j-vector x. SEST (input) REAL Estimated singular value of j by j matrix L W (input) REAL array, dimension (J) The j-vector w. GAMMA (input) REAL The diagonal element gamma. SESTPR (output) REAL Estimated singular value of (j+1) by (j+1) matrix Lhat. S (output) REAL Sine needed in forming xhat. C (output) REAL Cosine needed in forming xhat. ===================================================================== Parameter adjustments */ --w; --x; /* Function Body */ eps = slamch_("Epsilon"); alpha = sdot_(j, &x[1], &c__1, &w[1], &c__1); absalp = dabs(alpha); absgam = dabs(*gamma); absest = dabs(*sest); if (*job == 1) { /* Estimating largest singular value special cases */ if (*sest == 0.f) { s1 = dmax(absgam,absalp); if (s1 == 0.f) { *s = 0.f; *c__ = 1.f; *sestpr = 0.f; } else { latime_1.ops += 9; *s = alpha / s1; *c__ = *gamma / s1; tmp = sqrt(*s * *s + *c__ * *c__); *s /= tmp; *c__ /= tmp; *sestpr = s1 * tmp; } return 0; } else if (absgam <= eps * absest) { latime_1.ops += 7; *s = 1.f; *c__ = 0.f; tmp = dmax(absest,absalp); s1 = absest / tmp; s2 = absalp / tmp; *sestpr = tmp * sqrt(s1 * s1 + s2 * s2); return 0; } else if (absalp <= eps * absest) { s1 = absgam; s2 = absest; if (s1 <= s2) { *s = 1.f; *c__ = 0.f; *sestpr = s2; } else { *s = 0.f; *c__ = 1.f; *sestpr = s1; } return 0; } else if (absest <= eps * absalp || absest <= eps * absgam) { s1 = absgam; s2 = absalp; if (s1 <= s2) { latime_1.ops += 8; tmp = s1 / s2; *s = sqrt(tmp * tmp + 1.f); *sestpr = s2 * *s; *c__ = *gamma / s2 / *s; *s = r_sign(&c_b5, &alpha) / *s; } else { latime_1.ops += 8; tmp = s2 / s1; *c__ = sqrt(tmp * tmp + 1.f); *sestpr = s1 * *c__; *s = alpha / s1 / *c__; *c__ = r_sign(&c_b5, gamma) / *c__; } return 0; } else { /* normal case */ latime_1.ops += 8; zeta1 = alpha / absest; zeta2 = *gamma / absest; b = (1.f - zeta1 * zeta1 - zeta2 * zeta2) * .5f; *c__ = zeta1 * zeta1; if (b > 0.f) { latime_1.ops += 5; t = *c__ / (b + sqrt(b * b + *c__)); } else { latime_1.ops += 4; t = sqrt(b * b + *c__) - b; } latime_1.ops += 12; sine = -zeta1 / t; cosine = -zeta2 / (t + 1.f); tmp = sqrt(sine * sine + cosine * cosine); *s = sine / tmp; *c__ = cosine / tmp; *sestpr = sqrt(t + 1.f) * absest; return 0; } } else if (*job == 2) { /* Estimating smallest singular value special cases */ if (*sest == 0.f) { *sestpr = 0.f; if (dmax(absgam,absalp) == 0.f) { sine = 1.f; cosine = 0.f; } else { sine = -(*gamma); cosine = alpha; } latime_1.ops += 7; /* Computing MAX */ r__1 = dabs(sine), r__2 = dabs(cosine); s1 = dmax(r__1,r__2); *s = sine / s1; *c__ = cosine / s1; tmp = sqrt(*s * *s + *c__ * *c__); *s /= tmp; *c__ /= tmp; return 0; } else if (absgam <= eps * absest) { *s = 0.f; *c__ = 1.f; *sestpr = absgam; return 0; } else if (absalp <= eps * absest) { s1 = absgam; s2 = absest; if (s1 <= s2) { *s = 0.f; *c__ = 1.f; *sestpr = s1; } else { *s = 1.f; *c__ = 0.f; *sestpr = s2; } return 0; } else if (absest <= eps * absalp || absest <= eps * absgam) { s1 = absgam; s2 = absalp; if (s1 <= s2) { latime_1.ops += 9; tmp = s1 / s2; *c__ = sqrt(tmp * tmp + 1.f); *sestpr = absest * (tmp / *c__); *s = -(*gamma / s2) / *c__; *c__ = r_sign(&c_b5, &alpha) / *c__; } else { latime_1.ops += 8; tmp = s2 / s1; *s = sqrt(tmp * tmp + 1.f); *sestpr = absest / *s; *c__ = alpha / s1 / *s; *s = -r_sign(&c_b5, gamma) / *s; } return 0; } else { /* normal case */ latime_1.ops += 14; zeta1 = alpha / absest; zeta2 = *gamma / absest; /* Computing MAX */ r__3 = zeta1 * zeta1 + 1.f + (r__1 = zeta1 * zeta2, dabs(r__1)), r__4 = (r__2 = zeta1 * zeta2, dabs(r__2)) + zeta2 * zeta2; norma = dmax(r__3,r__4); /* See if root is closer to zero or to ONE */ test = (zeta1 - zeta2) * 2.f * (zeta1 + zeta2) + 1.f; if (test >= 0.f) { /* root is close to zero, compute directly */ latime_1.ops += 20; b = (zeta1 * zeta1 + zeta2 * zeta2 + 1.f) * .5f; *c__ = zeta2 * zeta2; t = *c__ / (b + sqrt((r__1 = b * b - *c__, dabs(r__1)))); sine = zeta1 / (1.f - t); cosine = -zeta2 / t; *sestpr = sqrt(t + eps * 4.f * eps * norma) * absest; } else { /* root is closer to ONE, shift by that amount */ latime_1.ops += 6; b = (zeta2 * zeta2 + zeta1 * zeta1 - 1.f) * .5f; *c__ = zeta1 * zeta1; if (b >= 0.f) { latime_1.ops += 5; t = -(*c__) / (b + sqrt(b * b + *c__)); } else { latime_1.ops += 4; t = b - sqrt(b * b + *c__); } latime_1.ops += 10; sine = -zeta1 / t; cosine = -zeta2 / (t + 1.f); *sestpr = sqrt(t + 1.f + eps * 4.f * eps * norma) * absest; } latime_1.ops += 6; tmp = sqrt(sine * sine + cosine * cosine); *s = sine / tmp; *c__ = cosine / tmp; return 0; } } return 0; /* End of SLAIC1 */ } /* slaic1_ */
/* Subroutine */ int ssytd2_(char *uplo, integer *n, real *a, integer *lda, real *d__, real *e, real *tau, integer *info) { /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ integer i__; real taui; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); extern /* Subroutine */ int ssyr2_(char *, integer *, real *, real *, integer *, real *, integer *, real *, integer *); real alpha; extern logical lsame_(char *, char *); logical upper; extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, real *, integer *), ssymv_(char *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), xerbla_(char *, integer *), slarfg_(integer *, real *, real *, integer *, real *); /* -- LAPACK routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal */ /* form T by an orthogonal similarity transformation: Q' * A * Q = T. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the upper or lower triangular part of the */ /* symmetric matrix A is stored: */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* A (input/output) REAL array, dimension (LDA,N) */ /* On entry, the symmetric matrix A. If UPLO = 'U', the leading */ /* n-by-n upper triangular part of A contains the upper */ /* triangular part of the matrix A, and the strictly lower */ /* triangular part of A is not referenced. If UPLO = 'L', the */ /* leading n-by-n lower triangular part of A contains the lower */ /* triangular part of the matrix A, and the strictly upper */ /* triangular part of A is not referenced. */ /* On exit, if UPLO = 'U', the diagonal and first superdiagonal */ /* of A are overwritten by the corresponding elements of the */ /* tridiagonal matrix T, and the elements above the first */ /* superdiagonal, with the array TAU, represent the orthogonal */ /* matrix Q as a product of elementary reflectors; if UPLO */ /* = 'L', the diagonal and first subdiagonal of A are over- */ /* written by the corresponding elements of the tridiagonal */ /* matrix T, and the elements below the first subdiagonal, with */ /* the array TAU, represent the orthogonal matrix Q as a product */ /* of elementary reflectors. See Further Details. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* D (output) REAL array, dimension (N) */ /* The diagonal elements of the tridiagonal matrix T: */ /* D(i) = A(i,i). */ /* E (output) REAL array, dimension (N-1) */ /* The off-diagonal elements of the tridiagonal matrix T: */ /* E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. */ /* TAU (output) REAL array, dimension (N-1) */ /* The scalar factors of the elementary reflectors (see Further */ /* Details). */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* If UPLO = 'U', the matrix Q is represented as a product of elementary */ /* reflectors */ /* Q = H(n-1) . . . H(2) H(1). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in */ /* A(1:i-1,i+1), and tau in TAU(i). */ /* If UPLO = 'L', the matrix Q is represented as a product of elementary */ /* reflectors */ /* Q = H(1) H(2) . . . H(n-1). */ /* Each H(i) has the form */ /* H(i) = I - tau * v * v' */ /* where tau is a real scalar, and v is a real vector with */ /* v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), */ /* and tau in TAU(i). */ /* The contents of A on exit are illustrated by the following examples */ /* with n = 5: */ /* if UPLO = 'U': if UPLO = 'L': */ /* ( d e v2 v3 v4 ) ( d ) */ /* ( d e v3 v4 ) ( e d ) */ /* ( d e v4 ) ( v1 e d ) */ /* ( d e ) ( v1 v2 e d ) */ /* ( d ) ( v1 v2 v3 e d ) */ /* where d and e denote diagonal and off-diagonal elements of T, and vi */ /* denotes an element of the vector defining H(i). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; --d__; --e; --tau; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); xerbla_("SSYTD2", &i__1); return 0; } /* Quick return if possible */ if (*n <= 0) { return 0; } if (upper) { /* Reduce the upper triangle of A */ for (i__ = *n - 1; i__ >= 1; --i__) { /* Generate elementary reflector H(i) = I - tau * v * v' */ /* to annihilate A(1:i-1,i+1) */ slarfg_(&i__, &a[i__ + (i__ + 1) * a_dim1], &a[(i__ + 1) * a_dim1 + 1], &c__1, &taui); e[i__] = a[i__ + (i__ + 1) * a_dim1]; if (taui != 0.f) { /* Apply H(i) from both sides to A(1:i,1:i) */ a[i__ + (i__ + 1) * a_dim1] = 1.f; /* Compute x := tau * A * v storing x in TAU(1:i) */ ssymv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * a_dim1 + 1], &c__1, &c_b8, &tau[1], &c__1); /* Compute w := x - 1/2 * tau * (x'*v) * v */ alpha = taui * -.5f * sdot_(&i__, &tau[1], &c__1, &a[(i__ + 1) * a_dim1 + 1], &c__1); saxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[ 1], &c__1); /* Apply the transformation as a rank-2 update: */ /* A := A - v * w' - w * v' */ ssyr2_(uplo, &i__, &c_b14, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[1], &c__1, &a[a_offset], lda); a[i__ + (i__ + 1) * a_dim1] = e[i__]; } d__[i__ + 1] = a[i__ + 1 + (i__ + 1) * a_dim1]; tau[i__] = taui; /* L10: */ } d__[1] = a[a_dim1 + 1]; } else { /* Reduce the lower triangle of A */ i__1 = *n - 1; for (i__ = 1; i__ <= i__1; ++i__) { /* Generate elementary reflector H(i) = I - tau * v * v' */ /* to annihilate A(i+2:n,i) */ i__2 = *n - i__; /* Computing MIN */ i__3 = i__ + 2; slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *n)+ i__ * a_dim1], &c__1, &taui); e[i__] = a[i__ + 1 + i__ * a_dim1]; if (taui != 0.f) { /* Apply H(i) from both sides to A(i+1:n,i+1:n) */ a[i__ + 1 + i__ * a_dim1] = 1.f; /* Compute x := tau * A * v storing y in TAU(i:n-1) */ i__2 = *n - i__; ssymv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b8, &tau[ i__], &c__1); /* Compute w := x - 1/2 * tau * (x'*v) * v */ i__2 = *n - i__; alpha = taui * -.5f * sdot_(&i__2, &tau[i__], &c__1, &a[i__ + 1 + i__ * a_dim1], &c__1); i__2 = *n - i__; saxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[ i__], &c__1); /* Apply the transformation as a rank-2 update: */ /* A := A - v * w' - w * v' */ i__2 = *n - i__; ssyr2_(uplo, &i__2, &c_b14, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * a_dim1], lda); a[i__ + 1 + i__ * a_dim1] = e[i__]; } d__[i__] = a[i__ + i__ * a_dim1]; tau[i__] = taui; /* L20: */ } d__[*n] = a[*n + *n * a_dim1]; } return 0; /* End of SSYTD2 */ } /* ssytd2_ */
/* Subroutine */ int slatps_(char *uplo, char *trans, char *diag, char * normin, integer *n, real *ap, real *x, real *scale, real *cnorm, integer *info) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1992 Purpose ======= SLATPS solves one of the triangular systems A *x = s*b or A'*x = s*b with scaling to prevent overflow, where A is an upper or lower triangular matrix stored in packed form. Here A' denotes the transpose of A, x and b are n-element vectors, and s is a scaling factor, usually less than or equal to 1, chosen so that the components of x will be less than the overflow threshold. If the unscaled problem will not cause overflow, the Level 2 BLAS routine STPSV is called. If the matrix A is singular (A(j,j) = 0 for some j), then s is set to 0 and a non-trivial solution to A*x = 0 is returned. Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the matrix A is upper or lower triangular. = 'U': Upper triangular = 'L': Lower triangular TRANS (input) CHARACTER*1 Specifies the operation applied to A. = 'N': Solve A * x = s*b (No transpose) = 'T': Solve A'* x = s*b (Transpose) = 'C': Solve A'* x = s*b (Conjugate transpose = Transpose) DIAG (input) CHARACTER*1 Specifies whether or not the matrix A is unit triangular. = 'N': Non-unit triangular = 'U': Unit triangular NORMIN (input) CHARACTER*1 Specifies whether CNORM has been set or not. = 'Y': CNORM contains the column norms on entry = 'N': CNORM is not set on entry. On exit, the norms will be computed and stored in CNORM. N (input) INTEGER The order of the matrix A. N >= 0. AP (input) REAL array, dimension (N*(N+1)/2) The upper or lower triangular matrix A, packed columnwise in a linear array. The j-th column of A is stored in the array AP as follows: if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. X (input/output) REAL array, dimension (N) On entry, the right hand side b of the triangular system. On exit, X is overwritten by the solution vector x. SCALE (output) REAL The scaling factor s for the triangular system A * x = s*b or A'* x = s*b. If SCALE = 0, the matrix A is singular or badly scaled, and the vector x is an exact or approximate solution to A*x = 0. CNORM (input or output) REAL array, dimension (N) If NORMIN = 'Y', CNORM is an input argument and CNORM(j) contains the norm of the off-diagonal part of the j-th column of A. If TRANS = 'N', CNORM(j) must be greater than or equal to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) must be greater than or equal to the 1-norm. If NORMIN = 'N', CNORM is an output argument and CNORM(j) returns the 1-norm of the offdiagonal part of the j-th column of A. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -k, the k-th argument had an illegal value Further Details ======= ======= A rough bound on x is computed; if that is less than overflow, STPSV is called, otherwise, specific code is used which checks for possible overflow or divide-by-zero at every operation. A columnwise scheme is used for solving A*x = b. The basic algorithm if A is lower triangular is x[1:n] := b[1:n] for j = 1, ..., n x(j) := x(j) / A(j,j) x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] end Define bounds on the components of x after j iterations of the loop: M(j) = bound on x[1:j] G(j) = bound on x[j+1:n] Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. Then for iteration j+1 we have M(j+1) <= G(j) / | A(j+1,j+1) | G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) where CNORM(j+1) is greater than or equal to the infinity-norm of column j+1 of A, not counting the diagonal. Hence G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) 1<=i<=j and |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) 1<=i< j Since |x(j)| <= M(j), we use the Level 2 BLAS routine STPSV if the reciprocal of the largest M(j), j=1,..,n, is larger than max(underflow, 1/overflow). The bound on x(j) is also used to determine when a step in the columnwise method can be performed without fear of overflow. If the computed bound is greater than a large constant, x is scaled to prevent overflow, but if the bound overflows, x is set to 0, x(j) to 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. Similarly, a row-wise scheme is used to solve A'*x = b. The basic algorithm for A upper triangular is for j = 1, ..., n x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) end We simultaneously compute two bounds G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j M(j) = bound on x(i), 1<=i<=j The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. Then the bound on x(j) is M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) 1<=i<=j and we can safely call STPSV if 1/M(n) and 1/G(n) are both greater than max(underflow, 1/overflow). ===================================================================== Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static real c_b36 = .5f; /* System generated locals */ integer i__1, i__2, i__3; real r__1, r__2, r__3; /* Local variables */ static integer jinc, jlen; static real xbnd; static integer imax; static real tmax, tjjs; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); static real xmax, grow, sumj; static integer i__, j; extern logical lsame_(char *, char *); extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static real tscal, uscal; static integer jlast; extern doublereal sasum_(integer *, real *, integer *); static logical upper; extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, real *, integer *), stpsv_(char *, char *, char *, integer *, real *, real *, integer *); static integer ip; static real xj; extern doublereal slamch_(char *); extern /* Subroutine */ int xerbla_(char *, integer *); static real bignum; extern integer isamax_(integer *, real *, integer *); static logical notran; static integer jfirst; static real smlnum; static logical nounit; static real rec, tjj; --cnorm; --x; --ap; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); notran = lsame_(trans, "N"); nounit = lsame_(diag, "N"); /* Test the input parameters. */ if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (! notran && ! lsame_(trans, "T") && ! lsame_(trans, "C")) { *info = -2; } else if (! nounit && ! lsame_(diag, "U")) { *info = -3; } else if (! lsame_(normin, "Y") && ! lsame_(normin, "N")) { *info = -4; } else if (*n < 0) { *info = -5; } if (*info != 0) { i__1 = -(*info); xerbla_("SLATPS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine machine dependent parameters to control overflow. */ smlnum = slamch_("Safe minimum") / slamch_("Precision"); bignum = 1.f / smlnum; *scale = 1.f; if (lsame_(normin, "N")) { /* Compute the 1-norm of each column, not including the diagonal. */ if (upper) { /* A is upper triangular. */ ip = 1; i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; cnorm[j] = sasum_(&i__2, &ap[ip], &c__1); ip += j; /* L10: */ } } else { /* A is lower triangular. */ ip = 1; i__1 = *n - 1; for (j = 1; j <= i__1; ++j) { i__2 = *n - j; cnorm[j] = sasum_(&i__2, &ap[ip + 1], &c__1); ip = ip + *n - j + 1; /* L20: */ } cnorm[*n] = 0.f; } } /* Scale the column norms by TSCAL if the maximum element in CNORM is greater than BIGNUM. */ imax = isamax_(n, &cnorm[1], &c__1); tmax = cnorm[imax]; if (tmax <= bignum) { tscal = 1.f; } else { tscal = 1.f / (smlnum * tmax); sscal_(n, &tscal, &cnorm[1], &c__1); } /* Compute a bound on the computed solution vector to see if the Level 2 BLAS routine STPSV can be used. */ j = isamax_(n, &x[1], &c__1); xmax = (r__1 = x[j], dabs(r__1)); xbnd = xmax; if (notran) { /* Compute the growth in A * x = b. */ if (upper) { jfirst = *n; jlast = 1; jinc = -1; } else { jfirst = 1; jlast = *n; jinc = 1; } if (tscal != 1.f) { grow = 0.f; goto L50; } if (nounit) { /* A is non-unit triangular. Compute GROW = 1/G(j) and XBND = 1/M(j). Initially, G(0) = max{x(i), i=1,...,n}. */ grow = 1.f / dmax(xbnd,smlnum); xbnd = grow; ip = jfirst * (jfirst + 1) / 2; jlen = *n; i__1 = jlast; i__2 = jinc; for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L50; } /* M(j) = G(j-1) / abs(A(j,j)) */ tjj = (r__1 = ap[ip], dabs(r__1)); /* Computing MIN */ r__1 = xbnd, r__2 = dmin(1.f,tjj) * grow; xbnd = dmin(r__1,r__2); if (tjj + cnorm[j] >= smlnum) { /* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */ grow *= tjj / (tjj + cnorm[j]); } else { /* G(j) could overflow, set GROW to 0. */ grow = 0.f; } ip += jinc * jlen; --jlen; /* L30: */ } grow = xbnd; } else { /* A is unit triangular. Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. Computing MIN */ r__1 = 1.f, r__2 = 1.f / dmax(xbnd,smlnum); grow = dmin(r__1,r__2); i__2 = jlast; i__1 = jinc; for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L50; } /* G(j) = G(j-1)*( 1 + CNORM(j) ) */ grow *= 1.f / (cnorm[j] + 1.f); /* L40: */ } } L50: ; } else { /* Compute the growth in A' * x = b. */ if (upper) { jfirst = 1; jlast = *n; jinc = 1; } else { jfirst = *n; jlast = 1; jinc = -1; } if (tscal != 1.f) { grow = 0.f; goto L80; } if (nounit) { /* A is non-unit triangular. Compute GROW = 1/G(j) and XBND = 1/M(j). Initially, M(0) = max{x(i), i=1,...,n}. */ grow = 1.f / dmax(xbnd,smlnum); xbnd = grow; ip = jfirst * (jfirst + 1) / 2; jlen = 1; i__1 = jlast; i__2 = jinc; for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L80; } /* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */ xj = cnorm[j] + 1.f; /* Computing MIN */ r__1 = grow, r__2 = xbnd / xj; grow = dmin(r__1,r__2); /* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */ tjj = (r__1 = ap[ip], dabs(r__1)); if (xj > tjj) { xbnd *= tjj / xj; } ++jlen; ip += jinc * jlen; /* L60: */ } grow = dmin(grow,xbnd); } else { /* A is unit triangular. Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. Computing MIN */ r__1 = 1.f, r__2 = 1.f / dmax(xbnd,smlnum); grow = dmin(r__1,r__2); i__2 = jlast; i__1 = jinc; for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L80; } /* G(j) = ( 1 + CNORM(j) )*G(j-1) */ xj = cnorm[j] + 1.f; grow /= xj; /* L70: */ } } L80: ; } if (grow * tscal > smlnum) { /* Use the Level 2 BLAS solve if the reciprocal of the bound on elements of X is not too small. */ stpsv_(uplo, trans, diag, n, &ap[1], &x[1], &c__1); } else { /* Use a Level 1 BLAS solve, scaling intermediate results. */ if (xmax > bignum) { /* Scale X so that its components are less than or equal to BIGNUM in absolute value. */ *scale = bignum / xmax; sscal_(n, scale, &x[1], &c__1); xmax = bignum; } if (notran) { /* Solve A * x = b */ ip = jfirst * (jfirst + 1) / 2; i__1 = jlast; i__2 = jinc; for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Compute x(j) = b(j) / A(j,j), scaling x if necessary. */ xj = (r__1 = x[j], dabs(r__1)); if (nounit) { tjjs = ap[ip] * tscal; } else { tjjs = tscal; if (tscal == 1.f) { goto L95; } } tjj = dabs(tjjs); if (tjj > smlnum) { /* abs(A(j,j)) > SMLNUM: */ if (tjj < 1.f) { if (xj > tjj * bignum) { /* Scale x by 1/b(j). */ rec = 1.f / xj; sscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } x[j] /= tjjs; xj = (r__1 = x[j], dabs(r__1)); } else if (tjj > 0.f) { /* 0 < abs(A(j,j)) <= SMLNUM: */ if (xj > tjj * bignum) { /* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM to avoid overflow when dividing by A(j,j). */ rec = tjj * bignum / xj; if (cnorm[j] > 1.f) { /* Scale by 1/CNORM(j) to avoid overflow when multiplying x(j) times column j. */ rec /= cnorm[j]; } sscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } x[j] /= tjjs; xj = (r__1 = x[j], dabs(r__1)); } else { /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and scale = 0, and compute a solution to A*x = 0. */ i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { x[i__] = 0.f; /* L90: */ } x[j] = 1.f; xj = 1.f; *scale = 0.f; xmax = 0.f; } L95: /* Scale x if necessary to avoid overflow when adding a multiple of column j of A. */ if (xj > 1.f) { rec = 1.f / xj; if (cnorm[j] > (bignum - xmax) * rec) { /* Scale x by 1/(2*abs(x(j))). */ rec *= .5f; sscal_(n, &rec, &x[1], &c__1); *scale *= rec; } } else if (xj * cnorm[j] > bignum - xmax) { /* Scale x by 1/2. */ sscal_(n, &c_b36, &x[1], &c__1); *scale *= .5f; } if (upper) { if (j > 1) { /* Compute the update x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) */ i__3 = j - 1; r__1 = -x[j] * tscal; saxpy_(&i__3, &r__1, &ap[ip - j + 1], &c__1, &x[1], & c__1); i__3 = j - 1; i__ = isamax_(&i__3, &x[1], &c__1); xmax = (r__1 = x[i__], dabs(r__1)); } ip -= j; } else { if (j < *n) { /* Compute the update x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) */ i__3 = *n - j; r__1 = -x[j] * tscal; saxpy_(&i__3, &r__1, &ap[ip + 1], &c__1, &x[j + 1], & c__1); i__3 = *n - j; i__ = j + isamax_(&i__3, &x[j + 1], &c__1); xmax = (r__1 = x[i__], dabs(r__1)); } ip = ip + *n - j + 1; } /* L100: */ } } else { /* Solve A' * x = b */ ip = jfirst * (jfirst + 1) / 2; jlen = 1; i__2 = jlast; i__1 = jinc; for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Compute x(j) = b(j) - sum A(k,j)*x(k). k<>j */ xj = (r__1 = x[j], dabs(r__1)); uscal = tscal; rec = 1.f / dmax(xmax,1.f); if (cnorm[j] > (bignum - xj) * rec) { /* If x(j) could overflow, scale x by 1/(2*XMAX). */ rec *= .5f; if (nounit) { tjjs = ap[ip] * tscal; } else { tjjs = tscal; } tjj = dabs(tjjs); if (tjj > 1.f) { /* Divide by A(j,j) when scaling x if A(j,j) > 1. Computing MIN */ r__1 = 1.f, r__2 = rec * tjj; rec = dmin(r__1,r__2); uscal /= tjjs; } if (rec < 1.f) { sscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } sumj = 0.f; if (uscal == 1.f) { /* If the scaling needed for A in the dot product is 1, call SDOT to perform the dot product. */ if (upper) { i__3 = j - 1; sumj = sdot_(&i__3, &ap[ip - j + 1], &c__1, &x[1], & c__1); } else if (j < *n) { i__3 = *n - j; sumj = sdot_(&i__3, &ap[ip + 1], &c__1, &x[j + 1], & c__1); } } else { /* Otherwise, use in-line code for the dot product. */ if (upper) { i__3 = j - 1; for (i__ = 1; i__ <= i__3; ++i__) { sumj += ap[ip - j + i__] * uscal * x[i__]; /* L110: */ } } else if (j < *n) { i__3 = *n - j; for (i__ = 1; i__ <= i__3; ++i__) { sumj += ap[ip + i__] * uscal * x[j + i__]; /* L120: */ } } } if (uscal == tscal) { /* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) was not used to scale the dotproduct. */ x[j] -= sumj; xj = (r__1 = x[j], dabs(r__1)); if (nounit) { /* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ tjjs = ap[ip] * tscal; } else { tjjs = tscal; if (tscal == 1.f) { goto L135; } } tjj = dabs(tjjs); if (tjj > smlnum) { /* abs(A(j,j)) > SMLNUM: */ if (tjj < 1.f) { if (xj > tjj * bignum) { /* Scale X by 1/abs(x(j)). */ rec = 1.f / xj; sscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } x[j] /= tjjs; } else if (tjj > 0.f) { /* 0 < abs(A(j,j)) <= SMLNUM: */ if (xj > tjj * bignum) { /* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ rec = tjj * bignum / xj; sscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } x[j] /= tjjs; } else { /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and scale = 0, and compute a solution to A'*x = 0. */ i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { x[i__] = 0.f; /* L130: */ } x[j] = 1.f; *scale = 0.f; xmax = 0.f; } L135: ; } else { /* Compute x(j) := x(j) / A(j,j) - sumj if the dot product has already been divided by 1/A(j,j). */ x[j] = x[j] / tjjs - sumj; } /* Computing MAX */ r__2 = xmax, r__3 = (r__1 = x[j], dabs(r__1)); xmax = dmax(r__2,r__3); ++jlen; ip += jinc * jlen; /* L140: */ } } *scale /= tscal; } /* Scale the column norms by 1/TSCAL for return. */ if (tscal != 1.f) { r__1 = 1.f / tscal; sscal_(n, &r__1, &cnorm[1], &c__1); } return 0; /* End of SLATPS */ } /* slatps_ */
/* Subroutine */ int slatbs_(char *uplo, char *trans, char *diag, char * normin, integer *n, integer *kd, real *ab, integer *ldab, real *x, real *scale, real *cnorm, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4; real r__1, r__2, r__3; /* Local variables */ integer i__, j; real xj, rec, tjj; integer jinc, jlen; real xbnd; integer imax; real tmax, tjjs; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); real xmax, grow, sumj; integer maind; extern logical lsame_(char *, char *); extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); real tscal, uscal; integer jlast; extern doublereal sasum_(integer *, real *, integer *); logical upper; extern /* Subroutine */ int stbsv_(char *, char *, char *, integer *, integer *, real *, integer *, real *, integer *), saxpy_(integer *, real *, real *, integer *, real *, integer *); extern doublereal slamch_(char *); extern /* Subroutine */ int xerbla_(char *, integer *); real bignum; extern integer isamax_(integer *, real *, integer *); logical notran; integer jfirst; real smlnum; logical nounit; /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SLATBS solves one of the triangular systems */ /* A *x = s*b or A'*x = s*b */ /* with scaling to prevent overflow, where A is an upper or lower */ /* triangular band matrix. Here A' denotes the transpose of A, x and b */ /* are n-element vectors, and s is a scaling factor, usually less than */ /* or equal to 1, chosen so that the components of x will be less than */ /* the overflow threshold. If the unscaled problem will not cause */ /* overflow, the Level 2 BLAS routine STBSV is called. If the matrix A */ /* is singular (A(j,j) = 0 for some j), then s is set to 0 and a */ /* non-trivial solution to A*x = 0 is returned. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the matrix A is upper or lower triangular. */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* TRANS (input) CHARACTER*1 */ /* Specifies the operation applied to A. */ /* = 'N': Solve A * x = s*b (No transpose) */ /* = 'T': Solve A'* x = s*b (Transpose) */ /* = 'C': Solve A'* x = s*b (Conjugate transpose = Transpose) */ /* DIAG (input) CHARACTER*1 */ /* Specifies whether or not the matrix A is unit triangular. */ /* = 'N': Non-unit triangular */ /* = 'U': Unit triangular */ /* NORMIN (input) CHARACTER*1 */ /* Specifies whether CNORM has been set or not. */ /* = 'Y': CNORM contains the column norms on entry */ /* = 'N': CNORM is not set on entry. On exit, the norms will */ /* be computed and stored in CNORM. */ /* N (input) INTEGER */ /* The order of the matrix A. N >= 0. */ /* KD (input) INTEGER */ /* The number of subdiagonals or superdiagonals in the */ /* triangular matrix A. KD >= 0. */ /* AB (input) REAL array, dimension (LDAB,N) */ /* The upper or lower triangular band matrix A, stored in the */ /* first KD+1 rows of the array. The j-th column of A is stored */ /* in the j-th column of the array AB as follows: */ /* if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */ /* if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd). */ /* LDAB (input) INTEGER */ /* The leading dimension of the array AB. LDAB >= KD+1. */ /* X (input/output) REAL array, dimension (N) */ /* On entry, the right hand side b of the triangular system. */ /* On exit, X is overwritten by the solution vector x. */ /* SCALE (output) REAL */ /* The scaling factor s for the triangular system */ /* A * x = s*b or A'* x = s*b. */ /* If SCALE = 0, the matrix A is singular or badly scaled, and */ /* the vector x is an exact or approximate solution to A*x = 0. */ /* CNORM (input or output) REAL array, dimension (N) */ /* If NORMIN = 'Y', CNORM is an input argument and CNORM(j) */ /* contains the norm of the off-diagonal part of the j-th column */ /* of A. If TRANS = 'N', CNORM(j) must be greater than or equal */ /* to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) */ /* must be greater than or equal to the 1-norm. */ /* If NORMIN = 'N', CNORM is an output argument and CNORM(j) */ /* returns the 1-norm of the offdiagonal part of the j-th column */ /* of A. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -k, the k-th argument had an illegal value */ /* Further Details */ /* ======= ======= */ /* A rough bound on x is computed; if that is less than overflow, STBSV */ /* is called, otherwise, specific code is used which checks for possible */ /* overflow or divide-by-zero at every operation. */ /* A columnwise scheme is used for solving A*x = b. The basic algorithm */ /* if A is lower triangular is */ /* x[1:n] := b[1:n] */ /* for j = 1, ..., n */ /* x(j) := x(j) / A(j,j) */ /* x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] */ /* end */ /* Define bounds on the components of x after j iterations of the loop: */ /* M(j) = bound on x[1:j] */ /* G(j) = bound on x[j+1:n] */ /* Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. */ /* Then for iteration j+1 we have */ /* M(j+1) <= G(j) / | A(j+1,j+1) | */ /* G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | */ /* <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) */ /* where CNORM(j+1) is greater than or equal to the infinity-norm of */ /* column j+1 of A, not counting the diagonal. Hence */ /* G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) */ /* 1<=i<=j */ /* and */ /* |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) */ /* 1<=i< j */ /* Since |x(j)| <= M(j), we use the Level 2 BLAS routine STBSV if the */ /* reciprocal of the largest M(j), j=1,..,n, is larger than */ /* max(underflow, 1/overflow). */ /* The bound on x(j) is also used to determine when a step in the */ /* columnwise method can be performed without fear of overflow. If */ /* the computed bound is greater than a large constant, x is scaled to */ /* prevent overflow, but if the bound overflows, x is set to 0, x(j) to */ /* 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. */ /* Similarly, a row-wise scheme is used to solve A'*x = b. The basic */ /* algorithm for A upper triangular is */ /* for j = 1, ..., n */ /* x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) */ /* end */ /* We simultaneously compute two bounds */ /* G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j */ /* M(j) = bound on x(i), 1<=i<=j */ /* The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we */ /* add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. */ /* Then the bound on x(j) is */ /* M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | */ /* <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) */ /* 1<=i<=j */ /* and we can safely call STBSV if 1/M(n) and 1/G(n) are both greater */ /* than max(underflow, 1/overflow). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; --x; --cnorm; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); notran = lsame_(trans, "N"); nounit = lsame_(diag, "N"); /* Test the input parameters. */ if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (! notran && ! lsame_(trans, "T") && ! lsame_(trans, "C")) { *info = -2; } else if (! nounit && ! lsame_(diag, "U")) { *info = -3; } else if (! lsame_(normin, "Y") && ! lsame_(normin, "N")) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*kd < 0) { *info = -6; } else if (*ldab < *kd + 1) { *info = -8; } if (*info != 0) { i__1 = -(*info); xerbla_("SLATBS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine machine dependent parameters to control overflow. */ smlnum = slamch_("Safe minimum") / slamch_("Precision"); bignum = 1.f / smlnum; *scale = 1.f; if (lsame_(normin, "N")) { /* Compute the 1-norm of each column, not including the diagonal. */ if (upper) { /* A is upper triangular. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__2 = *kd, i__3 = j - 1; jlen = min(i__2,i__3); cnorm[j] = sasum_(&jlen, &ab[*kd + 1 - jlen + j * ab_dim1], & c__1); /* L10: */ } } else { /* A is lower triangular. */ i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MIN */ i__2 = *kd, i__3 = *n - j; jlen = min(i__2,i__3); if (jlen > 0) { cnorm[j] = sasum_(&jlen, &ab[j * ab_dim1 + 2], &c__1); } else { cnorm[j] = 0.f; } /* L20: */ } } } /* Scale the column norms by TSCAL if the maximum element in CNORM is */ /* greater than BIGNUM. */ imax = isamax_(n, &cnorm[1], &c__1); tmax = cnorm[imax]; if (tmax <= bignum) { tscal = 1.f; } else { tscal = 1.f / (smlnum * tmax); sscal_(n, &tscal, &cnorm[1], &c__1); } /* Compute a bound on the computed solution vector to see if the */ /* Level 2 BLAS routine STBSV can be used. */ j = isamax_(n, &x[1], &c__1); xmax = (r__1 = x[j], dabs(r__1)); xbnd = xmax; if (notran) { /* Compute the growth in A * x = b. */ if (upper) { jfirst = *n; jlast = 1; jinc = -1; maind = *kd + 1; } else { jfirst = 1; jlast = *n; jinc = 1; maind = 1; } if (tscal != 1.f) { grow = 0.f; goto L50; } if (nounit) { /* A is non-unit triangular. */ /* Compute GROW = 1/G(j) and XBND = 1/M(j). */ /* Initially, G(0) = max{x(i), i=1,...,n}. */ grow = 1.f / dmax(xbnd,smlnum); xbnd = grow; i__1 = jlast; i__2 = jinc; for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L50; } /* M(j) = G(j-1) / abs(A(j,j)) */ tjj = (r__1 = ab[maind + j * ab_dim1], dabs(r__1)); /* Computing MIN */ r__1 = xbnd, r__2 = dmin(1.f,tjj) * grow; xbnd = dmin(r__1,r__2); if (tjj + cnorm[j] >= smlnum) { /* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */ grow *= tjj / (tjj + cnorm[j]); } else { /* G(j) could overflow, set GROW to 0. */ grow = 0.f; } /* L30: */ } grow = xbnd; } else { /* A is unit triangular. */ /* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */ /* Computing MIN */ r__1 = 1.f, r__2 = 1.f / dmax(xbnd,smlnum); grow = dmin(r__1,r__2); i__2 = jlast; i__1 = jinc; for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L50; } /* G(j) = G(j-1)*( 1 + CNORM(j) ) */ grow *= 1.f / (cnorm[j] + 1.f); /* L40: */ } } L50: ; } else { /* Compute the growth in A' * x = b. */ if (upper) { jfirst = 1; jlast = *n; jinc = 1; maind = *kd + 1; } else { jfirst = *n; jlast = 1; jinc = -1; maind = 1; } if (tscal != 1.f) { grow = 0.f; goto L80; } if (nounit) { /* A is non-unit triangular. */ /* Compute GROW = 1/G(j) and XBND = 1/M(j). */ /* Initially, M(0) = max{x(i), i=1,...,n}. */ grow = 1.f / dmax(xbnd,smlnum); xbnd = grow; i__1 = jlast; i__2 = jinc; for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L80; } /* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */ xj = cnorm[j] + 1.f; /* Computing MIN */ r__1 = grow, r__2 = xbnd / xj; grow = dmin(r__1,r__2); /* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */ tjj = (r__1 = ab[maind + j * ab_dim1], dabs(r__1)); if (xj > tjj) { xbnd *= tjj / xj; } /* L60: */ } grow = dmin(grow,xbnd); } else { /* A is unit triangular. */ /* Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */ /* Computing MIN */ r__1 = 1.f, r__2 = 1.f / dmax(xbnd,smlnum); grow = dmin(r__1,r__2); i__2 = jlast; i__1 = jinc; for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Exit the loop if the growth factor is too small. */ if (grow <= smlnum) { goto L80; } /* G(j) = ( 1 + CNORM(j) )*G(j-1) */ xj = cnorm[j] + 1.f; grow /= xj; /* L70: */ } } L80: ; } if (grow * tscal > smlnum) { /* Use the Level 2 BLAS solve if the reciprocal of the bound on */ /* elements of X is not too small. */ stbsv_(uplo, trans, diag, n, kd, &ab[ab_offset], ldab, &x[1], &c__1); } else { /* Use a Level 1 BLAS solve, scaling intermediate results. */ if (xmax > bignum) { /* Scale X so that its components are less than or equal to */ /* BIGNUM in absolute value. */ *scale = bignum / xmax; sscal_(n, scale, &x[1], &c__1); xmax = bignum; } if (notran) { /* Solve A * x = b */ i__1 = jlast; i__2 = jinc; for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { /* Compute x(j) = b(j) / A(j,j), scaling x if necessary. */ xj = (r__1 = x[j], dabs(r__1)); if (nounit) { tjjs = ab[maind + j * ab_dim1] * tscal; } else { tjjs = tscal; if (tscal == 1.f) { goto L95; } } tjj = dabs(tjjs); if (tjj > smlnum) { /* abs(A(j,j)) > SMLNUM: */ if (tjj < 1.f) { if (xj > tjj * bignum) { /* Scale x by 1/b(j). */ rec = 1.f / xj; sscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } x[j] /= tjjs; xj = (r__1 = x[j], dabs(r__1)); } else if (tjj > 0.f) { /* 0 < abs(A(j,j)) <= SMLNUM: */ if (xj > tjj * bignum) { /* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM */ /* to avoid overflow when dividing by A(j,j). */ rec = tjj * bignum / xj; if (cnorm[j] > 1.f) { /* Scale by 1/CNORM(j) to avoid overflow when */ /* multiplying x(j) times column j. */ rec /= cnorm[j]; } sscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } x[j] /= tjjs; xj = (r__1 = x[j], dabs(r__1)); } else { /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ /* scale = 0, and compute a solution to A*x = 0. */ i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { x[i__] = 0.f; /* L90: */ } x[j] = 1.f; xj = 1.f; *scale = 0.f; xmax = 0.f; } L95: /* Scale x if necessary to avoid overflow when adding a */ /* multiple of column j of A. */ if (xj > 1.f) { rec = 1.f / xj; if (cnorm[j] > (bignum - xmax) * rec) { /* Scale x by 1/(2*abs(x(j))). */ rec *= .5f; sscal_(n, &rec, &x[1], &c__1); *scale *= rec; } } else if (xj * cnorm[j] > bignum - xmax) { /* Scale x by 1/2. */ sscal_(n, &c_b36, &x[1], &c__1); *scale *= .5f; } if (upper) { if (j > 1) { /* Compute the update */ /* x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) - */ /* x(j)* A(max(1,j-kd):j-1,j) */ /* Computing MIN */ i__3 = *kd, i__4 = j - 1; jlen = min(i__3,i__4); r__1 = -x[j] * tscal; saxpy_(&jlen, &r__1, &ab[*kd + 1 - jlen + j * ab_dim1] , &c__1, &x[j - jlen], &c__1); i__3 = j - 1; i__ = isamax_(&i__3, &x[1], &c__1); xmax = (r__1 = x[i__], dabs(r__1)); } } else if (j < *n) { /* Compute the update */ /* x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) - */ /* x(j) * A(j+1:min(j+kd,n),j) */ /* Computing MIN */ i__3 = *kd, i__4 = *n - j; jlen = min(i__3,i__4); if (jlen > 0) { r__1 = -x[j] * tscal; saxpy_(&jlen, &r__1, &ab[j * ab_dim1 + 2], &c__1, &x[ j + 1], &c__1); } i__3 = *n - j; i__ = j + isamax_(&i__3, &x[j + 1], &c__1); xmax = (r__1 = x[i__], dabs(r__1)); } /* L100: */ } } else { /* Solve A' * x = b */ i__2 = jlast; i__1 = jinc; for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { /* Compute x(j) = b(j) - sum A(k,j)*x(k). */ /* k<>j */ xj = (r__1 = x[j], dabs(r__1)); uscal = tscal; rec = 1.f / dmax(xmax,1.f); if (cnorm[j] > (bignum - xj) * rec) { /* If x(j) could overflow, scale x by 1/(2*XMAX). */ rec *= .5f; if (nounit) { tjjs = ab[maind + j * ab_dim1] * tscal; } else { tjjs = tscal; } tjj = dabs(tjjs); if (tjj > 1.f) { /* Divide by A(j,j) when scaling x if A(j,j) > 1. */ /* Computing MIN */ r__1 = 1.f, r__2 = rec * tjj; rec = dmin(r__1,r__2); uscal /= tjjs; } if (rec < 1.f) { sscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } sumj = 0.f; if (uscal == 1.f) { /* If the scaling needed for A in the dot product is 1, */ /* call SDOT to perform the dot product. */ if (upper) { /* Computing MIN */ i__3 = *kd, i__4 = j - 1; jlen = min(i__3,i__4); sumj = sdot_(&jlen, &ab[*kd + 1 - jlen + j * ab_dim1], &c__1, &x[j - jlen], &c__1); } else { /* Computing MIN */ i__3 = *kd, i__4 = *n - j; jlen = min(i__3,i__4); if (jlen > 0) { sumj = sdot_(&jlen, &ab[j * ab_dim1 + 2], &c__1, & x[j + 1], &c__1); } } } else { /* Otherwise, use in-line code for the dot product. */ if (upper) { /* Computing MIN */ i__3 = *kd, i__4 = j - 1; jlen = min(i__3,i__4); i__3 = jlen; for (i__ = 1; i__ <= i__3; ++i__) { sumj += ab[*kd + i__ - jlen + j * ab_dim1] * uscal * x[j - jlen - 1 + i__]; /* L110: */ } } else { /* Computing MIN */ i__3 = *kd, i__4 = *n - j; jlen = min(i__3,i__4); i__3 = jlen; for (i__ = 1; i__ <= i__3; ++i__) { sumj += ab[i__ + 1 + j * ab_dim1] * uscal * x[j + i__]; /* L120: */ } } } if (uscal == tscal) { /* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) */ /* was not used to scale the dotproduct. */ x[j] -= sumj; xj = (r__1 = x[j], dabs(r__1)); if (nounit) { /* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ tjjs = ab[maind + j * ab_dim1] * tscal; } else { tjjs = tscal; if (tscal == 1.f) { goto L135; } } tjj = dabs(tjjs); if (tjj > smlnum) { /* abs(A(j,j)) > SMLNUM: */ if (tjj < 1.f) { if (xj > tjj * bignum) { /* Scale X by 1/abs(x(j)). */ rec = 1.f / xj; sscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } } x[j] /= tjjs; } else if (tjj > 0.f) { /* 0 < abs(A(j,j)) <= SMLNUM: */ if (xj > tjj * bignum) { /* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ rec = tjj * bignum / xj; sscal_(n, &rec, &x[1], &c__1); *scale *= rec; xmax *= rec; } x[j] /= tjjs; } else { /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ /* scale = 0, and compute a solution to A'*x = 0. */ i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { x[i__] = 0.f; /* L130: */ } x[j] = 1.f; *scale = 0.f; xmax = 0.f; } L135: ; } else { /* Compute x(j) := x(j) / A(j,j) - sumj if the dot */ /* product has already been divided by 1/A(j,j). */ x[j] = x[j] / tjjs - sumj; } /* Computing MAX */ r__2 = xmax, r__3 = (r__1 = x[j], dabs(r__1)); xmax = dmax(r__2,r__3); /* L140: */ } } *scale /= tscal; } /* Scale the column norms by 1/TSCAL for return. */ if (tscal != 1.f) { r__1 = 1.f / tscal; sscal_(n, &r__1, &cnorm[1], &c__1); } return 0; /* End of SLATBS */ } /* slatbs_ */