void cblas_caxpy( const integer N, const void *alpha, const void *X,
                  const integer incX, void *Y, const integer incY)
{
#define F77_N N
#define F77_incX incX
#define F77_incY incY
    caxpy_( &F77_N, alpha, X, &F77_incX, Y, &F77_incY);
}
Example #2
0
int
f2c_caxpy(integer* N,
          complex* alpha,
          complex* X, integer* incX,
          complex* Y, integer* incY)
{
    caxpy_(N, alpha, X, incX, Y, incY);
    return 0;
}
Example #3
0
/* Subroutine */ int csyrfs_(char *uplo, integer *n, integer *nrhs, complex *
	a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex *
	b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, 
	complex *work, real *rwork, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, 
	    x_offset, i__1, i__2, i__3, i__4, i__5;
    real r__1, r__2, r__3, r__4;
    complex q__1;

    /* Builtin functions */
    double r_imag(complex *);

    /* Local variables */
    integer i__, j, k;
    real s, xk;
    integer nz;
    real eps;
    integer kase;
    real safe1, safe2;
    extern logical lsame_(char *, char *);
    integer isave[3];
    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, 
	    complex *, integer *), caxpy_(integer *, complex *, complex *, 
	    integer *, complex *, integer *);
    integer count;
    logical upper;
    extern /* Subroutine */ int csymv_(char *, integer *, complex *, complex *
, integer *, complex *, integer *, complex *, complex *, integer *
), clacn2_(integer *, complex *, complex *, real *, 
	    integer *, integer *);
    extern doublereal slamch_(char *);
    real safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    real lstres;
    extern /* Subroutine */ int csytrs_(char *, integer *, integer *, complex 
	    *, integer *, integer *, complex *, integer *, integer *);


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     Modified to call CLACN2 in place of CLACON, 10 Feb 03, SJH. */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CSYRFS improves the computed solution to a system of linear */
/*  equations when the coefficient matrix is symmetric indefinite, and */
/*  provides error bounds and backward error estimates for the solution. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          = 'U':  Upper triangle of A is stored; */
/*          = 'L':  Lower triangle of A is stored. */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  NRHS    (input) INTEGER */
/*          The number of right hand sides, i.e., the number of columns */
/*          of the matrices B and X.  NRHS >= 0. */

/*  A       (input) COMPLEX array, dimension (LDA,N) */
/*          The symmetric matrix A.  If UPLO = 'U', the leading N-by-N */
/*          upper triangular part of A contains the upper triangular part */
/*          of the matrix A, and the strictly lower triangular part of A */
/*          is not referenced.  If UPLO = 'L', the leading N-by-N lower */
/*          triangular part of A contains the lower triangular part of */
/*          the matrix A, and the strictly upper triangular part of A is */
/*          not referenced. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  AF      (input) COMPLEX array, dimension (LDAF,N) */
/*          The factored form of the matrix A.  AF contains the block */
/*          diagonal matrix D and the multipliers used to obtain the */
/*          factor U or L from the factorization A = U*D*U**T or */
/*          A = L*D*L**T as computed by CSYTRF. */

/*  LDAF    (input) INTEGER */
/*          The leading dimension of the array AF.  LDAF >= max(1,N). */

/*  IPIV    (input) INTEGER array, dimension (N) */
/*          Details of the interchanges and the block structure of D */
/*          as determined by CSYTRF. */

/*  B       (input) COMPLEX array, dimension (LDB,NRHS) */
/*          The right hand side matrix B. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B.  LDB >= max(1,N). */

/*  X       (input/output) COMPLEX array, dimension (LDX,NRHS) */
/*          On entry, the solution matrix X, as computed by CSYTRS. */
/*          On exit, the improved solution matrix X. */

/*  LDX     (input) INTEGER */
/*          The leading dimension of the array X.  LDX >= max(1,N). */

/*  FERR    (output) REAL array, dimension (NRHS) */
/*          The estimated forward error bound for each solution vector */
/*          X(j) (the j-th column of the solution matrix X). */
/*          If XTRUE is the true solution corresponding to X(j), FERR(j) */
/*          is an estimated upper bound for the magnitude of the largest */
/*          element in (X(j) - XTRUE) divided by the magnitude of the */
/*          largest element in X(j).  The estimate is as reliable as */
/*          the estimate for RCOND, and is almost always a slight */
/*          overestimate of the true error. */

/*  BERR    (output) REAL array, dimension (NRHS) */
/*          The componentwise relative backward error of each solution */
/*          vector X(j) (i.e., the smallest relative change in */
/*          any element of A or B that makes X(j) an exact solution). */

/*  WORK    (workspace) COMPLEX array, dimension (2*N) */

/*  RWORK   (workspace) REAL array, dimension (N) */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */

/*  Internal Parameters */
/*  =================== */

/*  ITMAX is the maximum number of steps of iterative refinement. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Statement Functions .. */
/*     .. */
/*     .. Statement Function definitions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    af_dim1 = *ldaf;
    af_offset = 1 + af_dim1;
    af -= af_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1;
    x -= x_offset;
    --ferr;
    --berr;
    --work;
    --rwork;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*nrhs < 0) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    } else if (*ldaf < max(1,*n)) {
	*info = -7;
    } else if (*ldb < max(1,*n)) {
	*info = -10;
    } else if (*ldx < max(1,*n)) {
	*info = -12;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CSYRFS", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
	i__1 = *nrhs;
	for (j = 1; j <= i__1; ++j) {
	    ferr[j] = 0.f;
	    berr[j] = 0.f;
/* L10: */
	}
	return 0;
    }

/*     NZ = maximum number of nonzero elements in each row of A, plus 1 */

    nz = *n + 1;
    eps = slamch_("Epsilon");
    safmin = slamch_("Safe minimum");
    safe1 = nz * safmin;
    safe2 = safe1 / eps;

/*     Do for each right hand side */

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {

	count = 1;
	lstres = 3.f;
L20:

/*        Loop until stopping criterion is satisfied. */

/*        Compute residual R = B - A * X */

	ccopy_(n, &b[j * b_dim1 + 1], &c__1, &work[1], &c__1);
	q__1.r = -1.f, q__1.i = -0.f;
	csymv_(uplo, n, &q__1, &a[a_offset], lda, &x[j * x_dim1 + 1], &c__1, &
		c_b1, &work[1], &c__1);

/*        Compute componentwise relative backward error from formula */

/*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) */

/*        where abs(Z) is the componentwise absolute value of the matrix */
/*        or vector Z.  If the i-th component of the denominator is less */
/*        than SAFE2, then SAFE1 is added to the i-th components of the */
/*        numerator and denominator before dividing. */

	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    i__3 = i__ + j * b_dim1;
	    rwork[i__] = (r__1 = b[i__3].r, dabs(r__1)) + (r__2 = r_imag(&b[
		    i__ + j * b_dim1]), dabs(r__2));
/* L30: */
	}

/*        Compute abs(A)*abs(X) + abs(B). */

	if (upper) {
	    i__2 = *n;
	    for (k = 1; k <= i__2; ++k) {
		s = 0.f;
		i__3 = k + j * x_dim1;
		xk = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[k + j 
			* x_dim1]), dabs(r__2));
		i__3 = k - 1;
		for (i__ = 1; i__ <= i__3; ++i__) {
		    i__4 = i__ + k * a_dim1;
		    rwork[i__] += ((r__1 = a[i__4].r, dabs(r__1)) + (r__2 = 
			    r_imag(&a[i__ + k * a_dim1]), dabs(r__2))) * xk;
		    i__4 = i__ + k * a_dim1;
		    i__5 = i__ + j * x_dim1;
		    s += ((r__1 = a[i__4].r, dabs(r__1)) + (r__2 = r_imag(&a[
			    i__ + k * a_dim1]), dabs(r__2))) * ((r__3 = x[
			    i__5].r, dabs(r__3)) + (r__4 = r_imag(&x[i__ + j *
			     x_dim1]), dabs(r__4)));
/* L40: */
		}
		i__3 = k + k * a_dim1;
		rwork[k] = rwork[k] + ((r__1 = a[i__3].r, dabs(r__1)) + (r__2 
			= r_imag(&a[k + k * a_dim1]), dabs(r__2))) * xk + s;
/* L50: */
	    }
	} else {
	    i__2 = *n;
	    for (k = 1; k <= i__2; ++k) {
		s = 0.f;
		i__3 = k + j * x_dim1;
		xk = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[k + j 
			* x_dim1]), dabs(r__2));
		i__3 = k + k * a_dim1;
		rwork[k] += ((r__1 = a[i__3].r, dabs(r__1)) + (r__2 = r_imag(&
			a[k + k * a_dim1]), dabs(r__2))) * xk;
		i__3 = *n;
		for (i__ = k + 1; i__ <= i__3; ++i__) {
		    i__4 = i__ + k * a_dim1;
		    rwork[i__] += ((r__1 = a[i__4].r, dabs(r__1)) + (r__2 = 
			    r_imag(&a[i__ + k * a_dim1]), dabs(r__2))) * xk;
		    i__4 = i__ + k * a_dim1;
		    i__5 = i__ + j * x_dim1;
		    s += ((r__1 = a[i__4].r, dabs(r__1)) + (r__2 = r_imag(&a[
			    i__ + k * a_dim1]), dabs(r__2))) * ((r__3 = x[
			    i__5].r, dabs(r__3)) + (r__4 = r_imag(&x[i__ + j *
			     x_dim1]), dabs(r__4)));
/* L60: */
		}
		rwork[k] += s;
/* L70: */
	    }
	}
	s = 0.f;
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    if (rwork[i__] > safe2) {
/* Computing MAX */
		i__3 = i__;
		r__3 = s, r__4 = ((r__1 = work[i__3].r, dabs(r__1)) + (r__2 = 
			r_imag(&work[i__]), dabs(r__2))) / rwork[i__];
		s = dmax(r__3,r__4);
	    } else {
/* Computing MAX */
		i__3 = i__;
		r__3 = s, r__4 = ((r__1 = work[i__3].r, dabs(r__1)) + (r__2 = 
			r_imag(&work[i__]), dabs(r__2)) + safe1) / (rwork[i__]
			 + safe1);
		s = dmax(r__3,r__4);
	    }
/* L80: */
	}
	berr[j] = s;

/*        Test stopping criterion. Continue iterating if */
/*           1) The residual BERR(J) is larger than machine epsilon, and */
/*           2) BERR(J) decreased by at least a factor of 2 during the */
/*              last iteration, and */
/*           3) At most ITMAX iterations tried. */

	if (berr[j] > eps && berr[j] * 2.f <= lstres && count <= 5) {

/*           Update solution and try again. */

	    csytrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[1], 
		    n, info);
	    caxpy_(n, &c_b1, &work[1], &c__1, &x[j * x_dim1 + 1], &c__1);
	    lstres = berr[j];
	    ++count;
	    goto L20;
	}

/*        Bound error from formula */

/*        norm(X - XTRUE) / norm(X) .le. FERR = */
/*        norm( abs(inv(A))* */
/*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) */

/*        where */
/*          norm(Z) is the magnitude of the largest component of Z */
/*          inv(A) is the inverse of A */
/*          abs(Z) is the componentwise absolute value of the matrix or */
/*             vector Z */
/*          NZ is the maximum number of nonzeros in any row of A, plus 1 */
/*          EPS is machine epsilon */

/*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) */
/*        is incremented by SAFE1 if the i-th component of */
/*        abs(A)*abs(X) + abs(B) is less than SAFE2. */

/*        Use CLACN2 to estimate the infinity-norm of the matrix */
/*           inv(A) * diag(W), */
/*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */

	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    if (rwork[i__] > safe2) {
		i__3 = i__;
		rwork[i__] = (r__1 = work[i__3].r, dabs(r__1)) + (r__2 = 
			r_imag(&work[i__]), dabs(r__2)) + nz * eps * rwork[
			i__];
	    } else {
		i__3 = i__;
		rwork[i__] = (r__1 = work[i__3].r, dabs(r__1)) + (r__2 = 
			r_imag(&work[i__]), dabs(r__2)) + nz * eps * rwork[
			i__] + safe1;
	    }
/* L90: */
	}

	kase = 0;
L100:
	clacn2_(n, &work[*n + 1], &work[1], &ferr[j], &kase, isave);
	if (kase != 0) {
	    if (kase == 1) {

/*              Multiply by diag(W)*inv(A'). */

		csytrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[
			1], n, info);
		i__2 = *n;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    i__3 = i__;
		    i__4 = i__;
		    i__5 = i__;
		    q__1.r = rwork[i__4] * work[i__5].r, q__1.i = rwork[i__4] 
			    * work[i__5].i;
		    work[i__3].r = q__1.r, work[i__3].i = q__1.i;
/* L110: */
		}
	    } else if (kase == 2) {

/*              Multiply by inv(A)*diag(W). */

		i__2 = *n;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    i__3 = i__;
		    i__4 = i__;
		    i__5 = i__;
		    q__1.r = rwork[i__4] * work[i__5].r, q__1.i = rwork[i__4] 
			    * work[i__5].i;
		    work[i__3].r = q__1.r, work[i__3].i = q__1.i;
/* L120: */
		}
		csytrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[
			1], n, info);
	    }
	    goto L100;
	}

/*        Normalize error. */

	lstres = 0.f;
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    i__3 = i__ + j * x_dim1;
	    r__3 = lstres, r__4 = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = 
		    r_imag(&x[i__ + j * x_dim1]), dabs(r__2));
	    lstres = dmax(r__3,r__4);
/* L130: */
	}
	if (lstres != 0.f) {
	    ferr[j] /= lstres;
	}

/* L140: */
    }

    return 0;

/*     End of CSYRFS */

} /* csyrfs_ */
/* Subroutine */ int cherfs_(char *uplo, integer *n, integer *nrhs, complex *
	a, integer *lda, complex *af, integer *ldaf, integer *ipiv, complex *
	b, integer *ldb, complex *x, integer *ldx, real *ferr, real *berr, 
	complex *work, real *rwork, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CHERFS improves the computed solution to a system of linear   
    equations when the coefficient matrix is Hermitian indefinite, and   
    provides error bounds and backward error estimates for the solution.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            = 'U':  Upper triangle of A is stored;   
            = 'L':  Lower triangle of A is stored.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    NRHS    (input) INTEGER   
            The number of right hand sides, i.e., the number of columns   
            of the matrices B and X.  NRHS >= 0.   

    A       (input) COMPLEX array, dimension (LDA,N)   
            The Hermitian matrix A.  If UPLO = 'U', the leading N-by-N   
            upper triangular part of A contains the upper triangular part   
            of the matrix A, and the strictly lower triangular part of A   
            is not referenced.  If UPLO = 'L', the leading N-by-N lower   
            triangular part of A contains the lower triangular part of   
            the matrix A, and the strictly upper triangular part of A is   
            not referenced.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    AF      (input) COMPLEX array, dimension (LDAF,N)   
            The factored form of the matrix A.  AF contains the block   
            diagonal matrix D and the multipliers used to obtain the   
            factor U or L from the factorization A = U*D*U**H or   
            A = L*D*L**H as computed by CHETRF.   

    LDAF    (input) INTEGER   
            The leading dimension of the array AF.  LDAF >= max(1,N).   

    IPIV    (input) INTEGER array, dimension (N)   
            Details of the interchanges and the block structure of D   
            as determined by CHETRF.   

    B       (input) COMPLEX array, dimension (LDB,NRHS)   
            The right hand side matrix B.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    X       (input/output) COMPLEX array, dimension (LDX,NRHS)   
            On entry, the solution matrix X, as computed by CHETRS.   
            On exit, the improved solution matrix X.   

    LDX     (input) INTEGER   
            The leading dimension of the array X.  LDX >= max(1,N).   

    FERR    (output) REAL array, dimension (NRHS)   
            The estimated forward error bound for each solution vector   
            X(j) (the j-th column of the solution matrix X).   
            If XTRUE is the true solution corresponding to X(j), FERR(j)   
            is an estimated upper bound for the magnitude of the largest   
            element in (X(j) - XTRUE) divided by the magnitude of the   
            largest element in X(j).  The estimate is as reliable as   
            the estimate for RCOND, and is almost always a slight   
            overestimate of the true error.   

    BERR    (output) REAL array, dimension (NRHS)   
            The componentwise relative backward error of each solution   
            vector X(j) (i.e., the smallest relative change in   
            any element of A or B that makes X(j) an exact solution).   

    WORK    (workspace) COMPLEX array, dimension (2*N)   

    RWORK   (workspace) REAL array, dimension (N)   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   

    Internal Parameters   
    ===================   

    ITMAX is the maximum number of steps of iterative refinement.   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static complex c_b1 = {1.f,0.f};
    static integer c__1 = 1;
    
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, 
	    x_offset, i__1, i__2, i__3, i__4, i__5;
    real r__1, r__2, r__3, r__4;
    complex q__1;
    /* Builtin functions */
    double r_imag(complex *);
    /* Local variables */
    static integer kase;
    static real safe1, safe2;
    static integer i__, j, k;
    static real s;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int chemv_(char *, integer *, complex *, complex *
	    , integer *, complex *, integer *, complex *, complex *, integer *
	    ), ccopy_(integer *, complex *, integer *, complex *, 
	    integer *), caxpy_(integer *, complex *, complex *, integer *, 
	    complex *, integer *);
    static integer count;
    static logical upper;
    extern /* Subroutine */ int clacon_(integer *, complex *, complex *, real 
	    *, integer *);
    static real xk;
    extern doublereal slamch_(char *);
    static integer nz;
    static real safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *), chetrs_(
	    char *, integer *, integer *, complex *, integer *, integer *, 
	    complex *, integer *, integer *);
    static real lstres, eps;
#define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1
#define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)]
#define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1
#define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)]
#define x_subscr(a_1,a_2) (a_2)*x_dim1 + a_1
#define x_ref(a_1,a_2) x[x_subscr(a_1,a_2)]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    af_dim1 = *ldaf;
    af_offset = 1 + af_dim1 * 1;
    af -= af_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    --ferr;
    --berr;
    --work;
    --rwork;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*nrhs < 0) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    } else if (*ldaf < max(1,*n)) {
	*info = -7;
    } else if (*ldb < max(1,*n)) {
	*info = -10;
    } else if (*ldx < max(1,*n)) {
	*info = -12;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CHERFS", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
	i__1 = *nrhs;
	for (j = 1; j <= i__1; ++j) {
	    ferr[j] = 0.f;
	    berr[j] = 0.f;
/* L10: */
	}
	return 0;
    }

/*     NZ = maximum number of nonzero elements in each row of A, plus 1 */

    nz = *n + 1;
    eps = slamch_("Epsilon");
    safmin = slamch_("Safe minimum");
    safe1 = nz * safmin;
    safe2 = safe1 / eps;

/*     Do for each right hand side */

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {

	count = 1;
	lstres = 3.f;
L20:

/*        Loop until stopping criterion is satisfied.   

          Compute residual R = B - A * X */

	ccopy_(n, &b_ref(1, j), &c__1, &work[1], &c__1);
	q__1.r = -1.f, q__1.i = 0.f;
	chemv_(uplo, n, &q__1, &a[a_offset], lda, &x_ref(1, j), &c__1, &c_b1, 
		&work[1], &c__1);

/*        Compute componentwise relative backward error from formula   

          max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )   

          where abs(Z) is the componentwise absolute value of the matrix   
          or vector Z.  If the i-th component of the denominator is less   
          than SAFE2, then SAFE1 is added to the i-th components of the   
          numerator and denominator before dividing. */

	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    i__3 = b_subscr(i__, j);
	    rwork[i__] = (r__1 = b[i__3].r, dabs(r__1)) + (r__2 = r_imag(&
		    b_ref(i__, j)), dabs(r__2));
/* L30: */
	}

/*        Compute abs(A)*abs(X) + abs(B). */

	if (upper) {
	    i__2 = *n;
	    for (k = 1; k <= i__2; ++k) {
		s = 0.f;
		i__3 = x_subscr(k, j);
		xk = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x_ref(k,
			 j)), dabs(r__2));
		i__3 = k - 1;
		for (i__ = 1; i__ <= i__3; ++i__) {
		    i__4 = a_subscr(i__, k);
		    rwork[i__] += ((r__1 = a[i__4].r, dabs(r__1)) + (r__2 = 
			    r_imag(&a_ref(i__, k)), dabs(r__2))) * xk;
		    i__4 = a_subscr(i__, k);
		    i__5 = x_subscr(i__, j);
		    s += ((r__1 = a[i__4].r, dabs(r__1)) + (r__2 = r_imag(&
			    a_ref(i__, k)), dabs(r__2))) * ((r__3 = x[i__5].r,
			     dabs(r__3)) + (r__4 = r_imag(&x_ref(i__, j)), 
			    dabs(r__4)));
/* L40: */
		}
		i__3 = a_subscr(k, k);
		rwork[k] = rwork[k] + (r__1 = a[i__3].r, dabs(r__1)) * xk + s;
/* L50: */
	    }
	} else {
	    i__2 = *n;
	    for (k = 1; k <= i__2; ++k) {
		s = 0.f;
		i__3 = x_subscr(k, j);
		xk = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x_ref(k,
			 j)), dabs(r__2));
		i__3 = a_subscr(k, k);
		rwork[k] += (r__1 = a[i__3].r, dabs(r__1)) * xk;
		i__3 = *n;
		for (i__ = k + 1; i__ <= i__3; ++i__) {
		    i__4 = a_subscr(i__, k);
		    rwork[i__] += ((r__1 = a[i__4].r, dabs(r__1)) + (r__2 = 
			    r_imag(&a_ref(i__, k)), dabs(r__2))) * xk;
		    i__4 = a_subscr(i__, k);
		    i__5 = x_subscr(i__, j);
		    s += ((r__1 = a[i__4].r, dabs(r__1)) + (r__2 = r_imag(&
			    a_ref(i__, k)), dabs(r__2))) * ((r__3 = x[i__5].r,
			     dabs(r__3)) + (r__4 = r_imag(&x_ref(i__, j)), 
			    dabs(r__4)));
/* L60: */
		}
		rwork[k] += s;
/* L70: */
	    }
	}
	s = 0.f;
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    if (rwork[i__] > safe2) {
/* Computing MAX */
		i__3 = i__;
		r__3 = s, r__4 = ((r__1 = work[i__3].r, dabs(r__1)) + (r__2 = 
			r_imag(&work[i__]), dabs(r__2))) / rwork[i__];
		s = dmax(r__3,r__4);
	    } else {
/* Computing MAX */
		i__3 = i__;
		r__3 = s, r__4 = ((r__1 = work[i__3].r, dabs(r__1)) + (r__2 = 
			r_imag(&work[i__]), dabs(r__2)) + safe1) / (rwork[i__]
			 + safe1);
		s = dmax(r__3,r__4);
	    }
/* L80: */
	}
	berr[j] = s;

/*        Test stopping criterion. Continue iterating if   
             1) The residual BERR(J) is larger than machine epsilon, and   
             2) BERR(J) decreased by at least a factor of 2 during the   
                last iteration, and   
             3) At most ITMAX iterations tried. */

	if (berr[j] > eps && berr[j] * 2.f <= lstres && count <= 5) {

/*           Update solution and try again. */

	    chetrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[1], 
		    n, info);
	    caxpy_(n, &c_b1, &work[1], &c__1, &x_ref(1, j), &c__1);
	    lstres = berr[j];
	    ++count;
	    goto L20;
	}

/*        Bound error from formula   

          norm(X - XTRUE) / norm(X) .le. FERR =   
          norm( abs(inv(A))*   
             ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)   

          where   
            norm(Z) is the magnitude of the largest component of Z   
            inv(A) is the inverse of A   
            abs(Z) is the componentwise absolute value of the matrix or   
               vector Z   
            NZ is the maximum number of nonzeros in any row of A, plus 1   
            EPS is machine epsilon   

          The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))   
          is incremented by SAFE1 if the i-th component of   
          abs(A)*abs(X) + abs(B) is less than SAFE2.   

          Use CLACON to estimate the infinity-norm of the matrix   
             inv(A) * diag(W),   
          where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */

	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    if (rwork[i__] > safe2) {
		i__3 = i__;
		rwork[i__] = (r__1 = work[i__3].r, dabs(r__1)) + (r__2 = 
			r_imag(&work[i__]), dabs(r__2)) + nz * eps * rwork[
			i__];
	    } else {
		i__3 = i__;
		rwork[i__] = (r__1 = work[i__3].r, dabs(r__1)) + (r__2 = 
			r_imag(&work[i__]), dabs(r__2)) + nz * eps * rwork[
			i__] + safe1;
	    }
/* L90: */
	}

	kase = 0;
L100:
	clacon_(n, &work[*n + 1], &work[1], &ferr[j], &kase);
	if (kase != 0) {
	    if (kase == 1) {

/*              Multiply by diag(W)*inv(A'). */

		chetrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[
			1], n, info);
		i__2 = *n;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    i__3 = i__;
		    i__4 = i__;
		    i__5 = i__;
		    q__1.r = rwork[i__4] * work[i__5].r, q__1.i = rwork[i__4] 
			    * work[i__5].i;
		    work[i__3].r = q__1.r, work[i__3].i = q__1.i;
/* L110: */
		}
	    } else if (kase == 2) {

/*              Multiply by inv(A)*diag(W). */

		i__2 = *n;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    i__3 = i__;
		    i__4 = i__;
		    i__5 = i__;
		    q__1.r = rwork[i__4] * work[i__5].r, q__1.i = rwork[i__4] 
			    * work[i__5].i;
		    work[i__3].r = q__1.r, work[i__3].i = q__1.i;
/* L120: */
		}
		chetrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[
			1], n, info);
	    }
	    goto L100;
	}

/*        Normalize error. */

	lstres = 0.f;
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    i__3 = x_subscr(i__, j);
	    r__3 = lstres, r__4 = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = 
		    r_imag(&x_ref(i__, j)), dabs(r__2));
	    lstres = dmax(r__3,r__4);
/* L130: */
	}
	if (lstres != 0.f) {
	    ferr[j] /= lstres;
	}

/* L140: */
    }

    return 0;

/*     End of CHERFS */

} /* cherfs_ */
Example #5
0
/* Subroutine */ int cgbrfs_(char *trans, integer *n, integer *kl, integer *
	ku, integer *nrhs, complex *ab, integer *ldab, complex *afb, integer *
	ldafb, integer *ipiv, complex *b, integer *ldb, complex *x, integer *
	ldx, real *ferr, real *berr, complex *work, real *rwork, integer *
	info)
{
    /* System generated locals */
    integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, 
	    x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7;
    real r__1, r__2, r__3, r__4;
    complex q__1;

    /* Builtin functions */
    double r_imag(complex *);

    /* Local variables */
    integer i__, j, k;
    real s;
    integer kk;
    real xk;
    integer nz;
    real eps;
    integer kase;
    real safe1, safe2;
    extern /* Subroutine */ int cgbmv_(char *, integer *, integer *, integer *
, integer *, complex *, complex *, integer *, complex *, integer *
, complex *, complex *, integer *);
    extern logical lsame_(char *, char *);
    integer isave[3];
    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, 
	    complex *, integer *), caxpy_(integer *, complex *, complex *, 
	    integer *, complex *, integer *);
    integer count;
    extern /* Subroutine */ int clacn2_(integer *, complex *, complex *, real 
	    *, integer *, integer *);
    extern doublereal slamch_(char *);
    real safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *), cgbtrs_(
	    char *, integer *, integer *, integer *, integer *, complex *, 
	    integer *, integer *, complex *, integer *, integer *);
    logical notran;
    char transn[1], transt[1];
    real lstres;


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     Modified to call CLACN2 in place of CLACON, 10 Feb 03, SJH. */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CGBRFS improves the computed solution to a system of linear */
/*  equations when the coefficient matrix is banded, and provides */
/*  error bounds and backward error estimates for the solution. */

/*  Arguments */
/*  ========= */

/*  TRANS   (input) CHARACTER*1 */
/*          Specifies the form of the system of equations: */
/*          = 'N':  A * X = B     (No transpose) */
/*          = 'T':  A**T * X = B  (Transpose) */
/*          = 'C':  A**H * X = B  (Conjugate transpose) */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  KL      (input) INTEGER */
/*          The number of subdiagonals within the band of A.  KL >= 0. */

/*  KU      (input) INTEGER */
/*          The number of superdiagonals within the band of A.  KU >= 0. */

/*  NRHS    (input) INTEGER */
/*          The number of right hand sides, i.e., the number of columns */
/*          of the matrices B and X.  NRHS >= 0. */

/*  AB      (input) COMPLEX array, dimension (LDAB,N) */
/*          The original band matrix A, stored in rows 1 to KL+KU+1. */
/*          The j-th column of A is stored in the j-th column of the */
/*          array AB as follows: */
/*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). */

/*  LDAB    (input) INTEGER */
/*          The leading dimension of the array AB.  LDAB >= KL+KU+1. */

/*  AFB     (input) COMPLEX array, dimension (LDAFB,N) */
/*          Details of the LU factorization of the band matrix A, as */
/*          computed by CGBTRF.  U is stored as an upper triangular band */
/*          matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and */
/*          the multipliers used during the factorization are stored in */
/*          rows KL+KU+2 to 2*KL+KU+1. */

/*  LDAFB   (input) INTEGER */
/*          The leading dimension of the array AFB.  LDAFB >= 2*KL*KU+1. */

/*  IPIV    (input) INTEGER array, dimension (N) */
/*          The pivot indices from CGBTRF; for 1<=i<=N, row i of the */
/*          matrix was interchanged with row IPIV(i). */

/*  B       (input) COMPLEX array, dimension (LDB,NRHS) */
/*          The right hand side matrix B. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B.  LDB >= max(1,N). */

/*  X       (input/output) COMPLEX array, dimension (LDX,NRHS) */
/*          On entry, the solution matrix X, as computed by CGBTRS. */
/*          On exit, the improved solution matrix X. */

/*  LDX     (input) INTEGER */
/*          The leading dimension of the array X.  LDX >= max(1,N). */

/*  FERR    (output) REAL array, dimension (NRHS) */
/*          The estimated forward error bound for each solution vector */
/*          X(j) (the j-th column of the solution matrix X). */
/*          If XTRUE is the true solution corresponding to X(j), FERR(j) */
/*          is an estimated upper bound for the magnitude of the largest */
/*          element in (X(j) - XTRUE) divided by the magnitude of the */
/*          largest element in X(j).  The estimate is as reliable as */
/*          the estimate for RCOND, and is almost always a slight */
/*          overestimate of the true error. */

/*  BERR    (output) REAL array, dimension (NRHS) */
/*          The componentwise relative backward error of each solution */
/*          vector X(j) (i.e., the smallest relative change in */
/*          any element of A or B that makes X(j) an exact solution). */

/*  WORK    (workspace) COMPLEX array, dimension (2*N) */

/*  RWORK   (workspace) REAL array, dimension (N) */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */

/*  Internal Parameters */
/*  =================== */

/*  ITMAX is the maximum number of steps of iterative refinement. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Statement Functions .. */
/*     .. */
/*     .. Statement Function definitions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1;
    ab -= ab_offset;
    afb_dim1 = *ldafb;
    afb_offset = 1 + afb_dim1;
    afb -= afb_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1;
    x -= x_offset;
    --ferr;
    --berr;
    --work;
    --rwork;

    /* Function Body */
    *info = 0;
    notran = lsame_(trans, "N");
    if (! notran && ! lsame_(trans, "T") && ! lsame_(
	    trans, "C")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*kl < 0) {
	*info = -3;
    } else if (*ku < 0) {
	*info = -4;
    } else if (*nrhs < 0) {
	*info = -5;
    } else if (*ldab < *kl + *ku + 1) {
	*info = -7;
    } else if (*ldafb < (*kl << 1) + *ku + 1) {
	*info = -9;
    } else if (*ldb < max(1,*n)) {
	*info = -12;
    } else if (*ldx < max(1,*n)) {
	*info = -14;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CGBRFS", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
	i__1 = *nrhs;
	for (j = 1; j <= i__1; ++j) {
	    ferr[j] = 0.f;
	    berr[j] = 0.f;
/* L10: */
	}
	return 0;
    }

    if (notran) {
	*(unsigned char *)transn = 'N';
	*(unsigned char *)transt = 'C';
    } else {
	*(unsigned char *)transn = 'C';
	*(unsigned char *)transt = 'N';
    }

/*     NZ = maximum number of nonzero elements in each row of A, plus 1 */

/* Computing MIN */
    i__1 = *kl + *ku + 2, i__2 = *n + 1;
    nz = min(i__1,i__2);
    eps = slamch_("Epsilon");
    safmin = slamch_("Safe minimum");
    safe1 = nz * safmin;
    safe2 = safe1 / eps;

/*     Do for each right hand side */

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {

	count = 1;
	lstres = 3.f;
L20:

/*        Loop until stopping criterion is satisfied. */

/*        Compute residual R = B - op(A) * X, */
/*        where op(A) = A, A**T, or A**H, depending on TRANS. */

	ccopy_(n, &b[j * b_dim1 + 1], &c__1, &work[1], &c__1);
	q__1.r = -1.f, q__1.i = -0.f;
	cgbmv_(trans, n, n, kl, ku, &q__1, &ab[ab_offset], ldab, &x[j * 
		x_dim1 + 1], &c__1, &c_b1, &work[1], &c__1);

/*        Compute componentwise relative backward error from formula */

/*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */

/*        where abs(Z) is the componentwise absolute value of the matrix */
/*        or vector Z.  If the i-th component of the denominator is less */
/*        than SAFE2, then SAFE1 is added to the i-th components of the */
/*        numerator and denominator before dividing. */

	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    i__3 = i__ + j * b_dim1;
	    rwork[i__] = (r__1 = b[i__3].r, dabs(r__1)) + (r__2 = r_imag(&b[
		    i__ + j * b_dim1]), dabs(r__2));
/* L30: */
	}

/*        Compute abs(op(A))*abs(X) + abs(B). */

	if (notran) {
	    i__2 = *n;
	    for (k = 1; k <= i__2; ++k) {
		kk = *ku + 1 - k;
		i__3 = k + j * x_dim1;
		xk = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[k + j 
			* x_dim1]), dabs(r__2));
/* Computing MAX */
		i__3 = 1, i__4 = k - *ku;
/* Computing MIN */
		i__6 = *n, i__7 = k + *kl;
		i__5 = min(i__6,i__7);
		for (i__ = max(i__3,i__4); i__ <= i__5; ++i__) {
		    i__3 = kk + i__ + k * ab_dim1;
		    rwork[i__] += ((r__1 = ab[i__3].r, dabs(r__1)) + (r__2 = 
			    r_imag(&ab[kk + i__ + k * ab_dim1]), dabs(r__2))) 
			    * xk;
/* L40: */
		}
/* L50: */
	    }
	} else {
	    i__2 = *n;
	    for (k = 1; k <= i__2; ++k) {
		s = 0.f;
		kk = *ku + 1 - k;
/* Computing MAX */
		i__5 = 1, i__3 = k - *ku;
/* Computing MIN */
		i__6 = *n, i__7 = k + *kl;
		i__4 = min(i__6,i__7);
		for (i__ = max(i__5,i__3); i__ <= i__4; ++i__) {
		    i__5 = kk + i__ + k * ab_dim1;
		    i__3 = i__ + j * x_dim1;
		    s += ((r__1 = ab[i__5].r, dabs(r__1)) + (r__2 = r_imag(&
			    ab[kk + i__ + k * ab_dim1]), dabs(r__2))) * ((
			    r__3 = x[i__3].r, dabs(r__3)) + (r__4 = r_imag(&x[
			    i__ + j * x_dim1]), dabs(r__4)));
/* L60: */
		}
		rwork[k] += s;
/* L70: */
	    }
	}
	s = 0.f;
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    if (rwork[i__] > safe2) {
/* Computing MAX */
		i__4 = i__;
		r__3 = s, r__4 = ((r__1 = work[i__4].r, dabs(r__1)) + (r__2 = 
			r_imag(&work[i__]), dabs(r__2))) / rwork[i__];
		s = dmax(r__3,r__4);
	    } else {
/* Computing MAX */
		i__4 = i__;
		r__3 = s, r__4 = ((r__1 = work[i__4].r, dabs(r__1)) + (r__2 = 
			r_imag(&work[i__]), dabs(r__2)) + safe1) / (rwork[i__]
			 + safe1);
		s = dmax(r__3,r__4);
	    }
/* L80: */
	}
	berr[j] = s;

/*        Test stopping criterion. Continue iterating if */
/*           1) The residual BERR(J) is larger than machine epsilon, and */
/*           2) BERR(J) decreased by at least a factor of 2 during the */
/*              last iteration, and */
/*           3) At most ITMAX iterations tried. */

	if (berr[j] > eps && berr[j] * 2.f <= lstres && count <= 5) {

/*           Update solution and try again. */

	    cgbtrs_(trans, n, kl, ku, &c__1, &afb[afb_offset], ldafb, &ipiv[1]
, &work[1], n, info);
	    caxpy_(n, &c_b1, &work[1], &c__1, &x[j * x_dim1 + 1], &c__1);
	    lstres = berr[j];
	    ++count;
	    goto L20;
	}

/*        Bound error from formula */

/*        norm(X - XTRUE) / norm(X) .le. FERR = */
/*        norm( abs(inv(op(A)))* */
/*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */

/*        where */
/*          norm(Z) is the magnitude of the largest component of Z */
/*          inv(op(A)) is the inverse of op(A) */
/*          abs(Z) is the componentwise absolute value of the matrix or */
/*             vector Z */
/*          NZ is the maximum number of nonzeros in any row of A, plus 1 */
/*          EPS is machine epsilon */

/*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */
/*        is incremented by SAFE1 if the i-th component of */
/*        abs(op(A))*abs(X) + abs(B) is less than SAFE2. */

/*        Use CLACN2 to estimate the infinity-norm of the matrix */
/*           inv(op(A)) * diag(W), */
/*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */

	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    if (rwork[i__] > safe2) {
		i__4 = i__;
		rwork[i__] = (r__1 = work[i__4].r, dabs(r__1)) + (r__2 = 
			r_imag(&work[i__]), dabs(r__2)) + nz * eps * rwork[
			i__];
	    } else {
		i__4 = i__;
		rwork[i__] = (r__1 = work[i__4].r, dabs(r__1)) + (r__2 = 
			r_imag(&work[i__]), dabs(r__2)) + nz * eps * rwork[
			i__] + safe1;
	    }
/* L90: */
	}

	kase = 0;
L100:
	clacn2_(n, &work[*n + 1], &work[1], &ferr[j], &kase, isave);
	if (kase != 0) {
	    if (kase == 1) {

/*              Multiply by diag(W)*inv(op(A)**H). */

		cgbtrs_(transt, n, kl, ku, &c__1, &afb[afb_offset], ldafb, &
			ipiv[1], &work[1], n, info);
		i__2 = *n;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    i__4 = i__;
		    i__5 = i__;
		    i__3 = i__;
		    q__1.r = rwork[i__5] * work[i__3].r, q__1.i = rwork[i__5] 
			    * work[i__3].i;
		    work[i__4].r = q__1.r, work[i__4].i = q__1.i;
/* L110: */
		}
	    } else {

/*              Multiply by inv(op(A))*diag(W). */

		i__2 = *n;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    i__4 = i__;
		    i__5 = i__;
		    i__3 = i__;
		    q__1.r = rwork[i__5] * work[i__3].r, q__1.i = rwork[i__5] 
			    * work[i__3].i;
		    work[i__4].r = q__1.r, work[i__4].i = q__1.i;
/* L120: */
		}
		cgbtrs_(transn, n, kl, ku, &c__1, &afb[afb_offset], ldafb, &
			ipiv[1], &work[1], n, info);
	    }
	    goto L100;
	}

/*        Normalize error. */

	lstres = 0.f;
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    i__4 = i__ + j * x_dim1;
	    r__3 = lstres, r__4 = (r__1 = x[i__4].r, dabs(r__1)) + (r__2 = 
		    r_imag(&x[i__ + j * x_dim1]), dabs(r__2));
	    lstres = dmax(r__3,r__4);
/* L130: */
	}
	if (lstres != 0.f) {
	    ferr[j] /= lstres;
	}

/* L140: */
    }

    return 0;

/*     End of CGBRFS */

} /* cgbrfs_ */
Example #6
0
void
cgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
       int *perm_r, int *perm_c, equed_t equed, float *R, float *C,
       SuperMatrix *B, SuperMatrix *X, float *ferr, float *berr,
       Gstat_t *Gstat, int *info)
{
/*
 * -- SuperLU MT routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley,
 * and Xerox Palo Alto Research Center.
 * September 10, 2007
 *
 *
 * Purpose
 * =======   
 *
 * cgsrfs improves the computed solution to a system of linear
 * equations and provides error bounds and backward error estimates for
 * the solution.
 *
 * See supermatrix.h for the definition of 'SuperMatrix' structure.
 *
 * Arguments
 * =========
 *
 * trans   (input) trans_t
 *         Specifies the form of the system of equations:
 *         = NOTRANS:  A * X = B     (No transpose)
 *         = TRANS:    A**T * X = B  (Transpose)
 *         = CONJ:     A**H * X = B  (Conjugate transpose = Transpose)
 *
 * A       (input) SuperMatrix*
 *         The original matrix A in the system, or the scaled A if
 *         equilibration was done. The type of A can be:
 *         Stype = NC, Dtype = _D, Mtype = GE.
 *
 * L       (input) SuperMatrix*
 *         The factor L from the factorization Pr*A*Pc=L*U. Use
 *         compressed row subscripts storage for supernodes,
 *         i.e., L has types: Stype = SCP, Dtype = _D, Mtype = TRLU.
 *
 * U       (input) SuperMatrix*
 *         The factor U from the factorization Pr*A*Pc=L*U as computed by
 *         dgstrf(). Use column-wise storage scheme,
 *         i.e., U has types: Stype = NCP, Dtype = _D, Mtype = TRU.
 *
 * perm_r  (input) int*, dimension (A->nrow)
 *         Row permutation vector, which defines the permutation matrix Pr;
 *         perm_r[i] = j means row i of A is in position j in Pr*A.
 *
 * perm_c  (input) int*, dimension (A->ncol)
 *         Column permutation vector, which defines the
 *         permutation matrix Pc; perm_c[i] = j means column i of A is 
 *         in position j in A*Pc.
 *
 * equed   (input) equed_t
 *         Specifies the form of equilibration that was done.
 *         = NOEQUIL: No equilibration.
 *         = ROW:  Row equilibration, i.e., A was premultiplied by diag(R).
 *         = COL:  Column equilibration, i.e., A was postmultiplied by
 *                 diag(C).
 *         = BOTH: Both row and column equilibration, i.e., A was replaced
 *                 by diag(R)*A*diag(C).
 *
 * R       (input) double*, dimension (A->nrow)
 *         The row scale factors for A.
 *         If equed = ROW or BOTH, A is premultiplied by diag(R).
 *         If equed = NOEQUIL or COL, R is not accessed.
 *
 * C       (input) double*, dimension (A->ncol)
 *         The column scale factors for A.
 *         If equed = COL or BOTH, A is postmultiplied by diag(C).
 *         If equed = NOEQUIL or ROW, C is not accessed.
 *
 * B       (input) SuperMatrix*
 *         B has types: Stype = DN, Dtype = _D, Mtype = GE.
 *         The right hand side matrix B.
 *
 * X       (input/output) SuperMatrix*
 *         X has types: Stype = DN, Dtype = _D, Mtype = GE.
 *         On entry, the solution matrix X, as computed by dgstrs().
 *         On exit, the improved solution matrix X.
 *
 * FERR    (output) double*, dimension (B->ncol)
 *         The estimated forward error bound for each solution vector
 *         X(j) (the j-th column of the solution matrix X).
 *         If XTRUE is the true solution corresponding to X(j), FERR(j)
 *         is an estimated upper bound for the magnitude of the largest
 *         element in (X(j) - XTRUE) divided by the magnitude of the
 *         largest element in X(j).  The estimate is as reliable as
 *         the estimate for RCOND, and is almost always a slight
 *         overestimate of the true error.
 *
 * BERR    (output) double*, dimension (B->ncol)
 *         The componentwise relative backward error of each solution
 *         vector X(j) (i.e., the smallest relative change in
 *         any element of A or B that makes X(j) an exact solution).
 *
 * info    (output) int*
 *         = 0:  successful exit
 *         < 0:  if INFO = -i, the i-th argument had an illegal value
 *
 * Internal Parameters
 * ===================
 *
 * ITMAX is the maximum number of steps of iterative refinement.
 *
 */

#define ITMAX 5
    
    /* Table of constant values */
    int    ione = 1;
    complex ndone = {-1., 0.};
    complex done = {1., 0.};
    
    /* Local variables */
    NCformat *Astore;
    complex   *Aval;
    SuperMatrix Bjcol;
    DNformat *Bstore, *Xstore, *Bjcol_store;
    complex   *Bmat, *Xmat, *Bptr, *Xptr;
    int      kase;
    float   safe1, safe2;
    int      i, j, k, irow, nz, count, notran, rowequ, colequ;
    int      ldb, ldx, nrhs;
    float   s, xk, lstres, eps, safmin;
    char     transc[1];
    trans_t  transt;
    complex   *work;
    float   *rwork;
    int      *iwork;
    extern double slamch_(char *);
    extern int clacon_(int *, complex *, complex *, float *, int *);
#ifdef _CRAY
    extern int CCOPY(int *, complex *, int *, complex *, int *);
    extern int CSAXPY(int *, complex *, complex *, int *, complex *, int *);
#else
    extern int ccopy_(int *, complex *, int *, complex *, int *);
    extern int caxpy_(int *, complex *, complex *, int *, complex *, int *);
#endif

    Astore = A->Store;
    Aval   = Astore->nzval;
    Bstore = B->Store;
    Xstore = X->Store;
    Bmat   = Bstore->nzval;
    Xmat   = Xstore->nzval;
    ldb    = Bstore->lda;
    ldx    = Xstore->lda;
    nrhs   = B->ncol;
    
    /* Test the input parameters */
    *info = 0;
    notran = (trans == NOTRANS);
    if ( !notran && trans != TRANS && trans != CONJ ) *info = -1;
    else if ( A->nrow != A->ncol || A->nrow < 0 ||
	      A->Stype != SLU_NC || A->Dtype != SLU_C || A->Mtype != SLU_GE )
	*info = -2;
    else if ( L->nrow != L->ncol || L->nrow < 0 ||
 	      L->Stype != SLU_SCP || L->Dtype != SLU_C || L->Mtype != SLU_TRLU )
	*info = -3;
    else if ( U->nrow != U->ncol || U->nrow < 0 ||
 	      U->Stype != SLU_NCP || U->Dtype != SLU_C || U->Mtype != SLU_TRU )
	*info = -4;
    else if ( ldb < SUPERLU_MAX(0, A->nrow) ||
 	      B->Stype != SLU_DN || B->Dtype != SLU_C || B->Mtype != SLU_GE )
        *info = -10;
    else if ( ldx < SUPERLU_MAX(0, A->nrow) ||
 	      X->Stype != SLU_DN || X->Dtype != SLU_C || X->Mtype != SLU_GE )
	*info = -11;
    if (*info != 0) {
	i = -(*info);
	xerbla_("cgsrfs", &i);
	return;
    }

    /* Quick return if possible */
    if ( A->nrow == 0 || nrhs == 0) {
	for (j = 0; j < nrhs; ++j) {
	    ferr[j] = 0.;
	    berr[j] = 0.;
	}
	return;
    }

    rowequ = (equed == ROW) || (equed == BOTH);
    colequ = (equed == COL) || (equed == BOTH);
    
    /* Allocate working space */
    work = complexMalloc(2*A->nrow);
    rwork = (float *) SUPERLU_MALLOC( (size_t) A->nrow * sizeof(float) );
    iwork = intMalloc(A->nrow);
    if ( !work || !rwork || !iwork ) 
        SUPERLU_ABORT("Malloc fails for work/rwork/iwork.");
    
    if ( notran ) {
	*(unsigned char *)transc = 'N';
        transt = TRANS;
    } else {
	*(unsigned char *)transc = 'T';
	transt = NOTRANS;
    }

    /* NZ = maximum number of nonzero elements in each row of A, plus 1 */
    nz     = A->ncol + 1;
    eps    = slamch_("Epsilon");
    safmin = slamch_("Safe minimum");
    /* Set SAFE1 essentially to be the underflow threshold times the
       number of additions in each row. */
    safe1  = nz * safmin;
    safe2  = safe1 / eps;

    /* Compute the number of nonzeros in each row (or column) of A */
    for (i = 0; i < A->nrow; ++i) iwork[i] = 0;
    if ( notran ) {
	for (k = 0; k < A->ncol; ++k)
	    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) 
		++iwork[Astore->rowind[i]];
    } else {
	for (k = 0; k < A->ncol; ++k)
	    iwork[k] = Astore->colptr[k+1] - Astore->colptr[k];
    }	

    /* Copy one column of RHS B into Bjcol. */
    Bjcol.Stype = B->Stype;
    Bjcol.Dtype = B->Dtype;
    Bjcol.Mtype = B->Mtype;
    Bjcol.nrow  = B->nrow;
    Bjcol.ncol  = 1;
    Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
    if ( !Bjcol.Store ) SUPERLU_ABORT("SUPERLU_MALLOC fails for Bjcol.Store");
    Bjcol_store = Bjcol.Store;
    Bjcol_store->lda = ldb;
    Bjcol_store->nzval = work; /* address aliasing */
	
    /* Do for each right hand side ... */
    for (j = 0; j < nrhs; ++j) {
	count = 0;
	lstres = 3.;
	Bptr = &Bmat[j*ldb];
	Xptr = &Xmat[j*ldx];

	while (1) { /* Loop until stopping criterion is satisfied. */

	    /* Compute residual R = B - op(A) * X,   
	       where op(A) = A, A**T, or A**H, depending on TRANS. */
	    
#ifdef _CRAY
	    CCOPY(&A->nrow, Bptr, &ione, work, &ione);
#else
	    ccopy_(&A->nrow, Bptr, &ione, work, &ione);
#endif
	    sp_cgemv(transc, ndone, A, Xptr, ione, done, work, ione);

	    /* Compute componentwise relative backward error from formula 
	       max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )   
	       where abs(Z) is the componentwise absolute value of the matrix
	       or vector Z.  If the i-th component of the denominator is less
	       than SAFE2, then SAFE1 is added to the i-th component of the   
	       numerator before dividing. */

	    for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] );
	    
	    /* Compute abs(op(A))*abs(X) + abs(B). */
	    if (notran) {
		for (k = 0; k < A->ncol; ++k) {
		    xk = c_abs1( &Xptr[k] );
		    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
			rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk;
		}
	    } else {
		for (k = 0; k < A->ncol; ++k) {
		    s = 0.;
		    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
			irow = Astore->rowind[i];
			s += c_abs1(&Aval[i]) * c_abs1(&Xptr[irow]);
		    }
		    rwork[k] += s;
		}
	    }
	    s = 0.;
	    for (i = 0; i < A->nrow; ++i) {
		if (rwork[i] > safe2) {
		    s = SUPERLU_MAX( s, c_abs1(&work[i]) / rwork[i] );
		} else if ( rwork[i] != 0.0 ) {
		    s = SUPERLU_MAX( s, (c_abs1(&work[i]) + safe1) / rwork[i] );
                }
                /* If rwork[i] is exactly 0.0, then we know the true 
                   residual also must be exactly 0.0. */
	    }
	    berr[j] = s;

	    /* Test stopping criterion. Continue iterating if   
	       1) The residual BERR(J) is larger than machine epsilon, and   
	       2) BERR(J) decreased by at least a factor of 2 during the   
	          last iteration, and   
	       3) At most ITMAX iterations tried. */

	    if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) {
		/* Update solution and try again. */
		cgstrs (trans, L, U, perm_r, perm_c, &Bjcol, Gstat, info);
		
#ifdef _CRAY
		CAXPY(&A->nrow, &done, work, &ione,
		       &Xmat[j*ldx], &ione);
#else
		caxpy_(&A->nrow, &done, work, &ione,
		       &Xmat[j*ldx], &ione);
#endif
		lstres = berr[j];
		++count;
	    } else {
		break;
	    }
        
	} /* end while */

	/* Bound error from formula:
	   norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))*   
	   ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)   
          where   
            norm(Z) is the magnitude of the largest component of Z   
            inv(op(A)) is the inverse of op(A)   
            abs(Z) is the componentwise absolute value of the matrix or
	       vector Z   
            NZ is the maximum number of nonzeros in any row of A, plus 1   
            EPS is machine epsilon   

          The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))   
          is incremented by SAFE1 if the i-th component of   
          abs(op(A))*abs(X) + abs(B) is less than SAFE2.   

          Use CLACON to estimate the infinity-norm of the matrix   
             inv(op(A)) * diag(W),   
          where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
	
	for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] );
	
	/* Compute abs(op(A))*abs(X) + abs(B). */
	if ( notran ) {
	    for (k = 0; k < A->ncol; ++k) {
		xk = c_abs1( &Xptr[k] );
		for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
		    rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk;
	    }
	} else {
	    for (k = 0; k < A->ncol; ++k) {
		s = 0.;
		for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
		    irow = Astore->rowind[i];
		    xk = c_abs1( &Xptr[irow] );
		    s += c_abs1(&Aval[i]) * xk;
		}
		rwork[k] += s;
	    }
	}
	
	for (i = 0; i < A->nrow; ++i)
	    if (rwork[i] > safe2)
		rwork[i] = c_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i];
	    else
		rwork[i] = c_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;
	kase = 0;

	do {
	    clacon_(&A->nrow, &work[A->nrow], work,
		    &ferr[j], &kase);
	    if (kase == 0) break;

	    if (kase == 1) {
		/* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */
		if ( notran && colequ )
		    for (i = 0; i < A->ncol; ++i) {
		        cs_mult(&work[i], &work[i], C[i]);
	            }
		else if ( !notran && rowequ )
		    for (i = 0; i < A->nrow; ++i) {
		        cs_mult(&work[i], &work[i], R[i]);
                    }

		cgstrs (transt, L, U, perm_r, perm_c, &Bjcol, Gstat, info);
		
		for (i = 0; i < A->nrow; ++i) {
		    cs_mult(&work[i], &work[i], rwork[i]);
	 	}
	    } else {
		/* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */
		for (i = 0; i < A->nrow; ++i) {
		    cs_mult(&work[i], &work[i], rwork[i]);
		}
		
		cgstrs (trans, L, U, perm_r, perm_c, &Bjcol, Gstat, info);
		
		if ( notran && colequ )
		    for (i = 0; i < A->ncol; ++i) {
		        cs_mult(&work[i], &work[i], C[i]);
		    }
		else if ( !notran && rowequ )
		    for (i = 0; i < A->ncol; ++i) {
		        cs_mult(&work[i], &work[i], R[i]);  
		    }
	    }
	    
	} while ( kase != 0 );

	/* Normalize error. */
	lstres = 0.;
 	if ( notran && colequ ) {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = SUPERLU_MAX( lstres, C[i] * c_abs1( &Xptr[i]) );
  	} else if ( !notran && rowequ ) {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = SUPERLU_MAX( lstres, R[i] * c_abs1( &Xptr[i]) );
	} else {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = SUPERLU_MAX( lstres, c_abs1( &Xptr[i]) );
	}
	if ( lstres != 0. )
	    ferr[j] /= lstres;

    } /* for each RHS j ... */
    
    SUPERLU_FREE(work);
    SUPERLU_FREE(rwork);
    SUPERLU_FREE(iwork);
    SUPERLU_FREE(Bjcol.Store);

    return;

} /* cgsrfs */
Example #7
0
/* DECK CSIDI */
/* Subroutine */ int csidi_(complex *a, integer *lda, integer *n, integer *
	kpvt, complex *det, complex *work, integer *job)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    real r__1, r__2;
    complex q__1, q__2, q__3;

    /* Local variables */
    static complex d__;
    static integer j, k;
    static complex t, ak;
    static integer jb, ks, km1;
    static real ten;
    static complex akp1, temp, akkp1;
    static logical nodet;
    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, 
	    complex *, integer *);
    extern /* Complex */ void cdotu_(complex *, integer *, complex *, integer 
	    *, complex *, integer *);
    extern /* Subroutine */ int cswap_(integer *, complex *, integer *, 
	    complex *, integer *), caxpy_(integer *, complex *, complex *, 
	    integer *, complex *, integer *);
    static integer kstep;
    static logical noinv;

/* ***BEGIN PROLOGUE  CSIDI */
/* ***PURPOSE  Compute the determinant and inverse of a complex symmetric */
/*            matrix using the factors from CSIFA. */
/* ***LIBRARY   SLATEC (LINPACK) */
/* ***CATEGORY  D2C1, D3C1 */
/* ***TYPE      COMPLEX (SSIDI-S, DSIDI-D, CHIDI-C, CSIDI-C) */
/* ***KEYWORDS  DETERMINANT, INVERSE, LINEAR ALGEBRA, LINPACK, MATRIX, */
/*             SYMMETRIC */
/* ***AUTHOR  Bunch, J., (UCSD) */
/* ***DESCRIPTION */

/*     CSIDI computes the determinant and inverse */
/*     of a complex symmetric matrix using the factors from CSIFA. */

/*     On Entry */

/*        A       COMPLEX(LDA,N) */
/*                the output from CSIFA. */

/*        LDA     INTEGER */
/*                the leading dimension of the array A . */

/*        N       INTEGER */
/*                the order of the matrix A . */

/*        KVPT    INTEGER(N) */
/*                the pivot vector from CSIFA. */

/*        WORK    COMPLEX(N) */
/*                work vector.  Contents destroyed. */

/*        JOB     INTEGER */
/*                JOB has the decimal expansion  AB  where */
/*                   If  B .NE. 0, the inverse is computed, */
/*                   If  A .NE. 0, the determinant is computed, */

/*                For example, JOB = 11  gives both. */

/*     On Return */

/*        Variables not requested by JOB are not used. */

/*        A      contains the upper triangle of the inverse of */
/*               the original matrix.  The strict lower triangle */
/*               is never referenced. */

/*        DET    COMPLEX(2) */
/*               determinant of original matrix. */
/*               Determinant = DET(1) * 10.0**DET(2) */
/*               with 1.0 .LE. ABS(DET(1)) .LT. 10.0 */
/*               or DET(1) = 0.0. */

/*     Error Condition */

/*        A division by zero may occur if the inverse is requested */
/*        and  CSICO  has set RCOND .EQ. 0.0 */
/*        or  CSIFA  has set  INFO .NE. 0 . */

/* ***REFERENCES  J. J. Dongarra, J. R. Bunch, C. B. Moler, and G. W. */
/*                 Stewart, LINPACK Users' Guide, SIAM, 1979. */
/* ***ROUTINES CALLED  CAXPY, CCOPY, CDOTU, CSWAP */
/* ***REVISION HISTORY  (YYMMDD) */
/*   780814  DATE WRITTEN */
/*   890531  Changed all specific intrinsics to generic.  (WRB) */
/*   890831  Modified array declarations.  (WRB) */
/*   891107  Corrected category and modified routine equivalence */
/*           list.  (WRB) */
/*   891107  REVISION DATE from Version 3.2 */
/*   891214  Prologue converted to Version 4.0 format.  (BAB) */
/*   900326  Removed duplicate information from DESCRIPTION section. */
/*           (WRB) */
/*   920501  Reformatted the REFERENCES section.  (WRB) */
/* ***END PROLOGUE  CSIDI */


/* ***FIRST EXECUTABLE STATEMENT  CSIDI */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --kpvt;
    --det;
    --work;

    /* Function Body */
    noinv = *job % 10 == 0;
    nodet = *job % 100 / 10 == 0;

    if (nodet) {
	goto L100;
    }
    det[1].r = 1.f, det[1].i = 0.f;
    det[2].r = 0.f, det[2].i = 0.f;
    ten = 10.f;
    t.r = 0.f, t.i = 0.f;
    i__1 = *n;
    for (k = 1; k <= i__1; ++k) {
	i__2 = k + k * a_dim1;
	d__.r = a[i__2].r, d__.i = a[i__2].i;

/*           CHECK IF 1 BY 1 */

	if (kpvt[k] > 0) {
	    goto L30;
	}

/*              2 BY 2 BLOCK */
/*              USE DET (D  T)  =  (D/T * C - T) * T */
/*                      (T  C) */
/*              TO AVOID UNDERFLOW/OVERFLOW TROUBLES. */
/*              TAKE TWO PASSES THROUGH SCALING.  USE  T  FOR FLAG. */

	if ((r__1 = t.r, dabs(r__1)) + (r__2 = r_imag(&t), dabs(r__2)) != 0.f)
		 {
	    goto L10;
	}
	i__2 = k + (k + 1) * a_dim1;
	t.r = a[i__2].r, t.i = a[i__2].i;
	c_div(&q__3, &d__, &t);
	i__2 = k + 1 + (k + 1) * a_dim1;
	q__2.r = q__3.r * a[i__2].r - q__3.i * a[i__2].i, q__2.i = q__3.r * a[
		i__2].i + q__3.i * a[i__2].r;
	q__1.r = q__2.r - t.r, q__1.i = q__2.i - t.i;
	d__.r = q__1.r, d__.i = q__1.i;
	goto L20;
L10:
	d__.r = t.r, d__.i = t.i;
	t.r = 0.f, t.i = 0.f;
L20:
L30:

	q__1.r = d__.r * det[1].r - d__.i * det[1].i, q__1.i = d__.r * det[1]
		.i + d__.i * det[1].r;
	det[1].r = q__1.r, det[1].i = q__1.i;
	if ((r__1 = det[1].r, dabs(r__1)) + (r__2 = r_imag(&det[1]), dabs(
		r__2)) == 0.f) {
	    goto L80;
	}
L40:
	if ((r__1 = det[1].r, dabs(r__1)) + (r__2 = r_imag(&det[1]), dabs(
		r__2)) >= 1.f) {
	    goto L50;
	}
	q__2.r = ten, q__2.i = 0.f;
	q__1.r = q__2.r * det[1].r - q__2.i * det[1].i, q__1.i = q__2.r * det[
		1].i + q__2.i * det[1].r;
	det[1].r = q__1.r, det[1].i = q__1.i;
	q__1.r = det[2].r - 1.f, q__1.i = det[2].i - 0.f;
	det[2].r = q__1.r, det[2].i = q__1.i;
	goto L40;
L50:
L60:
	if ((r__1 = det[1].r, dabs(r__1)) + (r__2 = r_imag(&det[1]), dabs(
		r__2)) < ten) {
	    goto L70;
	}
	q__2.r = ten, q__2.i = 0.f;
	c_div(&q__1, &det[1], &q__2);
	det[1].r = q__1.r, det[1].i = q__1.i;
	q__1.r = det[2].r + 1.f, q__1.i = det[2].i + 0.f;
	det[2].r = q__1.r, det[2].i = q__1.i;
	goto L60;
L70:
L80:
/* L90: */
	;
    }
L100:

/*     COMPUTE INVERSE(A) */

    if (noinv) {
	goto L230;
    }
    k = 1;
L110:
    if (k > *n) {
	goto L220;
    }
    km1 = k - 1;
    if (kpvt[k] < 0) {
	goto L140;
    }

/*              1 BY 1 */

    i__1 = k + k * a_dim1;
    c_div(&q__1, &c_b3, &a[k + k * a_dim1]);
    a[i__1].r = q__1.r, a[i__1].i = q__1.i;
    if (km1 < 1) {
	goto L130;
    }
    ccopy_(&km1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1);
    i__1 = km1;
    for (j = 1; j <= i__1; ++j) {
	i__2 = j + k * a_dim1;
	cdotu_(&q__1, &j, &a[j * a_dim1 + 1], &c__1, &work[1], &c__1);
	a[i__2].r = q__1.r, a[i__2].i = q__1.i;
	i__2 = j - 1;
	caxpy_(&i__2, &work[j], &a[j * a_dim1 + 1], &c__1, &a[k * a_dim1 + 1],
		 &c__1);
/* L120: */
    }
    i__1 = k + k * a_dim1;
    i__2 = k + k * a_dim1;
    cdotu_(&q__2, &km1, &work[1], &c__1, &a[k * a_dim1 + 1], &c__1);
    q__1.r = a[i__2].r + q__2.r, q__1.i = a[i__2].i + q__2.i;
    a[i__1].r = q__1.r, a[i__1].i = q__1.i;
L130:
    kstep = 1;
    goto L180;
L140:

/*              2 BY 2 */

    i__1 = k + (k + 1) * a_dim1;
    t.r = a[i__1].r, t.i = a[i__1].i;
    c_div(&q__1, &a[k + k * a_dim1], &t);
    ak.r = q__1.r, ak.i = q__1.i;
    c_div(&q__1, &a[k + 1 + (k + 1) * a_dim1], &t);
    akp1.r = q__1.r, akp1.i = q__1.i;
    c_div(&q__1, &a[k + (k + 1) * a_dim1], &t);
    akkp1.r = q__1.r, akkp1.i = q__1.i;
    q__3.r = ak.r * akp1.r - ak.i * akp1.i, q__3.i = ak.r * akp1.i + ak.i * 
	    akp1.r;
    q__2.r = q__3.r - 1.f, q__2.i = q__3.i - 0.f;
    q__1.r = t.r * q__2.r - t.i * q__2.i, q__1.i = t.r * q__2.i + t.i * 
	    q__2.r;
    d__.r = q__1.r, d__.i = q__1.i;
    i__1 = k + k * a_dim1;
    c_div(&q__1, &akp1, &d__);
    a[i__1].r = q__1.r, a[i__1].i = q__1.i;
    i__1 = k + 1 + (k + 1) * a_dim1;
    c_div(&q__1, &ak, &d__);
    a[i__1].r = q__1.r, a[i__1].i = q__1.i;
    i__1 = k + (k + 1) * a_dim1;
    q__2.r = -akkp1.r, q__2.i = -akkp1.i;
    c_div(&q__1, &q__2, &d__);
    a[i__1].r = q__1.r, a[i__1].i = q__1.i;
    if (km1 < 1) {
	goto L170;
    }
    ccopy_(&km1, &a[(k + 1) * a_dim1 + 1], &c__1, &work[1], &c__1);
    i__1 = km1;
    for (j = 1; j <= i__1; ++j) {
	i__2 = j + (k + 1) * a_dim1;
	cdotu_(&q__1, &j, &a[j * a_dim1 + 1], &c__1, &work[1], &c__1);
	a[i__2].r = q__1.r, a[i__2].i = q__1.i;
	i__2 = j - 1;
	caxpy_(&i__2, &work[j], &a[j * a_dim1 + 1], &c__1, &a[(k + 1) * 
		a_dim1 + 1], &c__1);
/* L150: */
    }
    i__1 = k + 1 + (k + 1) * a_dim1;
    i__2 = k + 1 + (k + 1) * a_dim1;
    cdotu_(&q__2, &km1, &work[1], &c__1, &a[(k + 1) * a_dim1 + 1], &c__1);
    q__1.r = a[i__2].r + q__2.r, q__1.i = a[i__2].i + q__2.i;
    a[i__1].r = q__1.r, a[i__1].i = q__1.i;
    i__1 = k + (k + 1) * a_dim1;
    i__2 = k + (k + 1) * a_dim1;
    cdotu_(&q__2, &km1, &a[k * a_dim1 + 1], &c__1, &a[(k + 1) * a_dim1 + 1], &
	    c__1);
    q__1.r = a[i__2].r + q__2.r, q__1.i = a[i__2].i + q__2.i;
    a[i__1].r = q__1.r, a[i__1].i = q__1.i;
    ccopy_(&km1, &a[k * a_dim1 + 1], &c__1, &work[1], &c__1);
    i__1 = km1;
    for (j = 1; j <= i__1; ++j) {
	i__2 = j + k * a_dim1;
	cdotu_(&q__1, &j, &a[j * a_dim1 + 1], &c__1, &work[1], &c__1);
	a[i__2].r = q__1.r, a[i__2].i = q__1.i;
	i__2 = j - 1;
	caxpy_(&i__2, &work[j], &a[j * a_dim1 + 1], &c__1, &a[k * a_dim1 + 1],
		 &c__1);
/* L160: */
    }
    i__1 = k + k * a_dim1;
    i__2 = k + k * a_dim1;
    cdotu_(&q__2, &km1, &work[1], &c__1, &a[k * a_dim1 + 1], &c__1);
    q__1.r = a[i__2].r + q__2.r, q__1.i = a[i__2].i + q__2.i;
    a[i__1].r = q__1.r, a[i__1].i = q__1.i;
L170:
    kstep = 2;
L180:

/*           SWAP */

    ks = (i__1 = kpvt[k], abs(i__1));
    if (ks == k) {
	goto L210;
    }
    cswap_(&ks, &a[ks * a_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1);
    i__1 = k;
    for (jb = ks; jb <= i__1; ++jb) {
	j = k + ks - jb;
	i__2 = j + k * a_dim1;
	temp.r = a[i__2].r, temp.i = a[i__2].i;
	i__2 = j + k * a_dim1;
	i__3 = ks + j * a_dim1;
	a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i;
	i__2 = ks + j * a_dim1;
	a[i__2].r = temp.r, a[i__2].i = temp.i;
/* L190: */
    }
    if (kstep == 1) {
	goto L200;
    }
    i__1 = ks + (k + 1) * a_dim1;
    temp.r = a[i__1].r, temp.i = a[i__1].i;
    i__1 = ks + (k + 1) * a_dim1;
    i__2 = k + (k + 1) * a_dim1;
    a[i__1].r = a[i__2].r, a[i__1].i = a[i__2].i;
    i__1 = k + (k + 1) * a_dim1;
    a[i__1].r = temp.r, a[i__1].i = temp.i;
L200:
L210:
    k += kstep;
    goto L110;
L220:
L230:
    return 0;
} /* csidi_ */
Example #8
0
/* Subroutine */ int ctpt02_(char *uplo, char *trans, char *diag, integer *n, 
	integer *nrhs, complex *ap, complex *x, integer *ldx, complex *b, 
	integer *ldb, complex *work, real *rwork, real *resid)
{
    /* System generated locals */
    integer b_dim1, b_offset, x_dim1, x_offset, i__1;
    real r__1, r__2;

    /* Local variables */
    static integer j;
    extern logical lsame_(char *, char *);
    static real anorm, bnorm;
    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, 
	    complex *, integer *), caxpy_(integer *, complex *, complex *, 
	    integer *, complex *, integer *), ctpmv_(char *, char *, char *, 
	    integer *, complex *, complex *, integer *);
    static real xnorm;
    extern doublereal slamch_(char *), clantp_(char *, char *, char *,
	     integer *, complex *, real *), scasum_(
	    integer *, complex *, integer *);
    static real eps;


#define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1
#define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)]
#define x_subscr(a_1,a_2) (a_2)*x_dim1 + a_1
#define x_ref(a_1,a_2) x[x_subscr(a_1,a_2)]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    CTPT02 computes the residual for the computed solution to a   
    triangular system of linear equations  A*x = b,  A**T *x = b,  or   
    A**H *x = b, when the triangular matrix A is stored in packed format.   
    Here A**T denotes the transpose of A, A**H denotes the conjugate   
    transpose of A, and x and b are N by NRHS matrices.  The test ratio   
    is the maximum over the number of right hand sides of   
    the maximum over the number of right hand sides of   
       norm(b - op(A)*x) / ( norm(op(A)) * norm(x) * EPS ),   
    where op(A) denotes A, A**T, or A**H, and EPS is the machine epsilon.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the matrix A is upper or lower triangular.   
            = 'U':  Upper triangular   
            = 'L':  Lower triangular   

    TRANS   (input) CHARACTER*1   
            Specifies the operation applied to A.   
            = 'N':  A *x = b     (No transpose)   
            = 'T':  A**T *x = b  (Transpose)   
            = 'C':  A**H *x = b  (Conjugate transpose)   

    DIAG    (input) CHARACTER*1   
            Specifies whether or not the matrix A is unit triangular.   
            = 'N':  Non-unit triangular   
            = 'U':  Unit triangular   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    NRHS    (input) INTEGER   
            The number of right hand sides, i.e., the number of columns   
            of the matrices X and B.  NRHS >= 0.   

    AP      (input) COMPLEX array, dimension (N*(N+1)/2)   
            The upper or lower triangular matrix A, packed columnwise in   
            a linear array.  The j-th column of A is stored in the array   
            AP as follows:   
            if UPLO = 'U', AP((j-1)*j/2 + i) = A(i,j) for 1<=i<=j;   
            if UPLO = 'L',   
               AP((j-1)*(n-j) + j*(j+1)/2 + i-j) = A(i,j) for j<=i<=n.   

    X       (input) COMPLEX array, dimension (LDX,NRHS)   
            The computed solution vectors for the system of linear   
            equations.   

    LDX     (input) INTEGER   
            The leading dimension of the array X.  LDX >= max(1,N).   

    B       (input) COMPLEX array, dimension (LDB,NRHS)   
            The right hand side vectors for the system of linear   
            equations.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    WORK    (workspace) COMPLEX array, dimension (N)   

    RWORK   (workspace) REAL array, dimension (N)   

    RESID   (output) REAL   
            The maximum over the number of right hand sides of   
            norm(op(A)*x - b) / ( norm(op(A)) * norm(x) * EPS ).   

    =====================================================================   


       Quick exit if N = 0 or NRHS = 0   

       Parameter adjustments */
    --ap;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --work;
    --rwork;

    /* Function Body */
    if (*n <= 0 || *nrhs <= 0) {
	*resid = 0.f;
	return 0;
    }

/*     Compute the 1-norm of A or A**H. */

    if (lsame_(trans, "N")) {
	anorm = clantp_("1", uplo, diag, n, &ap[1], &rwork[1]);
    } else {
	anorm = clantp_("I", uplo, diag, n, &ap[1], &rwork[1]);
    }

/*     Exit with RESID = 1/EPS if ANORM = 0. */

    eps = slamch_("Epsilon");
    if (anorm <= 0.f) {
	*resid = 1.f / eps;
	return 0;
    }

/*     Compute the maximum over the number of right hand sides of   
          norm(op(A)*x - b) / ( norm(op(A)) * norm(x) * EPS ). */

    *resid = 0.f;
    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
	ccopy_(n, &x_ref(1, j), &c__1, &work[1], &c__1);
	ctpmv_(uplo, trans, diag, n, &ap[1], &work[1], &c__1);
	caxpy_(n, &c_b12, &b_ref(1, j), &c__1, &work[1], &c__1);
	bnorm = scasum_(n, &work[1], &c__1);
	xnorm = scasum_(n, &x_ref(1, j), &c__1);
	if (xnorm <= 0.f) {
	    *resid = 1.f / eps;
	} else {
/* Computing MAX */
	    r__1 = *resid, r__2 = bnorm / anorm / xnorm / eps;
	    *resid = dmax(r__1,r__2);
	}
/* L10: */
    }

    return 0;

/*     End of CTPT02 */

} /* ctpt02_ */
Example #9
0
 int cgehrd_(int *n, int *ilo, int *ihi, complex *
	a, int *lda, complex *tau, complex *work, int *lwork, int 
	*info)
{
    /* System generated locals */
    int a_dim1, a_offset, i__1, i__2, i__3, i__4;
    complex q__1;

    /* Local variables */
    int i__, j;
    complex t[4160]	/* was [65][64] */;
    int ib;
    complex ei;
    int nb, nh, nx, iws;
    extern  int cgemm_(char *, char *, int *, int *, 
	    int *, complex *, complex *, int *, complex *, int *, 
	    complex *, complex *, int *);
    int nbmin, iinfo;
    extern  int ctrmm_(char *, char *, char *, char *, 
	    int *, int *, complex *, complex *, int *, complex *, 
	    int *), caxpy_(int *, 
	    complex *, complex *, int *, complex *, int *), cgehd2_(
	    int *, int *, int *, complex *, int *, complex *, 
	    complex *, int *), clahr2_(int *, int *, int *, 
	    complex *, int *, complex *, complex *, int *, complex *, 
	    int *), clarfb_(char *, char *, char *, char *, int *, 
	    int *, int *, complex *, int *, complex *, int *, 
	    complex *, int *, complex *, int *), xerbla_(char *, int *);
    extern int ilaenv_(int *, char *, char *, int *, int *, 
	    int *, int *);
    int ldwork, lwkopt;
    int lquery;


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CGEHRD reduces a complex general matrix A to upper Hessenberg form H by */
/*  an unitary similarity transformation:  Q' * A * Q = H . */

/*  Arguments */
/*  ========= */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  ILO     (input) INTEGER */
/*  IHI     (input) INTEGER */
/*          It is assumed that A is already upper triangular in rows */
/*          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */
/*          set by a previous call to CGEBAL; otherwise they should be */
/*          set to 1 and N respectively. See Further Details. */
/*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */

/*  A       (input/output) COMPLEX array, dimension (LDA,N) */
/*          On entry, the N-by-N general matrix to be reduced. */
/*          On exit, the upper triangle and the first subdiagonal of A */
/*          are overwritten with the upper Hessenberg matrix H, and the */
/*          elements below the first subdiagonal, with the array TAU, */
/*          represent the unitary matrix Q as a product of elementary */
/*          reflectors. See Further Details. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= MAX(1,N). */

/*  TAU     (output) COMPLEX array, dimension (N-1) */
/*          The scalar factors of the elementary reflectors (see Further */
/*          Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to */
/*          zero. */

/*  WORK    (workspace/output) COMPLEX array, dimension (LWORK) */
/*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */

/*  LWORK   (input) INTEGER */
/*          The length of the array WORK.  LWORK >= MAX(1,N). */
/*          For optimum performance LWORK >= N*NB, where NB is the */
/*          optimal blocksize. */

/*          If LWORK = -1, then a workspace query is assumed; the routine */
/*          only calculates the optimal size of the WORK array, returns */
/*          this value as the first entry of the WORK array, and no error */
/*          message related to LWORK is issued by XERBLA. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */

/*  Further Details */
/*  =============== */

/*  The matrix Q is represented as a product of (ihi-ilo) elementary */
/*  reflectors */

/*     Q = H(ilo) H(ilo+1) . . . H(ihi-1). */

/*  Each H(i) has the form */

/*     H(i) = I - tau * v * v' */

/*  where tau is a complex scalar, and v is a complex vector with */
/*  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on */
/*  exit in A(i+2:ihi,i), and tau in TAU(i). */

/*  The contents of A are illustrated by the following example, with */
/*  n = 7, ilo = 2 and ihi = 6: */

/*  on entry,                        on exit, */

/*  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a ) */
/*  (     a   a   a   a   a   a )    (      a   h   h   h   h   a ) */
/*  (     a   a   a   a   a   a )    (      h   h   h   h   h   h ) */
/*  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h ) */
/*  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h ) */
/*  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h ) */
/*  (                         a )    (                          a ) */

/*  where a denotes an element of the original matrix A, h denotes a */
/*  modified element of the upper Hessenberg matrix H, and vi denotes an */
/*  element of the vector defining H(i). */

/*  This file is a slight modification of LAPACK-3.0's CGEHRD */
/*  subroutine incorporating improvements proposed by Quintana-Orti and */
/*  Van de Geijn (2005). */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
/* Computing MIN */
    i__1 = 64, i__2 = ilaenv_(&c__1, "CGEHRD", " ", n, ilo, ihi, &c_n1);
    nb = MIN(i__1,i__2);
    lwkopt = *n * nb;
    work[1].r = (float) lwkopt, work[1].i = 0.f;
    lquery = *lwork == -1;
    if (*n < 0) {
	*info = -1;
    } else if (*ilo < 1 || *ilo > MAX(1,*n)) {
	*info = -2;
    } else if (*ihi < MIN(*ilo,*n) || *ihi > *n) {
	*info = -3;
    } else if (*lda < MAX(1,*n)) {
	*info = -5;
    } else if (*lwork < MAX(1,*n) && ! lquery) {
	*info = -8;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CGEHRD", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */

    i__1 = *ilo - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
	i__2 = i__;
	tau[i__2].r = 0.f, tau[i__2].i = 0.f;
/* L10: */
    }
    i__1 = *n - 1;
    for (i__ = MAX(1,*ihi); i__ <= i__1; ++i__) {
	i__2 = i__;
	tau[i__2].r = 0.f, tau[i__2].i = 0.f;
/* L20: */
    }

/*     Quick return if possible */

    nh = *ihi - *ilo + 1;
    if (nh <= 1) {
	work[1].r = 1.f, work[1].i = 0.f;
	return 0;
    }

/*     Determine the block size */

/* Computing MIN */
    i__1 = 64, i__2 = ilaenv_(&c__1, "CGEHRD", " ", n, ilo, ihi, &c_n1);
    nb = MIN(i__1,i__2);
    nbmin = 2;
    iws = 1;
    if (nb > 1 && nb < nh) {

/*        Determine when to cross over from blocked to unblocked code */
/*        (last block is always handled by unblocked code) */

/* Computing MAX */
	i__1 = nb, i__2 = ilaenv_(&c__3, "CGEHRD", " ", n, ilo, ihi, &c_n1);
	nx = MAX(i__1,i__2);
	if (nx < nh) {

/*           Determine if workspace is large enough for blocked code */

	    iws = *n * nb;
	    if (*lwork < iws) {

/*              Not enough workspace to use optimal NB:  determine the */
/*              minimum value of NB, and reduce NB or force use of */
/*              unblocked code */

/* Computing MAX */
		i__1 = 2, i__2 = ilaenv_(&c__2, "CGEHRD", " ", n, ilo, ihi, &
			c_n1);
		nbmin = MAX(i__1,i__2);
		if (*lwork >= *n * nbmin) {
		    nb = *lwork / *n;
		} else {
		    nb = 1;
		}
	    }
	}
    }
    ldwork = *n;

    if (nb < nbmin || nb >= nh) {

/*        Use unblocked code below */

	i__ = *ilo;

    } else {

/*        Use blocked code */

	i__1 = *ihi - 1 - nx;
	i__2 = nb;
	for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
/* Computing MIN */
	    i__3 = nb, i__4 = *ihi - i__;
	    ib = MIN(i__3,i__4);

/*           Reduce columns i:i+ib-1 to Hessenberg form, returning the */
/*           matrices V and T of the block reflector H = I - V*T*V' */
/*           which performs the reduction, and also the matrix Y = A*V*T */

	    clahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
		    c__65, &work[1], &ldwork);

/*           Apply the block reflector H to A(1:ihi,i+ib:ihi) from the */
/*           right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set */
/*           to 1 */

	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
	    ei.r = a[i__3].r, ei.i = a[i__3].i;
	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
	    a[i__3].r = 1.f, a[i__3].i = 0.f;
	    i__3 = *ihi - i__ - ib + 1;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgemm_("No transpose", "Conjugate transpose", ihi, &i__3, &ib, &
		    q__1, &work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, 
		     &c_b2, &a[(i__ + ib) * a_dim1 + 1], lda);
	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
	    a[i__3].r = ei.r, a[i__3].i = ei.i;

/*           Apply the block reflector H to A(1:i,i+1:i+ib-1) from the */
/*           right */

	    i__3 = ib - 1;
	    ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", &i__, &
		    i__3, &c_b2, &a[i__ + 1 + i__ * a_dim1], lda, &work[1], &
		    ldwork);
	    i__3 = ib - 2;
	    for (j = 0; j <= i__3; ++j) {
		q__1.r = -1.f, q__1.i = -0.f;
		caxpy_(&i__, &q__1, &work[ldwork * j + 1], &c__1, &a[(i__ + j 
			+ 1) * a_dim1 + 1], &c__1);
/* L30: */
	    }

/*           Apply the block reflector H to A(i+1:ihi,i+ib:n) from the */
/*           left */

	    i__3 = *ihi - i__;
	    i__4 = *n - i__ - ib + 1;
	    clarfb_("Left", "Conjugate transpose", "Forward", "Columnwise", &
		    i__3, &i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &
		    c__65, &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &
		    ldwork);
/* L40: */
	}
    }

/*     Use unblocked code to reduce the rest of the matrix */

    cgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
    work[1].r = (float) iws, work[1].i = 0.f;

    return 0;

/*     End of CGEHRD */

} /* cgehrd_ */
Example #10
0
/* DECK CSPSL */
/* Subroutine */ int cspsl_(complex *ap, integer *n, integer *kpvt, complex *
	b)
{
    /* System generated locals */
    integer i__1, i__2, i__3;
    complex q__1, q__2, q__3;

    /* Local variables */
    static integer k;
    static complex ak, bk;
    static integer ik, kk, kp;
    static complex akm1, bkm1;
    static integer ikm1, km1k, ikp1;
    static complex temp;
    static integer km1km1;
    static complex denom;
    extern /* Complex */ void cdotu_(complex *, integer *, complex *, integer 
	    *, complex *, integer *);
    extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, 
	    integer *, complex *, integer *);

/* ***BEGIN PROLOGUE  CSPSL */
/* ***PURPOSE  Solve a complex symmetric system using the factors obtained */
/*            from CSPFA. */
/* ***LIBRARY   SLATEC (LINPACK) */
/* ***CATEGORY  D2C1 */
/* ***TYPE      COMPLEX (SSPSL-S, DSPSL-D, CHPSL-C, CSPSL-C) */
/* ***KEYWORDS  LINEAR ALGEBRA, LINPACK, MATRIX, PACKED, SOLVE, SYMMETRIC */
/* ***AUTHOR  Bunch, J., (UCSD) */
/* ***DESCRIPTION */

/*     CSISL solves the complex symmetric system */
/*     A * X = B */
/*     using the factors computed by CSPFA. */

/*     On Entry */

/*        AP      COMPLEX(N*(N+1)/2) */
/*                the output from CSPFA. */

/*        N       INTEGER */
/*                the order of the matrix  A . */

/*        KVPT    INTEGER(N) */
/*                the pivot vector from CSPFA. */

/*        B       COMPLEX(N) */
/*                the right hand side vector. */

/*     On Return */

/*        B       the solution vector  X . */

/*     Error Condition */

/*        A division by zero may occur if  CSPCO  has set RCOND .EQ. 0.0 */
/*        or  CSPFA  has set INFO .NE. 0  . */

/*     To compute  INVERSE(A) * C  where  C  is a matrix */
/*     with  P  columns */
/*           CALL CSPFA(AP,N,KVPT,INFO) */
/*           IF (INFO .NE. 0) GO TO ... */
/*           DO 10 J = 1, P */
/*              CALL CSPSL(AP,N,KVPT,C(1,J)) */
/*        10 CONTINUE */

/* ***REFERENCES  J. J. Dongarra, J. R. Bunch, C. B. Moler, and G. W. */
/*                 Stewart, LINPACK Users' Guide, SIAM, 1979. */
/* ***ROUTINES CALLED  CAXPY, CDOTU */
/* ***REVISION HISTORY  (YYMMDD) */
/*   780814  DATE WRITTEN */
/*   890531  Changed all specific intrinsics to generic.  (WRB) */
/*   890831  Modified array declarations.  (WRB) */
/*   891107  Corrected category and modified routine equivalence */
/*           list.  (WRB) */
/*   891107  REVISION DATE from Version 3.2 */
/*   891214  Prologue converted to Version 4.0 format.  (BAB) */
/*   900326  Removed duplicate information from DESCRIPTION section. */
/*           (WRB) */
/*   920501  Reformatted the REFERENCES section.  (WRB) */
/* ***END PROLOGUE  CSPSL */


/*     LOOP BACKWARD APPLYING THE TRANSFORMATIONS AND */
/*     D INVERSE TO B. */

/* ***FIRST EXECUTABLE STATEMENT  CSPSL */
    /* Parameter adjustments */
    --b;
    --kpvt;
    --ap;

    /* Function Body */
    k = *n;
    ik = *n * (*n - 1) / 2;
L10:
    if (k == 0) {
	goto L80;
    }
    kk = ik + k;
    if (kpvt[k] < 0) {
	goto L40;
    }

/*           1 X 1 PIVOT BLOCK. */

    if (k == 1) {
	goto L30;
    }
    kp = kpvt[k];
    if (kp == k) {
	goto L20;
    }

/*                 INTERCHANGE. */

    i__1 = k;
    temp.r = b[i__1].r, temp.i = b[i__1].i;
    i__1 = k;
    i__2 = kp;
    b[i__1].r = b[i__2].r, b[i__1].i = b[i__2].i;
    i__1 = kp;
    b[i__1].r = temp.r, b[i__1].i = temp.i;
L20:

/*              APPLY THE TRANSFORMATION. */

    i__1 = k - 1;
    caxpy_(&i__1, &b[k], &ap[ik + 1], &c__1, &b[1], &c__1);
L30:

/*           APPLY D INVERSE. */

    i__1 = k;
    c_div(&q__1, &b[k], &ap[kk]);
    b[i__1].r = q__1.r, b[i__1].i = q__1.i;
    --k;
    ik -= k;
    goto L70;
L40:

/*           2 X 2 PIVOT BLOCK. */

    ikm1 = ik - (k - 1);
    if (k == 2) {
	goto L60;
    }
    kp = (i__1 = kpvt[k], abs(i__1));
    if (kp == k - 1) {
	goto L50;
    }

/*                 INTERCHANGE. */

    i__1 = k - 1;
    temp.r = b[i__1].r, temp.i = b[i__1].i;
    i__1 = k - 1;
    i__2 = kp;
    b[i__1].r = b[i__2].r, b[i__1].i = b[i__2].i;
    i__1 = kp;
    b[i__1].r = temp.r, b[i__1].i = temp.i;
L50:

/*              APPLY THE TRANSFORMATION. */

    i__1 = k - 2;
    caxpy_(&i__1, &b[k], &ap[ik + 1], &c__1, &b[1], &c__1);
    i__1 = k - 2;
    caxpy_(&i__1, &b[k - 1], &ap[ikm1 + 1], &c__1, &b[1], &c__1);
L60:

/*           APPLY D INVERSE. */

    km1k = ik + k - 1;
    kk = ik + k;
    c_div(&q__1, &ap[kk], &ap[km1k]);
    ak.r = q__1.r, ak.i = q__1.i;
    km1km1 = ikm1 + k - 1;
    c_div(&q__1, &ap[km1km1], &ap[km1k]);
    akm1.r = q__1.r, akm1.i = q__1.i;
    c_div(&q__1, &b[k], &ap[km1k]);
    bk.r = q__1.r, bk.i = q__1.i;
    c_div(&q__1, &b[k - 1], &ap[km1k]);
    bkm1.r = q__1.r, bkm1.i = q__1.i;
    q__2.r = ak.r * akm1.r - ak.i * akm1.i, q__2.i = ak.r * akm1.i + ak.i * 
	    akm1.r;
    q__1.r = q__2.r - 1.f, q__1.i = q__2.i;
    denom.r = q__1.r, denom.i = q__1.i;
    i__1 = k;
    q__3.r = akm1.r * bk.r - akm1.i * bk.i, q__3.i = akm1.r * bk.i + akm1.i * 
	    bk.r;
    q__2.r = q__3.r - bkm1.r, q__2.i = q__3.i - bkm1.i;
    c_div(&q__1, &q__2, &denom);
    b[i__1].r = q__1.r, b[i__1].i = q__1.i;
    i__1 = k - 1;
    q__3.r = ak.r * bkm1.r - ak.i * bkm1.i, q__3.i = ak.r * bkm1.i + ak.i * 
	    bkm1.r;
    q__2.r = q__3.r - bk.r, q__2.i = q__3.i - bk.i;
    c_div(&q__1, &q__2, &denom);
    b[i__1].r = q__1.r, b[i__1].i = q__1.i;
    k += -2;
    ik = ik - (k + 1) - k;
L70:
    goto L10;
L80:

/*     LOOP FORWARD APPLYING THE TRANSFORMATIONS. */

    k = 1;
    ik = 0;
L90:
    if (k > *n) {
	goto L160;
    }
    if (kpvt[k] < 0) {
	goto L120;
    }

/*           1 X 1 PIVOT BLOCK. */

    if (k == 1) {
	goto L110;
    }

/*              APPLY THE TRANSFORMATION. */

    i__1 = k;
    i__2 = k;
    i__3 = k - 1;
    cdotu_(&q__2, &i__3, &ap[ik + 1], &c__1, &b[1], &c__1);
    q__1.r = b[i__2].r + q__2.r, q__1.i = b[i__2].i + q__2.i;
    b[i__1].r = q__1.r, b[i__1].i = q__1.i;
    kp = kpvt[k];
    if (kp == k) {
	goto L100;
    }

/*                 INTERCHANGE. */

    i__1 = k;
    temp.r = b[i__1].r, temp.i = b[i__1].i;
    i__1 = k;
    i__2 = kp;
    b[i__1].r = b[i__2].r, b[i__1].i = b[i__2].i;
    i__1 = kp;
    b[i__1].r = temp.r, b[i__1].i = temp.i;
L100:
L110:
    ik += k;
    ++k;
    goto L150;
L120:

/*           2 X 2 PIVOT BLOCK. */

    if (k == 1) {
	goto L140;
    }

/*              APPLY THE TRANSFORMATION. */

    i__1 = k;
    i__2 = k;
    i__3 = k - 1;
    cdotu_(&q__2, &i__3, &ap[ik + 1], &c__1, &b[1], &c__1);
    q__1.r = b[i__2].r + q__2.r, q__1.i = b[i__2].i + q__2.i;
    b[i__1].r = q__1.r, b[i__1].i = q__1.i;
    ikp1 = ik + k;
    i__1 = k + 1;
    i__2 = k + 1;
    i__3 = k - 1;
    cdotu_(&q__2, &i__3, &ap[ikp1 + 1], &c__1, &b[1], &c__1);
    q__1.r = b[i__2].r + q__2.r, q__1.i = b[i__2].i + q__2.i;
    b[i__1].r = q__1.r, b[i__1].i = q__1.i;
    kp = (i__1 = kpvt[k], abs(i__1));
    if (kp == k) {
	goto L130;
    }

/*                 INTERCHANGE. */

    i__1 = k;
    temp.r = b[i__1].r, temp.i = b[i__1].i;
    i__1 = k;
    i__2 = kp;
    b[i__1].r = b[i__2].r, b[i__1].i = b[i__2].i;
    i__1 = kp;
    b[i__1].r = temp.r, b[i__1].i = temp.i;
L130:
L140:
    ik = ik + k + k + 1;
    k += 2;
L150:
    goto L90;
L160:
    return 0;
} /* cspsl_ */
Example #11
0
/* Subroutine */ int chpgst_(integer *itype, char *uplo, integer *n, complex *
	ap, complex *bp, integer *info, ftnlen uplo_len)
{
    /* System generated locals */
    integer i__1, i__2, i__3, i__4;
    real r__1, r__2;
    complex q__1, q__2, q__3;

    /* Local variables */
    static integer j, k, j1, k1, jj, kk;
    static complex ct;
    static real ajj;
    static integer j1j1;
    static real akk;
    static integer k1k1;
    static real bjj, bkk;
    extern /* Subroutine */ int chpr2_(char *, integer *, complex *, complex *
	    , integer *, complex *, integer *, complex *, ftnlen);
    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer 
	    *, complex *, integer *);
    extern logical lsame_(char *, char *, ftnlen, ftnlen);
    extern /* Subroutine */ int chpmv_(char *, integer *, complex *, complex *
	    , complex *, integer *, complex *, complex *, integer *, ftnlen), 
	    caxpy_(integer *, complex *, complex *, integer *, complex *, 
	    integer *), ctpmv_(char *, char *, char *, integer *, complex *, 
	    complex *, integer *, ftnlen, ftnlen, ftnlen);
    static logical upper;
    extern /* Subroutine */ int ctpsv_(char *, char *, char *, integer *, 
	    complex *, complex *, integer *, ftnlen, ftnlen, ftnlen), csscal_(
	    integer *, real *, complex *, integer *), xerbla_(char *, integer 
	    *, ftnlen);


/*  -- LAPACK routine (version 3.0) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */
/*     Courant Institute, Argonne National Lab, and Rice University */
/*     September 30, 1994 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CHPGST reduces a complex Hermitian-definite generalized */
/*  eigenproblem to standard form, using packed storage. */

/*  If ITYPE = 1, the problem is A*x = lambda*B*x, */
/*  and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) */

/*  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */
/*  B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. */

/*  B must have been previously factorized as U**H*U or L*L**H by CPPTRF. */

/*  Arguments */
/*  ========= */

/*  ITYPE   (input) INTEGER */
/*          = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H); */
/*          = 2 or 3: compute U*A*U**H or L**H*A*L. */

/*  UPLO    (input) CHARACTER */
/*          = 'U':  Upper triangle of A is stored and B is factored as */
/*                  U**H*U; */
/*          = 'L':  Lower triangle of A is stored and B is factored as */
/*                  L*L**H. */

/*  N       (input) INTEGER */
/*          The order of the matrices A and B.  N >= 0. */

/*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2) */
/*          On entry, the upper or lower triangle of the Hermitian matrix */
/*          A, packed columnwise in a linear array.  The j-th column of A */
/*          is stored in the array AP as follows: */
/*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */
/*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */

/*          On exit, if INFO = 0, the transformed matrix, stored in the */
/*          same format as A. */

/*  BP      (input) COMPLEX array, dimension (N*(N+1)/2) */
/*          The triangular factor from the Cholesky factorization of B, */
/*          stored in the same format as A, as returned by CPPTRF. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    --bp;
    --ap;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
    if (*itype < 1 || *itype > 3) {
	*info = -1;
    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CHPGST", &i__1, (ftnlen)6);
	return 0;
    }

    if (*itype == 1) {
	if (upper) {

/*           Compute inv(U')*A*inv(U) */

/*           J1 and JJ are the indices of A(1,j) and A(j,j) */

	    jj = 0;
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		j1 = jj + 1;
		jj += j;

/*              Compute the j-th column of the upper triangle of A */

		i__2 = jj;
		i__3 = jj;
		r__1 = ap[i__3].r;
		ap[i__2].r = r__1, ap[i__2].i = 0.f;
		i__2 = jj;
		bjj = bp[i__2].r;
		ctpsv_(uplo, "Conjugate transpose", "Non-unit", &j, &bp[1], &
			ap[j1], &c__1, (ftnlen)1, (ftnlen)19, (ftnlen)8);
		i__2 = j - 1;
		q__1.r = -1.f, q__1.i = -0.f;
		chpmv_(uplo, &i__2, &q__1, &ap[1], &bp[j1], &c__1, &c_b1, &ap[
			j1], &c__1, (ftnlen)1);
		i__2 = j - 1;
		r__1 = 1.f / bjj;
		csscal_(&i__2, &r__1, &ap[j1], &c__1);
		i__2 = jj;
		i__3 = jj;
		i__4 = j - 1;
		cdotc_(&q__3, &i__4, &ap[j1], &c__1, &bp[j1], &c__1);
		q__2.r = ap[i__3].r - q__3.r, q__2.i = ap[i__3].i - q__3.i;
		q__1.r = q__2.r / bjj, q__1.i = q__2.i / bjj;
		ap[i__2].r = q__1.r, ap[i__2].i = q__1.i;
/* L10: */
	    }
	} else {

/*           Compute inv(L)*A*inv(L') */

/*           KK and K1K1 are the indices of A(k,k) and A(k+1,k+1) */

	    kk = 1;
	    i__1 = *n;
	    for (k = 1; k <= i__1; ++k) {
		k1k1 = kk + *n - k + 1;

/*              Update the lower triangle of A(k:n,k:n) */

		i__2 = kk;
		akk = ap[i__2].r;
		i__2 = kk;
		bkk = bp[i__2].r;
/* Computing 2nd power */
		r__1 = bkk;
		akk /= r__1 * r__1;
		i__2 = kk;
		ap[i__2].r = akk, ap[i__2].i = 0.f;
		if (k < *n) {
		    i__2 = *n - k;
		    r__1 = 1.f / bkk;
		    csscal_(&i__2, &r__1, &ap[kk + 1], &c__1);
		    r__1 = akk * -.5f;
		    ct.r = r__1, ct.i = 0.f;
		    i__2 = *n - k;
		    caxpy_(&i__2, &ct, &bp[kk + 1], &c__1, &ap[kk + 1], &c__1)
			    ;
		    i__2 = *n - k;
		    q__1.r = -1.f, q__1.i = -0.f;
		    chpr2_(uplo, &i__2, &q__1, &ap[kk + 1], &c__1, &bp[kk + 1]
			    , &c__1, &ap[k1k1], (ftnlen)1);
		    i__2 = *n - k;
		    caxpy_(&i__2, &ct, &bp[kk + 1], &c__1, &ap[kk + 1], &c__1)
			    ;
		    i__2 = *n - k;
		    ctpsv_(uplo, "No transpose", "Non-unit", &i__2, &bp[k1k1],
			     &ap[kk + 1], &c__1, (ftnlen)1, (ftnlen)12, (
			    ftnlen)8);
		}
		kk = k1k1;
/* L20: */
	    }
	}
    } else {
	if (upper) {

/*           Compute U*A*U' */

/*           K1 and KK are the indices of A(1,k) and A(k,k) */

	    kk = 0;
	    i__1 = *n;
	    for (k = 1; k <= i__1; ++k) {
		k1 = kk + 1;
		kk += k;

/*              Update the upper triangle of A(1:k,1:k) */

		i__2 = kk;
		akk = ap[i__2].r;
		i__2 = kk;
		bkk = bp[i__2].r;
		i__2 = k - 1;
		ctpmv_(uplo, "No transpose", "Non-unit", &i__2, &bp[1], &ap[
			k1], &c__1, (ftnlen)1, (ftnlen)12, (ftnlen)8);
		r__1 = akk * .5f;
		ct.r = r__1, ct.i = 0.f;
		i__2 = k - 1;
		caxpy_(&i__2, &ct, &bp[k1], &c__1, &ap[k1], &c__1);
		i__2 = k - 1;
		chpr2_(uplo, &i__2, &c_b1, &ap[k1], &c__1, &bp[k1], &c__1, &
			ap[1], (ftnlen)1);
		i__2 = k - 1;
		caxpy_(&i__2, &ct, &bp[k1], &c__1, &ap[k1], &c__1);
		i__2 = k - 1;
		csscal_(&i__2, &bkk, &ap[k1], &c__1);
		i__2 = kk;
/* Computing 2nd power */
		r__2 = bkk;
		r__1 = akk * (r__2 * r__2);
		ap[i__2].r = r__1, ap[i__2].i = 0.f;
/* L30: */
	    }
	} else {

/*           Compute L'*A*L */

/*           JJ and J1J1 are the indices of A(j,j) and A(j+1,j+1) */

	    jj = 1;
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		j1j1 = jj + *n - j + 1;

/*              Compute the j-th column of the lower triangle of A */

		i__2 = jj;
		ajj = ap[i__2].r;
		i__2 = jj;
		bjj = bp[i__2].r;
		i__2 = jj;
		r__1 = ajj * bjj;
		i__3 = *n - j;
		cdotc_(&q__2, &i__3, &ap[jj + 1], &c__1, &bp[jj + 1], &c__1);
		q__1.r = r__1 + q__2.r, q__1.i = q__2.i;
		ap[i__2].r = q__1.r, ap[i__2].i = q__1.i;
		i__2 = *n - j;
		csscal_(&i__2, &bjj, &ap[jj + 1], &c__1);
		i__2 = *n - j;
		chpmv_(uplo, &i__2, &c_b1, &ap[j1j1], &bp[jj + 1], &c__1, &
			c_b1, &ap[jj + 1], &c__1, (ftnlen)1);
		i__2 = *n - j + 1;
		ctpmv_(uplo, "Conjugate transpose", "Non-unit", &i__2, &bp[jj]
			, &ap[jj], &c__1, (ftnlen)1, (ftnlen)19, (ftnlen)8);
		jj = j1j1;
/* L40: */
	    }
	}
    }
    return 0;

/*     End of CHPGST */

} /* chpgst_ */
/* Subroutine */ int cla_gerfsx_extended__(integer *prec_type__, integer *
	trans_type__, integer *n, integer *nrhs, complex *a, integer *lda, 
	complex *af, integer *ldaf, integer *ipiv, logical *colequ, real *c__,
	 complex *b, integer *ldb, complex *y, integer *ldy, real *berr_out__,
	 integer *n_norms__, real *errs_n__, real *errs_c__, complex *res, 
	real *ayb, complex *dy, complex *y_tail__, real *rcond, integer *
	ithresh, real *rthresh, real *dz_ub__, logical *ignore_cwise__, 
	integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, y_dim1, 
	    y_offset, errs_n_dim1, errs_n_offset, errs_c_dim1, errs_c_offset, 
	    i__1, i__2, i__3, i__4;
    real r__1, r__2;
    char ch__1[1];

    /* Builtin functions */
    double r_imag(complex *);

    /* Local variables */
    real dxratmax, dzratmax;
    integer i__, j;
    extern /* Subroutine */ int cla_geamv__(integer *, integer *, integer *, 
	    real *, complex *, integer *, complex *, integer *, real *, real *
	    , integer *);
    logical incr_prec__;
    real prev_dz_z__, yk, final_dx_x__;
    extern /* Subroutine */ int cla_wwaddw__(integer *, complex *, complex *, 
	    complex *);
    real final_dz_z__, prevnormdx;
    integer cnt;
    real dyk, eps, incr_thresh__, dx_x__, dz_z__;
    extern /* Subroutine */ int cla_lin_berr__(integer *, integer *, integer *
	    , complex *, real *, real *);
    real ymin;
    extern /* Subroutine */ int blas_cgemv_x__(integer *, integer *, integer *
	    , complex *, complex *, integer *, complex *, integer *, complex *
	    , complex *, integer *, integer *);
    integer y_prec_state__;
    extern /* Subroutine */ int blas_cgemv2_x__(integer *, integer *, integer 
	    *, complex *, complex *, integer *, complex *, complex *, integer 
	    *, complex *, complex *, integer *, integer *), cgemv_(char *, 
	    integer *, integer *, complex *, complex *, integer *, complex *, 
	    integer *, complex *, complex *, integer *), ccopy_(
	    integer *, complex *, integer *, complex *, integer *);
    real dxrat, dzrat;
    extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, 
	    integer *, complex *, integer *);
    char trans[1];
    real normx, normy;
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int cgetrs_(char *, integer *, integer *, complex 
	    *, integer *, integer *, complex *, integer *, integer *);
    real normdx;
    extern /* Character */ VOID chla_transtype__(char *, ftnlen, integer *);
    real hugeval;
    integer x_state__, z_state__;


/*     -- LAPACK routine (version 3.2.1)                                 -- */
/*     -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */
/*     -- Jason Riedy of Univ. of California Berkeley.                 -- */
/*     -- April 2009                                                   -- */

/*     -- LAPACK is a software package provided by Univ. of Tennessee, -- */
/*     -- Univ. of California Berkeley and NAG Ltd.                    -- */

/*     .. */
/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CLA_GERFSX_EXTENDED improves the computed solution to a system of */
/*  linear equations by performing extra-precise iterative refinement */
/*  and provides error bounds and backward error estimates for the solution. */
/*  This subroutine is called by CGERFSX to perform iterative refinement. */
/*  In addition to normwise error bound, the code provides maximum */
/*  componentwise error bound if possible. See comments for ERR_BNDS_NORM */
/*  and ERR_BNDS_COMP for details of the error bounds. Note that this */
/*  subroutine is only resonsible for setting the second fields of */
/*  ERR_BNDS_NORM and ERR_BNDS_COMP. */

/*  Arguments */
/*  ========= */

/*     PREC_TYPE      (input) INTEGER */
/*     Specifies the intermediate precision to be used in refinement. */
/*     The value is defined by ILAPREC(P) where P is a CHARACTER and */
/*     P    = 'S':  Single */
/*          = 'D':  Double */
/*          = 'I':  Indigenous */
/*          = 'X', 'E':  Extra */

/*     TRANS_TYPE     (input) INTEGER */
/*     Specifies the transposition operation on A. */
/*     The value is defined by ILATRANS(T) where T is a CHARACTER and */
/*     T    = 'N':  No transpose */
/*          = 'T':  Transpose */
/*          = 'C':  Conjugate transpose */

/*     N              (input) INTEGER */
/*     The number of linear equations, i.e., the order of the */
/*     matrix A.  N >= 0. */

/*     NRHS           (input) INTEGER */
/*     The number of right-hand-sides, i.e., the number of columns of the */
/*     matrix B. */

/*     A              (input) COMPLEX array, dimension (LDA,N) */
/*     On entry, the N-by-N matrix A. */

/*     LDA            (input) INTEGER */
/*     The leading dimension of the array A.  LDA >= max(1,N). */

/*     AF             (input) COMPLEX array, dimension (LDAF,N) */
/*     The factors L and U from the factorization */
/*     A = P*L*U as computed by CGETRF. */

/*     LDAF           (input) INTEGER */
/*     The leading dimension of the array AF.  LDAF >= max(1,N). */

/*     IPIV           (input) INTEGER array, dimension (N) */
/*     The pivot indices from the factorization A = P*L*U */
/*     as computed by CGETRF; row i of the matrix was interchanged */
/*     with row IPIV(i). */

/*     COLEQU         (input) LOGICAL */
/*     If .TRUE. then column equilibration was done to A before calling */
/*     this routine. This is needed to compute the solution and error */
/*     bounds correctly. */

/*     C              (input) REAL array, dimension (N) */
/*     The column scale factors for A. If COLEQU = .FALSE., C */
/*     is not accessed. If C is input, each element of C should be a power */
/*     of the radix to ensure a reliable solution and error estimates. */
/*     Scaling by powers of the radix does not cause rounding errors unless */
/*     the result underflows or overflows. Rounding errors during scaling */
/*     lead to refining with a matrix that is not equivalent to the */
/*     input matrix, producing error estimates that may not be */
/*     reliable. */

/*     B              (input) COMPLEX array, dimension (LDB,NRHS) */
/*     The right-hand-side matrix B. */

/*     LDB            (input) INTEGER */
/*     The leading dimension of the array B.  LDB >= max(1,N). */

/*     Y              (input/output) COMPLEX array, dimension (LDY,NRHS) */
/*     On entry, the solution matrix X, as computed by CGETRS. */
/*     On exit, the improved solution matrix Y. */

/*     LDY            (input) INTEGER */
/*     The leading dimension of the array Y.  LDY >= max(1,N). */

/*     BERR_OUT       (output) REAL array, dimension (NRHS) */
/*     On exit, BERR_OUT(j) contains the componentwise relative backward */
/*     error for right-hand-side j from the formula */
/*         max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */
/*     where abs(Z) is the componentwise absolute value of the matrix */
/*     or vector Z. This is computed by CLA_LIN_BERR. */

/*     N_NORMS        (input) INTEGER */
/*     Determines which error bounds to return (see ERR_BNDS_NORM */
/*     and ERR_BNDS_COMP). */
/*     If N_NORMS >= 1 return normwise error bounds. */
/*     If N_NORMS >= 2 return componentwise error bounds. */

/*     ERR_BNDS_NORM  (input/output) REAL array, dimension (NRHS, N_ERR_BNDS) */
/*     For each right-hand side, this array contains information about */
/*     various error bounds and condition numbers corresponding to the */
/*     normwise relative error, which is defined as follows: */

/*     Normwise relative error in the ith solution vector: */
/*             max_j (abs(XTRUE(j,i) - X(j,i))) */
/*            ------------------------------ */
/*                  max_j abs(X(j,i)) */

/*     The array is indexed by the type of error information as described */
/*     below. There currently are up to three pieces of information */
/*     returned. */

/*     The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */
/*     right-hand side. */

/*     The second index in ERR_BNDS_NORM(:,err) contains the following */
/*     three fields: */
/*     err = 1 "Trust/don't trust" boolean. Trust the answer if the */
/*              reciprocal condition number is less than the threshold */
/*              sqrt(n) * slamch('Epsilon'). */

/*     err = 2 "Guaranteed" error bound: The estimated forward error, */
/*              almost certainly within a factor of 10 of the true error */
/*              so long as the next entry is greater than the threshold */
/*              sqrt(n) * slamch('Epsilon'). This error bound should only */
/*              be trusted if the previous boolean is true. */

/*     err = 3  Reciprocal condition number: Estimated normwise */
/*              reciprocal condition number.  Compared with the threshold */
/*              sqrt(n) * slamch('Epsilon') to determine if the error */
/*              estimate is "guaranteed". These reciprocal condition */
/*              numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
/*              appropriately scaled matrix Z. */
/*              Let Z = S*A, where S scales each row by a power of the */
/*              radix so all absolute row sums of Z are approximately 1. */

/*     This subroutine is only responsible for setting the second field */
/*     above. */
/*     See Lapack Working Note 165 for further details and extra */
/*     cautions. */

/*     ERR_BNDS_COMP  (input/output) REAL array, dimension (NRHS, N_ERR_BNDS) */
/*     For each right-hand side, this array contains information about */
/*     various error bounds and condition numbers corresponding to the */
/*     componentwise relative error, which is defined as follows: */

/*     Componentwise relative error in the ith solution vector: */
/*                    abs(XTRUE(j,i) - X(j,i)) */
/*             max_j ---------------------- */
/*                         abs(X(j,i)) */

/*     The array is indexed by the right-hand side i (on which the */
/*     componentwise relative error depends), and the type of error */
/*     information as described below. There currently are up to three */
/*     pieces of information returned for each right-hand side. If */
/*     componentwise accuracy is not requested (PARAMS(3) = 0.0), then */
/*     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS .LT. 3, then at most */
/*     the first (:,N_ERR_BNDS) entries are returned. */

/*     The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */
/*     right-hand side. */

/*     The second index in ERR_BNDS_COMP(:,err) contains the following */
/*     three fields: */
/*     err = 1 "Trust/don't trust" boolean. Trust the answer if the */
/*              reciprocal condition number is less than the threshold */
/*              sqrt(n) * slamch('Epsilon'). */

/*     err = 2 "Guaranteed" error bound: The estimated forward error, */
/*              almost certainly within a factor of 10 of the true error */
/*              so long as the next entry is greater than the threshold */
/*              sqrt(n) * slamch('Epsilon'). This error bound should only */
/*              be trusted if the previous boolean is true. */

/*     err = 3  Reciprocal condition number: Estimated componentwise */
/*              reciprocal condition number.  Compared with the threshold */
/*              sqrt(n) * slamch('Epsilon') to determine if the error */
/*              estimate is "guaranteed". These reciprocal condition */
/*              numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
/*              appropriately scaled matrix Z. */
/*              Let Z = S*(A*diag(x)), where x is the solution for the */
/*              current right-hand side and S scales each row of */
/*              A*diag(x) by a power of the radix so all absolute row */
/*              sums of Z are approximately 1. */

/*     This subroutine is only responsible for setting the second field */
/*     above. */
/*     See Lapack Working Note 165 for further details and extra */
/*     cautions. */

/*     RES            (input) COMPLEX array, dimension (N) */
/*     Workspace to hold the intermediate residual. */

/*     AYB            (input) REAL array, dimension (N) */
/*     Workspace. */

/*     DY             (input) COMPLEX array, dimension (N) */
/*     Workspace to hold the intermediate solution. */

/*     Y_TAIL         (input) COMPLEX array, dimension (N) */
/*     Workspace to hold the trailing bits of the intermediate solution. */

/*     RCOND          (input) REAL */
/*     Reciprocal scaled condition number.  This is an estimate of the */
/*     reciprocal Skeel condition number of the matrix A after */
/*     equilibration (if done).  If this is less than the machine */
/*     precision (in particular, if it is zero), the matrix is singular */
/*     to working precision.  Note that the error may still be small even */
/*     if this number is very small and the matrix appears ill- */
/*     conditioned. */

/*     ITHRESH        (input) INTEGER */
/*     The maximum number of residual computations allowed for */
/*     refinement. The default is 10. For 'aggressive' set to 100 to */
/*     permit convergence using approximate factorizations or */
/*     factorizations other than LU. If the factorization uses a */
/*     technique other than Gaussian elimination, the guarantees in */
/*     ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. */

/*     RTHRESH        (input) REAL */
/*     Determines when to stop refinement if the error estimate stops */
/*     decreasing. Refinement will stop when the next solution no longer */
/*     satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is */
/*     the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The */
/*     default value is 0.5. For 'aggressive' set to 0.9 to permit */
/*     convergence on extremely ill-conditioned matrices. See LAWN 165 */
/*     for more details. */

/*     DZ_UB          (input) REAL */
/*     Determines when to start considering componentwise convergence. */
/*     Componentwise convergence is only considered after each component */
/*     of the solution Y is stable, which we definte as the relative */
/*     change in each component being less than DZ_UB. The default value */
/*     is 0.25, requiring the first bit to be stable. See LAWN 165 for */
/*     more details. */

/*     IGNORE_CWISE   (input) LOGICAL */
/*     If .TRUE. then ignore componentwise convergence. Default value */
/*     is .FALSE.. */

/*     INFO           (output) INTEGER */
/*       = 0:  Successful exit. */
/*       < 0:  if INFO = -i, the ith argument to CGETRS had an illegal */
/*             value */

/*  ===================================================================== */

/*     .. Local Scalars .. */
/*     .. */
/*     .. Parameters .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Statement Functions .. */
/*     .. */
/*     .. Statement Function Definitions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    errs_c_dim1 = *nrhs;
    errs_c_offset = 1 + errs_c_dim1;
    errs_c__ -= errs_c_offset;
    errs_n_dim1 = *nrhs;
    errs_n_offset = 1 + errs_n_dim1;
    errs_n__ -= errs_n_offset;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    af_dim1 = *ldaf;
    af_offset = 1 + af_dim1;
    af -= af_offset;
    --ipiv;
    --c__;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    y_dim1 = *ldy;
    y_offset = 1 + y_dim1;
    y -= y_offset;
    --berr_out__;
    --res;
    --ayb;
    --dy;
    --y_tail__;

    /* Function Body */
    if (*info != 0) {
	return 0;
    }
    chla_transtype__(ch__1, (ftnlen)1, trans_type__);
    *(unsigned char *)trans = *(unsigned char *)&ch__1[0];
    eps = slamch_("Epsilon");
    hugeval = slamch_("Overflow");
/*     Force HUGEVAL to Inf */
    hugeval *= hugeval;
/*     Using HUGEVAL may lead to spurious underflows. */
    incr_thresh__ = (real) (*n) * eps;

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
	y_prec_state__ = 1;
	if (y_prec_state__ == 2) {
	    i__2 = *n;
	    for (i__ = 1; i__ <= i__2; ++i__) {
		i__3 = i__;
		y_tail__[i__3].r = 0.f, y_tail__[i__3].i = 0.f;
	    }
	}
	dxrat = 0.f;
	dxratmax = 0.f;
	dzrat = 0.f;
	dzratmax = 0.f;
	final_dx_x__ = hugeval;
	final_dz_z__ = hugeval;
	prevnormdx = hugeval;
	prev_dz_z__ = hugeval;
	dz_z__ = hugeval;
	dx_x__ = hugeval;
	x_state__ = 1;
	z_state__ = 0;
	incr_prec__ = FALSE_;
	i__2 = *ithresh;
	for (cnt = 1; cnt <= i__2; ++cnt) {

/*         Compute residual RES = B_s - op(A_s) * Y, */
/*             op(A) = A, A**T, or A**H depending on TRANS (and type). */

	    ccopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1);
	    if (y_prec_state__ == 0) {
		cgemv_(trans, n, n, &c_b6, &a[a_offset], lda, &y[j * y_dim1 + 
			1], &c__1, &c_b8, &res[1], &c__1);
	    } else if (y_prec_state__ == 1) {
		blas_cgemv_x__(trans_type__, n, n, &c_b6, &a[a_offset], lda, &
			y[j * y_dim1 + 1], &c__1, &c_b8, &res[1], &c__1, 
			prec_type__);
	    } else {
		blas_cgemv2_x__(trans_type__, n, n, &c_b6, &a[a_offset], lda, 
			&y[j * y_dim1 + 1], &y_tail__[1], &c__1, &c_b8, &res[
			1], &c__1, prec_type__);
	    }
/*         XXX: RES is no longer needed. */
	    ccopy_(n, &res[1], &c__1, &dy[1], &c__1);
	    cgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &dy[1], 
		    n, info);

/*         Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. */

	    normx = 0.f;
	    normy = 0.f;
	    normdx = 0.f;
	    dz_z__ = 0.f;
	    ymin = hugeval;

	    i__3 = *n;
	    for (i__ = 1; i__ <= i__3; ++i__) {
		i__4 = i__ + j * y_dim1;
		yk = (r__1 = y[i__4].r, dabs(r__1)) + (r__2 = r_imag(&y[i__ + 
			j * y_dim1]), dabs(r__2));
		i__4 = i__;
		dyk = (r__1 = dy[i__4].r, dabs(r__1)) + (r__2 = r_imag(&dy[
			i__]), dabs(r__2));
		if (yk != 0.f) {
/* Computing MAX */
		    r__1 = dz_z__, r__2 = dyk / yk;
		    dz_z__ = dmax(r__1,r__2);
		} else if (dyk != 0.f) {
		    dz_z__ = hugeval;
		}
		ymin = dmin(ymin,yk);
		normy = dmax(normy,yk);
		if (*colequ) {
/* Computing MAX */
		    r__1 = normx, r__2 = yk * c__[i__];
		    normx = dmax(r__1,r__2);
/* Computing MAX */
		    r__1 = normdx, r__2 = dyk * c__[i__];
		    normdx = dmax(r__1,r__2);
		} else {
		    normx = normy;
		    normdx = dmax(normdx,dyk);
		}
	    }
	    if (normx != 0.f) {
		dx_x__ = normdx / normx;
	    } else if (normdx == 0.f) {
		dx_x__ = 0.f;
	    } else {
		dx_x__ = hugeval;
	    }
	    dxrat = normdx / prevnormdx;
	    dzrat = dz_z__ / prev_dz_z__;

/*         Check termination criteria */

	    if (! (*ignore_cwise__) && ymin * *rcond < incr_thresh__ * normy 
		    && y_prec_state__ < 2) {
		incr_prec__ = TRUE_;
	    }
	    if (x_state__ == 3 && dxrat <= *rthresh) {
		x_state__ = 1;
	    }
	    if (x_state__ == 1) {
		if (dx_x__ <= eps) {
		    x_state__ = 2;
		} else if (dxrat > *rthresh) {
		    if (y_prec_state__ != 2) {
			incr_prec__ = TRUE_;
		    } else {
			x_state__ = 3;
		    }
		} else {
		    if (dxrat > dxratmax) {
			dxratmax = dxrat;
		    }
		}
		if (x_state__ > 1) {
		    final_dx_x__ = dx_x__;
		}
	    }
	    if (z_state__ == 0 && dz_z__ <= *dz_ub__) {
		z_state__ = 1;
	    }
	    if (z_state__ == 3 && dzrat <= *rthresh) {
		z_state__ = 1;
	    }
	    if (z_state__ == 1) {
		if (dz_z__ <= eps) {
		    z_state__ = 2;
		} else if (dz_z__ > *dz_ub__) {
		    z_state__ = 0;
		    dzratmax = 0.f;
		    final_dz_z__ = hugeval;
		} else if (dzrat > *rthresh) {
		    if (y_prec_state__ != 2) {
			incr_prec__ = TRUE_;
		    } else {
			z_state__ = 3;
		    }
		} else {
		    if (dzrat > dzratmax) {
			dzratmax = dzrat;
		    }
		}
		if (z_state__ > 1) {
		    final_dz_z__ = dz_z__;
		}
	    }

/*           Exit if both normwise and componentwise stopped working, */
/*           but if componentwise is unstable, let it go at least two */
/*           iterations. */

	    if (x_state__ != 1) {
		if (*ignore_cwise__) {
		    goto L666;
		}
		if (z_state__ == 3 || z_state__ == 2) {
		    goto L666;
		}
		if (z_state__ == 0 && cnt > 1) {
		    goto L666;
		}
	    }
	    if (incr_prec__) {
		incr_prec__ = FALSE_;
		++y_prec_state__;
		i__3 = *n;
		for (i__ = 1; i__ <= i__3; ++i__) {
		    i__4 = i__;
		    y_tail__[i__4].r = 0.f, y_tail__[i__4].i = 0.f;
		}
	    }
	    prevnormdx = normdx;
	    prev_dz_z__ = dz_z__;

/*           Update soluton. */

	    if (y_prec_state__ < 2) {
		caxpy_(n, &c_b8, &dy[1], &c__1, &y[j * y_dim1 + 1], &c__1);
	    } else {
		cla_wwaddw__(n, &y[j * y_dim1 + 1], &y_tail__[1], &dy[1]);
	    }
	}
/*        Target of "IF (Z_STOP .AND. X_STOP)".  Sun's f77 won't EXIT. */
L666:

/*     Set final_* when cnt hits ithresh */

	if (x_state__ == 1) {
	    final_dx_x__ = dx_x__;
	}
	if (z_state__ == 1) {
	    final_dz_z__ = dz_z__;
	}

/*     Compute error bounds */

	if (*n_norms__ >= 1) {
	    errs_n__[j + (errs_n_dim1 << 1)] = final_dx_x__ / (1 - dxratmax);
	}
	if (*n_norms__ >= 2) {
	    errs_c__[j + (errs_c_dim1 << 1)] = final_dz_z__ / (1 - dzratmax);
	}

/*     Compute componentwise relative backward error from formula */
/*         max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */
/*     where abs(Z) is the componentwise absolute value of the matrix */
/*     or vector Z. */

/*        Compute residual RES = B_s - op(A_s) * Y, */
/*            op(A) = A, A**T, or A**H depending on TRANS (and type). */

	ccopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1);
	cgemv_(trans, n, n, &c_b6, &a[a_offset], lda, &y[j * y_dim1 + 1], &
		c__1, &c_b8, &res[1], &c__1);
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    i__3 = i__ + j * b_dim1;
	    ayb[i__] = (r__1 = b[i__3].r, dabs(r__1)) + (r__2 = r_imag(&b[i__ 
		    + j * b_dim1]), dabs(r__2));
	}

/*     Compute abs(op(A_s))*abs(Y) + abs(B_s). */

	cla_geamv__(trans_type__, n, n, &c_b31, &a[a_offset], lda, &y[j * 
		y_dim1 + 1], &c__1, &c_b31, &ayb[1], &c__1);
	cla_lin_berr__(n, n, &c__1, &res[1], &ayb[1], &berr_out__[j]);

/*     End of loop for each RHS. */

    }

    return 0;
} /* cla_gerfsx_extended__ */
Example #13
0
/* Subroutine */ int ctbt03_(char *uplo, char *trans, char *diag, integer *n, 
	integer *kd, integer *nrhs, complex *ab, integer *ldab, real *scale, 
	real *cnorm, real *tscal, complex *x, integer *ldx, complex *b, 
	integer *ldb, complex *work, real *resid)
{
    /* System generated locals */
    integer ab_dim1, ab_offset, b_dim1, b_offset, x_dim1, x_offset, i__1;
    real r__1, r__2;
    complex q__1;

    /* Builtin functions */
    double c_abs(complex *);

    /* Local variables */
    static integer j;
    extern logical lsame_(char *, char *);
    static real xscal;
    extern /* Subroutine */ int ctbmv_(char *, char *, char *, integer *, 
	    integer *, complex *, integer *, complex *, integer *), ccopy_(integer *, complex *, integer *, complex *
	    , integer *), caxpy_(integer *, complex *, complex *, integer *, 
	    complex *, integer *);
    static real tnorm, xnorm;
    static integer ix;
    extern integer icamax_(integer *, complex *, integer *);
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer 
	    *);
    static real smlnum, eps, err;


#define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1
#define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)]
#define x_subscr(a_1,a_2) (a_2)*x_dim1 + a_1
#define x_ref(a_1,a_2) x[x_subscr(a_1,a_2)]
#define ab_subscr(a_1,a_2) (a_2)*ab_dim1 + a_1
#define ab_ref(a_1,a_2) ab[ab_subscr(a_1,a_2)]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    CTBT03 computes the residual for the solution to a scaled triangular   
    system of equations  A*x = s*b,  A**T *x = s*b,  or  A**H *x = s*b   
    when A is a triangular band matrix.  Here A**T  denotes the transpose   
    of A, A**H denotes the conjugate transpose of A, s is a scalar, and   
    x and b are N by NRHS matrices.  The test ratio is the maximum over   
    the number of right hand sides of   
       norm(s*b - op(A)*x) / ( norm(op(A)) * norm(x) * EPS ),   
    where op(A) denotes A, A**T, or A**H, and EPS is the machine epsilon.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the matrix A is upper or lower triangular.   
            = 'U':  Upper triangular   
            = 'L':  Lower triangular   

    TRANS   (input) CHARACTER*1   
            Specifies the operation applied to A.   
            = 'N':  A *x = s*b     (No transpose)   
            = 'T':  A**T *x = s*b  (Transpose)   
            = 'C':  A**H *x = s*b  (Conjugate transpose)   

    DIAG    (input) CHARACTER*1   
            Specifies whether or not the matrix A is unit triangular.   
            = 'N':  Non-unit triangular   
            = 'U':  Unit triangular   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    KD      (input) INTEGER   
            The number of superdiagonals or subdiagonals of the   
            triangular band matrix A.  KD >= 0.   

    NRHS    (input) INTEGER   
            The number of right hand sides, i.e., the number of columns   
            of the matrices X and B.  NRHS >= 0.   

    AB      (input) COMPLEX array, dimension (LDAB,N)   
            The upper or lower triangular band matrix A, stored in the   
            first kd+1 rows of the array. The j-th column of A is stored   
            in the j-th column of the array AB as follows:   
            if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;   
            if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).   

    LDAB    (input) INTEGER   
            The leading dimension of the array AB.  LDAB >= KD+1.   

    SCALE   (input) REAL   
            The scaling factor s used in solving the triangular system.   

    CNORM   (input) REAL array, dimension (N)   
            The 1-norms of the columns of A, not counting the diagonal.   

    TSCAL   (input) REAL   
            The scaling factor used in computing the 1-norms in CNORM.   
            CNORM actually contains the column norms of TSCAL*A.   

    X       (input) COMPLEX array, dimension (LDX,NRHS)   
            The computed solution vectors for the system of linear   
            equations.   

    LDX     (input) INTEGER   
            The leading dimension of the array X.  LDX >= max(1,N).   

    B       (input) COMPLEX array, dimension (LDB,NRHS)   
            The right hand side vectors for the system of linear   
            equations.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    WORK    (workspace) COMPLEX array, dimension (N)   

    RESID   (output) REAL   
            The maximum over the number of right hand sides of   
            norm(op(A)*x - s*b) / ( norm(op(A)) * norm(x) * EPS ).   

    =====================================================================   



       Quick exit if N = 0   

       Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    --cnorm;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --work;

    /* Function Body */
    if (*n <= 0 || *nrhs <= 0) {
	*resid = 0.f;
	return 0;
    }
    eps = slamch_("Epsilon");
    smlnum = slamch_("Safe minimum");

/*     Compute the norm of the triangular matrix A using the column   
       norms already computed by CLATBS. */

    tnorm = 0.f;
    if (lsame_(diag, "N")) {
	if (lsame_(uplo, "U")) {
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
/* Computing MAX */
		r__1 = tnorm, r__2 = *tscal * c_abs(&ab_ref(*kd + 1, j)) + 
			cnorm[j];
		tnorm = dmax(r__1,r__2);
/* L10: */
	    }
	} else {
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
/* Computing MAX */
		r__1 = tnorm, r__2 = *tscal * c_abs(&ab_ref(1, j)) + cnorm[j];
		tnorm = dmax(r__1,r__2);
/* L20: */
	    }
	}
    } else {
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
/* Computing MAX */
	    r__1 = tnorm, r__2 = *tscal + cnorm[j];
	    tnorm = dmax(r__1,r__2);
/* L30: */
	}
    }

/*     Compute the maximum over the number of right hand sides of   
          norm(op(A)*x - s*b) / ( norm(op(A)) * norm(x) * EPS ). */

    *resid = 0.f;
    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
	ccopy_(n, &x_ref(1, j), &c__1, &work[1], &c__1);
	ix = icamax_(n, &work[1], &c__1);
/* Computing MAX */
	r__1 = 1.f, r__2 = c_abs(&x_ref(ix, j));
	xnorm = dmax(r__1,r__2);
	xscal = 1.f / xnorm / (real) (*kd + 1);
	csscal_(n, &xscal, &work[1], &c__1);
	ctbmv_(uplo, trans, diag, n, kd, &ab[ab_offset], ldab, &work[1], &
		c__1);
	r__1 = -(*scale) * xscal;
	q__1.r = r__1, q__1.i = 0.f;
	caxpy_(n, &q__1, &b_ref(1, j), &c__1, &work[1], &c__1);
	ix = icamax_(n, &work[1], &c__1);
	err = *tscal * c_abs(&work[ix]);
	ix = icamax_(n, &x_ref(1, j), &c__1);
	xnorm = c_abs(&x_ref(ix, j));
	if (err * smlnum <= xnorm) {
	    if (xnorm > 0.f) {
		err /= xnorm;
	    }
	} else {
	    if (err > 0.f) {
		err = 1.f / eps;
	    }
	}
	if (err * smlnum <= tnorm) {
	    if (tnorm > 0.f) {
		err /= tnorm;
	    }
	} else {
	    if (err > 0.f) {
		err = 1.f / eps;
	    }
	}
	*resid = dmax(*resid,err);
/* L40: */
    }

    return 0;

/*     End of CTBT03 */

} /* ctbt03_ */
Example #14
0
/* Subroutine */ int clagsy_(integer *n, integer *k, real *d__, complex *a, 
	integer *lda, integer *iseed, complex *work, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, 
	    i__9;
    real r__1;
    complex q__1, q__2, q__3, q__4;

    /* Local variables */
    integer i__, j, ii, jj;
    complex wa, wb;
    real wn;
    complex tau;
    complex alpha;

/*  -- LAPACK auxiliary test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CLAGSY generates a complex symmetric matrix A, by pre- and post- */
/*  multiplying a real diagonal matrix D with a random unitary matrix: */
/*  A = U*D*U**T. The semi-bandwidth may then be reduced to k by */
/*  additional unitary transformations. */

/*  Arguments */
/*  ========= */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  K       (input) INTEGER */
/*          The number of nonzero subdiagonals within the band of A. */
/*          0 <= K <= N-1. */

/*  D       (input) REAL array, dimension (N) */
/*          The diagonal elements of the diagonal matrix D. */

/*  A       (output) COMPLEX array, dimension (LDA,N) */
/*          The generated n by n symmetric matrix A (the full matrix is */
/*          stored). */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= N. */

/*  ISEED   (input/output) INTEGER array, dimension (4) */
/*          On entry, the seed of the random number generator; the array */
/*          elements must be between 0 and 4095, and ISEED(4) must be */
/*          odd. */
/*          On exit, the seed is updated. */

/*  WORK    (workspace) COMPLEX array, dimension (2*N) */

/*  INFO    (output) INTEGER */
/*          = 0: successful exit */
/*          < 0: if INFO = -i, the i-th argument had an illegal value */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input arguments */

    /* Parameter adjustments */
    --d__;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --iseed;
    --work;

    /* Function Body */
    *info = 0;
    if (*n < 0) {
	*info = -1;
    } else if (*k < 0 || *k > *n - 1) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    }
    if (*info < 0) {
	i__1 = -(*info);
	xerbla_("CLAGSY", &i__1);
	return 0;
    }

/*     initialize lower triangle of A to diagonal matrix */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	i__2 = *n;
	for (i__ = j + 1; i__ <= i__2; ++i__) {
	    i__3 = i__ + j * a_dim1;
	    a[i__3].r = 0.f, a[i__3].i = 0.f;
/* L10: */
	}
/* L20: */
    }
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	i__2 = i__ + i__ * a_dim1;
	i__3 = i__;
	a[i__2].r = d__[i__3], a[i__2].i = 0.f;
/* L30: */
    }

/*     Generate lower triangle of symmetric matrix */

    for (i__ = *n - 1; i__ >= 1; --i__) {

/*        generate random reflection */

	i__1 = *n - i__ + 1;
	clarnv_(&c__3, &iseed[1], &i__1, &work[1]);
	i__1 = *n - i__ + 1;
	wn = scnrm2_(&i__1, &work[1], &c__1);
	r__1 = wn / c_abs(&work[1]);
	q__1.r = r__1 * work[1].r, q__1.i = r__1 * work[1].i;
	wa.r = q__1.r, wa.i = q__1.i;
	if (wn == 0.f) {
	    tau.r = 0.f, tau.i = 0.f;
	} else {
	    q__1.r = work[1].r + wa.r, q__1.i = work[1].i + wa.i;
	    wb.r = q__1.r, wb.i = q__1.i;
	    i__1 = *n - i__;
	    c_div(&q__1, &c_b2, &wb);
	    cscal_(&i__1, &q__1, &work[2], &c__1);
	    work[1].r = 1.f, work[1].i = 0.f;
	    c_div(&q__1, &wb, &wa);
	    r__1 = q__1.r;
	    tau.r = r__1, tau.i = 0.f;
	}

/*        apply random reflection to A(i:n,i:n) from the left */
/*        and the right */

/*        compute  y := tau * A * conjg(u) */

	i__1 = *n - i__ + 1;
	clacgv_(&i__1, &work[1], &c__1);
	i__1 = *n - i__ + 1;
	csymv_("Lower", &i__1, &tau, &a[i__ + i__ * a_dim1], lda, &work[1], &
		c__1, &c_b1, &work[*n + 1], &c__1);
	i__1 = *n - i__ + 1;
	clacgv_(&i__1, &work[1], &c__1);

/*        compute  v := y - 1/2 * tau * ( u, y ) * u */

	q__3.r = -.5f, q__3.i = -0.f;
	q__2.r = q__3.r * tau.r - q__3.i * tau.i, q__2.i = q__3.r * tau.i + 
		q__3.i * tau.r;
	i__1 = *n - i__ + 1;
	cdotc_(&q__4, &i__1, &work[1], &c__1, &work[*n + 1], &c__1);
	q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * q__4.i 
		+ q__2.i * q__4.r;
	alpha.r = q__1.r, alpha.i = q__1.i;
	i__1 = *n - i__ + 1;
	caxpy_(&i__1, &alpha, &work[1], &c__1, &work[*n + 1], &c__1);

/*        apply the transformation as a rank-2 update to A(i:n,i:n) */

/*        CALL CSYR2( 'Lower', N-I+1, -ONE, WORK, 1, WORK( N+1 ), 1, */
/*        $               A( I, I ), LDA ) */

	i__1 = *n;
	for (jj = i__; jj <= i__1; ++jj) {
	    i__2 = *n;
	    for (ii = jj; ii <= i__2; ++ii) {
		i__3 = ii + jj * a_dim1;
		i__4 = ii + jj * a_dim1;
		i__5 = ii - i__ + 1;
		i__6 = *n + jj - i__ + 1;
		q__3.r = work[i__5].r * work[i__6].r - work[i__5].i * work[
			i__6].i, q__3.i = work[i__5].r * work[i__6].i + work[
			i__5].i * work[i__6].r;
		q__2.r = a[i__4].r - q__3.r, q__2.i = a[i__4].i - q__3.i;
		i__7 = *n + ii - i__ + 1;
		i__8 = jj - i__ + 1;
		q__4.r = work[i__7].r * work[i__8].r - work[i__7].i * work[
			i__8].i, q__4.i = work[i__7].r * work[i__8].i + work[
			i__7].i * work[i__8].r;
		q__1.r = q__2.r - q__4.r, q__1.i = q__2.i - q__4.i;
		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
/* L40: */
	    }
/* L50: */
	}
/* L60: */
    }

/*     Reduce number of subdiagonals to K */

    i__1 = *n - 1 - *k;
    for (i__ = 1; i__ <= i__1; ++i__) {

/*        generate reflection to annihilate A(k+i+1:n,i) */

	i__2 = *n - *k - i__ + 1;
	wn = scnrm2_(&i__2, &a[*k + i__ + i__ * a_dim1], &c__1);
	r__1 = wn / c_abs(&a[*k + i__ + i__ * a_dim1]);
	i__2 = *k + i__ + i__ * a_dim1;
	q__1.r = r__1 * a[i__2].r, q__1.i = r__1 * a[i__2].i;
	wa.r = q__1.r, wa.i = q__1.i;
	if (wn == 0.f) {
	    tau.r = 0.f, tau.i = 0.f;
	} else {
	    i__2 = *k + i__ + i__ * a_dim1;
	    q__1.r = a[i__2].r + wa.r, q__1.i = a[i__2].i + wa.i;
	    wb.r = q__1.r, wb.i = q__1.i;
	    i__2 = *n - *k - i__;
	    c_div(&q__1, &c_b2, &wb);
	    cscal_(&i__2, &q__1, &a[*k + i__ + 1 + i__ * a_dim1], &c__1);
	    i__2 = *k + i__ + i__ * a_dim1;
	    a[i__2].r = 1.f, a[i__2].i = 0.f;
	    c_div(&q__1, &wb, &wa);
	    r__1 = q__1.r;
	    tau.r = r__1, tau.i = 0.f;
	}

/*        apply reflection to A(k+i:n,i+1:k+i-1) from the left */

	i__2 = *n - *k - i__ + 1;
	i__3 = *k - 1;
	cgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a[*k + i__ + (i__ 
		+ 1) * a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &
		c_b1, &work[1], &c__1);
	i__2 = *n - *k - i__ + 1;
	i__3 = *k - 1;
	q__1.r = -tau.r, q__1.i = -tau.i;
	cgerc_(&i__2, &i__3, &q__1, &a[*k + i__ + i__ * a_dim1], &c__1, &work[
		1], &c__1, &a[*k + i__ + (i__ + 1) * a_dim1], lda);

/*        apply reflection to A(k+i:n,k+i:n) from the left and the right */

/*        compute  y := tau * A * conjg(u) */

	i__2 = *n - *k - i__ + 1;
	clacgv_(&i__2, &a[*k + i__ + i__ * a_dim1], &c__1);
	i__2 = *n - *k - i__ + 1;
	csymv_("Lower", &i__2, &tau, &a[*k + i__ + (*k + i__) * a_dim1], lda, 
		&a[*k + i__ + i__ * a_dim1], &c__1, &c_b1, &work[1], &c__1);
	i__2 = *n - *k - i__ + 1;
	clacgv_(&i__2, &a[*k + i__ + i__ * a_dim1], &c__1);

/*        compute  v := y - 1/2 * tau * ( u, y ) * u */

	q__3.r = -.5f, q__3.i = -0.f;
	q__2.r = q__3.r * tau.r - q__3.i * tau.i, q__2.i = q__3.r * tau.i + 
		q__3.i * tau.r;
	i__2 = *n - *k - i__ + 1;
	cdotc_(&q__4, &i__2, &a[*k + i__ + i__ * a_dim1], &c__1, &work[1], &
		c__1);
	q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * q__4.i 
		+ q__2.i * q__4.r;
	alpha.r = q__1.r, alpha.i = q__1.i;
	i__2 = *n - *k - i__ + 1;
	caxpy_(&i__2, &alpha, &a[*k + i__ + i__ * a_dim1], &c__1, &work[1], &
		c__1);

/*        apply symmetric rank-2 update to A(k+i:n,k+i:n) */

/*        CALL CSYR2( 'Lower', N-K-I+1, -ONE, A( K+I, I ), 1, WORK, 1, */
/*        $               A( K+I, K+I ), LDA ) */

	i__2 = *n;
	for (jj = *k + i__; jj <= i__2; ++jj) {
	    i__3 = *n;
	    for (ii = jj; ii <= i__3; ++ii) {
		i__4 = ii + jj * a_dim1;
		i__5 = ii + jj * a_dim1;
		i__6 = ii + i__ * a_dim1;
		i__7 = jj - *k - i__ + 1;
		q__3.r = a[i__6].r * work[i__7].r - a[i__6].i * work[i__7].i, 
			q__3.i = a[i__6].r * work[i__7].i + a[i__6].i * work[
			i__7].r;
		q__2.r = a[i__5].r - q__3.r, q__2.i = a[i__5].i - q__3.i;
		i__8 = ii - *k - i__ + 1;
		i__9 = jj + i__ * a_dim1;
		q__4.r = work[i__8].r * a[i__9].r - work[i__8].i * a[i__9].i, 
			q__4.i = work[i__8].r * a[i__9].i + work[i__8].i * a[
			i__9].r;
		q__1.r = q__2.r - q__4.r, q__1.i = q__2.i - q__4.i;
		a[i__4].r = q__1.r, a[i__4].i = q__1.i;
/* L70: */
	    }
/* L80: */
	}

	i__2 = *k + i__ + i__ * a_dim1;
	q__1.r = -wa.r, q__1.i = -wa.i;
	a[i__2].r = q__1.r, a[i__2].i = q__1.i;
	i__2 = *n;
	for (j = *k + i__ + 1; j <= i__2; ++j) {
	    i__3 = j + i__ * a_dim1;
	    a[i__3].r = 0.f, a[i__3].i = 0.f;
/* L90: */
	}
/* L100: */
    }

/*     Store full symmetric matrix */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	i__2 = *n;
	for (i__ = j + 1; i__ <= i__2; ++i__) {
	    i__3 = j + i__ * a_dim1;
	    i__4 = i__ + j * a_dim1;
	    a[i__3].r = a[i__4].r, a[i__3].i = a[i__4].i;
/* L110: */
	}
/* L120: */
    }
    return 0;

/*     End of CLAGSY */

} /* clagsy_ */
Example #15
0
/* Subroutine */ int ctrt03_(char *uplo, char *trans, char *diag, integer *n, 
	integer *nrhs, complex *a, integer *lda, real *scale, real *cnorm, 
	real *tscal, complex *x, integer *ldx, complex *b, integer *ldb, 
	complex *work, real *resid)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset, i__1;
    real r__1, r__2;
    complex q__1;

    /* Local variables */
    integer j, ix;
    real eps, err;
    real xscal;
    real tnorm, xnorm;
    real smlnum;


/*  -- LAPACK test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CTRT03 computes the residual for the solution to a scaled triangular */
/*  system of equations A*x = s*b,  A**T *x = s*b,  or  A**H *x = s*b. */
/*  Here A is a triangular matrix, A**T denotes the transpose of A, A**H */
/*  denotes the conjugate transpose of A, s is a scalar, and x and b are */
/*  N by NRHS matrices.  The test ratio is the maximum over the number of */
/*  right hand sides of */
/*     norm(s*b - op(A)*x) / ( norm(op(A)) * norm(x) * EPS ), */
/*  where op(A) denotes A, A**T, or A**H, and EPS is the machine epsilon. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          Specifies whether the matrix A is upper or lower triangular. */
/*          = 'U':  Upper triangular */
/*          = 'L':  Lower triangular */

/*  TRANS   (input) CHARACTER*1 */
/*          Specifies the operation applied to A. */
/*          = 'N':  A *x = s*b     (No transpose) */
/*          = 'T':  A**T *x = s*b  (Transpose) */
/*          = 'C':  A**H *x = s*b  (Conjugate transpose) */

/*  DIAG    (input) CHARACTER*1 */
/*          Specifies whether or not the matrix A is unit triangular. */
/*          = 'N':  Non-unit triangular */
/*          = 'U':  Unit triangular */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  NRHS    (input) INTEGER */
/*          The number of right hand sides, i.e., the number of columns */
/*          of the matrices X and B.  NRHS >= 0. */

/*  A       (input) COMPLEX array, dimension (LDA,N) */
/*          The triangular matrix A.  If UPLO = 'U', the leading n by n */
/*          upper triangular part of the array A contains the upper */
/*          triangular matrix, and the strictly lower triangular part of */
/*          A is not referenced.  If UPLO = 'L', the leading n by n lower */
/*          triangular part of the array A contains the lower triangular */
/*          matrix, and the strictly upper triangular part of A is not */
/*          referenced.  If DIAG = 'U', the diagonal elements of A are */
/*          also not referenced and are assumed to be 1. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  SCALE   (input) REAL */
/*          The scaling factor s used in solving the triangular system. */

/*  CNORM   (input) REAL array, dimension (N) */
/*          The 1-norms of the columns of A, not counting the diagonal. */

/*  TSCAL   (input) REAL */
/*          The scaling factor used in computing the 1-norms in CNORM. */
/*          CNORM actually contains the column norms of TSCAL*A. */

/*  X       (input) COMPLEX array, dimension (LDX,NRHS) */
/*          The computed solution vectors for the system of linear */
/*          equations. */

/*  LDX     (input) INTEGER */
/*          The leading dimension of the array X.  LDX >= max(1,N). */

/*  B       (input) COMPLEX array, dimension (LDB,NRHS) */
/*          The right hand side vectors for the system of linear */
/*          equations. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B.  LDB >= max(1,N). */

/*  WORK    (workspace) COMPLEX array, dimension (N) */

/*  RESID   (output) REAL */
/*          The maximum over the number of right hand sides of */
/*          norm(op(A)*x - s*b) / ( norm(op(A)) * norm(x) * EPS ). */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Quick exit if N = 0 */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --cnorm;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1;
    x -= x_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    --work;

    /* Function Body */
    if (*n <= 0 || *nrhs <= 0) {
	*resid = 0.f;
	return 0;
    }
    eps = slamch_("Epsilon");
    smlnum = slamch_("Safe minimum");

/*     Compute the norm of the triangular matrix A using the column */
/*     norms already computed by CLATRS. */

    tnorm = 0.f;
    if (lsame_(diag, "N")) {
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
/* Computing MAX */
	    r__1 = tnorm, r__2 = *tscal * c_abs(&a[j + j * a_dim1]) + cnorm[j]
		    ;
	    tnorm = dmax(r__1,r__2);
/* L10: */
	}
    } else {
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
/* Computing MAX */
	    r__1 = tnorm, r__2 = *tscal + cnorm[j];
	    tnorm = dmax(r__1,r__2);
/* L20: */
	}
    }

/*     Compute the maximum over the number of right hand sides of */
/*        norm(op(A)*x - s*b) / ( norm(op(A)) * norm(x) * EPS ). */

    *resid = 0.f;
    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
	ccopy_(n, &x[j * x_dim1 + 1], &c__1, &work[1], &c__1);
	ix = icamax_(n, &work[1], &c__1);
/* Computing MAX */
	r__1 = 1.f, r__2 = c_abs(&x[ix + j * x_dim1]);
	xnorm = dmax(r__1,r__2);
	xscal = 1.f / xnorm / (real) (*n);
	csscal_(n, &xscal, &work[1], &c__1);
	ctrmv_(uplo, trans, diag, n, &a[a_offset], lda, &work[1], &c__1);
	r__1 = -(*scale) * xscal;
	q__1.r = r__1, q__1.i = 0.f;
	caxpy_(n, &q__1, &b[j * b_dim1 + 1], &c__1, &work[1], &c__1);
	ix = icamax_(n, &work[1], &c__1);
	err = *tscal * c_abs(&work[ix]);
	ix = icamax_(n, &x[j * x_dim1 + 1], &c__1);
	xnorm = c_abs(&x[ix + j * x_dim1]);
	if (err * smlnum <= xnorm) {
	    if (xnorm > 0.f) {
		err /= xnorm;
	    }
	} else {
	    if (err > 0.f) {
		err = 1.f / eps;
	    }
	}
	if (err * smlnum <= tnorm) {
	    if (tnorm > 0.f) {
		err /= tnorm;
	    }
	} else {
	    if (err > 0.f) {
		err = 1.f / eps;
	    }
	}
	*resid = dmax(*resid,err);
/* L30: */
    }

    return 0;

/*     End of CTRT03 */

} /* ctrt03_ */
Example #16
0
/* Subroutine */ int ctgsy2_(char *trans, integer *ijob, integer *m, integer *
	n, complex *a, integer *lda, complex *b, integer *ldb, complex *c__, 
	integer *ldc, complex *d__, integer *ldd, complex *e, integer *lde, 
	complex *f, integer *ldf, real *scale, real *rdsum, real *rdscal, 
	integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, d_dim1, 
	    d_offset, e_dim1, e_offset, f_dim1, f_offset, i__1, i__2, i__3, 
	    i__4;
    complex q__1, q__2, q__3, q__4, q__5, q__6;

    /* Local variables */
    integer i__, j, k;
    complex z__[4]	/* was [2][2] */, rhs[2];
    integer ierr, ipiv[2], jpiv[2];
    complex alpha;
    real scaloc;
    logical notran;

/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     November 2006 */

/*  Purpose */
/*  ======= */

/*  CTGSY2 solves the generalized Sylvester equation */

/*              A * R - L * B = scale *   C               (1) */
/*              D * R - L * E = scale * F */

/*  using Level 1 and 2 BLAS, where R and L are unknown M-by-N matrices, */
/*  (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M, */
/*  N-by-N and M-by-N, respectively. A, B, D and E are upper triangular */
/*  (i.e., (A,D) and (B,E) in generalized Schur form). */

/*  The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output */
/*  scaling factor chosen to avoid overflow. */

/*  In matrix notation solving equation (1) corresponds to solve */
/*  Zx = scale * b, where Z is defined as */

/*         Z = [ kron(In, A)  -kron(B', Im) ]             (2) */
/*             [ kron(In, D)  -kron(E', Im) ], */

/*  Ik is the identity matrix of size k and X' is the transpose of X. */
/*  kron(X, Y) is the Kronecker product between the matrices X and Y. */

/*  If TRANS = 'C', y in the conjugate transposed system Z'y = scale*b */
/*  is solved for, which is equivalent to solve for R and L in */

/*              A' * R  + D' * L   = scale *  C           (3) */
/*              R  * B' + L  * E'  = scale * -F */

/*  This case is used to compute an estimate of Dif[(A, D), (B, E)] = */
/*  = sigma_min(Z) using reverse communicaton with CLACON. */

/*  CTGSY2 also (IJOB >= 1) contributes to the computation in CTGSYL */
/*  of an upper bound on the separation between to matrix pairs. Then */
/*  the input (A, D), (B, E) are sub-pencils of two matrix pairs in */
/*  CTGSYL. */

/*  Arguments */
/*  ========= */

/*  TRANS   (input) CHARACTER*1 */
/*          = 'N', solve the generalized Sylvester equation (1). */
/*          = 'T': solve the 'transposed' system (3). */

/*  IJOB    (input) INTEGER */
/*          Specifies what kind of functionality to be performed. */
/*          =0: solve (1) only. */
/*          =1: A contribution from this subsystem to a Frobenius */
/*              norm-based estimate of the separation between two matrix */
/*              pairs is computed. (look ahead strategy is used). */
/*          =2: A contribution from this subsystem to a Frobenius */
/*              norm-based estimate of the separation between two matrix */
/*              pairs is computed. (SGECON on sub-systems is used.) */
/*          Not referenced if TRANS = 'T'. */

/*  M       (input) INTEGER */
/*          On entry, M specifies the order of A and D, and the row */
/*          dimension of C, F, R and L. */

/*  N       (input) INTEGER */
/*          On entry, N specifies the order of B and E, and the column */
/*          dimension of C, F, R and L. */

/*  A       (input) COMPLEX array, dimension (LDA, M) */
/*          On entry, A contains an upper triangular matrix. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the matrix A. LDA >= max(1, M). */

/*  B       (input) COMPLEX array, dimension (LDB, N) */
/*          On entry, B contains an upper triangular matrix. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the matrix B. LDB >= max(1, N). */

/*  C       (input/output) COMPLEX array, dimension (LDC, N) */
/*          On entry, C contains the right-hand-side of the first matrix */
/*          equation in (1). */
/*          On exit, if IJOB = 0, C has been overwritten by the solution */
/*          R. */

/*  LDC     (input) INTEGER */
/*          The leading dimension of the matrix C. LDC >= max(1, M). */

/*  D       (input) COMPLEX array, dimension (LDD, M) */
/*          On entry, D contains an upper triangular matrix. */

/*  LDD     (input) INTEGER */
/*          The leading dimension of the matrix D. LDD >= max(1, M). */

/*  E       (input) COMPLEX array, dimension (LDE, N) */
/*          On entry, E contains an upper triangular matrix. */

/*  LDE     (input) INTEGER */
/*          The leading dimension of the matrix E. LDE >= max(1, N). */

/*  F       (input/output) COMPLEX array, dimension (LDF, N) */
/*          On entry, F contains the right-hand-side of the second matrix */
/*          equation in (1). */
/*          On exit, if IJOB = 0, F has been overwritten by the solution */
/*          L. */

/*  LDF     (input) INTEGER */
/*          The leading dimension of the matrix F. LDF >= max(1, M). */

/*  SCALE   (output) REAL */
/*          On exit, 0 <= SCALE <= 1. If 0 < SCALE < 1, the solutions */
/*          R and L (C and F on entry) will hold the solutions to a */
/*          slightly perturbed system but the input matrices A, B, D and */
/*          E have not been changed. If SCALE = 0, R and L will hold the */
/*          solutions to the homogeneous system with C = F = 0. */
/*          Normally, SCALE = 1. */

/*  RDSUM   (input/output) REAL */
/*          On entry, the sum of squares of computed contributions to */
/*          the Dif-estimate under computation by CTGSYL, where the */
/*          scaling factor RDSCAL (see below) has been factored out. */
/*          On exit, the corresponding sum of squares updated with the */
/*          contributions from the current sub-system. */
/*          If TRANS = 'T' RDSUM is not touched. */
/*          NOTE: RDSUM only makes sense when CTGSY2 is called by */
/*          CTGSYL. */

/*  RDSCAL  (input/output) REAL */
/*          On entry, scaling factor used to prevent overflow in RDSUM. */
/*          On exit, RDSCAL is updated w.r.t. the current contributions */
/*          in RDSUM. */
/*          If TRANS = 'T', RDSCAL is not touched. */
/*          NOTE: RDSCAL only makes sense when CTGSY2 is called by */
/*          CTGSYL. */

/*  INFO    (output) INTEGER */
/*          On exit, if INFO is set to */
/*            =0: Successful exit */
/*            <0: If INFO = -i, input argument number i is illegal. */
/*            >0: The matrix pairs (A, D) and (B, E) have common or very */
/*                close eigenvalues. */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*     Bo Kagstrom and Peter Poromaa, Department of Computing Science, */
/*     Umea University, S-901 87 Umea, Sweden. */

/*  ===================================================================== */

/*     Decode and test input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1;
    c__ -= c_offset;
    d_dim1 = *ldd;
    d_offset = 1 + d_dim1;
    d__ -= d_offset;
    e_dim1 = *lde;
    e_offset = 1 + e_dim1;
    e -= e_offset;
    f_dim1 = *ldf;
    f_offset = 1 + f_dim1;
    f -= f_offset;

    /* Function Body */
    *info = 0;
    ierr = 0;
    notran = lsame_(trans, "N");
    if (! notran && ! lsame_(trans, "C")) {
	*info = -1;
    } else if (notran) {
	if (*ijob < 0 || *ijob > 2) {
	    *info = -2;
	}
    }
    if (*info == 0) {
	if (*m <= 0) {
	    *info = -3;
	} else if (*n <= 0) {
	    *info = -4;
	} else if (*lda < max(1,*m)) {
	    *info = -5;
	} else if (*ldb < max(1,*n)) {
	    *info = -8;
	} else if (*ldc < max(1,*m)) {
	    *info = -10;
	} else if (*ldd < max(1,*m)) {
	    *info = -12;
	} else if (*lde < max(1,*n)) {
	    *info = -14;
	} else if (*ldf < max(1,*m)) {
	    *info = -16;
	}
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CTGSY2", &i__1);
	return 0;
    }

    if (notran) {

/*        Solve (I, J) - system */
/*           A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) */
/*           D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) */

	*scale = 1.f;
	scaloc = 1.f;
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    for (i__ = *m; i__ >= 1; --i__) {

/*              Build 2 by 2 system */

		i__2 = i__ + i__ * a_dim1;
		z__[0].r = a[i__2].r, z__[0].i = a[i__2].i;
		i__2 = i__ + i__ * d_dim1;
		z__[1].r = d__[i__2].r, z__[1].i = d__[i__2].i;
		i__2 = j + j * b_dim1;
		q__1.r = -b[i__2].r, q__1.i = -b[i__2].i;
		z__[2].r = q__1.r, z__[2].i = q__1.i;
		i__2 = j + j * e_dim1;
		q__1.r = -e[i__2].r, q__1.i = -e[i__2].i;
		z__[3].r = q__1.r, z__[3].i = q__1.i;

/*              Set up right hand side(s) */

		i__2 = i__ + j * c_dim1;
		rhs[0].r = c__[i__2].r, rhs[0].i = c__[i__2].i;
		i__2 = i__ + j * f_dim1;
		rhs[1].r = f[i__2].r, rhs[1].i = f[i__2].i;

/*              Solve Z * x = RHS */

		cgetc2_(&c__2, z__, &c__2, ipiv, jpiv, &ierr);
		if (ierr > 0) {
		    *info = ierr;
		}
		if (*ijob == 0) {
		    cgesc2_(&c__2, z__, &c__2, rhs, ipiv, jpiv, &scaloc);
		    if (scaloc != 1.f) {
			i__2 = *n;
			for (k = 1; k <= i__2; ++k) {
			    q__1.r = scaloc, q__1.i = 0.f;
			    cscal_(m, &q__1, &c__[k * c_dim1 + 1], &c__1);
			    q__1.r = scaloc, q__1.i = 0.f;
			    cscal_(m, &q__1, &f[k * f_dim1 + 1], &c__1);
			}
			*scale *= scaloc;
		    }
		} else {
		    clatdf_(ijob, &c__2, z__, &c__2, rhs, rdsum, rdscal, ipiv, 
			     jpiv);
		}

/*              Unpack solution vector(s) */

		i__2 = i__ + j * c_dim1;
		c__[i__2].r = rhs[0].r, c__[i__2].i = rhs[0].i;
		i__2 = i__ + j * f_dim1;
		f[i__2].r = rhs[1].r, f[i__2].i = rhs[1].i;

/*              Substitute R(I, J) and L(I, J) into remaining equation. */

		if (i__ > 1) {
		    q__1.r = -rhs[0].r, q__1.i = -rhs[0].i;
		    alpha.r = q__1.r, alpha.i = q__1.i;
		    i__2 = i__ - 1;
		    caxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &c__[j 
			    * c_dim1 + 1], &c__1);
		    i__2 = i__ - 1;
		    caxpy_(&i__2, &alpha, &d__[i__ * d_dim1 + 1], &c__1, &f[j 
			    * f_dim1 + 1], &c__1);
		}
		if (j < *n) {
		    i__2 = *n - j;
		    caxpy_(&i__2, &rhs[1], &b[j + (j + 1) * b_dim1], ldb, &
			    c__[i__ + (j + 1) * c_dim1], ldc);
		    i__2 = *n - j;
		    caxpy_(&i__2, &rhs[1], &e[j + (j + 1) * e_dim1], lde, &f[
			    i__ + (j + 1) * f_dim1], ldf);
		}

	    }
	}
    } else {

/*        Solve transposed (I, J) - system: */
/*           A(I, I)' * R(I, J) + D(I, I)' * L(J, J) = C(I, J) */
/*           R(I, I) * B(J, J) + L(I, J) * E(J, J)   = -F(I, J) */

	*scale = 1.f;
	scaloc = 1.f;
	i__1 = *m;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    for (j = *n; j >= 1; --j) {

/*              Build 2 by 2 system Z' */

		r_cnjg(&q__1, &a[i__ + i__ * a_dim1]);
		z__[0].r = q__1.r, z__[0].i = q__1.i;
		r_cnjg(&q__2, &b[j + j * b_dim1]);
		q__1.r = -q__2.r, q__1.i = -q__2.i;
		z__[1].r = q__1.r, z__[1].i = q__1.i;
		r_cnjg(&q__1, &d__[i__ + i__ * d_dim1]);
		z__[2].r = q__1.r, z__[2].i = q__1.i;
		r_cnjg(&q__2, &e[j + j * e_dim1]);
		q__1.r = -q__2.r, q__1.i = -q__2.i;
		z__[3].r = q__1.r, z__[3].i = q__1.i;

/*              Set up right hand side(s) */

		i__2 = i__ + j * c_dim1;
		rhs[0].r = c__[i__2].r, rhs[0].i = c__[i__2].i;
		i__2 = i__ + j * f_dim1;
		rhs[1].r = f[i__2].r, rhs[1].i = f[i__2].i;

/*              Solve Z' * x = RHS */

		cgetc2_(&c__2, z__, &c__2, ipiv, jpiv, &ierr);
		if (ierr > 0) {
		    *info = ierr;
		}
		cgesc2_(&c__2, z__, &c__2, rhs, ipiv, jpiv, &scaloc);
		if (scaloc != 1.f) {
		    i__2 = *n;
		    for (k = 1; k <= i__2; ++k) {
			q__1.r = scaloc, q__1.i = 0.f;
			cscal_(m, &q__1, &c__[k * c_dim1 + 1], &c__1);
			q__1.r = scaloc, q__1.i = 0.f;
			cscal_(m, &q__1, &f[k * f_dim1 + 1], &c__1);
		    }
		    *scale *= scaloc;
		}

/*              Unpack solution vector(s) */

		i__2 = i__ + j * c_dim1;
		c__[i__2].r = rhs[0].r, c__[i__2].i = rhs[0].i;
		i__2 = i__ + j * f_dim1;
		f[i__2].r = rhs[1].r, f[i__2].i = rhs[1].i;

/*              Substitute R(I, J) and L(I, J) into remaining equation. */

		i__2 = j - 1;
		for (k = 1; k <= i__2; ++k) {
		    i__3 = i__ + k * f_dim1;
		    i__4 = i__ + k * f_dim1;
		    r_cnjg(&q__4, &b[k + j * b_dim1]);
		    q__3.r = rhs[0].r * q__4.r - rhs[0].i * q__4.i, q__3.i = 
			    rhs[0].r * q__4.i + rhs[0].i * q__4.r;
		    q__2.r = f[i__4].r + q__3.r, q__2.i = f[i__4].i + q__3.i;
		    r_cnjg(&q__6, &e[k + j * e_dim1]);
		    q__5.r = rhs[1].r * q__6.r - rhs[1].i * q__6.i, q__5.i = 
			    rhs[1].r * q__6.i + rhs[1].i * q__6.r;
		    q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i;
		    f[i__3].r = q__1.r, f[i__3].i = q__1.i;
		}
		i__2 = *m;
		for (k = i__ + 1; k <= i__2; ++k) {
		    i__3 = k + j * c_dim1;
		    i__4 = k + j * c_dim1;
		    r_cnjg(&q__4, &a[i__ + k * a_dim1]);
		    q__3.r = q__4.r * rhs[0].r - q__4.i * rhs[0].i, q__3.i = 
			    q__4.r * rhs[0].i + q__4.i * rhs[0].r;
		    q__2.r = c__[i__4].r - q__3.r, q__2.i = c__[i__4].i - 
			    q__3.i;
		    r_cnjg(&q__6, &d__[i__ + k * d_dim1]);
		    q__5.r = q__6.r * rhs[1].r - q__6.i * rhs[1].i, q__5.i = 
			    q__6.r * rhs[1].i + q__6.i * rhs[1].r;
		    q__1.r = q__2.r - q__5.r, q__1.i = q__2.i - q__5.i;
		    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
		}

	    }
	}
    }
    return 0;

/*     End of CTGSY2 */

} /* ctgsy2_ */
Example #17
0
/* Subroutine */ int ctprfs_(char *uplo, char *trans, char *diag, integer *n, 
	integer *nrhs, complex *ap, complex *b, integer *ldb, complex *x, 
	integer *ldx, real *ferr, real *berr, complex *work, real *rwork, 
	integer *info)
{
    /* System generated locals */
    integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5;
    real r__1, r__2, r__3, r__4;
    complex q__1;

    /* Builtin functions */
    double r_imag(complex *);

    /* Local variables */
    integer i__, j, k;
    real s;
    integer kc;
    real xk;
    integer nz;
    real eps;
    integer kase;
    real safe1, safe2;
    extern logical lsame_(char *, char *);
    integer isave[3];
    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, 
	    complex *, integer *), caxpy_(integer *, complex *, complex *, 
	    integer *, complex *, integer *), ctpmv_(char *, char *, char *, 
	    integer *, complex *, complex *, integer *);
    logical upper;
    extern /* Subroutine */ int ctpsv_(char *, char *, char *, integer *, 
	    complex *, complex *, integer *), clacn2_(
	    integer *, complex *, complex *, real *, integer *, integer *);
    extern doublereal slamch_(char *);
    real safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    logical notran;
    char transn[1], transt[1];
    logical nounit;
    real lstres;


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     Modified to call CLACN2 in place of CLACON, 10 Feb 03, SJH. */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CTPRFS provides error bounds and backward error estimates for the */
/*  solution to a system of linear equations with a triangular packed */
/*  coefficient matrix. */

/*  The solution matrix X must be computed by CTPTRS or some other */
/*  means before entering this routine.  CTPRFS does not do iterative */
/*  refinement because doing so cannot improve the backward error. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          = 'U':  A is upper triangular; */
/*          = 'L':  A is lower triangular. */

/*  TRANS   (input) CHARACTER*1 */
/*          Specifies the form of the system of equations: */
/*          = 'N':  A * X = B     (No transpose) */
/*          = 'T':  A**T * X = B  (Transpose) */
/*          = 'C':  A**H * X = B  (Conjugate transpose) */

/*  DIAG    (input) CHARACTER*1 */
/*          = 'N':  A is non-unit triangular; */
/*          = 'U':  A is unit triangular. */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  NRHS    (input) INTEGER */
/*          The number of right hand sides, i.e., the number of columns */
/*          of the matrices B and X.  NRHS >= 0. */

/*  AP      (input) COMPLEX array, dimension (N*(N+1)/2) */
/*          The upper or lower triangular matrix A, packed columnwise in */
/*          a linear array.  The j-th column of A is stored in the array */
/*          AP as follows: */
/*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */
/*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */
/*          If DIAG = 'U', the diagonal elements of A are not referenced */
/*          and are assumed to be 1. */

/*  B       (input) COMPLEX array, dimension (LDB,NRHS) */
/*          The right hand side matrix B. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B.  LDB >= max(1,N). */

/*  X       (input) COMPLEX array, dimension (LDX,NRHS) */
/*          The solution matrix X. */

/*  LDX     (input) INTEGER */
/*          The leading dimension of the array X.  LDX >= max(1,N). */

/*  FERR    (output) REAL array, dimension (NRHS) */
/*          The estimated forward error bound for each solution vector */
/*          X(j) (the j-th column of the solution matrix X). */
/*          If XTRUE is the true solution corresponding to X(j), FERR(j) */
/*          is an estimated upper bound for the magnitude of the largest */
/*          element in (X(j) - XTRUE) divided by the magnitude of the */
/*          largest element in X(j).  The estimate is as reliable as */
/*          the estimate for RCOND, and is almost always a slight */
/*          overestimate of the true error. */

/*  BERR    (output) REAL array, dimension (NRHS) */
/*          The componentwise relative backward error of each solution */
/*          vector X(j) (i.e., the smallest relative change in */
/*          any element of A or B that makes X(j) an exact solution). */

/*  WORK    (workspace) COMPLEX array, dimension (2*N) */

/*  RWORK   (workspace) REAL array, dimension (N) */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Statement Functions .. */
/*     .. */
/*     .. Statement Function definitions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    --ap;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1;
    x -= x_offset;
    --ferr;
    --berr;
    --work;
    --rwork;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    notran = lsame_(trans, "N");
    nounit = lsame_(diag, "N");

    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (! notran && ! lsame_(trans, "T") && ! 
	    lsame_(trans, "C")) {
	*info = -2;
    } else if (! nounit && ! lsame_(diag, "U")) {
	*info = -3;
    } else if (*n < 0) {
	*info = -4;
    } else if (*nrhs < 0) {
	*info = -5;
    } else if (*ldb < max(1,*n)) {
	*info = -8;
    } else if (*ldx < max(1,*n)) {
	*info = -10;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CTPRFS", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
	i__1 = *nrhs;
	for (j = 1; j <= i__1; ++j) {
	    ferr[j] = 0.f;
	    berr[j] = 0.f;
/* L10: */
	}
	return 0;
    }

    if (notran) {
	*(unsigned char *)transn = 'N';
	*(unsigned char *)transt = 'C';
    } else {
	*(unsigned char *)transn = 'C';
	*(unsigned char *)transt = 'N';
    }

/*     NZ = maximum number of nonzero elements in each row of A, plus 1 */

    nz = *n + 1;
    eps = slamch_("Epsilon");
    safmin = slamch_("Safe minimum");
    safe1 = nz * safmin;
    safe2 = safe1 / eps;

/*     Do for each right hand side */

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {

/*        Compute residual R = B - op(A) * X, */
/*        where op(A) = A, A**T, or A**H, depending on TRANS. */

	ccopy_(n, &x[j * x_dim1 + 1], &c__1, &work[1], &c__1);
	ctpmv_(uplo, trans, diag, n, &ap[1], &work[1], &c__1);
	q__1.r = -1.f, q__1.i = -0.f;
	caxpy_(n, &q__1, &b[j * b_dim1 + 1], &c__1, &work[1], &c__1);

/*        Compute componentwise relative backward error from formula */

/*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) */

/*        where abs(Z) is the componentwise absolute value of the matrix */
/*        or vector Z.  If the i-th component of the denominator is less */
/*        than SAFE2, then SAFE1 is added to the i-th components of the */
/*        numerator and denominator before dividing. */

	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    i__3 = i__ + j * b_dim1;
	    rwork[i__] = (r__1 = b[i__3].r, dabs(r__1)) + (r__2 = r_imag(&b[
		    i__ + j * b_dim1]), dabs(r__2));
/* L20: */
	}

	if (notran) {

/*           Compute abs(A)*abs(X) + abs(B). */

	    if (upper) {
		kc = 1;
		if (nounit) {
		    i__2 = *n;
		    for (k = 1; k <= i__2; ++k) {
			i__3 = k + j * x_dim1;
			xk = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&
				x[k + j * x_dim1]), dabs(r__2));
			i__3 = k;
			for (i__ = 1; i__ <= i__3; ++i__) {
			    i__4 = kc + i__ - 1;
			    rwork[i__] += ((r__1 = ap[i__4].r, dabs(r__1)) + (
				    r__2 = r_imag(&ap[kc + i__ - 1]), dabs(
				    r__2))) * xk;
/* L30: */
			}
			kc += k;
/* L40: */
		    }
		} else {
		    i__2 = *n;
		    for (k = 1; k <= i__2; ++k) {
			i__3 = k + j * x_dim1;
			xk = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&
				x[k + j * x_dim1]), dabs(r__2));
			i__3 = k - 1;
			for (i__ = 1; i__ <= i__3; ++i__) {
			    i__4 = kc + i__ - 1;
			    rwork[i__] += ((r__1 = ap[i__4].r, dabs(r__1)) + (
				    r__2 = r_imag(&ap[kc + i__ - 1]), dabs(
				    r__2))) * xk;
/* L50: */
			}
			rwork[k] += xk;
			kc += k;
/* L60: */
		    }
		}
	    } else {
		kc = 1;
		if (nounit) {
		    i__2 = *n;
		    for (k = 1; k <= i__2; ++k) {
			i__3 = k + j * x_dim1;
			xk = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&
				x[k + j * x_dim1]), dabs(r__2));
			i__3 = *n;
			for (i__ = k; i__ <= i__3; ++i__) {
			    i__4 = kc + i__ - k;
			    rwork[i__] += ((r__1 = ap[i__4].r, dabs(r__1)) + (
				    r__2 = r_imag(&ap[kc + i__ - k]), dabs(
				    r__2))) * xk;
/* L70: */
			}
			kc = kc + *n - k + 1;
/* L80: */
		    }
		} else {
		    i__2 = *n;
		    for (k = 1; k <= i__2; ++k) {
			i__3 = k + j * x_dim1;
			xk = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&
				x[k + j * x_dim1]), dabs(r__2));
			i__3 = *n;
			for (i__ = k + 1; i__ <= i__3; ++i__) {
			    i__4 = kc + i__ - k;
			    rwork[i__] += ((r__1 = ap[i__4].r, dabs(r__1)) + (
				    r__2 = r_imag(&ap[kc + i__ - k]), dabs(
				    r__2))) * xk;
/* L90: */
			}
			rwork[k] += xk;
			kc = kc + *n - k + 1;
/* L100: */
		    }
		}
	    }
	} else {

/*           Compute abs(A**H)*abs(X) + abs(B). */

	    if (upper) {
		kc = 1;
		if (nounit) {
		    i__2 = *n;
		    for (k = 1; k <= i__2; ++k) {
			s = 0.f;
			i__3 = k;
			for (i__ = 1; i__ <= i__3; ++i__) {
			    i__4 = kc + i__ - 1;
			    i__5 = i__ + j * x_dim1;
			    s += ((r__1 = ap[i__4].r, dabs(r__1)) + (r__2 = 
				    r_imag(&ap[kc + i__ - 1]), dabs(r__2))) * 
				    ((r__3 = x[i__5].r, dabs(r__3)) + (r__4 = 
				    r_imag(&x[i__ + j * x_dim1]), dabs(r__4)))
				    ;
/* L110: */
			}
			rwork[k] += s;
			kc += k;
/* L120: */
		    }
		} else {
		    i__2 = *n;
		    for (k = 1; k <= i__2; ++k) {
			i__3 = k + j * x_dim1;
			s = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&
				x[k + j * x_dim1]), dabs(r__2));
			i__3 = k - 1;
			for (i__ = 1; i__ <= i__3; ++i__) {
			    i__4 = kc + i__ - 1;
			    i__5 = i__ + j * x_dim1;
			    s += ((r__1 = ap[i__4].r, dabs(r__1)) + (r__2 = 
				    r_imag(&ap[kc + i__ - 1]), dabs(r__2))) * 
				    ((r__3 = x[i__5].r, dabs(r__3)) + (r__4 = 
				    r_imag(&x[i__ + j * x_dim1]), dabs(r__4)))
				    ;
/* L130: */
			}
			rwork[k] += s;
			kc += k;
/* L140: */
		    }
		}
	    } else {
		kc = 1;
		if (nounit) {
		    i__2 = *n;
		    for (k = 1; k <= i__2; ++k) {
			s = 0.f;
			i__3 = *n;
			for (i__ = k; i__ <= i__3; ++i__) {
			    i__4 = kc + i__ - k;
			    i__5 = i__ + j * x_dim1;
			    s += ((r__1 = ap[i__4].r, dabs(r__1)) + (r__2 = 
				    r_imag(&ap[kc + i__ - k]), dabs(r__2))) * 
				    ((r__3 = x[i__5].r, dabs(r__3)) + (r__4 = 
				    r_imag(&x[i__ + j * x_dim1]), dabs(r__4)))
				    ;
/* L150: */
			}
			rwork[k] += s;
			kc = kc + *n - k + 1;
/* L160: */
		    }
		} else {
		    i__2 = *n;
		    for (k = 1; k <= i__2; ++k) {
			i__3 = k + j * x_dim1;
			s = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&
				x[k + j * x_dim1]), dabs(r__2));
			i__3 = *n;
			for (i__ = k + 1; i__ <= i__3; ++i__) {
			    i__4 = kc + i__ - k;
			    i__5 = i__ + j * x_dim1;
			    s += ((r__1 = ap[i__4].r, dabs(r__1)) + (r__2 = 
				    r_imag(&ap[kc + i__ - k]), dabs(r__2))) * 
				    ((r__3 = x[i__5].r, dabs(r__3)) + (r__4 = 
				    r_imag(&x[i__ + j * x_dim1]), dabs(r__4)))
				    ;
/* L170: */
			}
			rwork[k] += s;
			kc = kc + *n - k + 1;
/* L180: */
		    }
		}
	    }
	}
	s = 0.f;
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    if (rwork[i__] > safe2) {
/* Computing MAX */
		i__3 = i__;
		r__3 = s, r__4 = ((r__1 = work[i__3].r, dabs(r__1)) + (r__2 = 
			r_imag(&work[i__]), dabs(r__2))) / rwork[i__];
		s = dmax(r__3,r__4);
	    } else {
/* Computing MAX */
		i__3 = i__;
		r__3 = s, r__4 = ((r__1 = work[i__3].r, dabs(r__1)) + (r__2 = 
			r_imag(&work[i__]), dabs(r__2)) + safe1) / (rwork[i__]
			 + safe1);
		s = dmax(r__3,r__4);
	    }
/* L190: */
	}
	berr[j] = s;

/*        Bound error from formula */

/*        norm(X - XTRUE) / norm(X) .le. FERR = */
/*        norm( abs(inv(op(A)))* */
/*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) */

/*        where */
/*          norm(Z) is the magnitude of the largest component of Z */
/*          inv(op(A)) is the inverse of op(A) */
/*          abs(Z) is the componentwise absolute value of the matrix or */
/*             vector Z */
/*          NZ is the maximum number of nonzeros in any row of A, plus 1 */
/*          EPS is machine epsilon */

/*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) */
/*        is incremented by SAFE1 if the i-th component of */
/*        abs(op(A))*abs(X) + abs(B) is less than SAFE2. */

/*        Use CLACN2 to estimate the infinity-norm of the matrix */
/*           inv(op(A)) * diag(W), */
/*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */

	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    if (rwork[i__] > safe2) {
		i__3 = i__;
		rwork[i__] = (r__1 = work[i__3].r, dabs(r__1)) + (r__2 = 
			r_imag(&work[i__]), dabs(r__2)) + nz * eps * rwork[
			i__];
	    } else {
		i__3 = i__;
		rwork[i__] = (r__1 = work[i__3].r, dabs(r__1)) + (r__2 = 
			r_imag(&work[i__]), dabs(r__2)) + nz * eps * rwork[
			i__] + safe1;
	    }
/* L200: */
	}

	kase = 0;
L210:
	clacn2_(n, &work[*n + 1], &work[1], &ferr[j], &kase, isave);
	if (kase != 0) {
	    if (kase == 1) {

/*              Multiply by diag(W)*inv(op(A)**H). */

		ctpsv_(uplo, transt, diag, n, &ap[1], &work[1], &c__1);
		i__2 = *n;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    i__3 = i__;
		    i__4 = i__;
		    i__5 = i__;
		    q__1.r = rwork[i__4] * work[i__5].r, q__1.i = rwork[i__4] 
			    * work[i__5].i;
		    work[i__3].r = q__1.r, work[i__3].i = q__1.i;
/* L220: */
		}
	    } else {

/*              Multiply by inv(op(A))*diag(W). */

		i__2 = *n;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    i__3 = i__;
		    i__4 = i__;
		    i__5 = i__;
		    q__1.r = rwork[i__4] * work[i__5].r, q__1.i = rwork[i__4] 
			    * work[i__5].i;
		    work[i__3].r = q__1.r, work[i__3].i = q__1.i;
/* L230: */
		}
		ctpsv_(uplo, transn, diag, n, &ap[1], &work[1], &c__1);
	    }
	    goto L210;
	}

/*        Normalize error. */

	lstres = 0.f;
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    i__3 = i__ + j * x_dim1;
	    r__3 = lstres, r__4 = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = 
		    r_imag(&x[i__ + j * x_dim1]), dabs(r__2));
	    lstres = dmax(r__3,r__4);
/* L240: */
	}
	if (lstres != 0.f) {
	    ferr[j] /= lstres;
	}

/* L250: */
    }

    return 0;

/*     End of CTPRFS */

} /* ctprfs_ */
Example #18
0
/* Subroutine */ int clagsy_(integer *n, integer *k, real *d, complex *a, 
	integer *lda, integer *iseed, complex *work, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, 
	    i__9;
    doublereal d__1;
    complex q__1, q__2, q__3, q__4;

    /* Builtin functions */
    double c_abs(complex *);
    void c_div(complex *, complex *, complex *);

    /* Local variables */
    static integer i, j;
    extern /* Subroutine */ int cgerc_(integer *, integer *, complex *, 
	    complex *, integer *, complex *, integer *, complex *, integer *);
    static complex alpha;
    extern /* Subroutine */ int cscal_(integer *, complex *, complex *, 
	    integer *);
    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer 
	    *, complex *, integer *);
    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
	    , complex *, integer *, complex *, integer *, complex *, complex *
	    , integer *), caxpy_(integer *, complex *, complex *, 
	    integer *, complex *, integer *), csymv_(char *, integer *, 
	    complex *, complex *, integer *, complex *, integer *, complex *, 
	    complex *, integer *);
    extern real scnrm2_(integer *, complex *, integer *);
    static integer ii, jj;
    static complex wa, wb;
    extern /* Subroutine */ int clacgv_(integer *, complex *, integer *);
    static real wn;
    extern /* Subroutine */ int xerbla_(char *, integer *), clarnv_(
	    integer *, integer *, integer *, complex *);
    static complex tau;


/*  -- LAPACK auxiliary test routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CLAGSY generates a complex symmetric matrix A, by pre- and post-   
    multiplying a real diagonal matrix D with a random unitary matrix:   
    A = U*D*U**T. The semi-bandwidth may then be reduced to k by   
    additional unitary transformations.   

    Arguments   
    =========   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    K       (input) INTEGER   
            The number of nonzero subdiagonals within the band of A.   
            0 <= K <= N-1.   

    D       (input) REAL array, dimension (N)   
            The diagonal elements of the diagonal matrix D.   

    A       (output) COMPLEX array, dimension (LDA,N)   
            The generated n by n symmetric matrix A (the full matrix is   
            stored).   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= N.   

    ISEED   (input/output) INTEGER array, dimension (4)   
            On entry, the seed of the random number generator; the array 
  
            elements must be between 0 and 4095, and ISEED(4) must be   
            odd.   
            On exit, the seed is updated.   

    WORK    (workspace) COMPLEX array, dimension (2*N)   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -i, the i-th argument had an illegal value   

    ===================================================================== 
  


       Test the input arguments   

       Parameter adjustments */
    --d;
    a_dim1 = *lda;
    a_offset = a_dim1 + 1;
    a -= a_offset;
    --iseed;
    --work;

    /* Function Body */
    *info = 0;
    if (*n < 0) {
	*info = -1;
    } else if (*k < 0 || *k > *n - 1) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    }
    if (*info < 0) {
	i__1 = -(*info);
	xerbla_("CLAGSY", &i__1);
	return 0;
    }

/*     initialize lower triangle of A to diagonal matrix */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	i__2 = *n;
	for (i = j + 1; i <= i__2; ++i) {
	    i__3 = i + j * a_dim1;
	    a[i__3].r = 0.f, a[i__3].i = 0.f;
/* L10: */
	}
/* L20: */
    }
    i__1 = *n;
    for (i = 1; i <= i__1; ++i) {
	i__2 = i + i * a_dim1;
	i__3 = i;
	a[i__2].r = d[i__3], a[i__2].i = 0.f;
/* L30: */
    }

/*     Generate lower triangle of symmetric matrix */

    for (i = *n - 1; i >= 1; --i) {

/*        generate random reflection */

	i__1 = *n - i + 1;
	clarnv_(&c__3, &iseed[1], &i__1, &work[1]);
	i__1 = *n - i + 1;
	wn = scnrm2_(&i__1, &work[1], &c__1);
	d__1 = wn / c_abs(&work[1]);
	q__1.r = d__1 * work[1].r, q__1.i = d__1 * work[1].i;
	wa.r = q__1.r, wa.i = q__1.i;
	if (wn == 0.f) {
	    tau.r = 0.f, tau.i = 0.f;
	} else {
	    q__1.r = work[1].r + wa.r, q__1.i = work[1].i + wa.i;
	    wb.r = q__1.r, wb.i = q__1.i;
	    i__1 = *n - i;
	    c_div(&q__1, &c_b2, &wb);
	    cscal_(&i__1, &q__1, &work[2], &c__1);
	    work[1].r = 1.f, work[1].i = 0.f;
	    c_div(&q__1, &wb, &wa);
	    d__1 = q__1.r;
	    tau.r = d__1, tau.i = 0.f;
	}

/*        apply random reflection to A(i:n,i:n) from the left   
          and the right   

          compute  y := tau * A * conjg(u) */

	i__1 = *n - i + 1;
	clacgv_(&i__1, &work[1], &c__1);
	i__1 = *n - i + 1;
	csymv_("Lower", &i__1, &tau, &a[i + i * a_dim1], lda, &work[1], &c__1,
		 &c_b1, &work[*n + 1], &c__1);
	i__1 = *n - i + 1;
	clacgv_(&i__1, &work[1], &c__1);

/*        compute  v := y - 1/2 * tau * ( u, y ) * u */

	q__3.r = -.5f, q__3.i = 0.f;
	q__2.r = q__3.r * tau.r - q__3.i * tau.i, q__2.i = q__3.r * tau.i + 
		q__3.i * tau.r;
	i__1 = *n - i + 1;
	cdotc_(&q__4, &i__1, &work[1], &c__1, &work[*n + 1], &c__1);
	q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * q__4.i 
		+ q__2.i * q__4.r;
	alpha.r = q__1.r, alpha.i = q__1.i;
	i__1 = *n - i + 1;
	caxpy_(&i__1, &alpha, &work[1], &c__1, &work[*n + 1], &c__1);

/*        apply the transformation as a rank-2 update to A(i:n,i:n)   

          CALL CSYR2( 'Lower', N-I+1, -ONE, WORK, 1, WORK( N+1 ), 1, 
  
          $               A( I, I ), LDA ) */

	i__1 = *n;
	for (jj = i; jj <= i__1; ++jj) {
	    i__2 = *n;
	    for (ii = jj; ii <= i__2; ++ii) {
		i__3 = ii + jj * a_dim1;
		i__4 = ii + jj * a_dim1;
		i__5 = ii - i + 1;
		i__6 = *n + jj - i + 1;
		q__3.r = work[i__5].r * work[i__6].r - work[i__5].i * work[
			i__6].i, q__3.i = work[i__5].r * work[i__6].i + work[
			i__5].i * work[i__6].r;
		q__2.r = a[i__4].r - q__3.r, q__2.i = a[i__4].i - q__3.i;
		i__7 = *n + ii - i + 1;
		i__8 = jj - i + 1;
		q__4.r = work[i__7].r * work[i__8].r - work[i__7].i * work[
			i__8].i, q__4.i = work[i__7].r * work[i__8].i + work[
			i__7].i * work[i__8].r;
		q__1.r = q__2.r - q__4.r, q__1.i = q__2.i - q__4.i;
		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
/* L40: */
	    }
/* L50: */
	}
/* L60: */
    }

/*     Reduce number of subdiagonals to K */

    i__1 = *n - 1 - *k;
    for (i = 1; i <= i__1; ++i) {

/*        generate reflection to annihilate A(k+i+1:n,i) */

	i__2 = *n - *k - i + 1;
	wn = scnrm2_(&i__2, &a[*k + i + i * a_dim1], &c__1);
	d__1 = wn / c_abs(&a[*k + i + i * a_dim1]);
	i__2 = *k + i + i * a_dim1;
	q__1.r = d__1 * a[i__2].r, q__1.i = d__1 * a[i__2].i;
	wa.r = q__1.r, wa.i = q__1.i;
	if (wn == 0.f) {
	    tau.r = 0.f, tau.i = 0.f;
	} else {
	    i__2 = *k + i + i * a_dim1;
	    q__1.r = a[i__2].r + wa.r, q__1.i = a[i__2].i + wa.i;
	    wb.r = q__1.r, wb.i = q__1.i;
	    i__2 = *n - *k - i;
	    c_div(&q__1, &c_b2, &wb);
	    cscal_(&i__2, &q__1, &a[*k + i + 1 + i * a_dim1], &c__1);
	    i__2 = *k + i + i * a_dim1;
	    a[i__2].r = 1.f, a[i__2].i = 0.f;
	    c_div(&q__1, &wb, &wa);
	    d__1 = q__1.r;
	    tau.r = d__1, tau.i = 0.f;
	}

/*        apply reflection to A(k+i:n,i+1:k+i-1) from the left */

	i__2 = *n - *k - i + 1;
	i__3 = *k - 1;
	cgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a[*k + i + (i + 1)
		 * a_dim1], lda, &a[*k + i + i * a_dim1], &c__1, &c_b1, &work[
		1], &c__1);
	i__2 = *n - *k - i + 1;
	i__3 = *k - 1;
	q__1.r = -(doublereal)tau.r, q__1.i = -(doublereal)tau.i;
	cgerc_(&i__2, &i__3, &q__1, &a[*k + i + i * a_dim1], &c__1, &work[1], 
		&c__1, &a[*k + i + (i + 1) * a_dim1], lda);

/*        apply reflection to A(k+i:n,k+i:n) from the left and the rig
ht   

          compute  y := tau * A * conjg(u) */

	i__2 = *n - *k - i + 1;
	clacgv_(&i__2, &a[*k + i + i * a_dim1], &c__1);
	i__2 = *n - *k - i + 1;
	csymv_("Lower", &i__2, &tau, &a[*k + i + (*k + i) * a_dim1], lda, &a[*
		k + i + i * a_dim1], &c__1, &c_b1, &work[1], &c__1);
	i__2 = *n - *k - i + 1;
	clacgv_(&i__2, &a[*k + i + i * a_dim1], &c__1);

/*        compute  v := y - 1/2 * tau * ( u, y ) * u */

	q__3.r = -.5f, q__3.i = 0.f;
	q__2.r = q__3.r * tau.r - q__3.i * tau.i, q__2.i = q__3.r * tau.i + 
		q__3.i * tau.r;
	i__2 = *n - *k - i + 1;
	cdotc_(&q__4, &i__2, &a[*k + i + i * a_dim1], &c__1, &work[1], &c__1);
	q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * q__4.i 
		+ q__2.i * q__4.r;
	alpha.r = q__1.r, alpha.i = q__1.i;
	i__2 = *n - *k - i + 1;
	caxpy_(&i__2, &alpha, &a[*k + i + i * a_dim1], &c__1, &work[1], &c__1)
		;

/*        apply symmetric rank-2 update to A(k+i:n,k+i:n)   

          CALL CSYR2( 'Lower', N-K-I+1, -ONE, A( K+I, I ), 1, WORK, 1,
   
          $               A( K+I, K+I ), LDA ) */

	i__2 = *n;
	for (jj = *k + i; jj <= i__2; ++jj) {
	    i__3 = *n;
	    for (ii = jj; ii <= i__3; ++ii) {
		i__4 = ii + jj * a_dim1;
		i__5 = ii + jj * a_dim1;
		i__6 = ii + i * a_dim1;
		i__7 = jj - *k - i + 1;
		q__3.r = a[i__6].r * work[i__7].r - a[i__6].i * work[i__7].i, 
			q__3.i = a[i__6].r * work[i__7].i + a[i__6].i * work[
			i__7].r;
		q__2.r = a[i__5].r - q__3.r, q__2.i = a[i__5].i - q__3.i;
		i__8 = ii - *k - i + 1;
		i__9 = jj + i * a_dim1;
		q__4.r = work[i__8].r * a[i__9].r - work[i__8].i * a[i__9].i, 
			q__4.i = work[i__8].r * a[i__9].i + work[i__8].i * a[
			i__9].r;
		q__1.r = q__2.r - q__4.r, q__1.i = q__2.i - q__4.i;
		a[i__4].r = q__1.r, a[i__4].i = q__1.i;
/* L70: */
	    }
/* L80: */
	}

	i__2 = *k + i + i * a_dim1;
	q__1.r = -(doublereal)wa.r, q__1.i = -(doublereal)wa.i;
	a[i__2].r = q__1.r, a[i__2].i = q__1.i;
	i__2 = *n;
	for (j = *k + i + 1; j <= i__2; ++j) {
	    i__3 = j + i * a_dim1;
	    a[i__3].r = 0.f, a[i__3].i = 0.f;
/* L90: */
	}
/* L100: */
    }

/*     Store full symmetric matrix */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	i__2 = *n;
	for (i = j + 1; i <= i__2; ++i) {
	    i__3 = j + i * a_dim1;
	    i__4 = i + j * a_dim1;
	    a[i__3].r = a[i__4].r, a[i__3].i = a[i__4].i;
/* L110: */
	}
/* L120: */
    }
    return 0;

/*     End of CLAGSY */

} /* clagsy_ */
Example #19
0
/* Subroutine */ int clarz_(char *side, integer *m, integer *n, integer *l, 
	complex *v, integer *incv, complex *tau, complex *c__, integer *ldc, 
	complex *work)
{
    /* System generated locals */
    integer c_dim1, c_offset;
    complex q__1;

    /* Local variables */
    extern /* Subroutine */ int cgerc_(integer *, integer *, complex *, 
	    complex *, integer *, complex *, integer *, complex *, integer *),
	     cgemv_(char *, integer *, integer *, complex *, complex *, 
	    integer *, complex *, integer *, complex *, complex *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int cgeru_(integer *, integer *, complex *, 
	    complex *, integer *, complex *, integer *, complex *, integer *),
	     ccopy_(integer *, complex *, integer *, complex *, integer *), 
	    caxpy_(integer *, complex *, complex *, integer *, complex *, 
	    integer *), clacgv_(integer *, complex *, integer *);


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CLARZ applies a complex elementary reflector H to a complex */
/*  M-by-N matrix C, from either the left or the right. H is represented */
/*  in the form */

/*        H = I - tau * v * v' */

/*  where tau is a complex scalar and v is a complex vector. */

/*  If tau = 0, then H is taken to be the unit matrix. */

/*  To apply H' (the conjugate transpose of H), supply conjg(tau) instead */
/*  tau. */

/*  H is a product of k elementary reflectors as returned by CTZRZF. */

/*  Arguments */
/*  ========= */

/*  SIDE    (input) CHARACTER*1 */
/*          = 'L': form  H * C */
/*          = 'R': form  C * H */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix C. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix C. */

/*  L       (input) INTEGER */
/*          The number of entries of the vector V containing */
/*          the meaningful part of the Householder vectors. */
/*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. */

/*  V       (input) COMPLEX array, dimension (1+(L-1)*abs(INCV)) */
/*          The vector v in the representation of H as returned by */
/*          CTZRZF. V is not used if TAU = 0. */

/*  INCV    (input) INTEGER */
/*          The increment between elements of v. INCV <> 0. */

/*  TAU     (input) COMPLEX */
/*          The value tau in the representation of H. */

/*  C       (input/output) COMPLEX array, dimension (LDC,N) */
/*          On entry, the M-by-N matrix C. */
/*          On exit, C is overwritten by the matrix H * C if SIDE = 'L', */
/*          or C * H if SIDE = 'R'. */

/*  LDC     (input) INTEGER */
/*          The leading dimension of the array C. LDC >= max(1,M). */

/*  WORK    (workspace) COMPLEX array, dimension */
/*                         (N) if SIDE = 'L' */
/*                      or (M) if SIDE = 'R' */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    --v;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1;
    c__ -= c_offset;
    --work;

    /* Function Body */
    if (lsame_(side, "L")) {

/*        Form  H * C */

	if (tau->r != 0.f || tau->i != 0.f) {

/*           w( 1:n ) = conjg( C( 1, 1:n ) ) */

	    ccopy_(n, &c__[c_offset], ldc, &work[1], &c__1);
	    clacgv_(n, &work[1], &c__1);

/*           w( 1:n ) = conjg( w( 1:n ) + C( m-l+1:m, 1:n )' * v( 1:l ) ) */

	    cgemv_("Conjugate transpose", l, n, &c_b1, &c__[*m - *l + 1 + 
		    c_dim1], ldc, &v[1], incv, &c_b1, &work[1], &c__1);
	    clacgv_(n, &work[1], &c__1);

/*           C( 1, 1:n ) = C( 1, 1:n ) - tau * w( 1:n ) */

	    q__1.r = -tau->r, q__1.i = -tau->i;
	    caxpy_(n, &q__1, &work[1], &c__1, &c__[c_offset], ldc);

/*           C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... */
/*                               tau * v( 1:l ) * conjg( w( 1:n )' ) */

	    q__1.r = -tau->r, q__1.i = -tau->i;
	    cgeru_(l, n, &q__1, &v[1], incv, &work[1], &c__1, &c__[*m - *l + 
		    1 + c_dim1], ldc);
	}

    } else {

/*        Form  C * H */

	if (tau->r != 0.f || tau->i != 0.f) {

/*           w( 1:m ) = C( 1:m, 1 ) */

	    ccopy_(m, &c__[c_offset], &c__1, &work[1], &c__1);

/*           w( 1:m ) = w( 1:m ) + C( 1:m, n-l+1:n, 1:n ) * v( 1:l ) */

	    cgemv_("No transpose", m, l, &c_b1, &c__[(*n - *l + 1) * c_dim1 + 
		    1], ldc, &v[1], incv, &c_b1, &work[1], &c__1);

/*           C( 1:m, 1 ) = C( 1:m, 1 ) - tau * w( 1:m ) */

	    q__1.r = -tau->r, q__1.i = -tau->i;
	    caxpy_(m, &q__1, &work[1], &c__1, &c__[c_offset], &c__1);

/*           C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... */
/*                               tau * w( 1:m ) * v( 1:l )' */

	    q__1.r = -tau->r, q__1.i = -tau->i;
	    cgerc_(m, l, &q__1, &work[1], &c__1, &v[1], incv, &c__[(*n - *l + 
		    1) * c_dim1 + 1], ldc);

	}

    }

    return 0;

/*     End of CLARZ */

} /* clarz_ */
Example #20
0
void caxpy( int n, complex *alpha, complex *x, int incx,  complex *y, int incy)
{
    caxpy_(&n, alpha, x, &incx, y, &incy);
}
Example #21
0
/* DECK CSPCO */
/* Subroutine */ int cspco_(complex *ap, integer *n, integer *kpvt, real *
	rcond, complex *z__)
{
    /* System generated locals */
    integer i__1, i__2, i__3, i__4, i__5;
    real r__1, r__2, r__3, r__4, r__5, r__6, r__7, r__8;
    complex q__1, q__2, q__3;

    /* Local variables */
    static integer i__, j, k;
    static real s;
    static complex t;
    static integer j1;
    static complex ak, bk, ek;
    static integer ij, ik, kk, kp, ks, jm1, kps;
    static complex akm1, bkm1;
    static integer ikm1, km1k, ikp1, info;
    extern /* Subroutine */ int cspfa_(complex *, integer *, integer *, 
	    integer *);
    static integer km1km1;
    static complex denom;
    static real anorm;
    extern /* Complex */ void cdotu_(complex *, integer *, complex *, integer 
	    *, complex *, integer *);
    extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, 
	    integer *, complex *, integer *);
    static real ynorm;
    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer 
	    *);
    extern doublereal scasum_(integer *, complex *, integer *);

/* ***BEGIN PROLOGUE  CSPCO */
/* ***PURPOSE  Factor a complex symmetric matrix stored in packed form */
/*            by elimination with symmetric pivoting and estimate the */
/*            condition number of the matrix. */
/* ***LIBRARY   SLATEC (LINPACK) */
/* ***CATEGORY  D2C1 */
/* ***TYPE      COMPLEX (SSPCO-S, DSPCO-D, CHPCO-C, CSPCO-C) */
/* ***KEYWORDS  CONDITION NUMBER, LINEAR ALGEBRA, LINPACK, */
/*             MATRIX FACTORIZATION, PACKED, SYMMETRIC */
/* ***AUTHOR  Moler, C. B., (U. of New Mexico) */
/* ***DESCRIPTION */

/*     CSPCO factors a complex symmetric matrix stored in packed */
/*     form by elimination with symmetric pivoting and estimates */
/*     the condition of the matrix. */

/*     If  RCOND  is not needed, CSPFA is slightly faster. */
/*     To solve  A*X = B , follow CSPCO by CSPSL. */
/*     To compute  INVERSE(A)*C , follow CSPCO by CSPSL. */
/*     To compute  INVERSE(A) , follow CSPCO by CSPDI. */
/*     To compute  DETERMINANT(A) , follow CSPCO by CSPDI. */

/*     On Entry */

/*        AP      COMPLEX (N*(N+1)/2) */
/*                the packed form of a symmetric matrix  A .  The */
/*                columns of the upper triangle are stored sequentially */
/*                in a one-dimensional array of length  N*(N+1)/2 . */
/*                See comments below for details. */

/*        N       INTEGER */
/*                the order of the matrix  A . */

/*     On Return */

/*        AP      a block diagonal matrix and the multipliers which */
/*                were used to obtain it stored in packed form. */
/*                The factorization can be written  A = U*D*TRANS(U) */
/*                where  U  is a product of permutation and unit */
/*                upper triangular matrices , TRANS(U) is the */
/*                transpose of  U , and  D  is block diagonal */
/*                with 1 by 1 and 2 by 2 blocks. */

/*        KVPT    INTEGER(N) */
/*                an integer vector of pivot indices. */

/*        RCOND   REAL */
/*                an estimate of the reciprocal condition of  A . */
/*                For the system  A*X = B , relative perturbations */
/*                in  A  and  B  of size  EPSILON  may cause */
/*                relative perturbations in  X  of size  EPSILON/RCOND . */
/*                If  RCOND  is so small that the logical expression */
/*                           1.0 + RCOND .EQ. 1.0 */
/*                is true, then  A  may be singular to working */
/*                precision.  In particular,  RCOND  is zero  if */
/*                exact singularity is detected or the estimate */
/*                underflows. */

/*        Z       COMPLEX(N) */
/*                a work vector whose contents are usually unimportant. */
/*                If  A  is close to a singular matrix, then  Z  is */
/*                an approximate null vector in the sense that */
/*                NORM(A*Z) = RCOND*NORM(A)*NORM(Z) . */

/*     Packed Storage */

/*          The following program segment will pack the upper */
/*          triangle of a symmetric matrix. */

/*                K = 0 */
/*                DO 20 J = 1, N */
/*                   DO 10 I = 1, J */
/*                      K = K + 1 */
/*                      AP(K) = A(I,J) */
/*             10    CONTINUE */
/*             20 CONTINUE */

/* ***REFERENCES  J. J. Dongarra, J. R. Bunch, C. B. Moler, and G. W. */
/*                 Stewart, LINPACK Users' Guide, SIAM, 1979. */
/* ***ROUTINES CALLED  CAXPY, CDOTU, CSPFA, CSSCAL, SCASUM */
/* ***REVISION HISTORY  (YYMMDD) */
/*   780814  DATE WRITTEN */
/*   890531  Changed all specific intrinsics to generic.  (WRB) */
/*   890831  Modified array declarations.  (WRB) */
/*   891107  Corrected category and modified routine equivalence */
/*           list.  (WRB) */
/*   891107  REVISION DATE from Version 3.2 */
/*   891214  Prologue converted to Version 4.0 format.  (BAB) */
/*   900326  Removed duplicate information from DESCRIPTION section. */
/*           (WRB) */
/*   920501  Reformatted the REFERENCES section.  (WRB) */
/* ***END PROLOGUE  CSPCO */


/*     FIND NORM OF A USING ONLY UPPER HALF */

/* ***FIRST EXECUTABLE STATEMENT  CSPCO */
    /* Parameter adjustments */
    --z__;
    --kpvt;
    --ap;

    /* Function Body */
    j1 = 1;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	i__2 = j;
	r__1 = scasum_(&j, &ap[j1], &c__1);
	q__1.r = r__1, q__1.i = 0.f;
	z__[i__2].r = q__1.r, z__[i__2].i = q__1.i;
	ij = j1;
	j1 += j;
	jm1 = j - 1;
	if (jm1 < 1) {
	    goto L20;
	}
	i__2 = jm1;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    i__3 = i__;
	    i__4 = i__;
	    i__5 = ij;
	    r__3 = z__[i__4].r + ((r__1 = ap[i__5].r, dabs(r__1)) + (r__2 = 
		    r_imag(&ap[ij]), dabs(r__2)));
	    q__1.r = r__3, q__1.i = 0.f;
	    z__[i__3].r = q__1.r, z__[i__3].i = q__1.i;
	    ++ij;
/* L10: */
	}
L20:
/* L30: */
	;
    }
    anorm = 0.f;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
/* Computing MAX */
	i__2 = j;
	r__1 = anorm, r__2 = z__[i__2].r;
	anorm = dmax(r__1,r__2);
/* L40: */
    }

/*     FACTOR */

    cspfa_(&ap[1], n, &kpvt[1], &info);

/*     RCOND = 1/(NORM(A)*(ESTIMATE OF NORM(INVERSE(A)))) . */
/*     ESTIMATE = NORM(Z)/NORM(Y) WHERE  A*Z = Y  AND  A*Y = E . */
/*     THE COMPONENTS OF  E  ARE CHOSEN TO CAUSE MAXIMUM LOCAL */
/*     GROWTH IN THE ELEMENTS OF W  WHERE  U*D*W = E . */
/*     THE VECTORS ARE FREQUENTLY RESCALED TO AVOID OVERFLOW. */

/*     SOLVE U*D*W = E */

    ek.r = 1.f, ek.i = 0.f;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	i__2 = j;
	z__[i__2].r = 0.f, z__[i__2].i = 0.f;
/* L50: */
    }
    k = *n;
    ik = *n * (*n - 1) / 2;
L60:
    if (k == 0) {
	goto L120;
    }
    kk = ik + k;
    ikm1 = ik - (k - 1);
    ks = 1;
    if (kpvt[k] < 0) {
	ks = 2;
    }
    kp = (i__1 = kpvt[k], abs(i__1));
    kps = k + 1 - ks;
    if (kp == kps) {
	goto L70;
    }
    i__1 = kps;
    t.r = z__[i__1].r, t.i = z__[i__1].i;
    i__1 = kps;
    i__2 = kp;
    z__[i__1].r = z__[i__2].r, z__[i__1].i = z__[i__2].i;
    i__1 = kp;
    z__[i__1].r = t.r, z__[i__1].i = t.i;
L70:
    i__1 = k;
    if ((r__1 = z__[i__1].r, dabs(r__1)) + (r__2 = r_imag(&z__[k]), dabs(r__2)
	    ) != 0.f) {
	r__7 = (r__3 = ek.r, dabs(r__3)) + (r__4 = r_imag(&ek), dabs(r__4));
	i__2 = k;
	i__3 = k;
	r__8 = (r__5 = z__[i__3].r, dabs(r__5)) + (r__6 = r_imag(&z__[k]), 
		dabs(r__6));
	q__2.r = z__[i__2].r / r__8, q__2.i = z__[i__2].i / r__8;
	q__1.r = r__7 * q__2.r, q__1.i = r__7 * q__2.i;
	ek.r = q__1.r, ek.i = q__1.i;
    }
    i__1 = k;
    i__2 = k;
    q__1.r = z__[i__2].r + ek.r, q__1.i = z__[i__2].i + ek.i;
    z__[i__1].r = q__1.r, z__[i__1].i = q__1.i;
    i__1 = k - ks;
    caxpy_(&i__1, &z__[k], &ap[ik + 1], &c__1, &z__[1], &c__1);
    if (ks == 1) {
	goto L80;
    }
    i__1 = k - 1;
    if ((r__1 = z__[i__1].r, dabs(r__1)) + (r__2 = r_imag(&z__[k - 1]), dabs(
	    r__2)) != 0.f) {
	r__7 = (r__3 = ek.r, dabs(r__3)) + (r__4 = r_imag(&ek), dabs(r__4));
	i__2 = k - 1;
	i__3 = k - 1;
	r__8 = (r__5 = z__[i__3].r, dabs(r__5)) + (r__6 = r_imag(&z__[k - 1]),
		 dabs(r__6));
	q__2.r = z__[i__2].r / r__8, q__2.i = z__[i__2].i / r__8;
	q__1.r = r__7 * q__2.r, q__1.i = r__7 * q__2.i;
	ek.r = q__1.r, ek.i = q__1.i;
    }
    i__1 = k - 1;
    i__2 = k - 1;
    q__1.r = z__[i__2].r + ek.r, q__1.i = z__[i__2].i + ek.i;
    z__[i__1].r = q__1.r, z__[i__1].i = q__1.i;
    i__1 = k - ks;
    caxpy_(&i__1, &z__[k - 1], &ap[ikm1 + 1], &c__1, &z__[1], &c__1);
L80:
    if (ks == 2) {
	goto L100;
    }
    i__1 = k;
    i__2 = kk;
    if ((r__1 = z__[i__1].r, dabs(r__1)) + (r__2 = r_imag(&z__[k]), dabs(r__2)
	    ) <= (r__3 = ap[i__2].r, dabs(r__3)) + (r__4 = r_imag(&ap[kk]), 
	    dabs(r__4))) {
	goto L90;
    }
    i__1 = kk;
    i__2 = k;
    s = ((r__1 = ap[i__1].r, dabs(r__1)) + (r__2 = r_imag(&ap[kk]), dabs(r__2)
	    )) / ((r__3 = z__[i__2].r, dabs(r__3)) + (r__4 = r_imag(&z__[k]), 
	    dabs(r__4)));
    csscal_(n, &s, &z__[1], &c__1);
    q__2.r = s, q__2.i = 0.f;
    q__1.r = q__2.r * ek.r - q__2.i * ek.i, q__1.i = q__2.r * ek.i + q__2.i * 
	    ek.r;
    ek.r = q__1.r, ek.i = q__1.i;
L90:
    i__1 = kk;
    if ((r__1 = ap[i__1].r, dabs(r__1)) + (r__2 = r_imag(&ap[kk]), dabs(r__2))
	     != 0.f) {
	i__2 = k;
	c_div(&q__1, &z__[k], &ap[kk]);
	z__[i__2].r = q__1.r, z__[i__2].i = q__1.i;
    }
    i__1 = kk;
    if ((r__1 = ap[i__1].r, dabs(r__1)) + (r__2 = r_imag(&ap[kk]), dabs(r__2))
	     == 0.f) {
	i__2 = k;
	z__[i__2].r = 1.f, z__[i__2].i = 0.f;
    }
    goto L110;
L100:
    km1k = ik + k - 1;
    km1km1 = ikm1 + k - 1;
    c_div(&q__1, &ap[kk], &ap[km1k]);
    ak.r = q__1.r, ak.i = q__1.i;
    c_div(&q__1, &ap[km1km1], &ap[km1k]);
    akm1.r = q__1.r, akm1.i = q__1.i;
    c_div(&q__1, &z__[k], &ap[km1k]);
    bk.r = q__1.r, bk.i = q__1.i;
    c_div(&q__1, &z__[k - 1], &ap[km1k]);
    bkm1.r = q__1.r, bkm1.i = q__1.i;
    q__2.r = ak.r * akm1.r - ak.i * akm1.i, q__2.i = ak.r * akm1.i + ak.i * 
	    akm1.r;
    q__1.r = q__2.r - 1.f, q__1.i = q__2.i;
    denom.r = q__1.r, denom.i = q__1.i;
    i__1 = k;
    q__3.r = akm1.r * bk.r - akm1.i * bk.i, q__3.i = akm1.r * bk.i + akm1.i * 
	    bk.r;
    q__2.r = q__3.r - bkm1.r, q__2.i = q__3.i - bkm1.i;
    c_div(&q__1, &q__2, &denom);
    z__[i__1].r = q__1.r, z__[i__1].i = q__1.i;
    i__1 = k - 1;
    q__3.r = ak.r * bkm1.r - ak.i * bkm1.i, q__3.i = ak.r * bkm1.i + ak.i * 
	    bkm1.r;
    q__2.r = q__3.r - bk.r, q__2.i = q__3.i - bk.i;
    c_div(&q__1, &q__2, &denom);
    z__[i__1].r = q__1.r, z__[i__1].i = q__1.i;
L110:
    k -= ks;
    ik -= k;
    if (ks == 2) {
	ik -= k + 1;
    }
    goto L60;
L120:
    s = 1.f / scasum_(n, &z__[1], &c__1);
    csscal_(n, &s, &z__[1], &c__1);

/*     SOLVE TRANS(U)*Y = W */

    k = 1;
    ik = 0;
L130:
    if (k > *n) {
	goto L160;
    }
    ks = 1;
    if (kpvt[k] < 0) {
	ks = 2;
    }
    if (k == 1) {
	goto L150;
    }
    i__1 = k;
    i__2 = k;
    i__3 = k - 1;
    cdotu_(&q__2, &i__3, &ap[ik + 1], &c__1, &z__[1], &c__1);
    q__1.r = z__[i__2].r + q__2.r, q__1.i = z__[i__2].i + q__2.i;
    z__[i__1].r = q__1.r, z__[i__1].i = q__1.i;
    ikp1 = ik + k;
    if (ks == 2) {
	i__1 = k + 1;
	i__2 = k + 1;
	i__3 = k - 1;
	cdotu_(&q__2, &i__3, &ap[ikp1 + 1], &c__1, &z__[1], &c__1);
	q__1.r = z__[i__2].r + q__2.r, q__1.i = z__[i__2].i + q__2.i;
	z__[i__1].r = q__1.r, z__[i__1].i = q__1.i;
    }
    kp = (i__1 = kpvt[k], abs(i__1));
    if (kp == k) {
	goto L140;
    }
    i__1 = k;
    t.r = z__[i__1].r, t.i = z__[i__1].i;
    i__1 = k;
    i__2 = kp;
    z__[i__1].r = z__[i__2].r, z__[i__1].i = z__[i__2].i;
    i__1 = kp;
    z__[i__1].r = t.r, z__[i__1].i = t.i;
L140:
L150:
    ik += k;
    if (ks == 2) {
	ik += k + 1;
    }
    k += ks;
    goto L130;
L160:
    s = 1.f / scasum_(n, &z__[1], &c__1);
    csscal_(n, &s, &z__[1], &c__1);

    ynorm = 1.f;

/*     SOLVE U*D*V = Y */

    k = *n;
    ik = *n * (*n - 1) / 2;
L170:
    if (k == 0) {
	goto L230;
    }
    kk = ik + k;
    ikm1 = ik - (k - 1);
    ks = 1;
    if (kpvt[k] < 0) {
	ks = 2;
    }
    if (k == ks) {
	goto L190;
    }
    kp = (i__1 = kpvt[k], abs(i__1));
    kps = k + 1 - ks;
    if (kp == kps) {
	goto L180;
    }
    i__1 = kps;
    t.r = z__[i__1].r, t.i = z__[i__1].i;
    i__1 = kps;
    i__2 = kp;
    z__[i__1].r = z__[i__2].r, z__[i__1].i = z__[i__2].i;
    i__1 = kp;
    z__[i__1].r = t.r, z__[i__1].i = t.i;
L180:
    i__1 = k - ks;
    caxpy_(&i__1, &z__[k], &ap[ik + 1], &c__1, &z__[1], &c__1);
    if (ks == 2) {
	i__1 = k - ks;
	caxpy_(&i__1, &z__[k - 1], &ap[ikm1 + 1], &c__1, &z__[1], &c__1);
    }
L190:
    if (ks == 2) {
	goto L210;
    }
    i__1 = k;
    i__2 = kk;
    if ((r__1 = z__[i__1].r, dabs(r__1)) + (r__2 = r_imag(&z__[k]), dabs(r__2)
	    ) <= (r__3 = ap[i__2].r, dabs(r__3)) + (r__4 = r_imag(&ap[kk]), 
	    dabs(r__4))) {
	goto L200;
    }
    i__1 = kk;
    i__2 = k;
    s = ((r__1 = ap[i__1].r, dabs(r__1)) + (r__2 = r_imag(&ap[kk]), dabs(r__2)
	    )) / ((r__3 = z__[i__2].r, dabs(r__3)) + (r__4 = r_imag(&z__[k]), 
	    dabs(r__4)));
    csscal_(n, &s, &z__[1], &c__1);
    ynorm = s * ynorm;
L200:
    i__1 = kk;
    if ((r__1 = ap[i__1].r, dabs(r__1)) + (r__2 = r_imag(&ap[kk]), dabs(r__2))
	     != 0.f) {
	i__2 = k;
	c_div(&q__1, &z__[k], &ap[kk]);
	z__[i__2].r = q__1.r, z__[i__2].i = q__1.i;
    }
    i__1 = kk;
    if ((r__1 = ap[i__1].r, dabs(r__1)) + (r__2 = r_imag(&ap[kk]), dabs(r__2))
	     == 0.f) {
	i__2 = k;
	z__[i__2].r = 1.f, z__[i__2].i = 0.f;
    }
    goto L220;
L210:
    km1k = ik + k - 1;
    km1km1 = ikm1 + k - 1;
    c_div(&q__1, &ap[kk], &ap[km1k]);
    ak.r = q__1.r, ak.i = q__1.i;
    c_div(&q__1, &ap[km1km1], &ap[km1k]);
    akm1.r = q__1.r, akm1.i = q__1.i;
    c_div(&q__1, &z__[k], &ap[km1k]);
    bk.r = q__1.r, bk.i = q__1.i;
    c_div(&q__1, &z__[k - 1], &ap[km1k]);
    bkm1.r = q__1.r, bkm1.i = q__1.i;
    q__2.r = ak.r * akm1.r - ak.i * akm1.i, q__2.i = ak.r * akm1.i + ak.i * 
	    akm1.r;
    q__1.r = q__2.r - 1.f, q__1.i = q__2.i;
    denom.r = q__1.r, denom.i = q__1.i;
    i__1 = k;
    q__3.r = akm1.r * bk.r - akm1.i * bk.i, q__3.i = akm1.r * bk.i + akm1.i * 
	    bk.r;
    q__2.r = q__3.r - bkm1.r, q__2.i = q__3.i - bkm1.i;
    c_div(&q__1, &q__2, &denom);
    z__[i__1].r = q__1.r, z__[i__1].i = q__1.i;
    i__1 = k - 1;
    q__3.r = ak.r * bkm1.r - ak.i * bkm1.i, q__3.i = ak.r * bkm1.i + ak.i * 
	    bkm1.r;
    q__2.r = q__3.r - bk.r, q__2.i = q__3.i - bk.i;
    c_div(&q__1, &q__2, &denom);
    z__[i__1].r = q__1.r, z__[i__1].i = q__1.i;
L220:
    k -= ks;
    ik -= k;
    if (ks == 2) {
	ik -= k + 1;
    }
    goto L170;
L230:
    s = 1.f / scasum_(n, &z__[1], &c__1);
    csscal_(n, &s, &z__[1], &c__1);
    ynorm = s * ynorm;

/*     SOLVE TRANS(U)*Z = V */

    k = 1;
    ik = 0;
L240:
    if (k > *n) {
	goto L270;
    }
    ks = 1;
    if (kpvt[k] < 0) {
	ks = 2;
    }
    if (k == 1) {
	goto L260;
    }
    i__1 = k;
    i__2 = k;
    i__3 = k - 1;
    cdotu_(&q__2, &i__3, &ap[ik + 1], &c__1, &z__[1], &c__1);
    q__1.r = z__[i__2].r + q__2.r, q__1.i = z__[i__2].i + q__2.i;
    z__[i__1].r = q__1.r, z__[i__1].i = q__1.i;
    ikp1 = ik + k;
    if (ks == 2) {
	i__1 = k + 1;
	i__2 = k + 1;
	i__3 = k - 1;
	cdotu_(&q__2, &i__3, &ap[ikp1 + 1], &c__1, &z__[1], &c__1);
	q__1.r = z__[i__2].r + q__2.r, q__1.i = z__[i__2].i + q__2.i;
	z__[i__1].r = q__1.r, z__[i__1].i = q__1.i;
    }
    kp = (i__1 = kpvt[k], abs(i__1));
    if (kp == k) {
	goto L250;
    }
    i__1 = k;
    t.r = z__[i__1].r, t.i = z__[i__1].i;
    i__1 = k;
    i__2 = kp;
    z__[i__1].r = z__[i__2].r, z__[i__1].i = z__[i__2].i;
    i__1 = kp;
    z__[i__1].r = t.r, z__[i__1].i = t.i;
L250:
L260:
    ik += k;
    if (ks == 2) {
	ik += k + 1;
    }
    k += ks;
    goto L240;
L270:
/*     MAKE ZNORM = 1.0 */
    s = 1.f / scasum_(n, &z__[1], &c__1);
    csscal_(n, &s, &z__[1], &c__1);
    ynorm = s * ynorm;

    if (anorm != 0.f) {
	*rcond = ynorm / anorm;
    }
    if (anorm == 0.f) {
	*rcond = 0.f;
    }
    return 0;
} /* cspco_ */
Example #22
0
/* Subroutine */ int clahrd_(integer *n, integer *k, integer *nb, complex *a, 
	integer *lda, complex *tau, complex *t, integer *ldt, complex *y, 
	integer *ldy)
{
    /* System generated locals */
    integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, 
	    i__3;
    complex q__1;

    /* Local variables */
    integer i__;
    complex ei;
    extern /* Subroutine */ int cscal_(integer *, complex *, complex *, 
	    integer *), cgemv_(char *, integer *, integer *, complex *, 
	    complex *, integer *, complex *, integer *, complex *, complex *, 
	    integer *), ccopy_(integer *, complex *, integer *, 
	    complex *, integer *), caxpy_(integer *, complex *, complex *, 
	    integer *, complex *, integer *), ctrmv_(char *, char *, char *, 
	    integer *, complex *, integer *, complex *, integer *), clarfg_(integer *, complex *, complex *, integer 
	    *, complex *), clacgv_(integer *, complex *, integer *);


/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CLAHRD reduces the first NB columns of a complex general n-by-(n-k+1) */
/*  matrix A so that elements below the k-th subdiagonal are zero. The */
/*  reduction is performed by a unitary similarity transformation */
/*  Q' * A * Q. The routine returns the matrices V and T which determine */
/*  Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. */

/*  This is an OBSOLETE auxiliary routine. */
/*  This routine will be 'deprecated' in a  future release. */
/*  Please use the new routine CLAHR2 instead. */

/*  Arguments */
/*  ========= */

/*  N       (input) INTEGER */
/*          The order of the matrix A. */

/*  K       (input) INTEGER */
/*          The offset for the reduction. Elements below the k-th */
/*          subdiagonal in the first NB columns are reduced to zero. */

/*  NB      (input) INTEGER */
/*          The number of columns to be reduced. */

/*  A       (input/output) COMPLEX array, dimension (LDA,N-K+1) */
/*          On entry, the n-by-(n-k+1) general matrix A. */
/*          On exit, the elements on and above the k-th subdiagonal in */
/*          the first NB columns are overwritten with the corresponding */
/*          elements of the reduced matrix; the elements below the k-th */
/*          subdiagonal, with the array TAU, represent the matrix Q as a */
/*          product of elementary reflectors. The other columns of A are */
/*          unchanged. See Further Details. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  TAU     (output) COMPLEX array, dimension (NB) */
/*          The scalar factors of the elementary reflectors. See Further */
/*          Details. */

/*  T       (output) COMPLEX array, dimension (LDT,NB) */
/*          The upper triangular matrix T. */

/*  LDT     (input) INTEGER */
/*          The leading dimension of the array T.  LDT >= NB. */

/*  Y       (output) COMPLEX array, dimension (LDY,NB) */
/*          The n-by-nb matrix Y. */

/*  LDY     (input) INTEGER */
/*          The leading dimension of the array Y. LDY >= max(1,N). */

/*  Further Details */
/*  =============== */

/*  The matrix Q is represented as a product of nb elementary reflectors */

/*     Q = H(1) H(2) . . . H(nb). */

/*  Each H(i) has the form */

/*     H(i) = I - tau * v * v' */

/*  where tau is a complex scalar, and v is a complex vector with */
/*  v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in */
/*  A(i+k+1:n,i), and tau in TAU(i). */

/*  The elements of the vectors v together form the (n-k+1)-by-nb matrix */
/*  V which is needed, with T and Y, to apply the transformation to the */
/*  unreduced part of the matrix, using an update of the form: */
/*  A := (I - V*T*V') * (A - Y*V'). */

/*  The contents of A on exit are illustrated by the following example */
/*  with n = 7, k = 3 and nb = 2: */

/*     ( a   h   a   a   a ) */
/*     ( a   h   a   a   a ) */
/*     ( a   h   a   a   a ) */
/*     ( h   h   a   a   a ) */
/*     ( v1  h   a   a   a ) */
/*     ( v1  v2  a   a   a ) */
/*     ( v1  v2  a   a   a ) */

/*  where a denotes an element of the original matrix A, h denotes a */
/*  modified element of the upper Hessenberg matrix H, and vi denotes an */
/*  element of the vector defining H(i). */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Quick return if possible */

    /* Parameter adjustments */
    --tau;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1;
    t -= t_offset;
    y_dim1 = *ldy;
    y_offset = 1 + y_dim1;
    y -= y_offset;

    /* Function Body */
    if (*n <= 1) {
	return 0;
    }

    i__1 = *nb;
    for (i__ = 1; i__ <= i__1; ++i__) {
	if (i__ > 1) {

/*           Update A(1:n,i) */

/*           Compute i-th column of A - Y * V' */

	    i__2 = i__ - 1;
	    clacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda);
	    i__2 = i__ - 1;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgemv_("No transpose", n, &i__2, &q__1, &y[y_offset], ldy, &a[*k 
		    + i__ - 1 + a_dim1], lda, &c_b2, &a[i__ * a_dim1 + 1], &
		    c__1);
	    i__2 = i__ - 1;
	    clacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda);

/*           Apply I - V * T' * V' to this column (call it b) from the */
/*           left, using the last column of T as workspace */

/*           Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows) */
/*                    ( V2 )             ( b2 ) */

/*           where V1 is unit lower triangular */

/*           w := V1' * b1 */

	    i__2 = i__ - 1;
	    ccopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + 
		    1], &c__1);
	    i__2 = i__ - 1;
	    ctrmv_("Lower", "Conjugate transpose", "Unit", &i__2, &a[*k + 1 + 
		    a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1);

/*           w := w + V2'*b2 */

	    i__2 = *n - *k - i__ + 1;
	    i__3 = i__ - 1;
	    cgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a[*k + i__ + 
		    a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b2, &
		    t[*nb * t_dim1 + 1], &c__1);

/*           w := T'*w */

	    i__2 = i__ - 1;
	    ctrmv_("Upper", "Conjugate transpose", "Non-unit", &i__2, &t[
		    t_offset], ldt, &t[*nb * t_dim1 + 1], &c__1);

/*           b2 := b2 - V2*w */

	    i__2 = *n - *k - i__ + 1;
	    i__3 = i__ - 1;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgemv_("No transpose", &i__2, &i__3, &q__1, &a[*k + i__ + a_dim1], 
		     lda, &t[*nb * t_dim1 + 1], &c__1, &c_b2, &a[*k + i__ + 
		    i__ * a_dim1], &c__1);

/*           b1 := b1 - V1*w */

	    i__2 = i__ - 1;
	    ctrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1]
, lda, &t[*nb * t_dim1 + 1], &c__1);
	    i__2 = i__ - 1;
	    q__1.r = -1.f, q__1.i = -0.f;
	    caxpy_(&i__2, &q__1, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__ 
		    * a_dim1], &c__1);

	    i__2 = *k + i__ - 1 + (i__ - 1) * a_dim1;
	    a[i__2].r = ei.r, a[i__2].i = ei.i;
	}

/*        Generate the elementary reflector H(i) to annihilate */
/*        A(k+i+1:n,i) */

	i__2 = *k + i__ + i__ * a_dim1;
	ei.r = a[i__2].r, ei.i = a[i__2].i;
	i__2 = *n - *k - i__ + 1;
/* Computing MIN */
	i__3 = *k + i__ + 1;
	clarfg_(&i__2, &ei, &a[min(i__3, *n)+ i__ * a_dim1], &c__1, &tau[i__])
		;
	i__2 = *k + i__ + i__ * a_dim1;
	a[i__2].r = 1.f, a[i__2].i = 0.f;

/*        Compute  Y(1:n,i) */

	i__2 = *n - *k - i__ + 1;
	cgemv_("No transpose", n, &i__2, &c_b2, &a[(i__ + 1) * a_dim1 + 1], 
		lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b1, &y[i__ * 
		y_dim1 + 1], &c__1);
	i__2 = *n - *k - i__ + 1;
	i__3 = i__ - 1;
	cgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a[*k + i__ + 
		a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b1, &t[
		i__ * t_dim1 + 1], &c__1);
	i__2 = i__ - 1;
	q__1.r = -1.f, q__1.i = -0.f;
	cgemv_("No transpose", n, &i__2, &q__1, &y[y_offset], ldy, &t[i__ * 
		t_dim1 + 1], &c__1, &c_b2, &y[i__ * y_dim1 + 1], &c__1);
	cscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1);

/*        Compute T(1:i,i) */

	i__2 = i__ - 1;
	i__3 = i__;
	q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
	cscal_(&i__2, &q__1, &t[i__ * t_dim1 + 1], &c__1);
	i__2 = i__ - 1;
	ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt, 
		&t[i__ * t_dim1 + 1], &c__1)
		;
	i__2 = i__ + i__ * t_dim1;
	i__3 = i__;
	t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i;

/* L10: */
    }
    i__1 = *k + *nb + *nb * a_dim1;
    a[i__1].r = ei.r, a[i__1].i = ei.i;

    return 0;

/*     End of CLAHRD */

} /* clahrd_ */
Example #23
0
/* Subroutine */
int cla_syrfsx_extended_(integer *prec_type__, char *uplo, integer *n, integer *nrhs, complex *a, integer *lda, complex *af, integer *ldaf, integer *ipiv, logical *colequ, real *c__, complex *b, integer *ldb, complex *y, integer *ldy, real *berr_out__, integer * n_norms__, real *err_bnds_norm__, real *err_bnds_comp__, complex *res, real *ayb, complex *dy, complex *y_tail__, real *rcond, integer * ithresh, real *rthresh, real *dz_ub__, logical *ignore_cwise__, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, y_dim1, y_offset, err_bnds_norm_dim1, err_bnds_norm_offset, err_bnds_comp_dim1, err_bnds_comp_offset, i__1, i__2, i__3, i__4;
    real r__1, r__2;
    /* Builtin functions */
    double r_imag(complex *);
    /* Local variables */
    real dxratmax, dzratmax;
    integer i__, j;
    logical incr_prec__;
    extern /* Subroutine */
    int cla_syamv_(integer *, integer *, real *, complex *, integer *, complex *, integer *, real *, real *, integer *);
    real prev_dz_z__, yk, final_dx_x__;
    extern /* Subroutine */
    int cla_wwaddw_(integer *, complex *, complex *, complex *);
    real final_dz_z__, prevnormdx;
    integer cnt;
    real dyk, eps, incr_thresh__, dx_x__, dz_z__;
    extern /* Subroutine */
    int cla_lin_berr_(integer *, integer *, integer * , complex *, real *, real *);
    real ymin;
    integer y_prec_state__;
    extern /* Subroutine */
    int blas_csymv_x_(integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *, integer *);
    integer uplo2;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */
    int blas_csymv2_x_(integer *, integer *, complex *, complex *, integer *, complex *, complex *, integer *, complex *, complex *, integer *, integer *), ccopy_(integer *, complex *, integer *, complex *, integer *);
    real dxrat, dzrat;
    extern /* Subroutine */
    int caxpy_(integer *, complex *, complex *, integer *, complex *, integer *);
    logical upper;
    extern /* Subroutine */
    int csymv_(char *, integer *, complex *, complex * , integer *, complex *, integer *, complex *, complex *, integer * );
    real normx, normy;
    extern real slamch_(char *);
    extern /* Subroutine */
    int xerbla_(char *, integer *);
    real normdx;
    extern /* Subroutine */
    int csytrs_(char *, integer *, integer *, complex *, integer *, integer *, complex *, integer *, integer *);
    real hugeval;
    extern integer ilauplo_(char *);
    integer x_state__, z_state__;
    /* -- LAPACK computational routine (version 3.4.2) -- */
    /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
    /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
    /* September 2012 */
    /* .. Scalar Arguments .. */
    /* .. */
    /* .. Array Arguments .. */
    /* .. */
    /* ===================================================================== */
    /* .. Local Scalars .. */
    /* .. */
    /* .. Parameters .. */
    /* .. */
    /* .. External Functions .. */
    /* .. */
    /* .. External Subroutines .. */
    /* .. */
    /* .. Intrinsic Functions .. */
    /* .. */
    /* .. Statement Functions .. */
    /* .. */
    /* .. Statement Function Definitions .. */
    /* .. */
    /* .. Executable Statements .. */
    /* Parameter adjustments */
    err_bnds_comp_dim1 = *nrhs;
    err_bnds_comp_offset = 1 + err_bnds_comp_dim1;
    err_bnds_comp__ -= err_bnds_comp_offset;
    err_bnds_norm_dim1 = *nrhs;
    err_bnds_norm_offset = 1 + err_bnds_norm_dim1;
    err_bnds_norm__ -= err_bnds_norm_offset;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    af_dim1 = *ldaf;
    af_offset = 1 + af_dim1;
    af -= af_offset;
    --ipiv;
    --c__;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    y_dim1 = *ldy;
    y_offset = 1 + y_dim1;
    y -= y_offset;
    --berr_out__;
    --res;
    --ayb;
    --dy;
    --y_tail__;
    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L"))
    {
        *info = -2;
    }
    else if (*n < 0)
    {
        *info = -3;
    }
    else if (*nrhs < 0)
    {
        *info = -4;
    }
    else if (*lda < max(1,*n))
    {
        *info = -6;
    }
    else if (*ldaf < max(1,*n))
    {
        *info = -8;
    }
    else if (*ldb < max(1,*n))
    {
        *info = -13;
    }
    else if (*ldy < max(1,*n))
    {
        *info = -15;
    }
    if (*info != 0)
    {
        i__1 = -(*info);
        xerbla_("CLA_SYRFSX_EXTENDED", &i__1);
        return 0;
    }
    eps = slamch_("Epsilon");
    hugeval = slamch_("Overflow");
    /* Force HUGEVAL to Inf */
    hugeval *= hugeval;
    /* Using HUGEVAL may lead to spurious underflows. */
    incr_thresh__ = (real) (*n) * eps;
    if (lsame_(uplo, "L"))
    {
        uplo2 = ilauplo_("L");
    }
    else
    {
        uplo2 = ilauplo_("U");
    }
    i__1 = *nrhs;
    for (j = 1;
            j <= i__1;
            ++j)
    {
        y_prec_state__ = 1;
        if (y_prec_state__ == 2)
        {
            i__2 = *n;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                i__3 = i__;
                y_tail__[i__3].r = 0.f;
                y_tail__[i__3].i = 0.f; // , expr subst
            }
        }
        dxrat = 0.f;
        dxratmax = 0.f;
        dzrat = 0.f;
        dzratmax = 0.f;
        final_dx_x__ = hugeval;
        final_dz_z__ = hugeval;
        prevnormdx = hugeval;
        prev_dz_z__ = hugeval;
        dz_z__ = hugeval;
        dx_x__ = hugeval;
        x_state__ = 1;
        z_state__ = 0;
        incr_prec__ = FALSE_;
        i__2 = *ithresh;
        for (cnt = 1;
                cnt <= i__2;
                ++cnt)
        {
            /* Compute residual RES = B_s - op(A_s) * Y, */
            /* op(A) = A, A**T, or A**H depending on TRANS (and type). */
            ccopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1);
            if (y_prec_state__ == 0)
            {
                csymv_(uplo, n, &c_b14, &a[a_offset], lda, &y[j * y_dim1 + 1], &c__1, &c_b15, &res[1], &c__1);
            }
            else if (y_prec_state__ == 1)
            {
                blas_csymv_x_(&uplo2, n, &c_b14, &a[a_offset], lda, &y[j * y_dim1 + 1], &c__1, &c_b15, &res[1], &c__1, prec_type__);
            }
            else
            {
                blas_csymv2_x_(&uplo2, n, &c_b14, &a[a_offset], lda, &y[j * y_dim1 + 1], &y_tail__[1], &c__1, &c_b15, &res[1], & c__1, prec_type__);
            }
            /* XXX: RES is no longer needed. */
            ccopy_(n, &res[1], &c__1, &dy[1], &c__1);
            csytrs_(uplo, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &dy[1], n, info);
            /* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. */
            normx = 0.f;
            normy = 0.f;
            normdx = 0.f;
            dz_z__ = 0.f;
            ymin = hugeval;
            i__3 = *n;
            for (i__ = 1;
                    i__ <= i__3;
                    ++i__)
            {
                i__4 = i__ + j * y_dim1;
                yk = (r__1 = y[i__4].r, f2c_abs(r__1)) + (r__2 = r_imag(&y[i__ + j * y_dim1]), f2c_abs(r__2));
                i__4 = i__;
                dyk = (r__1 = dy[i__4].r, f2c_abs(r__1)) + (r__2 = r_imag(&dy[i__] ), f2c_abs(r__2));
                if (yk != 0.f)
                {
                    /* Computing MAX */
                    r__1 = dz_z__;
                    r__2 = dyk / yk; // , expr subst
                    dz_z__ = max(r__1,r__2);
                }
                else if (dyk != 0.f)
                {
                    dz_z__ = hugeval;
                }
                ymin = min(ymin,yk);
                normy = max(normy,yk);
                if (*colequ)
                {
                    /* Computing MAX */
                    r__1 = normx;
                    r__2 = yk * c__[i__]; // , expr subst
                    normx = max(r__1,r__2);
                    /* Computing MAX */
                    r__1 = normdx;
                    r__2 = dyk * c__[i__]; // , expr subst
                    normdx = max(r__1,r__2);
                }
                else
                {
                    normx = normy;
                    normdx = max(normdx,dyk);
                }
            }
            if (normx != 0.f)
            {
                dx_x__ = normdx / normx;
            }
            else if (normdx == 0.f)
            {
                dx_x__ = 0.f;
            }
            else
            {
                dx_x__ = hugeval;
            }
            dxrat = normdx / prevnormdx;
            dzrat = dz_z__ / prev_dz_z__;
            /* Check termination criteria. */
            if (ymin * *rcond < incr_thresh__ * normy && y_prec_state__ < 2)
            {
                incr_prec__ = TRUE_;
            }
            if (x_state__ == 3 && dxrat <= *rthresh)
            {
                x_state__ = 1;
            }
            if (x_state__ == 1)
            {
                if (dx_x__ <= eps)
                {
                    x_state__ = 2;
                }
                else if (dxrat > *rthresh)
                {
                    if (y_prec_state__ != 2)
                    {
                        incr_prec__ = TRUE_;
                    }
                    else
                    {
                        x_state__ = 3;
                    }
                }
                else
                {
                    if (dxrat > dxratmax)
                    {
                        dxratmax = dxrat;
                    }
                }
                if (x_state__ > 1)
                {
                    final_dx_x__ = dx_x__;
                }
            }
            if (z_state__ == 0 && dz_z__ <= *dz_ub__)
            {
                z_state__ = 1;
            }
            if (z_state__ == 3 && dzrat <= *rthresh)
            {
                z_state__ = 1;
            }
            if (z_state__ == 1)
            {
                if (dz_z__ <= eps)
                {
                    z_state__ = 2;
                }
                else if (dz_z__ > *dz_ub__)
                {
                    z_state__ = 0;
                    dzratmax = 0.f;
                    final_dz_z__ = hugeval;
                }
                else if (dzrat > *rthresh)
                {
                    if (y_prec_state__ != 2)
                    {
                        incr_prec__ = TRUE_;
                    }
                    else
                    {
                        z_state__ = 3;
                    }
                }
                else
                {
                    if (dzrat > dzratmax)
                    {
                        dzratmax = dzrat;
                    }
                }
                if (z_state__ > 1)
                {
                    final_dz_z__ = dz_z__;
                }
            }
            if (x_state__ != 1 && (*ignore_cwise__ || z_state__ != 1))
            {
                goto L666;
            }
            if (incr_prec__)
            {
                incr_prec__ = FALSE_;
                ++y_prec_state__;
                i__3 = *n;
                for (i__ = 1;
                        i__ <= i__3;
                        ++i__)
                {
                    i__4 = i__;
                    y_tail__[i__4].r = 0.f;
                    y_tail__[i__4].i = 0.f; // , expr subst
                }
            }
            prevnormdx = normdx;
            prev_dz_z__ = dz_z__;
            /* Update soluton. */
            if (y_prec_state__ < 2)
            {
                caxpy_(n, &c_b15, &dy[1], &c__1, &y[j * y_dim1 + 1], &c__1);
            }
            else
            {
                cla_wwaddw_(n, &y[j * y_dim1 + 1], &y_tail__[1], &dy[1]);
            }
        }
        /* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't CALL F90_EXIT. */
L666: /* Set final_* when cnt hits ithresh. */
        if (x_state__ == 1)
        {
            final_dx_x__ = dx_x__;
        }
        if (z_state__ == 1)
        {
            final_dz_z__ = dz_z__;
        }
        /* Compute error bounds. */
        if (*n_norms__ >= 1)
        {
            err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = final_dx_x__ / ( 1 - dxratmax);
        }
        if (*n_norms__ >= 2)
        {
            err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = final_dz_z__ / ( 1 - dzratmax);
        }
        /* Compute componentwise relative backward error from formula */
        /* max(i) ( f2c_abs(R(i)) / ( f2c_abs(op(A_s))*f2c_abs(Y) + f2c_abs(B_s) )(i) ) */
        /* where f2c_abs(Z) is the componentwise absolute value of the matrix */
        /* or vector Z. */
        /* Compute residual RES = B_s - op(A_s) * Y, */
        /* op(A) = A, A**T, or A**H depending on TRANS (and type). */
        ccopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1);
        csymv_(uplo, n, &c_b14, &a[a_offset], lda, &y[j * y_dim1 + 1], &c__1, &c_b15, &res[1], &c__1);
        i__2 = *n;
        for (i__ = 1;
                i__ <= i__2;
                ++i__)
        {
            i__3 = i__ + j * b_dim1;
            ayb[i__] = (r__1 = b[i__3].r, f2c_abs(r__1)) + (r__2 = r_imag(&b[i__ + j * b_dim1]), f2c_abs(r__2));
        }
        /* Compute f2c_abs(op(A_s))*f2c_abs(Y) + f2c_abs(B_s). */
        cla_syamv_(&uplo2, n, &c_b37, &a[a_offset], lda, &y[j * y_dim1 + 1], &c__1, &c_b37, &ayb[1], &c__1);
        cla_lin_berr_(n, n, &c__1, &res[1], &ayb[1], &berr_out__[j]);
        /* End of loop for each RHS. */
    }
    return 0;
}
Example #24
0
/*! \brief
 * <pre>
 * Purpose
 * =======
 *    ilu_cdrop_row() - Drop some small rows from the previous 
 *    supernode (L-part only).
 * </pre>
 */
int ilu_cdrop_row(
	superlu_options_t *options, /* options */
	int    first,	    /* index of the first column in the supernode */
	int    last,	    /* index of the last column in the supernode */
	double drop_tol,    /* dropping parameter */
	int    quota,	    /* maximum nonzero entries allowed */
	int    *nnzLj,	    /* in/out number of nonzeros in L(:, 1:last) */
	double *fill_tol,   /* in/out - on exit, fill_tol=-num_zero_pivots,
			     * does not change if options->ILU_MILU != SMILU1 */
	GlobalLU_t *Glu,    /* modified */
	float swork[],   /* working space
	                     * the length of swork[] should be no less than
			     * the number of rows in the supernode */
	float swork2[], /* working space with the same size as swork[],
			     * used only by the second dropping rule */
	int    lastc	    /* if lastc == 0, there is nothing after the
			     * working supernode [first:last];
			     * if lastc == 1, there is one more column after
			     * the working supernode. */ )
{
    register int i, j, k, m1;
    register int nzlc; /* number of nonzeros in column last+1 */
    register int xlusup_first, xlsub_first;
    int m, n; /* m x n is the size of the supernode */
    int r = 0; /* number of dropped rows */
    register float *temp;
    register complex *lusup = (complex *) Glu->lusup;
    register int *lsub = Glu->lsub;
    register int *xlsub = Glu->xlsub;
    register int *xlusup = Glu->xlusup;
    register float d_max = 0.0, d_min = 1.0;
    int    drop_rule = options->ILU_DropRule;
    milu_t milu = options->ILU_MILU;
    norm_t nrm = options->ILU_Norm;
    complex zero = {0.0, 0.0};
    complex one = {1.0, 0.0};
    complex none = {-1.0, 0.0};
    int i_1 = 1;
    int inc_diag; /* inc_diag = m + 1 */
    int nzp = 0;  /* number of zero pivots */
    float alpha = pow((double)(Glu->n), -1.0 / options->ILU_MILU_Dim);

    xlusup_first = xlusup[first];
    xlsub_first = xlsub[first];
    m = xlusup[first + 1] - xlusup_first;
    n = last - first + 1;
    m1 = m - 1;
    inc_diag = m + 1;
    nzlc = lastc ? (xlusup[last + 2] - xlusup[last + 1]) : 0;
    temp = swork - n;

    /* Quick return if nothing to do. */
    if (m == 0 || m == n || drop_rule == NODROP)
    {
	*nnzLj += m * n;
	return 0;
    }

    /* basic dropping: ILU(tau) */
    for (i = n; i <= m1; )
    {
	/* the average abs value of ith row */
	switch (nrm)
	{
	    case ONE_NORM:
		temp[i] = scasum_(&n, &lusup[xlusup_first + i], &m) / (double)n;
		break;
	    case TWO_NORM:
		temp[i] = scnrm2_(&n, &lusup[xlusup_first + i], &m)
		    / sqrt((double)n);
		break;
	    case INF_NORM:
	    default:
		k = icamax_(&n, &lusup[xlusup_first + i], &m) - 1;
		temp[i] = c_abs1(&lusup[xlusup_first + i + m * k]);
		break;
	}

	/* drop small entries due to drop_tol */
	if (drop_rule & DROP_BASIC && temp[i] < drop_tol)
	{
	    r++;
	    /* drop the current row and move the last undropped row here */
	    if (r > 1) /* add to last row */
	    {
		/* accumulate the sum (for MILU) */
		switch (milu)
		{
		    case SMILU_1:
		    case SMILU_2:
			caxpy_(&n, &one, &lusup[xlusup_first + i], &m,
				&lusup[xlusup_first + m - 1], &m);
			break;
		    case SMILU_3:
			for (j = 0; j < n; j++)
			    lusup[xlusup_first + (m - 1) + j * m].r +=
				    c_abs1(&lusup[xlusup_first + i + j * m]);
			break;
		    case SILU:
		    default:
			break;
		}
		ccopy_(&n, &lusup[xlusup_first + m1], &m,
                       &lusup[xlusup_first + i], &m);
	    } /* if (r > 1) */
	    else /* move to last row */
	    {
		cswap_(&n, &lusup[xlusup_first + m1], &m,
			&lusup[xlusup_first + i], &m);
		if (milu == SMILU_3)
		    for (j = 0; j < n; j++) {
			lusup[xlusup_first + m1 + j * m].r =
				c_abs1(&lusup[xlusup_first + m1 + j * m]);
			lusup[xlusup_first + m1 + j * m].i = 0.0;
                    }
	    }
	    lsub[xlsub_first + i] = lsub[xlsub_first + m1];
	    m1--;
	    continue;
	} /* if dropping */
	else
	{
	    if (temp[i] > d_max) d_max = temp[i];
	    if (temp[i] < d_min) d_min = temp[i];
	}
	i++;
    } /* for */

    /* Secondary dropping: drop more rows according to the quota. */
    quota = ceil((double)quota / (double)n);
    if (drop_rule & DROP_SECONDARY && m - r > quota)
    {
	register double tol = d_max;

	/* Calculate the second dropping tolerance */
	if (quota > n)
	{
	    if (drop_rule & DROP_INTERP) /* by interpolation */
	    {
		d_max = 1.0 / d_max; d_min = 1.0 / d_min;
		tol = 1.0 / (d_max + (d_min - d_max) * quota / (m - n - r));
	    }
	    else /* by quick select */
	    {
		int len = m1 - n + 1;
		scopy_(&len, swork, &i_1, swork2, &i_1);
		tol = sqselect(len, swork2, quota - n);
#if 0
		register int *itemp = iwork - n;
		A = temp;
		for (i = n; i <= m1; i++) itemp[i] = i;
		qsort(iwork, m1 - n + 1, sizeof(int), _compare_);
		tol = temp[itemp[quota]];
#endif
	    }
	}

	for (i = n; i <= m1; )
	{
	    if (temp[i] <= tol)
	    {
		register int j;
		r++;
		/* drop the current row and move the last undropped row here */
		if (r > 1) /* add to last row */
		{
		    /* accumulate the sum (for MILU) */
		    switch (milu)
		    {
			case SMILU_1:
			case SMILU_2:
			    caxpy_(&n, &one, &lusup[xlusup_first + i], &m,
				    &lusup[xlusup_first + m - 1], &m);
			    break;
			case SMILU_3:
			    for (j = 0; j < n; j++)
				lusup[xlusup_first + (m - 1) + j * m].r +=
   				  c_abs1(&lusup[xlusup_first + i + j * m]);
			    break;
			case SILU:
			default:
			    break;
		    }
		    ccopy_(&n, &lusup[xlusup_first + m1], &m,
			    &lusup[xlusup_first + i], &m);
		} /* if (r > 1) */
		else /* move to last row */
		{
		    cswap_(&n, &lusup[xlusup_first + m1], &m,
			    &lusup[xlusup_first + i], &m);
		    if (milu == SMILU_3)
			for (j = 0; j < n; j++) {
			    lusup[xlusup_first + m1 + j * m].r =
				    c_abs1(&lusup[xlusup_first + m1 + j * m]);
			    lusup[xlusup_first + m1 + j * m].i = 0.0;
                        }
		}
		lsub[xlsub_first + i] = lsub[xlsub_first + m1];
		m1--;
		temp[i] = temp[m1];

		continue;
	    }
	    i++;

	} /* for */

    } /* if secondary dropping */

    for (i = n; i < m; i++) temp[i] = 0.0;

    if (r == 0)
    {
	*nnzLj += m * n;
	return 0;
    }

    /* add dropped entries to the diagnal */
    if (milu != SILU)
    {
	register int j;
	complex t;
	float omega;
	for (j = 0; j < n; j++)
	{
	    t = lusup[xlusup_first + (m - 1) + j * m];
            if (t.r == 0.0 && t.i == 0.0) continue;
            omega = SUPERLU_MIN(2.0 * (1.0 - alpha) / c_abs1(&t), 1.0);
	    cs_mult(&t, &t, omega);

 	    switch (milu)
	    {
		case SMILU_1:
		    if ( !(c_eq(&t, &none)) ) {
                        c_add(&t, &t, &one);
                        cc_mult(&lusup[xlusup_first + j * inc_diag],
			                  &lusup[xlusup_first + j * inc_diag],
                                          &t);
                    }
		    else
		    {
                        cs_mult(
                                &lusup[xlusup_first + j * inc_diag],
			        &lusup[xlusup_first + j * inc_diag],
                                *fill_tol);
#ifdef DEBUG
			printf("[1] ZERO PIVOT: FILL col %d.\n", first + j);
			fflush(stdout);
#endif
			nzp++;
		    }
		    break;
		case SMILU_2:
                    cs_mult(&lusup[xlusup_first + j * inc_diag],
                                          &lusup[xlusup_first + j * inc_diag],
                                          1.0 + c_abs1(&t));
		    break;
		case SMILU_3:
                    c_add(&t, &t, &one);
                    cc_mult(&lusup[xlusup_first + j * inc_diag],
	                              &lusup[xlusup_first + j * inc_diag],
                                      &t);
		    break;
		case SILU:
		default:
		    break;
	    }
	}
	if (nzp > 0) *fill_tol = -nzp;
    }

    /* Remove dropped entries from the memory and fix the pointers. */
    m1 = m - r;
    for (j = 1; j < n; j++)
    {
	register int tmp1, tmp2;
	tmp1 = xlusup_first + j * m1;
	tmp2 = xlusup_first + j * m;
	for (i = 0; i < m1; i++)
	    lusup[i + tmp1] = lusup[i + tmp2];
    }
    for (i = 0; i < nzlc; i++)
	lusup[xlusup_first + i + n * m1] = lusup[xlusup_first + i + n * m];
    for (i = 0; i < nzlc; i++)
	lsub[xlsub[last + 1] - r + i] = lsub[xlsub[last + 1] + i];
    for (i = first + 1; i <= last + 1; i++)
    {
	xlusup[i] -= r * (i - first);
	xlsub[i] -= r;
    }
    if (lastc)
    {
	xlusup[last + 2] -= r * n;
	xlsub[last + 2] -= r;
    }

    *nnzLj += (m - r) * n;
    return r;
}
Example #25
0
doublereal ctzt01_(integer *m, integer *n, complex *a, complex *af, integer *
	lda, complex *tau, complex *work, integer *lwork)
{
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, i__1, i__2, i__3, i__4;
    real ret_val;

    /* Local variables */
    integer i__, j;
    real norma;
    real rwork[1];


/*  -- LAPACK test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CTZT01 returns */
/*       || A - R*Q || / ( M * eps * ||A|| ) */
/*  for an upper trapezoidal A that was factored with CTZRQF. */

/*  Arguments */
/*  ========= */

/*  M       (input) INTEGER */
/*          The number of rows of the matrices A and AF. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrices A and AF. */

/*  A       (input) COMPLEX array, dimension (LDA,N) */
/*          The original upper trapezoidal M by N matrix A. */

/*  AF      (input) COMPLEX array, dimension (LDA,N) */
/*          The output of CTZRQF for input matrix A. */
/*          The lower triangle is not referenced. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the arrays A and AF. */

/*  TAU     (input) COMPLEX array, dimension (M) */
/*          Details of the  Householder transformations as returned by */
/*          CTZRQF. */

/*  WORK    (workspace) COMPLEX array, dimension (LWORK) */

/*  LWORK   (input) INTEGER */
/*          The length of the array WORK.  LWORK >= m*n + m. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    af_dim1 = *lda;
    af_offset = 1 + af_dim1;
    af -= af_offset;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    ret_val = 0.f;

    if (*lwork < *m * *n + *m) {
	this_xerbla_("CTZT01", &c__8);
	return ret_val;
    }

/*     Quick return if possible */

    if (*m <= 0 || *n <= 0) {
	return ret_val;
    }

    norma = clange_("One-norm", m, n, &a[a_offset], lda, rwork);

/*     Copy upper triangle R */

    claset_("Full", m, n, &c_b6, &c_b6, &work[1], m);
    i__1 = *m;
    for (j = 1; j <= i__1; ++j) {
	i__2 = j;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    i__3 = (j - 1) * *m + i__;
	    i__4 = i__ + j * af_dim1;
	    work[i__3].r = af[i__4].r, work[i__3].i = af[i__4].i;
/* L10: */
	}
/* L20: */
    }

/*     R = R * P(1) * ... *P(m) */

    i__1 = *m;
    for (i__ = 1; i__ <= i__1; ++i__) {
	i__2 = *n - *m + 1;
	clatzm_("Right", &i__, &i__2, &af[i__ + (*m + 1) * af_dim1], lda, &
		tau[i__], &work[(i__ - 1) * *m + 1], &work[*m * *m + 1], m, &
		work[*m * *n + 1]);
/* L30: */
    }

/*     R = R - A */

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	caxpy_(m, &c_b15, &a[i__ * a_dim1 + 1], &c__1, &work[(i__ - 1) * *m + 
		1], &c__1);
/* L40: */
    }

    ret_val = clange_("One-norm", m, n, &work[1], m, rwork);

    ret_val /= slamch_("Epsilon") * (real) max(*m,*n);
    if (norma != 0.f) {
	ret_val /= norma;
    }

    return ret_val;

/*     End of CTZT01 */

} /* ctzt01_ */
Example #26
0
/*! \brief
 *
 * <pre>
 *   Purpose   
 *   =======   
 *
 *   CGSRFS improves the computed solution to a system of linear   
 *   equations and provides error bounds and backward error estimates for 
 *   the solution.   
 *
 *   If equilibration was performed, the system becomes:
 *           (diag(R)*A_original*diag(C)) * X = diag(R)*B_original.
 *
 *   See supermatrix.h for the definition of 'SuperMatrix' structure.
 *
 *   Arguments   
 *   =========   
 *
 * trans   (input) trans_t
 *          Specifies the form of the system of equations:
 *          = NOTRANS: A * X = B  (No transpose)
 *          = TRANS:   A'* X = B  (Transpose)
 *          = CONJ:    A**H * X = B  (Conjugate transpose)
 *   
 *   A       (input) SuperMatrix*
 *           The original matrix A in the system, or the scaled A if
 *           equilibration was done. The type of A can be:
 *           Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_GE.
 *    
 *   L       (input) SuperMatrix*
 *	     The factor L from the factorization Pr*A*Pc=L*U. Use
 *           compressed row subscripts storage for supernodes, 
 *           i.e., L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
 * 
 *   U       (input) SuperMatrix*
 *           The factor U from the factorization Pr*A*Pc=L*U as computed by
 *           cgstrf(). Use column-wise storage scheme, 
 *           i.e., U has types: Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
 *
 *   perm_c  (input) int*, dimension (A->ncol)
 *	     Column permutation vector, which defines the 
 *           permutation matrix Pc; perm_c[i] = j means column i of A is 
 *           in position j in A*Pc.
 *
 *   perm_r  (input) int*, dimension (A->nrow)
 *           Row permutation vector, which defines the permutation matrix Pr;
 *           perm_r[i] = j means row i of A is in position j in Pr*A.
 *
 *   equed   (input) Specifies the form of equilibration that was done.
 *           = 'N': No equilibration.
 *           = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
 *           = 'C': Column equilibration, i.e., A was postmultiplied by
 *                  diag(C).
 *           = 'B': Both row and column equilibration, i.e., A was replaced 
 *                  by diag(R)*A*diag(C).
 *
 *   R       (input) float*, dimension (A->nrow)
 *           The row scale factors for A.
 *           If equed = 'R' or 'B', A is premultiplied by diag(R).
 *           If equed = 'N' or 'C', R is not accessed.
 * 
 *   C       (input) float*, dimension (A->ncol)
 *           The column scale factors for A.
 *           If equed = 'C' or 'B', A is postmultiplied by diag(C).
 *           If equed = 'N' or 'R', C is not accessed.
 *
 *   B       (input) SuperMatrix*
 *           B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE.
 *           The right hand side matrix B.
 *           if equed = 'R' or 'B', B is premultiplied by diag(R).
 *
 *   X       (input/output) SuperMatrix*
 *           X has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE.
 *           On entry, the solution matrix X, as computed by cgstrs().
 *           On exit, the improved solution matrix X.
 *           if *equed = 'C' or 'B', X should be premultiplied by diag(C)
 *               in order to obtain the solution to the original system.
 *
 *   FERR    (output) float*, dimension (B->ncol)   
 *           The estimated forward error bound for each solution vector   
 *           X(j) (the j-th column of the solution matrix X).   
 *           If XTRUE is the true solution corresponding to X(j), FERR(j) 
 *           is an estimated upper bound for the magnitude of the largest 
 *           element in (X(j) - XTRUE) divided by the magnitude of the   
 *           largest element in X(j).  The estimate is as reliable as   
 *           the estimate for RCOND, and is almost always a slight   
 *           overestimate of the true error.
 *
 *   BERR    (output) float*, dimension (B->ncol)   
 *           The componentwise relative backward error of each solution   
 *           vector X(j) (i.e., the smallest relative change in   
 *           any element of A or B that makes X(j) an exact solution).
 *
 *   stat     (output) SuperLUStat_t*
 *            Record the statistics on runtime and floating-point operation count.
 *            See util.h for the definition of 'SuperLUStat_t'.
 *
 *   info    (output) int*   
 *           = 0:  successful exit   
 *            < 0:  if INFO = -i, the i-th argument had an illegal value   
 *
 *    Internal Parameters   
 *    ===================   
 *
 *    ITMAX is the maximum number of steps of iterative refinement.   
 *
 * </pre>
 */
void
cgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
       int *perm_c, int *perm_r, char *equed, float *R, float *C,
       SuperMatrix *B, SuperMatrix *X, float *ferr, float *berr,
       SuperLUStat_t *stat, int *info)
{


#define ITMAX 5
    
    /* Table of constant values */
    int    ione = 1;
    complex ndone = {-1., 0.};
    complex done = {1., 0.};
    
    /* Local variables */
    NCformat *Astore;
    complex   *Aval;
    SuperMatrix Bjcol;
    DNformat *Bstore, *Xstore, *Bjcol_store;
    complex   *Bmat, *Xmat, *Bptr, *Xptr;
    int      kase;
    float   safe1, safe2;
    int      i, j, k, irow, nz, count, notran, rowequ, colequ;
    int      ldb, ldx, nrhs;
    float   s, xk, lstres, eps, safmin;
    char     transc[1];
    trans_t  transt;
    complex   *work;
    float   *rwork;
    int      *iwork;
    int      isave[3];

    extern int clacon2_(int *, complex *, complex *, float *, int *, int []);
#ifdef _CRAY
    extern int CCOPY(int *, complex *, int *, complex *, int *);
    extern int CSAXPY(int *, complex *, complex *, int *, complex *, int *);
#else
    extern int ccopy_(int *, complex *, int *, complex *, int *);
    extern int caxpy_(int *, complex *, complex *, int *, complex *, int *);
#endif

    Astore = A->Store;
    Aval   = Astore->nzval;
    Bstore = B->Store;
    Xstore = X->Store;
    Bmat   = Bstore->nzval;
    Xmat   = Xstore->nzval;
    ldb    = Bstore->lda;
    ldx    = Xstore->lda;
    nrhs   = B->ncol;
    
    /* Test the input parameters */
    *info = 0;
    notran = (trans == NOTRANS);
    if ( !notran && trans != TRANS && trans != CONJ ) *info = -1;
    else if ( A->nrow != A->ncol || A->nrow < 0 ||
	      A->Stype != SLU_NC || A->Dtype != SLU_C || A->Mtype != SLU_GE )
	*info = -2;
    else if ( L->nrow != L->ncol || L->nrow < 0 ||
 	      L->Stype != SLU_SC || L->Dtype != SLU_C || L->Mtype != SLU_TRLU )
	*info = -3;
    else if ( U->nrow != U->ncol || U->nrow < 0 ||
 	      U->Stype != SLU_NC || U->Dtype != SLU_C || U->Mtype != SLU_TRU )
	*info = -4;
    else if ( ldb < SUPERLU_MAX(0, A->nrow) ||
 	      B->Stype != SLU_DN || B->Dtype != SLU_C || B->Mtype != SLU_GE )
        *info = -10;
    else if ( ldx < SUPERLU_MAX(0, A->nrow) ||
 	      X->Stype != SLU_DN || X->Dtype != SLU_C || X->Mtype != SLU_GE )
	*info = -11;
    if (*info != 0) {
	i = -(*info);
	input_error("cgsrfs", &i);
	return;
    }

    /* Quick return if possible */
    if ( A->nrow == 0 || nrhs == 0) {
	for (j = 0; j < nrhs; ++j) {
	    ferr[j] = 0.;
	    berr[j] = 0.;
	}
	return;
    }

    rowequ = lsame_(equed, "R") || lsame_(equed, "B");
    colequ = lsame_(equed, "C") || lsame_(equed, "B");
    
    /* Allocate working space */
    work = complexMalloc(2*A->nrow);
    rwork = (float *) SUPERLU_MALLOC( A->nrow * sizeof(float) );
    iwork = intMalloc(A->nrow);
    if ( !work || !rwork || !iwork ) 
        ABORT("Malloc fails for work/rwork/iwork.");
    
    if ( notran ) {
	*(unsigned char *)transc = 'N';
        transt = TRANS;
    } else {
	*(unsigned char *)transc = 'T';
	transt = NOTRANS;
    }

    /* NZ = maximum number of nonzero elements in each row of A, plus 1 */
    nz     = A->ncol + 1;
    eps    = smach("Epsilon");
    safmin = smach("Safe minimum");

    /* Set SAFE1 essentially to be the underflow threshold times the
       number of additions in each row. */
    safe1  = nz * safmin;
    safe2  = safe1 / eps;

    /* Compute the number of nonzeros in each row (or column) of A */
    for (i = 0; i < A->nrow; ++i) iwork[i] = 0;
    if ( notran ) {
	for (k = 0; k < A->ncol; ++k)
	    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) 
		++iwork[Astore->rowind[i]];
    } else {
	for (k = 0; k < A->ncol; ++k)
	    iwork[k] = Astore->colptr[k+1] - Astore->colptr[k];
    }	

    /* Copy one column of RHS B into Bjcol. */
    Bjcol.Stype = B->Stype;
    Bjcol.Dtype = B->Dtype;
    Bjcol.Mtype = B->Mtype;
    Bjcol.nrow  = B->nrow;
    Bjcol.ncol  = 1;
    Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
    if ( !Bjcol.Store ) ABORT("SUPERLU_MALLOC fails for Bjcol.Store");
    Bjcol_store = Bjcol.Store;
    Bjcol_store->lda = ldb;
    Bjcol_store->nzval = work; /* address aliasing */
	
    /* Do for each right hand side ... */
    for (j = 0; j < nrhs; ++j) {
	count = 0;
	lstres = 3.;
	Bptr = &Bmat[j*ldb];
	Xptr = &Xmat[j*ldx];

	while (1) { /* Loop until stopping criterion is satisfied. */

	    /* Compute residual R = B - op(A) * X,   
	       where op(A) = A, A**T, or A**H, depending on TRANS. */
	    
#ifdef _CRAY
	    CCOPY(&A->nrow, Bptr, &ione, work, &ione);
#else
	    ccopy_(&A->nrow, Bptr, &ione, work, &ione);
#endif
	    sp_cgemv(transc, ndone, A, Xptr, ione, done, work, ione);

	    /* Compute componentwise relative backward error from formula 
	       max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )   
	       where abs(Z) is the componentwise absolute value of the matrix
	       or vector Z.  If the i-th component of the denominator is less
	       than SAFE2, then SAFE1 is added to the i-th component of the   
	       numerator before dividing. */

	    for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] );
	    
	    /* Compute abs(op(A))*abs(X) + abs(B). */
	    if (notran) {
		for (k = 0; k < A->ncol; ++k) {
		    xk = c_abs1( &Xptr[k] );
		    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
			rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk;
		}
	    } else {
		for (k = 0; k < A->ncol; ++k) {
		    s = 0.;
		    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
			irow = Astore->rowind[i];
			s += c_abs1(&Aval[i]) * c_abs1(&Xptr[irow]);
		    }
		    rwork[k] += s;
		}
	    }
	    s = 0.;
	    for (i = 0; i < A->nrow; ++i) {
		if (rwork[i] > safe2) {
		    s = SUPERLU_MAX( s, c_abs1(&work[i]) / rwork[i] );
                } else if ( rwork[i] != 0.0 ) {
		    s = SUPERLU_MAX( s, (c_abs1(&work[i]) + safe1) / rwork[i] );
                }
                /* If rwork[i] is exactly 0.0, then we know the true 
                   residual also must be exactly 0.0. */
	    }
	    berr[j] = s;

	    /* Test stopping criterion. Continue iterating if   
	       1) The residual BERR(J) is larger than machine epsilon, and   
	       2) BERR(J) decreased by at least a factor of 2 during the   
	          last iteration, and   
	       3) At most ITMAX iterations tried. */

	    if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) {
		/* Update solution and try again. */
		cgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
		
#ifdef _CRAY
		CAXPY(&A->nrow, &done, work, &ione,
		       &Xmat[j*ldx], &ione);
#else
		caxpy_(&A->nrow, &done, work, &ione,
		       &Xmat[j*ldx], &ione);
#endif
		lstres = berr[j];
		++count;
	    } else {
		break;
	    }
        
	} /* end while */

	stat->RefineSteps = count;

	/* Bound error from formula:
	   norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))*   
	   ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)   
          where   
            norm(Z) is the magnitude of the largest component of Z   
            inv(op(A)) is the inverse of op(A)   
            abs(Z) is the componentwise absolute value of the matrix or
	       vector Z   
            NZ is the maximum number of nonzeros in any row of A, plus 1   
            EPS is machine epsilon   

          The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))   
          is incremented by SAFE1 if the i-th component of   
          abs(op(A))*abs(X) + abs(B) is less than SAFE2.   

          Use CLACON2 to estimate the infinity-norm of the matrix   
             inv(op(A)) * diag(W),   
          where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
	
	for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] );
	
	/* Compute abs(op(A))*abs(X) + abs(B). */
	if ( notran ) {
	    for (k = 0; k < A->ncol; ++k) {
		xk = c_abs1( &Xptr[k] );
		for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
		    rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk;
	    }
	} else {
	    for (k = 0; k < A->ncol; ++k) {
		s = 0.;
		for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
		    irow = Astore->rowind[i];
		    xk = c_abs1( &Xptr[irow] );
		    s += c_abs1(&Aval[i]) * xk;
		}
		rwork[k] += s;
	    }
	}
	
	for (i = 0; i < A->nrow; ++i)
	    if (rwork[i] > safe2)
		rwork[i] = c_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i];
	    else
		rwork[i] = c_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;
	kase = 0;

	do {
	    clacon2_(&A->nrow, &work[A->nrow], work, &ferr[j], &kase, isave);
	    if (kase == 0) break;

	    if (kase == 1) {
		/* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */
		if ( notran && colequ )
		    for (i = 0; i < A->ncol; ++i) {
		        cs_mult(&work[i], &work[i], C[i]);
	            }
		else if ( !notran && rowequ )
		    for (i = 0; i < A->nrow; ++i) {
		        cs_mult(&work[i], &work[i], R[i]);
                    }

		cgstrs (transt, L, U, perm_c, perm_r, &Bjcol, stat, info);
		
		for (i = 0; i < A->nrow; ++i) {
		    cs_mult(&work[i], &work[i], rwork[i]);
	 	}
	    } else {
		/* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */
		for (i = 0; i < A->nrow; ++i) {
		    cs_mult(&work[i], &work[i], rwork[i]);
		}
		
		cgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
		
		if ( notran && colequ )
		    for (i = 0; i < A->ncol; ++i) {
		        cs_mult(&work[i], &work[i], C[i]);
		    }
		else if ( !notran && rowequ )
		    for (i = 0; i < A->ncol; ++i) {
		        cs_mult(&work[i], &work[i], R[i]);  
		    }
	    }
	    
	} while ( kase != 0 );

	/* Normalize error. */
	lstres = 0.;
 	if ( notran && colequ ) {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = SUPERLU_MAX( lstres, C[i] * c_abs1( &Xptr[i]) );
  	} else if ( !notran && rowequ ) {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = SUPERLU_MAX( lstres, R[i] * c_abs1( &Xptr[i]) );
	} else {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = SUPERLU_MAX( lstres, c_abs1( &Xptr[i]) );
	}
	if ( lstres != 0. )
	    ferr[j] /= lstres;

    } /* for each RHS j ... */
    
    SUPERLU_FREE(work);
    SUPERLU_FREE(rwork);
    SUPERLU_FREE(iwork);
    SUPERLU_FREE(Bjcol.Store);

    return;

} /* cgsrfs */
Example #27
0
/* Subroutine */ int clatrs_(char *uplo, char *trans, char *diag, char *
	normin, integer *n, complex *a, integer *lda, complex *x, real *scale, 
	 real *cnorm, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
    real r__1, r__2, r__3, r__4;
    complex q__1, q__2, q__3, q__4;

    /* Builtin functions */
    double r_imag(complex *);
    void r_cnjg(complex *, complex *);

    /* Local variables */
    integer i__, j;
    real xj, rec, tjj;
    integer jinc;
    real xbnd;
    integer imax;
    real tmax;
    complex tjjs;
    real xmax, grow;
    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer 
	    *, complex *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    real tscal;
    complex uscal;
    integer jlast;
    extern /* Complex */ VOID cdotu_(complex *, integer *, complex *, integer 
	    *, complex *, integer *);
    complex csumj;
    extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, 
	    integer *, complex *, integer *);
    logical upper;
    extern /* Subroutine */ int ctrsv_(char *, char *, char *, integer *, 
	    complex *, integer *, complex *, integer *), slabad_(real *, real *);
    extern integer icamax_(integer *, complex *, integer *);
    extern /* Complex */ VOID cladiv_(complex *, complex *, complex *);
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer 
	    *), xerbla_(char *, integer *);
    real bignum;
    extern integer isamax_(integer *, real *, integer *);
    extern doublereal scasum_(integer *, complex *, integer *);
    logical notran;
    integer jfirst;
    real smlnum;
    logical nounit;


/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CLATRS solves one of the triangular systems */

/*     A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b, */

/*  with scaling to prevent overflow.  Here A is an upper or lower */
/*  triangular matrix, A**T denotes the transpose of A, A**H denotes the */
/*  conjugate transpose of A, x and b are n-element vectors, and s is a */
/*  scaling factor, usually less than or equal to 1, chosen so that the */
/*  components of x will be less than the overflow threshold.  If the */
/*  unscaled problem will not cause overflow, the Level 2 BLAS routine */
/*  CTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j), */
/*  then s is set to 0 and a non-trivial solution to A*x = 0 is returned. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          Specifies whether the matrix A is upper or lower triangular. */
/*          = 'U':  Upper triangular */
/*          = 'L':  Lower triangular */

/*  TRANS   (input) CHARACTER*1 */
/*          Specifies the operation applied to A. */
/*          = 'N':  Solve A * x = s*b     (No transpose) */
/*          = 'T':  Solve A**T * x = s*b  (Transpose) */
/*          = 'C':  Solve A**H * x = s*b  (Conjugate transpose) */

/*  DIAG    (input) CHARACTER*1 */
/*          Specifies whether or not the matrix A is unit triangular. */
/*          = 'N':  Non-unit triangular */
/*          = 'U':  Unit triangular */

/*  NORMIN  (input) CHARACTER*1 */
/*          Specifies whether CNORM has been set or not. */
/*          = 'Y':  CNORM contains the column norms on entry */
/*          = 'N':  CNORM is not set on entry.  On exit, the norms will */
/*                  be computed and stored in CNORM. */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  A       (input) COMPLEX array, dimension (LDA,N) */
/*          The triangular matrix A.  If UPLO = 'U', the leading n by n */
/*          upper triangular part of the array A contains the upper */
/*          triangular matrix, and the strictly lower triangular part of */
/*          A is not referenced.  If UPLO = 'L', the leading n by n lower */
/*          triangular part of the array A contains the lower triangular */
/*          matrix, and the strictly upper triangular part of A is not */
/*          referenced.  If DIAG = 'U', the diagonal elements of A are */
/*          also not referenced and are assumed to be 1. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max (1,N). */

/*  X       (input/output) COMPLEX array, dimension (N) */
/*          On entry, the right hand side b of the triangular system. */
/*          On exit, X is overwritten by the solution vector x. */

/*  SCALE   (output) REAL */
/*          The scaling factor s for the triangular system */
/*             A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b. */
/*          If SCALE = 0, the matrix A is singular or badly scaled, and */
/*          the vector x is an exact or approximate solution to A*x = 0. */

/*  CNORM   (input or output) REAL array, dimension (N) */

/*          If NORMIN = 'Y', CNORM is an input argument and CNORM(j) */
/*          contains the norm of the off-diagonal part of the j-th column */
/*          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal */
/*          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) */
/*          must be greater than or equal to the 1-norm. */

/*          If NORMIN = 'N', CNORM is an output argument and CNORM(j) */
/*          returns the 1-norm of the offdiagonal part of the j-th column */
/*          of A. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -k, the k-th argument had an illegal value */

/*  Further Details */
/*  ======= ======= */

/*  A rough bound on x is computed; if that is less than overflow, CTRSV */
/*  is called, otherwise, specific code is used which checks for possible */
/*  overflow or divide-by-zero at every operation. */

/*  A columnwise scheme is used for solving A*x = b.  The basic algorithm */
/*  if A is lower triangular is */

/*       x[1:n] := b[1:n] */
/*       for j = 1, ..., n */
/*            x(j) := x(j) / A(j,j) */
/*            x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] */
/*       end */

/*  Define bounds on the components of x after j iterations of the loop: */
/*     M(j) = bound on x[1:j] */
/*     G(j) = bound on x[j+1:n] */
/*  Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. */

/*  Then for iteration j+1 we have */
/*     M(j+1) <= G(j) / | A(j+1,j+1) | */
/*     G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | */
/*            <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) */

/*  where CNORM(j+1) is greater than or equal to the infinity-norm of */
/*  column j+1 of A, not counting the diagonal.  Hence */

/*     G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) */
/*                  1<=i<=j */
/*  and */

/*     |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) */
/*                                   1<=i< j */

/*  Since |x(j)| <= M(j), we use the Level 2 BLAS routine CTRSV if the */
/*  reciprocal of the largest M(j), j=1,..,n, is larger than */
/*  max(underflow, 1/overflow). */

/*  The bound on x(j) is also used to determine when a step in the */
/*  columnwise method can be performed without fear of overflow.  If */
/*  the computed bound is greater than a large constant, x is scaled to */
/*  prevent overflow, but if the bound overflows, x is set to 0, x(j) to */
/*  1, and scale to 0, and a non-trivial solution to A*x = 0 is found. */

/*  Similarly, a row-wise scheme is used to solve A**T *x = b  or */
/*  A**H *x = b.  The basic algorithm for A upper triangular is */

/*       for j = 1, ..., n */
/*            x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) */
/*       end */

/*  We simultaneously compute two bounds */
/*       G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j */
/*       M(j) = bound on x(i), 1<=i<=j */

/*  The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we */
/*  add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. */
/*  Then the bound on x(j) is */

/*       M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | */

/*            <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) */
/*                      1<=i<=j */

/*  and we can safely call CTRSV if 1/M(n) and 1/G(n) are both greater */
/*  than max(underflow, 1/overflow). */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Statement Functions .. */
/*     .. */
/*     .. Statement Function definitions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --x;
    --cnorm;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    notran = lsame_(trans, "N");
    nounit = lsame_(diag, "N");

/*     Test the input parameters. */

    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (! notran && ! lsame_(trans, "T") && ! 
	    lsame_(trans, "C")) {
	*info = -2;
    } else if (! nounit && ! lsame_(diag, "U")) {
	*info = -3;
    } else if (! lsame_(normin, "Y") && ! lsame_(normin, 
	     "N")) {
	*info = -4;
    } else if (*n < 0) {
	*info = -5;
    } else if (*lda < max(1,*n)) {
	*info = -7;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CLATRS", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine machine dependent parameters to control overflow. */

    smlnum = slamch_("Safe minimum");
    bignum = 1.f / smlnum;
    slabad_(&smlnum, &bignum);
    smlnum /= slamch_("Precision");
    bignum = 1.f / smlnum;
    *scale = 1.f;

    if (lsame_(normin, "N")) {

/*        Compute the 1-norm of each column, not including the diagonal. */

	if (upper) {

/*           A is upper triangular. */

	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		i__2 = j - 1;
		cnorm[j] = scasum_(&i__2, &a[j * a_dim1 + 1], &c__1);
/* L10: */
	    }
	} else {

/*           A is lower triangular. */

	    i__1 = *n - 1;
	    for (j = 1; j <= i__1; ++j) {
		i__2 = *n - j;
		cnorm[j] = scasum_(&i__2, &a[j + 1 + j * a_dim1], &c__1);
/* L20: */
	    }
	    cnorm[*n] = 0.f;
	}
    }

/*     Scale the column norms by TSCAL if the maximum element in CNORM is */
/*     greater than BIGNUM/2. */

    imax = isamax_(n, &cnorm[1], &c__1);
    tmax = cnorm[imax];
    if (tmax <= bignum * .5f) {
	tscal = 1.f;
    } else {
	tscal = .5f / (smlnum * tmax);
	sscal_(n, &tscal, &cnorm[1], &c__1);
    }

/*     Compute a bound on the computed solution vector to see if the */
/*     Level 2 BLAS routine CTRSV can be used. */

    xmax = 0.f;
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
/* Computing MAX */
	i__2 = j;
	r__3 = xmax, r__4 = (r__1 = x[i__2].r / 2.f, dabs(r__1)) + (r__2 = 
		r_imag(&x[j]) / 2.f, dabs(r__2));
	xmax = dmax(r__3,r__4);
/* L30: */
    }
    xbnd = xmax;

    if (notran) {

/*        Compute the growth in A * x = b. */

	if (upper) {
	    jfirst = *n;
	    jlast = 1;
	    jinc = -1;
	} else {
	    jfirst = 1;
	    jlast = *n;
	    jinc = 1;
	}

	if (tscal != 1.f) {
	    grow = 0.f;
	    goto L60;
	}

	if (nounit) {

/*           A is non-unit triangular. */

/*           Compute GROW = 1/G(j) and XBND = 1/M(j). */
/*           Initially, G(0) = max{x(i), i=1,...,n}. */

	    grow = .5f / dmax(xbnd,smlnum);
	    xbnd = grow;
	    i__1 = jlast;
	    i__2 = jinc;
	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {

/*              Exit the loop if the growth factor is too small. */

		if (grow <= smlnum) {
		    goto L60;
		}

		i__3 = j + j * a_dim1;
		tjjs.r = a[i__3].r, tjjs.i = a[i__3].i;
		tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), 
			dabs(r__2));

		if (tjj >= smlnum) {

/*                 M(j) = G(j-1) / abs(A(j,j)) */

/* Computing MIN */
		    r__1 = xbnd, r__2 = dmin(1.f,tjj) * grow;
		    xbnd = dmin(r__1,r__2);
		} else {

/*                 M(j) could overflow, set XBND to 0. */

		    xbnd = 0.f;
		}

		if (tjj + cnorm[j] >= smlnum) {

/*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */

		    grow *= tjj / (tjj + cnorm[j]);
		} else {

/*                 G(j) could overflow, set GROW to 0. */

		    grow = 0.f;
		}
/* L40: */
	    }
	    grow = xbnd;
	} else {

/*           A is unit triangular. */

/*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */

/* Computing MIN */
	    r__1 = 1.f, r__2 = .5f / dmax(xbnd,smlnum);
	    grow = dmin(r__1,r__2);
	    i__2 = jlast;
	    i__1 = jinc;
	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {

/*              Exit the loop if the growth factor is too small. */

		if (grow <= smlnum) {
		    goto L60;
		}

/*              G(j) = G(j-1)*( 1 + CNORM(j) ) */

		grow *= 1.f / (cnorm[j] + 1.f);
/* L50: */
	    }
	}
L60:

	;
    } else {

/*        Compute the growth in A**T * x = b  or  A**H * x = b. */

	if (upper) {
	    jfirst = 1;
	    jlast = *n;
	    jinc = 1;
	} else {
	    jfirst = *n;
	    jlast = 1;
	    jinc = -1;
	}

	if (tscal != 1.f) {
	    grow = 0.f;
	    goto L90;
	}

	if (nounit) {

/*           A is non-unit triangular. */

/*           Compute GROW = 1/G(j) and XBND = 1/M(j). */
/*           Initially, M(0) = max{x(i), i=1,...,n}. */

	    grow = .5f / dmax(xbnd,smlnum);
	    xbnd = grow;
	    i__1 = jlast;
	    i__2 = jinc;
	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {

/*              Exit the loop if the growth factor is too small. */

		if (grow <= smlnum) {
		    goto L90;
		}

/*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */

		xj = cnorm[j] + 1.f;
/* Computing MIN */
		r__1 = grow, r__2 = xbnd / xj;
		grow = dmin(r__1,r__2);

		i__3 = j + j * a_dim1;
		tjjs.r = a[i__3].r, tjjs.i = a[i__3].i;
		tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), 
			dabs(r__2));

		if (tjj >= smlnum) {

/*                 M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */

		    if (xj > tjj) {
			xbnd *= tjj / xj;
		    }
		} else {

/*                 M(j) could overflow, set XBND to 0. */

		    xbnd = 0.f;
		}
/* L70: */
	    }
	    grow = dmin(grow,xbnd);
	} else {

/*           A is unit triangular. */

/*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */

/* Computing MIN */
	    r__1 = 1.f, r__2 = .5f / dmax(xbnd,smlnum);
	    grow = dmin(r__1,r__2);
	    i__2 = jlast;
	    i__1 = jinc;
	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {

/*              Exit the loop if the growth factor is too small. */

		if (grow <= smlnum) {
		    goto L90;
		}

/*              G(j) = ( 1 + CNORM(j) )*G(j-1) */

		xj = cnorm[j] + 1.f;
		grow /= xj;
/* L80: */
	    }
	}
L90:
	;
    }

    if (grow * tscal > smlnum) {

/*        Use the Level 2 BLAS solve if the reciprocal of the bound on */
/*        elements of X is not too small. */

	ctrsv_(uplo, trans, diag, n, &a[a_offset], lda, &x[1], &c__1);
    } else {

/*        Use a Level 1 BLAS solve, scaling intermediate results. */

	if (xmax > bignum * .5f) {

/*           Scale X so that its components are less than or equal to */
/*           BIGNUM in absolute value. */

	    *scale = bignum * .5f / xmax;
	    csscal_(n, scale, &x[1], &c__1);
	    xmax = bignum;
	} else {
	    xmax *= 2.f;
	}

	if (notran) {

/*           Solve A * x = b */

	    i__1 = jlast;
	    i__2 = jinc;
	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {

/*              Compute x(j) = b(j) / A(j,j), scaling x if necessary. */

		i__3 = j;
		xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]), 
			dabs(r__2));
		if (nounit) {
		    i__3 = j + j * a_dim1;
		    q__1.r = tscal * a[i__3].r, q__1.i = tscal * a[i__3].i;
		    tjjs.r = q__1.r, tjjs.i = q__1.i;
		} else {
		    tjjs.r = tscal, tjjs.i = 0.f;
		    if (tscal == 1.f) {
			goto L105;
		    }
		}
		tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs), 
			dabs(r__2));
		if (tjj > smlnum) {

/*                    abs(A(j,j)) > SMLNUM: */

		    if (tjj < 1.f) {
			if (xj > tjj * bignum) {

/*                          Scale x by 1/b(j). */

			    rec = 1.f / xj;
			    csscal_(n, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
		    }
		    i__3 = j;
		    cladiv_(&q__1, &x[j], &tjjs);
		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
		    i__3 = j;
		    xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]
			    ), dabs(r__2));
		} else if (tjj > 0.f) {

/*                    0 < abs(A(j,j)) <= SMLNUM: */

		    if (xj > tjj * bignum) {

/*                       Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM */
/*                       to avoid overflow when dividing by A(j,j). */

			rec = tjj * bignum / xj;
			if (cnorm[j] > 1.f) {

/*                          Scale by 1/CNORM(j) to avoid overflow when */
/*                          multiplying x(j) times column j. */

			    rec /= cnorm[j];
			}
			csscal_(n, &rec, &x[1], &c__1);
			*scale *= rec;
			xmax *= rec;
		    }
		    i__3 = j;
		    cladiv_(&q__1, &x[j], &tjjs);
		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
		    i__3 = j;
		    xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]
			    ), dabs(r__2));
		} else {

/*                    A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and */
/*                    scale = 0, and compute a solution to A*x = 0. */

		    i__3 = *n;
		    for (i__ = 1; i__ <= i__3; ++i__) {
			i__4 = i__;
			x[i__4].r = 0.f, x[i__4].i = 0.f;
/* L100: */
		    }
		    i__3 = j;
		    x[i__3].r = 1.f, x[i__3].i = 0.f;
		    xj = 1.f;
		    *scale = 0.f;
		    xmax = 0.f;
		}
L105:

/*              Scale x if necessary to avoid overflow when adding a */
/*              multiple of column j of A. */

		if (xj > 1.f) {
		    rec = 1.f / xj;
		    if (cnorm[j] > (bignum - xmax) * rec) {

/*                    Scale x by 1/(2*abs(x(j))). */

			rec *= .5f;
			csscal_(n, &rec, &x[1], &c__1);
			*scale *= rec;
		    }
		} else if (xj * cnorm[j] > bignum - xmax) {

/*                 Scale x by 1/2. */

		    csscal_(n, &c_b36, &x[1], &c__1);
		    *scale *= .5f;
		}

		if (upper) {
		    if (j > 1) {

/*                    Compute the update */
/*                       x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) */

			i__3 = j - 1;
			i__4 = j;
			q__2.r = -x[i__4].r, q__2.i = -x[i__4].i;
			q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i;
			caxpy_(&i__3, &q__1, &a[j * a_dim1 + 1], &c__1, &x[1], 
				 &c__1);
			i__3 = j - 1;
			i__ = icamax_(&i__3, &x[1], &c__1);
			i__3 = i__;
			xmax = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = 
				r_imag(&x[i__]), dabs(r__2));
		    }
		} else {
		    if (j < *n) {

/*                    Compute the update */
/*                       x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) */

			i__3 = *n - j;
			i__4 = j;
			q__2.r = -x[i__4].r, q__2.i = -x[i__4].i;
			q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i;
			caxpy_(&i__3, &q__1, &a[j + 1 + j * a_dim1], &c__1, &
				x[j + 1], &c__1);
			i__3 = *n - j;
			i__ = j + icamax_(&i__3, &x[j + 1], &c__1);
			i__3 = i__;
			xmax = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = 
				r_imag(&x[i__]), dabs(r__2));
		    }
		}
/* L110: */
	    }

	} else if (lsame_(trans, "T")) {

/*           Solve A**T * x = b */

	    i__2 = jlast;
	    i__1 = jinc;
	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {

/*              Compute x(j) = b(j) - sum A(k,j)*x(k). */
/*                                    k<>j */

		i__3 = j;
		xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]), 
			dabs(r__2));
		uscal.r = tscal, uscal.i = 0.f;
		rec = 1.f / dmax(xmax,1.f);
		if (cnorm[j] > (bignum - xj) * rec) {

/*                 If x(j) could overflow, scale x by 1/(2*XMAX). */

		    rec *= .5f;
		    if (nounit) {
			i__3 = j + j * a_dim1;
			q__1.r = tscal * a[i__3].r, q__1.i = tscal * a[i__3]
				.i;
			tjjs.r = q__1.r, tjjs.i = q__1.i;
		    } else {
			tjjs.r = tscal, tjjs.i = 0.f;
		    }
		    tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
			     dabs(r__2));
		    if (tjj > 1.f) {

/*                       Divide by A(j,j) when scaling x if A(j,j) > 1. */

/* Computing MIN */
			r__1 = 1.f, r__2 = rec * tjj;
			rec = dmin(r__1,r__2);
			cladiv_(&q__1, &uscal, &tjjs);
			uscal.r = q__1.r, uscal.i = q__1.i;
		    }
		    if (rec < 1.f) {
			csscal_(n, &rec, &x[1], &c__1);
			*scale *= rec;
			xmax *= rec;
		    }
		}

		csumj.r = 0.f, csumj.i = 0.f;
		if (uscal.r == 1.f && uscal.i == 0.f) {

/*                 If the scaling needed for A in the dot product is 1, */
/*                 call CDOTU to perform the dot product. */

		    if (upper) {
			i__3 = j - 1;
			cdotu_(&q__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1], 
				 &c__1);
			csumj.r = q__1.r, csumj.i = q__1.i;
		    } else if (j < *n) {
			i__3 = *n - j;
			cdotu_(&q__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, &
				x[j + 1], &c__1);
			csumj.r = q__1.r, csumj.i = q__1.i;
		    }
		} else {

/*                 Otherwise, use in-line code for the dot product. */

		    if (upper) {
			i__3 = j - 1;
			for (i__ = 1; i__ <= i__3; ++i__) {
			    i__4 = i__ + j * a_dim1;
			    q__3.r = a[i__4].r * uscal.r - a[i__4].i * 
				    uscal.i, q__3.i = a[i__4].r * uscal.i + a[
				    i__4].i * uscal.r;
			    i__5 = i__;
			    q__2.r = q__3.r * x[i__5].r - q__3.i * x[i__5].i, 
				    q__2.i = q__3.r * x[i__5].i + q__3.i * x[
				    i__5].r;
			    q__1.r = csumj.r + q__2.r, q__1.i = csumj.i + 
				    q__2.i;
			    csumj.r = q__1.r, csumj.i = q__1.i;
/* L120: */
			}
		    } else if (j < *n) {
			i__3 = *n;
			for (i__ = j + 1; i__ <= i__3; ++i__) {
			    i__4 = i__ + j * a_dim1;
			    q__3.r = a[i__4].r * uscal.r - a[i__4].i * 
				    uscal.i, q__3.i = a[i__4].r * uscal.i + a[
				    i__4].i * uscal.r;
			    i__5 = i__;
			    q__2.r = q__3.r * x[i__5].r - q__3.i * x[i__5].i, 
				    q__2.i = q__3.r * x[i__5].i + q__3.i * x[
				    i__5].r;
			    q__1.r = csumj.r + q__2.r, q__1.i = csumj.i + 
				    q__2.i;
			    csumj.r = q__1.r, csumj.i = q__1.i;
/* L130: */
			}
		    }
		}

		q__1.r = tscal, q__1.i = 0.f;
		if (uscal.r == q__1.r && uscal.i == q__1.i) {

/*                 Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) */
/*                 was not used to scale the dotproduct. */

		    i__3 = j;
		    i__4 = j;
		    q__1.r = x[i__4].r - csumj.r, q__1.i = x[i__4].i - 
			    csumj.i;
		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
		    i__3 = j;
		    xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]
			    ), dabs(r__2));
		    if (nounit) {
			i__3 = j + j * a_dim1;
			q__1.r = tscal * a[i__3].r, q__1.i = tscal * a[i__3]
				.i;
			tjjs.r = q__1.r, tjjs.i = q__1.i;
		    } else {
			tjjs.r = tscal, tjjs.i = 0.f;
			if (tscal == 1.f) {
			    goto L145;
			}
		    }

/*                    Compute x(j) = x(j) / A(j,j), scaling if necessary. */

		    tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
			     dabs(r__2));
		    if (tjj > smlnum) {

/*                       abs(A(j,j)) > SMLNUM: */

			if (tjj < 1.f) {
			    if (xj > tjj * bignum) {

/*                             Scale X by 1/abs(x(j)). */

				rec = 1.f / xj;
				csscal_(n, &rec, &x[1], &c__1);
				*scale *= rec;
				xmax *= rec;
			    }
			}
			i__3 = j;
			cladiv_(&q__1, &x[j], &tjjs);
			x[i__3].r = q__1.r, x[i__3].i = q__1.i;
		    } else if (tjj > 0.f) {

/*                       0 < abs(A(j,j)) <= SMLNUM: */

			if (xj > tjj * bignum) {

/*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */

			    rec = tjj * bignum / xj;
			    csscal_(n, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
			i__3 = j;
			cladiv_(&q__1, &x[j], &tjjs);
			x[i__3].r = q__1.r, x[i__3].i = q__1.i;
		    } else {

/*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and */
/*                       scale = 0 and compute a solution to A**T *x = 0. */

			i__3 = *n;
			for (i__ = 1; i__ <= i__3; ++i__) {
			    i__4 = i__;
			    x[i__4].r = 0.f, x[i__4].i = 0.f;
/* L140: */
			}
			i__3 = j;
			x[i__3].r = 1.f, x[i__3].i = 0.f;
			*scale = 0.f;
			xmax = 0.f;
		    }
L145:
		    ;
		} else {

/*                 Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot */
/*                 product has already been divided by 1/A(j,j). */

		    i__3 = j;
		    cladiv_(&q__2, &x[j], &tjjs);
		    q__1.r = q__2.r - csumj.r, q__1.i = q__2.i - csumj.i;
		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
		}
/* Computing MAX */
		i__3 = j;
		r__3 = xmax, r__4 = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = 
			r_imag(&x[j]), dabs(r__2));
		xmax = dmax(r__3,r__4);
/* L150: */
	    }

	} else {

/*           Solve A**H * x = b */

	    i__1 = jlast;
	    i__2 = jinc;
	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {

/*              Compute x(j) = b(j) - sum A(k,j)*x(k). */
/*                                    k<>j */

		i__3 = j;
		xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]), 
			dabs(r__2));
		uscal.r = tscal, uscal.i = 0.f;
		rec = 1.f / dmax(xmax,1.f);
		if (cnorm[j] > (bignum - xj) * rec) {

/*                 If x(j) could overflow, scale x by 1/(2*XMAX). */

		    rec *= .5f;
		    if (nounit) {
			r_cnjg(&q__2, &a[j + j * a_dim1]);
			q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i;
			tjjs.r = q__1.r, tjjs.i = q__1.i;
		    } else {
			tjjs.r = tscal, tjjs.i = 0.f;
		    }
		    tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
			     dabs(r__2));
		    if (tjj > 1.f) {

/*                       Divide by A(j,j) when scaling x if A(j,j) > 1. */

/* Computing MIN */
			r__1 = 1.f, r__2 = rec * tjj;
			rec = dmin(r__1,r__2);
			cladiv_(&q__1, &uscal, &tjjs);
			uscal.r = q__1.r, uscal.i = q__1.i;
		    }
		    if (rec < 1.f) {
			csscal_(n, &rec, &x[1], &c__1);
			*scale *= rec;
			xmax *= rec;
		    }
		}

		csumj.r = 0.f, csumj.i = 0.f;
		if (uscal.r == 1.f && uscal.i == 0.f) {

/*                 If the scaling needed for A in the dot product is 1, */
/*                 call CDOTC to perform the dot product. */

		    if (upper) {
			i__3 = j - 1;
			cdotc_(&q__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1], 
				 &c__1);
			csumj.r = q__1.r, csumj.i = q__1.i;
		    } else if (j < *n) {
			i__3 = *n - j;
			cdotc_(&q__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, &
				x[j + 1], &c__1);
			csumj.r = q__1.r, csumj.i = q__1.i;
		    }
		} else {

/*                 Otherwise, use in-line code for the dot product. */

		    if (upper) {
			i__3 = j - 1;
			for (i__ = 1; i__ <= i__3; ++i__) {
			    r_cnjg(&q__4, &a[i__ + j * a_dim1]);
			    q__3.r = q__4.r * uscal.r - q__4.i * uscal.i, 
				    q__3.i = q__4.r * uscal.i + q__4.i * 
				    uscal.r;
			    i__4 = i__;
			    q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, 
				    q__2.i = q__3.r * x[i__4].i + q__3.i * x[
				    i__4].r;
			    q__1.r = csumj.r + q__2.r, q__1.i = csumj.i + 
				    q__2.i;
			    csumj.r = q__1.r, csumj.i = q__1.i;
/* L160: */
			}
		    } else if (j < *n) {
			i__3 = *n;
			for (i__ = j + 1; i__ <= i__3; ++i__) {
			    r_cnjg(&q__4, &a[i__ + j * a_dim1]);
			    q__3.r = q__4.r * uscal.r - q__4.i * uscal.i, 
				    q__3.i = q__4.r * uscal.i + q__4.i * 
				    uscal.r;
			    i__4 = i__;
			    q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, 
				    q__2.i = q__3.r * x[i__4].i + q__3.i * x[
				    i__4].r;
			    q__1.r = csumj.r + q__2.r, q__1.i = csumj.i + 
				    q__2.i;
			    csumj.r = q__1.r, csumj.i = q__1.i;
/* L170: */
			}
		    }
		}

		q__1.r = tscal, q__1.i = 0.f;
		if (uscal.r == q__1.r && uscal.i == q__1.i) {

/*                 Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) */
/*                 was not used to scale the dotproduct. */

		    i__3 = j;
		    i__4 = j;
		    q__1.r = x[i__4].r - csumj.r, q__1.i = x[i__4].i - 
			    csumj.i;
		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
		    i__3 = j;
		    xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]
			    ), dabs(r__2));
		    if (nounit) {
			r_cnjg(&q__2, &a[j + j * a_dim1]);
			q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i;
			tjjs.r = q__1.r, tjjs.i = q__1.i;
		    } else {
			tjjs.r = tscal, tjjs.i = 0.f;
			if (tscal == 1.f) {
			    goto L185;
			}
		    }

/*                    Compute x(j) = x(j) / A(j,j), scaling if necessary. */

		    tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
			     dabs(r__2));
		    if (tjj > smlnum) {

/*                       abs(A(j,j)) > SMLNUM: */

			if (tjj < 1.f) {
			    if (xj > tjj * bignum) {

/*                             Scale X by 1/abs(x(j)). */

				rec = 1.f / xj;
				csscal_(n, &rec, &x[1], &c__1);
				*scale *= rec;
				xmax *= rec;
			    }
			}
			i__3 = j;
			cladiv_(&q__1, &x[j], &tjjs);
			x[i__3].r = q__1.r, x[i__3].i = q__1.i;
		    } else if (tjj > 0.f) {

/*                       0 < abs(A(j,j)) <= SMLNUM: */

			if (xj > tjj * bignum) {

/*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */

			    rec = tjj * bignum / xj;
			    csscal_(n, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
			i__3 = j;
			cladiv_(&q__1, &x[j], &tjjs);
			x[i__3].r = q__1.r, x[i__3].i = q__1.i;
		    } else {

/*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and */
/*                       scale = 0 and compute a solution to A**H *x = 0. */

			i__3 = *n;
			for (i__ = 1; i__ <= i__3; ++i__) {
			    i__4 = i__;
			    x[i__4].r = 0.f, x[i__4].i = 0.f;
/* L180: */
			}
			i__3 = j;
			x[i__3].r = 1.f, x[i__3].i = 0.f;
			*scale = 0.f;
			xmax = 0.f;
		    }
L185:
		    ;
		} else {

/*                 Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot */
/*                 product has already been divided by 1/A(j,j). */

		    i__3 = j;
		    cladiv_(&q__2, &x[j], &tjjs);
		    q__1.r = q__2.r - csumj.r, q__1.i = q__2.i - csumj.i;
		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
		}
/* Computing MAX */
		i__3 = j;
		r__3 = xmax, r__4 = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = 
			r_imag(&x[j]), dabs(r__2));
		xmax = dmax(r__3,r__4);
/* L190: */
	    }
	}
	*scale /= tscal;
    }

/*     Scale the column norms by 1/TSCAL for return. */

    if (tscal != 1.f) {
	r__1 = 1.f / tscal;
	sscal_(n, &r__1, &cnorm[1], &c__1);
    }

    return 0;

/*     End of CLATRS */

} /* clatrs_ */
Example #28
0
 int chegs2_(int *itype, char *uplo, int *n, complex *
	a, int *lda, complex *b, int *ldb, int *info)
{
    /* System generated locals */
    int a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
    float r__1, r__2;
    complex q__1;

    /* Local variables */
    int k;
    complex ct;
    float akk, bkk;
    extern  int cher2_(char *, int *, complex *, complex *
, int *, complex *, int *, complex *, int *);
    extern int lsame_(char *, char *);
    extern  int caxpy_(int *, complex *, complex *, 
	    int *, complex *, int *);
    int upper;
    extern  int ctrmv_(char *, char *, char *, int *, 
	    complex *, int *, complex *, int *), ctrsv_(char *, char *, char *, int *, complex *, 
	    int *, complex *, int *), clacgv_(
	    int *, complex *, int *), csscal_(int *, float *, 
	    complex *, int *), xerbla_(char *, int *);


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CHEGS2 reduces a complex Hermitian-definite generalized */
/*  eigenproblem to standard form. */

/*  If ITYPE = 1, the problem is A*x = lambda*B*x, */
/*  and A is overwritten by inv(U')*A*inv(U) or inv(L)*A*inv(L') */

/*  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */
/*  B*A*x = lambda*x, and A is overwritten by U*A*U` or L'*A*L. */

/*  B must have been previously factorized as U'*U or L*L' by CPOTRF. */

/*  Arguments */
/*  ========= */

/*  ITYPE   (input) INTEGER */
/*          = 1: compute inv(U')*A*inv(U) or inv(L)*A*inv(L'); */
/*          = 2 or 3: compute U*A*U' or L'*A*L. */

/*  UPLO    (input) CHARACTER*1 */
/*          Specifies whether the upper or lower triangular part of the */
/*          Hermitian matrix A is stored, and how B has been factorized. */
/*          = 'U':  Upper triangular */
/*          = 'L':  Lower triangular */

/*  N       (input) INTEGER */
/*          The order of the matrices A and B.  N >= 0. */

/*  A       (input/output) COMPLEX array, dimension (LDA,N) */
/*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading */
/*          n by n upper triangular part of A contains the upper */
/*          triangular part of the matrix A, and the strictly lower */
/*          triangular part of A is not referenced.  If UPLO = 'L', the */
/*          leading n by n lower triangular part of A contains the lower */
/*          triangular part of the matrix A, and the strictly upper */
/*          triangular part of A is not referenced. */

/*          On exit, if INFO = 0, the transformed matrix, stored in the */
/*          same format as A. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= MAX(1,N). */

/*  B       (input) COMPLEX array, dimension (LDB,N) */
/*          The triangular factor from the Cholesky factorization of B, */
/*          as returned by CPOTRF. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B.  LDB >= MAX(1,N). */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit. */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (*itype < 1 || *itype > 3) {
	*info = -1;
    } else if (! upper && ! lsame_(uplo, "L")) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*lda < MAX(1,*n)) {
	*info = -5;
    } else if (*ldb < MAX(1,*n)) {
	*info = -7;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CHEGS2", &i__1);
	return 0;
    }

    if (*itype == 1) {
	if (upper) {

/*           Compute inv(U')*A*inv(U) */

	    i__1 = *n;
	    for (k = 1; k <= i__1; ++k) {

/*              Update the upper triangle of A(k:n,k:n) */

		i__2 = k + k * a_dim1;
		akk = a[i__2].r;
		i__2 = k + k * b_dim1;
		bkk = b[i__2].r;
/* Computing 2nd power */
		r__1 = bkk;
		akk /= r__1 * r__1;
		i__2 = k + k * a_dim1;
		a[i__2].r = akk, a[i__2].i = 0.f;
		if (k < *n) {
		    i__2 = *n - k;
		    r__1 = 1.f / bkk;
		    csscal_(&i__2, &r__1, &a[k + (k + 1) * a_dim1], lda);
		    r__1 = akk * -.5f;
		    ct.r = r__1, ct.i = 0.f;
		    i__2 = *n - k;
		    clacgv_(&i__2, &a[k + (k + 1) * a_dim1], lda);
		    i__2 = *n - k;
		    clacgv_(&i__2, &b[k + (k + 1) * b_dim1], ldb);
		    i__2 = *n - k;
		    caxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + (
			    k + 1) * a_dim1], lda);
		    i__2 = *n - k;
		    q__1.r = -1.f, q__1.i = -0.f;
		    cher2_(uplo, &i__2, &q__1, &a[k + (k + 1) * a_dim1], lda, 
			    &b[k + (k + 1) * b_dim1], ldb, &a[k + 1 + (k + 1) 
			    * a_dim1], lda);
		    i__2 = *n - k;
		    caxpy_(&i__2, &ct, &b[k + (k + 1) * b_dim1], ldb, &a[k + (
			    k + 1) * a_dim1], lda);
		    i__2 = *n - k;
		    clacgv_(&i__2, &b[k + (k + 1) * b_dim1], ldb);
		    i__2 = *n - k;
		    ctrsv_(uplo, "Conjugate transpose", "Non-unit", &i__2, &b[
			    k + 1 + (k + 1) * b_dim1], ldb, &a[k + (k + 1) * 
			    a_dim1], lda);
		    i__2 = *n - k;
		    clacgv_(&i__2, &a[k + (k + 1) * a_dim1], lda);
		}
/* L10: */
	    }
	} else {

/*           Compute inv(L)*A*inv(L') */

	    i__1 = *n;
	    for (k = 1; k <= i__1; ++k) {

/*              Update the lower triangle of A(k:n,k:n) */

		i__2 = k + k * a_dim1;
		akk = a[i__2].r;
		i__2 = k + k * b_dim1;
		bkk = b[i__2].r;
/* Computing 2nd power */
		r__1 = bkk;
		akk /= r__1 * r__1;
		i__2 = k + k * a_dim1;
		a[i__2].r = akk, a[i__2].i = 0.f;
		if (k < *n) {
		    i__2 = *n - k;
		    r__1 = 1.f / bkk;
		    csscal_(&i__2, &r__1, &a[k + 1 + k * a_dim1], &c__1);
		    r__1 = akk * -.5f;
		    ct.r = r__1, ct.i = 0.f;
		    i__2 = *n - k;
		    caxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + 
			    1 + k * a_dim1], &c__1);
		    i__2 = *n - k;
		    q__1.r = -1.f, q__1.i = -0.f;
		    cher2_(uplo, &i__2, &q__1, &a[k + 1 + k * a_dim1], &c__1, 
			    &b[k + 1 + k * b_dim1], &c__1, &a[k + 1 + (k + 1) 
			    * a_dim1], lda);
		    i__2 = *n - k;
		    caxpy_(&i__2, &ct, &b[k + 1 + k * b_dim1], &c__1, &a[k + 
			    1 + k * a_dim1], &c__1);
		    i__2 = *n - k;
		    ctrsv_(uplo, "No transpose", "Non-unit", &i__2, &b[k + 1 
			    + (k + 1) * b_dim1], ldb, &a[k + 1 + k * a_dim1], 
			    &c__1);
		}
/* L20: */
	    }
	}
    } else {
	if (upper) {

/*           Compute U*A*U' */

	    i__1 = *n;
	    for (k = 1; k <= i__1; ++k) {

/*              Update the upper triangle of A(1:k,1:k) */

		i__2 = k + k * a_dim1;
		akk = a[i__2].r;
		i__2 = k + k * b_dim1;
		bkk = b[i__2].r;
		i__2 = k - 1;
		ctrmv_(uplo, "No transpose", "Non-unit", &i__2, &b[b_offset], 
			ldb, &a[k * a_dim1 + 1], &c__1);
		r__1 = akk * .5f;
		ct.r = r__1, ct.i = 0.f;
		i__2 = k - 1;
		caxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + 
			1], &c__1);
		i__2 = k - 1;
		cher2_(uplo, &i__2, &c_b1, &a[k * a_dim1 + 1], &c__1, &b[k * 
			b_dim1 + 1], &c__1, &a[a_offset], lda);
		i__2 = k - 1;
		caxpy_(&i__2, &ct, &b[k * b_dim1 + 1], &c__1, &a[k * a_dim1 + 
			1], &c__1);
		i__2 = k - 1;
		csscal_(&i__2, &bkk, &a[k * a_dim1 + 1], &c__1);
		i__2 = k + k * a_dim1;
/* Computing 2nd power */
		r__2 = bkk;
		r__1 = akk * (r__2 * r__2);
		a[i__2].r = r__1, a[i__2].i = 0.f;
/* L30: */
	    }
	} else {

/*           Compute L'*A*L */

	    i__1 = *n;
	    for (k = 1; k <= i__1; ++k) {

/*              Update the lower triangle of A(1:k,1:k) */

		i__2 = k + k * a_dim1;
		akk = a[i__2].r;
		i__2 = k + k * b_dim1;
		bkk = b[i__2].r;
		i__2 = k - 1;
		clacgv_(&i__2, &a[k + a_dim1], lda);
		i__2 = k - 1;
		ctrmv_(uplo, "Conjugate transpose", "Non-unit", &i__2, &b[
			b_offset], ldb, &a[k + a_dim1], lda);
		r__1 = akk * .5f;
		ct.r = r__1, ct.i = 0.f;
		i__2 = k - 1;
		clacgv_(&i__2, &b[k + b_dim1], ldb);
		i__2 = k - 1;
		caxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda);
		i__2 = k - 1;
		cher2_(uplo, &i__2, &c_b1, &a[k + a_dim1], lda, &b[k + b_dim1]
, ldb, &a[a_offset], lda);
		i__2 = k - 1;
		caxpy_(&i__2, &ct, &b[k + b_dim1], ldb, &a[k + a_dim1], lda);
		i__2 = k - 1;
		clacgv_(&i__2, &b[k + b_dim1], ldb);
		i__2 = k - 1;
		csscal_(&i__2, &bkk, &a[k + a_dim1], lda);
		i__2 = k - 1;
		clacgv_(&i__2, &a[k + a_dim1], lda);
		i__2 = k + k * a_dim1;
/* Computing 2nd power */
		r__2 = bkk;
		r__1 = akk * (r__2 * r__2);
		a[i__2].r = r__1, a[i__2].i = 0.f;
/* L40: */
	    }
	}
    }
    return 0;

/*     End of CHEGS2 */

} /* chegs2_ */
Example #29
0
/* Subroutine */ int ctbt02_(char *uplo, char *trans, char *diag, integer *n, 
	integer *kd, integer *nrhs, complex *ab, integer *ldab, complex *x, 
	integer *ldx, complex *b, integer *ldb, complex *work, real *rwork, 
	real *resid)
{
    /* System generated locals */
    integer ab_dim1, ab_offset, b_dim1, b_offset, x_dim1, x_offset, i__1;
    real r__1, r__2;

    /* Local variables */
    integer j;
    real eps;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int ctbmv_(char *, char *, char *, integer *, 
	    integer *, complex *, integer *, complex *, integer *);
    real anorm, bnorm;
    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, 
	    complex *, integer *), caxpy_(integer *, complex *, complex *, 
	    integer *, complex *, integer *);
    real xnorm;
    extern doublereal clantb_(char *, char *, char *, integer *, integer *, 
	    complex *, integer *, real *), slamch_(
	    char *), scasum_(integer *, complex *, integer *);


/*  -- LAPACK test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CTBT02 computes the residual for the computed solution to a */
/*  triangular system of linear equations  A*x = b,  A**T *x = b,  or */
/*  A**H *x = b  when A is a triangular band matrix.  Here A**T denotes */
/*  the transpose of A, A**H denotes the conjugate transpose of A, and */
/*  x and b are N by NRHS matrices.  The test ratio is the maximum over */
/*  the number of right hand sides of */
/*     norm(b - op(A)*x) / ( norm(op(A)) * norm(x) * EPS ), */
/*  where op(A) denotes A, A**T, or A**H, and EPS is the machine epsilon. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          Specifies whether the matrix A is upper or lower triangular. */
/*          = 'U':  Upper triangular */
/*          = 'L':  Lower triangular */

/*  TRANS   (input) CHARACTER*1 */
/*          Specifies the operation applied to A. */
/*          = 'N':  A *x = b     (No transpose) */
/*          = 'T':  A**T *x = b  (Transpose) */
/*          = 'C':  A**H *x = b  (Conjugate transpose) */

/*  DIAG    (input) CHARACTER*1 */
/*          Specifies whether or not the matrix A is unit triangular. */
/*          = 'N':  Non-unit triangular */
/*          = 'U':  Unit triangular */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  KD      (input) INTEGER */
/*          The number of superdiagonals or subdiagonals of the */
/*          triangular band matrix A.  KD >= 0. */

/*  NRHS    (input) INTEGER */
/*          The number of right hand sides, i.e., the number of columns */
/*          of the matrices X and B.  NRHS >= 0. */

/*  AB      (input) COMPLEX array, dimension (LDA,N) */
/*          The upper or lower triangular band matrix A, stored in the */
/*          first kd+1 rows of the array. The j-th column of A is stored */
/*          in the j-th column of the array AB as follows: */
/*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */
/*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd). */

/*  LDAB    (input) INTEGER */
/*          The leading dimension of the array AB.  LDAB >= max(1,KD+1). */

/*  X       (input) COMPLEX array, dimension (LDX,NRHS) */
/*          The computed solution vectors for the system of linear */
/*          equations. */

/*  LDX     (input) INTEGER */
/*          The leading dimension of the array X.  LDX >= max(1,N). */

/*  B       (input) COMPLEX array, dimension (LDB,NRHS) */
/*          The right hand side vectors for the system of linear */
/*          equations. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B.  LDB >= max(1,N). */

/*  WORK    (workspace) COMPLEX array, dimension (N) */

/*  RWORK   (workspace) REAL array, dimension (N) */

/*  RESID   (output) REAL */
/*          The maximum over the number of right hand sides of */
/*          norm(op(A)*x - b) / ( norm(op(A)) * norm(x) * EPS ). */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Quick exit if N = 0 or NRHS = 0 */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1;
    ab -= ab_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1;
    x -= x_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    --work;
    --rwork;

    /* Function Body */
    if (*n <= 0 || *nrhs <= 0) {
	*resid = 0.f;
	return 0;
    }

/*     Compute the 1-norm of A or A'. */

    if (lsame_(trans, "N")) {
	anorm = clantb_("1", uplo, diag, n, kd, &ab[ab_offset], ldab, &rwork[
		1]);
    } else {
	anorm = clantb_("I", uplo, diag, n, kd, &ab[ab_offset], ldab, &rwork[
		1]);
    }

/*     Exit with RESID = 1/EPS if ANORM = 0. */

    eps = slamch_("Epsilon");
    if (anorm <= 0.f) {
	*resid = 1.f / eps;
	return 0;
    }

/*     Compute the maximum over the number of right hand sides of */
/*        norm(op(A)*x - b) / ( norm(op(A)) * norm(x) * EPS ). */

    *resid = 0.f;
    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
	ccopy_(n, &x[j * x_dim1 + 1], &c__1, &work[1], &c__1);
	ctbmv_(uplo, trans, diag, n, kd, &ab[ab_offset], ldab, &work[1], &
		c__1);
	caxpy_(n, &c_b12, &b[j * b_dim1 + 1], &c__1, &work[1], &c__1);
	bnorm = scasum_(n, &work[1], &c__1);
	xnorm = scasum_(n, &x[j * x_dim1 + 1], &c__1);
	if (xnorm <= 0.f) {
	    *resid = 1.f / eps;
	} else {
/* Computing MAX */
	    r__1 = *resid, r__2 = bnorm / anorm / xnorm / eps;
	    *resid = dmax(r__1,r__2);
	}
/* L10: */
    }

    return 0;

/*     End of CTBT02 */

} /* ctbt02_ */
Example #30
0
/*<       subroutine csvdc(x,ldx,n,p,s,e,u,ldu,v,ldv,work,job,info) >*/
/* Subroutine */ int csvdc_(complex *x, integer *ldx, integer *n, integer *p, 
        complex *s, complex *e, complex *u, integer *ldu, complex *v, integer 
        *ldv, complex *work, integer *job, integer *info)
{
    /* System generated locals */
    integer x_dim1, x_offset, u_dim1, u_offset, v_dim1, v_offset, i__1, i__2, 
            i__3, i__4;
    real r__1, r__2, r__3, r__4;
    complex q__1, q__2, q__3;

    /* Builtin functions */
    double r_imag(complex *), c_abs(complex *);
    void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *);
    double sqrt(doublereal);

    /* Local variables */
    real b, c__, f, g;
    integer i__, j, k, l=0, m;
    complex r__, t;
    real t1, el;
    integer kk;
    real cs;
    integer ll, mm, ls=0;
    real sl;
    integer lu;
    real sm, sn;
    integer lm1, mm1, lp1, mp1, nct, ncu, lls, nrt;
    real emm1, smm1;
    integer kase, jobu, iter;
    real test;
    integer nctp1, nrtp1;
    extern /* Subroutine */ int cscal_(integer *, complex *, complex *, 
            integer *);
    real scale;
    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer 
            *, complex *, integer *);
    real shift;
    extern /* Subroutine */ int cswap_(integer *, complex *, integer *, 
            complex *, integer *);
    integer maxit;
    extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, 
            integer *, complex *, integer *), csrot_(integer *, complex *, 
            integer *, complex *, integer *, real *, real *);
    logical wantu, wantv;
    extern /* Subroutine */ int srotg_(real *, real *, real *, real *);
    real ztest;
    extern doublereal scnrm2_(integer *, complex *, integer *);

/*<       integer ldx,n,p,ldu,ldv,job,info >*/
/*<       complex x(ldx,1),s(1),e(1),u(ldu,1),v(ldv,1),work(1) >*/


/*     csvdc is a subroutine to reduce a complex nxp matrix x by */
/*     unitary transformations u and v to diagonal form.  the */
/*     diagonal elements s(i) are the singular values of x.  the */
/*     columns of u are the corresponding left singular vectors, */
/*     and the columns of v the right singular vectors. */

/*     on entry */

/*         x         complex(ldx,p), where ldx.ge.n. */
/*                   x contains the matrix whose singular value */
/*                   decomposition is to be computed.  x is */
/*                   destroyed by csvdc. */

/*         ldx       integer. */
/*                   ldx is the leading dimension of the array x. */

/*         n         integer. */
/*                   n is the number of rows of the matrix x. */

/*         p         integer. */
/*                   p is the number of columns of the matrix x. */

/*         ldu       integer. */
/*                   ldu is the leading dimension of the array u */
/*                   (see below). */

/*         ldv       integer. */
/*                   ldv is the leading dimension of the array v */
/*                   (see below). */

/*         work      complex(n). */
/*                   work is a scratch array. */

/*         job       integer. */
/*                   job controls the computation of the singular */
/*                   vectors.  it has the decimal expansion ab */
/*                   with the following meaning */

/*                        a.eq.0    do not compute the left singular */
/*                                  vectors. */
/*                        a.eq.1    return the n left singular vectors */
/*                                  in u. */
/*                        a.ge.2    returns the first min(n,p) */
/*                                  left singular vectors in u. */
/*                        b.eq.0    do not compute the right singular */
/*                                  vectors. */
/*                        b.eq.1    return the right singular vectors */
/*                                  in v. */

/*     on return */

/*         s         complex(mm), where mm=min(n+1,p). */
/*                   the first min(n,p) entries of s contain the */
/*                   singular values of x arranged in descending */
/*                   order of magnitude. */

/*         e         complex(p). */
/*                   e ordinarily contains zeros.  however see the */
/*                   discussion of info for exceptions. */

/*         u         complex(ldu,k), where ldu.ge.n.  if joba.eq.1 then */
/*                                   k.eq.n, if joba.ge.2 then */
/*                                   k.eq.min(n,p). */
/*                   u contains the matrix of left singular vectors. */
/*                   u is not referenced if joba.eq.0.  if n.le.p */
/*                   or if joba.gt.2, then u may be identified with x */
/*                   in the subroutine call. */

/*         v         complex(ldv,p), where ldv.ge.p. */
/*                   v contains the matrix of right singular vectors. */
/*                   v is not referenced if jobb.eq.0.  if p.le.n, */
/*                   then v may be identified whth x in the */
/*                   subroutine call. */

/*         info      integer. */
/*                   the singular values (and their corresponding */
/*                   singular vectors) s(info+1),s(info+2),...,s(m) */
/*                   are correct (here m=min(n,p)).  thus if */
/*                   info.eq.0, all the singular values and their */
/*                   vectors are correct.  in any event, the matrix */
/*                   b = ctrans(u)*x*v is the bidiagonal matrix */
/*                   with the elements of s on its diagonal and the */
/*                   elements of e on its super-diagonal (ctrans(u) */
/*                   is the conjugate-transpose of u).  thus the */
/*                   singular values of x and b are the same. */

/*     linpack. this version dated 03/19/79 . */
/*              correction to shift calculation made 2/85. */
/*     g.w. stewart, university of maryland, argonne national lab. */

/*     csvdc uses the following functions and subprograms. */

/*     external csrot */
/*     blas caxpy,cdotc,cscal,cswap,scnrm2,srotg */
/*     fortran abs,aimag,amax1,cabs,cmplx */
/*     fortran conjg,max0,min0,mod,real,sqrt */

/*     internal variables */

/*<    >*/
/*<       complex cdotc,t,r >*/
/*<    >*/
/*<       logical wantu,wantv >*/

/*<       complex csign,zdum,zdum1,zdum2 >*/
/*<       real cabs1 >*/
/*<       cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum)) >*/
/*<       csign(zdum1,zdum2) = cabs(zdum1)*(zdum2/cabs(zdum2)) >*/

/*     set the maximum number of iterations. */

/*<       maxit = 1000 >*/
    /* Parameter adjustments */
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1;
    x -= x_offset;
    --s;
    --e;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1;
    u -= u_offset;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1;
    v -= v_offset;
    --work;

    /* Function Body */
    maxit = 1000;

/*     determine what is to be computed. */

/*<       wantu = .false. >*/
    wantu = FALSE_;
/*<       wantv = .false. >*/
    wantv = FALSE_;
/*<       jobu = mod(job,100)/10 >*/
    jobu = *job % 100 / 10;
/*<       ncu = n >*/
    ncu = *n;
/*<       if (jobu .gt. 1) ncu = min0(n,p) >*/
    if (jobu > 1) {
        ncu = min(*n,*p);
    }
/*<       if (jobu .ne. 0) wantu = .true. >*/
    if (jobu != 0) {
        wantu = TRUE_;
    }
/*<       if (mod(job,10) .ne. 0) wantv = .true. >*/
    if (*job % 10 != 0) {
        wantv = TRUE_;
    }

/*     reduce x to bidiagonal form, storing the diagonal elements */
/*     in s and the super-diagonal elements in e. */

/*<       info = 0 >*/
    *info = 0;
/*<       nct = min0(n-1,p) >*/
/* Computing MIN */
    i__1 = *n - 1;
    nct = min(i__1,*p);
/*<       nrt = max0(0,min0(p-2,n)) >*/
/* Computing MAX */
/* Computing MIN */
    i__3 = *p - 2;
    i__1 = 0, i__2 = min(i__3,*n);
    nrt = max(i__1,i__2);
/*<       lu = max0(nct,nrt) >*/
    lu = max(nct,nrt);
/*<       if (lu .lt. 1) go to 170 >*/
    if (lu < 1) {
        goto L170;
    }
/*<       do 160 l = 1, lu >*/
    i__1 = lu;
    for (l = 1; l <= i__1; ++l) {
/*<          lp1 = l + 1 >*/
        lp1 = l + 1;
/*<          if (l .gt. nct) go to 20 >*/
        if (l > nct) {
            goto L20;
        }

/*           compute the transformation for the l-th column and */
/*           place the l-th diagonal in s(l). */

/*<             s(l) = cmplx(scnrm2(n-l+1,x(l,l),1),0.0e0) >*/
        i__2 = l;
        i__3 = *n - l + 1;
        r__1 = scnrm2_(&i__3, &x[l + l * x_dim1], &c__1);
        q__1.r = r__1, q__1.i = (float)0.;
        s[i__2].r = q__1.r, s[i__2].i = q__1.i;
/*<             if (cabs1(s(l)) .eq. 0.0e0) go to 10 >*/
        i__2 = l;
        if ((r__1 = s[i__2].r, dabs(r__1)) + (r__2 = r_imag(&s[l]), dabs(r__2)
                ) == (float)0.) {
            goto L10;
        }
/*<                if (cabs1(x(l,l)) .ne. 0.0e0) s(l) = csign(s(l),x(l,l)) >*/
        i__2 = l + l * x_dim1;
        if ((r__1 = x[i__2].r, dabs(r__1)) + (r__2 = r_imag(&x[l + l * x_dim1]
                ), dabs(r__2)) != (float)0.) {
            i__3 = l;
            r__3 = c_abs(&s[l]);
            i__4 = l + l * x_dim1;
            r__4 = c_abs(&x[l + l * x_dim1]);
            q__2.r = x[i__4].r / r__4, q__2.i = x[i__4].i / r__4;
            q__1.r = r__3 * q__2.r, q__1.i = r__3 * q__2.i;
            s[i__3].r = q__1.r, s[i__3].i = q__1.i;
        }
/*<                call cscal(n-l+1,1.0e0/s(l),x(l,l),1) >*/
        i__2 = *n - l + 1;
        c_div(&q__1, &c_b8, &s[l]);
        cscal_(&i__2, &q__1, &x[l + l * x_dim1], &c__1);
/*<                x(l,l) = (1.0e0,0.0e0) + x(l,l) >*/
        i__2 = l + l * x_dim1;
        i__3 = l + l * x_dim1;
        q__1.r = x[i__3].r + (float)1., q__1.i = x[i__3].i + (float)0.;
        x[i__2].r = q__1.r, x[i__2].i = q__1.i;
/*<    10       continue >*/
L10:
/*<             s(l) = -s(l) >*/
        i__2 = l;
        i__3 = l;
        q__1.r = -s[i__3].r, q__1.i = -s[i__3].i;
        s[i__2].r = q__1.r, s[i__2].i = q__1.i;
/*<    20    continue >*/
L20:
/*<          if (p .lt. lp1) go to 50 >*/
        if (*p < lp1) {
            goto L50;
        }
/*<          do 40 j = lp1, p >*/
        i__2 = *p;
        for (j = lp1; j <= i__2; ++j) {
/*<             if (l .gt. nct) go to 30 >*/
            if (l > nct) {
                goto L30;
            }
/*<             if (cabs1(s(l)) .eq. 0.0e0) go to 30 >*/
            i__3 = l;
            if ((r__1 = s[i__3].r, dabs(r__1)) + (r__2 = r_imag(&s[l]), dabs(
                    r__2)) == (float)0.) {
                goto L30;
            }

/*              apply the transformation. */

/*<                t = -cdotc(n-l+1,x(l,l),1,x(l,j),1)/x(l,l) >*/
            i__3 = *n - l + 1;
            cdotc_(&q__3, &i__3, &x[l + l * x_dim1], &c__1, &x[l + j * x_dim1]
                    , &c__1);
            q__2.r = -q__3.r, q__2.i = -q__3.i;
            c_div(&q__1, &q__2, &x[l + l * x_dim1]);
            t.r = q__1.r, t.i = q__1.i;
/*<                call caxpy(n-l+1,t,x(l,l),1,x(l,j),1) >*/
            i__3 = *n - l + 1;
            caxpy_(&i__3, &t, &x[l + l * x_dim1], &c__1, &x[l + j * x_dim1], &
                    c__1);
/*<    30       continue >*/
L30:

/*           place the l-th row of x into  e for the */
/*           subsequent calculation of the row transformation. */

/*<             e(j) = conjg(x(l,j)) >*/
            i__3 = j;
            r_cnjg(&q__1, &x[l + j * x_dim1]);
            e[i__3].r = q__1.r, e[i__3].i = q__1.i;
/*<    40    continue >*/
/* L40: */
        }
/*<    50    continue >*/
L50:
/*<          if (.not.wantu .or. l .gt. nct) go to 70 >*/
        if (! wantu || l > nct) {
            goto L70;
        }

/*           place the transformation in u for subsequent back */
/*           multiplication. */

/*<             do 60 i = l, n >*/
        i__2 = *n;
        for (i__ = l; i__ <= i__2; ++i__) {
/*<                u(i,l) = x(i,l) >*/
            i__3 = i__ + l * u_dim1;
            i__4 = i__ + l * x_dim1;
            u[i__3].r = x[i__4].r, u[i__3].i = x[i__4].i;
/*<    60       continue >*/
/* L60: */
        }
/*<    70    continue >*/
L70:
/*<          if (l .gt. nrt) go to 150 >*/
        if (l > nrt) {
            goto L150;
        }

/*           compute the l-th row transformation and place the */
/*           l-th super-diagonal in e(l). */

/*<             e(l) = cmplx(scnrm2(p-l,e(lp1),1),0.0e0) >*/
        i__2 = l;
        i__3 = *p - l;
        r__1 = scnrm2_(&i__3, &e[lp1], &c__1);
        q__1.r = r__1, q__1.i = (float)0.;
        e[i__2].r = q__1.r, e[i__2].i = q__1.i;
/*<             if (cabs1(e(l)) .eq. 0.0e0) go to 80 >*/
        i__2 = l;
        if ((r__1 = e[i__2].r, dabs(r__1)) + (r__2 = r_imag(&e[l]), dabs(r__2)
                ) == (float)0.) {
            goto L80;
        }
/*<                if (cabs1(e(lp1)) .ne. 0.0e0) e(l) = csign(e(l),e(lp1)) >*/
        i__2 = lp1;
        if ((r__1 = e[i__2].r, dabs(r__1)) + (r__2 = r_imag(&e[lp1]), dabs(
                r__2)) != (float)0.) {
            i__3 = l;
            r__3 = c_abs(&e[l]);
            i__4 = lp1;
            r__4 = c_abs(&e[lp1]);
            q__2.r = e[i__4].r / r__4, q__2.i = e[i__4].i / r__4;
            q__1.r = r__3 * q__2.r, q__1.i = r__3 * q__2.i;
            e[i__3].r = q__1.r, e[i__3].i = q__1.i;
        }
/*<                call cscal(p-l,1.0e0/e(l),e(lp1),1) >*/
        i__2 = *p - l;
        c_div(&q__1, &c_b8, &e[l]);
        cscal_(&i__2, &q__1, &e[lp1], &c__1);
/*<                e(lp1) = (1.0e0,0.0e0) + e(lp1) >*/
        i__2 = lp1;
        i__3 = lp1;
        q__1.r = e[i__3].r + (float)1., q__1.i = e[i__3].i + (float)0.;
        e[i__2].r = q__1.r, e[i__2].i = q__1.i;
/*<    80       continue >*/
L80:
/*<             e(l) = -conjg(e(l)) >*/
        i__2 = l;
        r_cnjg(&q__2, &e[l]);
        q__1.r = -q__2.r, q__1.i = -q__2.i;
        e[i__2].r = q__1.r, e[i__2].i = q__1.i;
/*<             if (lp1 .gt. n .or. cabs1(e(l)) .eq. 0.0e0) go to 120 >*/
        i__2 = l;
        if (lp1 > *n || (r__1 = e[i__2].r, dabs(r__1)) + (r__2 = r_imag(&e[l])
                , dabs(r__2)) == (float)0.) {
            goto L120;
        }

/*              apply the transformation. */

/*<                do 90 i = lp1, n >*/
        i__2 = *n;
        for (i__ = lp1; i__ <= i__2; ++i__) {
/*<                   work(i) = (0.0e0,0.0e0) >*/
            i__3 = i__;
            work[i__3].r = (float)0., work[i__3].i = (float)0.;
/*<    90          continue >*/
/* L90: */
        }
/*<                do 100 j = lp1, p >*/
        i__2 = *p;
        for (j = lp1; j <= i__2; ++j) {
/*<                   call caxpy(n-l,e(j),x(lp1,j),1,work(lp1),1) >*/
            i__3 = *n - l;
            caxpy_(&i__3, &e[j], &x[lp1 + j * x_dim1], &c__1, &work[lp1], &
                    c__1);
/*<   100          continue >*/
/* L100: */
        }
/*<                do 110 j = lp1, p >*/
        i__2 = *p;
        for (j = lp1; j <= i__2; ++j) {
/*<    >*/
            i__3 = *n - l;
            i__4 = j;
            q__3.r = -e[i__4].r, q__3.i = -e[i__4].i;
            c_div(&q__2, &q__3, &e[lp1]);
            r_cnjg(&q__1, &q__2);
            caxpy_(&i__3, &q__1, &work[lp1], &c__1, &x[lp1 + j * x_dim1], &
                    c__1);
/*<   110          continue >*/
/* L110: */
        }
/*<   120       continue >*/
L120:
/*<             if (.not.wantv) go to 140 >*/
        if (! wantv) {
            goto L140;
        }

/*              place the transformation in v for subsequent */
/*              back multiplication. */

/*<                do 130 i = lp1, p >*/
        i__2 = *p;
        for (i__ = lp1; i__ <= i__2; ++i__) {
/*<                   v(i,l) = e(i) >*/
            i__3 = i__ + l * v_dim1;
            i__4 = i__;
            v[i__3].r = e[i__4].r, v[i__3].i = e[i__4].i;
/*<   130          continue >*/
/* L130: */
        }
/*<   140       continue >*/
L140:
/*<   150    continue >*/
L150:
/*<   160 continue >*/
/* L160: */
        ;
    }
/*<   170 continue >*/
L170:

/*     set up the final bidiagonal matrix or order m. */

/*<       m = min0(p,n+1) >*/
/* Computing MIN */
    i__1 = *p, i__2 = *n + 1;
    m = min(i__1,i__2);
/*<       nctp1 = nct + 1 >*/
    nctp1 = nct + 1;
/*<       nrtp1 = nrt + 1 >*/
    nrtp1 = nrt + 1;
/*<       if (nct .lt. p) s(nctp1) = x(nctp1,nctp1) >*/
    if (nct < *p) {
        i__1 = nctp1;
        i__2 = nctp1 + nctp1 * x_dim1;
        s[i__1].r = x[i__2].r, s[i__1].i = x[i__2].i;
    }
/*<       if (n .lt. m) s(m) = (0.0e0,0.0e0) >*/
    if (*n < m) {
        i__1 = m;
        s[i__1].r = (float)0., s[i__1].i = (float)0.;
    }
/*<       if (nrtp1 .lt. m) e(nrtp1) = x(nrtp1,m) >*/
    if (nrtp1 < m) {
        i__1 = nrtp1;
        i__2 = nrtp1 + m * x_dim1;
        e[i__1].r = x[i__2].r, e[i__1].i = x[i__2].i;
    }
/*<       e(m) = (0.0e0,0.0e0) >*/
    i__1 = m;
    e[i__1].r = (float)0., e[i__1].i = (float)0.;

/*     if required, generate u. */

/*<       if (.not.wantu) go to 300 >*/
    if (! wantu) {
        goto L300;
    }
/*<          if (ncu .lt. nctp1) go to 200 >*/
    if (ncu < nctp1) {
        goto L200;
    }
/*<          do 190 j = nctp1, ncu >*/
    i__1 = ncu;
    for (j = nctp1; j <= i__1; ++j) {
/*<             do 180 i = 1, n >*/
        i__2 = *n;
        for (i__ = 1; i__ <= i__2; ++i__) {
/*<                u(i,j) = (0.0e0,0.0e0) >*/
            i__3 = i__ + j * u_dim1;
            u[i__3].r = (float)0., u[i__3].i = (float)0.;
/*<   180       continue >*/
/* L180: */
        }
/*<             u(j,j) = (1.0e0,0.0e0) >*/
        i__2 = j + j * u_dim1;
        u[i__2].r = (float)1., u[i__2].i = (float)0.;
/*<   190    continue >*/
/* L190: */
    }
/*<   200    continue >*/
L200:
/*<          if (nct .lt. 1) go to 290 >*/
    if (nct < 1) {
        goto L290;
    }
/*<          do 280 ll = 1, nct >*/
    i__1 = nct;
    for (ll = 1; ll <= i__1; ++ll) {
/*<             l = nct - ll + 1 >*/
        l = nct - ll + 1;
/*<             if (cabs1(s(l)) .eq. 0.0e0) go to 250 >*/
        i__2 = l;
        if ((r__1 = s[i__2].r, dabs(r__1)) + (r__2 = r_imag(&s[l]), dabs(r__2)
                ) == (float)0.) {
            goto L250;
        }
/*<                lp1 = l + 1 >*/
        lp1 = l + 1;
/*<                if (ncu .lt. lp1) go to 220 >*/
        if (ncu < lp1) {
            goto L220;
        }
/*<                do 210 j = lp1, ncu >*/
        i__2 = ncu;
        for (j = lp1; j <= i__2; ++j) {
/*<                   t = -cdotc(n-l+1,u(l,l),1,u(l,j),1)/u(l,l) >*/
            i__3 = *n - l + 1;
            cdotc_(&q__3, &i__3, &u[l + l * u_dim1], &c__1, &u[l + j * u_dim1]
                    , &c__1);
            q__2.r = -q__3.r, q__2.i = -q__3.i;
            c_div(&q__1, &q__2, &u[l + l * u_dim1]);
            t.r = q__1.r, t.i = q__1.i;
/*<                   call caxpy(n-l+1,t,u(l,l),1,u(l,j),1) >*/
            i__3 = *n - l + 1;
            caxpy_(&i__3, &t, &u[l + l * u_dim1], &c__1, &u[l + j * u_dim1], &
                    c__1);
/*<   210          continue >*/
/* L210: */
        }
/*<   220          continue >*/
L220:
/*<                call cscal(n-l+1,(-1.0e0,0.0e0),u(l,l),1) >*/
        i__2 = *n - l + 1;
        cscal_(&i__2, &c_b53, &u[l + l * u_dim1], &c__1);
/*<                u(l,l) = (1.0e0,0.0e0) + u(l,l) >*/
        i__2 = l + l * u_dim1;
        i__3 = l + l * u_dim1;
        q__1.r = u[i__3].r + (float)1., q__1.i = u[i__3].i + (float)0.;
        u[i__2].r = q__1.r, u[i__2].i = q__1.i;
/*<                lm1 = l - 1 >*/
        lm1 = l - 1;
/*<                if (lm1 .lt. 1) go to 240 >*/
        if (lm1 < 1) {
            goto L240;
        }
/*<                do 230 i = 1, lm1 >*/
        i__2 = lm1;
        for (i__ = 1; i__ <= i__2; ++i__) {
/*<                   u(i,l) = (0.0e0,0.0e0) >*/
            i__3 = i__ + l * u_dim1;
            u[i__3].r = (float)0., u[i__3].i = (float)0.;
/*<   230          continue >*/
/* L230: */
        }
/*<   240          continue >*/
L240:
/*<             go to 270 >*/
        goto L270;
/*<   250       continue >*/
L250:
/*<                do 260 i = 1, n >*/
        i__2 = *n;
        for (i__ = 1; i__ <= i__2; ++i__) {
/*<                   u(i,l) = (0.0e0,0.0e0) >*/
            i__3 = i__ + l * u_dim1;
            u[i__3].r = (float)0., u[i__3].i = (float)0.;
/*<   260          continue >*/
/* L260: */
        }
/*<                u(l,l) = (1.0e0,0.0e0) >*/
        i__2 = l + l * u_dim1;
        u[i__2].r = (float)1., u[i__2].i = (float)0.;
/*<   270       continue >*/
L270:
/*<   280    continue >*/
/* L280: */
        ;
    }
/*<   290    continue >*/
L290:
/*<   300 continue >*/
L300:

/*     if it is required, generate v. */

/*<       if (.not.wantv) go to 350 >*/
    if (! wantv) {
        goto L350;
    }
/*<          do 340 ll = 1, p >*/
    i__1 = *p;
    for (ll = 1; ll <= i__1; ++ll) {
/*<             l = p - ll + 1 >*/
        l = *p - ll + 1;
/*<             lp1 = l + 1 >*/
        lp1 = l + 1;
/*<             if (l .gt. nrt) go to 320 >*/
        if (l > nrt) {
            goto L320;
        }
/*<             if (cabs1(e(l)) .eq. 0.0e0) go to 320 >*/
        i__2 = l;
        if ((r__1 = e[i__2].r, dabs(r__1)) + (r__2 = r_imag(&e[l]), dabs(r__2)
                ) == (float)0.) {
            goto L320;
        }
/*<                do 310 j = lp1, p >*/
        i__2 = *p;
        for (j = lp1; j <= i__2; ++j) {
/*<                   t = -cdotc(p-l,v(lp1,l),1,v(lp1,j),1)/v(lp1,l) >*/
            i__3 = *p - l;
            cdotc_(&q__3, &i__3, &v[lp1 + l * v_dim1], &c__1, &v[lp1 + j * 
                    v_dim1], &c__1);
            q__2.r = -q__3.r, q__2.i = -q__3.i;
            c_div(&q__1, &q__2, &v[lp1 + l * v_dim1]);
            t.r = q__1.r, t.i = q__1.i;
/*<                   call caxpy(p-l,t,v(lp1,l),1,v(lp1,j),1) >*/
            i__3 = *p - l;
            caxpy_(&i__3, &t, &v[lp1 + l * v_dim1], &c__1, &v[lp1 + j * 
                    v_dim1], &c__1);
/*<   310          continue >*/
/* L310: */
        }
/*<   320       continue >*/
L320:
/*<             do 330 i = 1, p >*/
        i__2 = *p;
        for (i__ = 1; i__ <= i__2; ++i__) {
/*<                v(i,l) = (0.0e0,0.0e0) >*/
            i__3 = i__ + l * v_dim1;
            v[i__3].r = (float)0., v[i__3].i = (float)0.;
/*<   330       continue >*/
/* L330: */
        }
/*<             v(l,l) = (1.0e0,0.0e0) >*/
        i__2 = l + l * v_dim1;
        v[i__2].r = (float)1., v[i__2].i = (float)0.;
/*<   340    continue >*/
/* L340: */
    }
/*<   350 continue >*/
L350:

/*     transform s and e so that they are real. */

/*<       do 380 i = 1, m >*/
    i__1 = m;
    for (i__ = 1; i__ <= i__1; ++i__) {
/*<          if (cabs1(s(i)) .eq. 0.0e0) go to 360 >*/
        i__2 = i__;
        if ((r__1 = s[i__2].r, dabs(r__1)) + (r__2 = r_imag(&s[i__]), dabs(
                r__2)) == (float)0.) {
            goto L360;
        }
/*<             t = cmplx(cabs(s(i)),0.0e0) >*/
        r__1 = c_abs(&s[i__]);
        q__1.r = r__1, q__1.i = (float)0.;
        t.r = q__1.r, t.i = q__1.i;
/*<             r = s(i)/t >*/
        c_div(&q__1, &s[i__], &t);
        r__.r = q__1.r, r__.i = q__1.i;
/*<             s(i) = t >*/
        i__2 = i__;
        s[i__2].r = t.r, s[i__2].i = t.i;
/*<             if (i .lt. m) e(i) = e(i)/r >*/
        if (i__ < m) {
            i__2 = i__;
            c_div(&q__1, &e[i__], &r__);
            e[i__2].r = q__1.r, e[i__2].i = q__1.i;
        }
/*<             if (wantu) call cscal(n,r,u(1,i),1) >*/
        if (wantu) {
            cscal_(n, &r__, &u[i__ * u_dim1 + 1], &c__1);
        }
/*<   360    continue >*/
L360:
/*     ...exit */
/*<          if (i .eq. m) go to 390 >*/
        if (i__ == m) {
            goto L390;
        }
/*<          if (cabs1(e(i)) .eq. 0.0e0) go to 370 >*/
        i__2 = i__;
        if ((r__1 = e[i__2].r, dabs(r__1)) + (r__2 = r_imag(&e[i__]), dabs(
                r__2)) == (float)0.) {
            goto L370;
        }
/*<             t = cmplx(cabs(e(i)),0.0e0) >*/
        r__1 = c_abs(&e[i__]);
        q__1.r = r__1, q__1.i = (float)0.;
        t.r = q__1.r, t.i = q__1.i;
/*<             r = t/e(i) >*/
        c_div(&q__1, &t, &e[i__]);
        r__.r = q__1.r, r__.i = q__1.i;
/*<             e(i) = t >*/
        i__2 = i__;
        e[i__2].r = t.r, e[i__2].i = t.i;
/*<             s(i+1) = s(i+1)*r >*/
        i__2 = i__ + 1;
        i__3 = i__ + 1;
        q__1.r = s[i__3].r * r__.r - s[i__3].i * r__.i, q__1.i = s[i__3].r * 
                r__.i + s[i__3].i * r__.r;
        s[i__2].r = q__1.r, s[i__2].i = q__1.i;
/*<             if (wantv) call cscal(p,r,v(1,i+1),1) >*/
        if (wantv) {
            cscal_(p, &r__, &v[(i__ + 1) * v_dim1 + 1], &c__1);
        }
/*<   370    continue >*/
L370:
/*<   380 continue >*/
/* L380: */
        ;
    }
/*<   390 continue >*/
L390:

/*     main iteration loop for the singular values. */

/*<       mm = m >*/
    mm = m;
/*<       iter = 0 >*/
    iter = 0;
/*<   400 continue >*/
L400:

/*        quit if all the singular values have been found. */

/*     ...exit */
/*<          if (m .eq. 0) go to 660 >*/
    if (m == 0) {
        goto L660;
    }

/*        if too many iterations have been performed, set */
/*        flag and return. */

/*<          if (iter .lt. maxit) go to 410 >*/
    if (iter < maxit) {
        goto L410;
    }
/*<             info = m >*/
    *info = m;
/*     ......exit */
/*<             go to 660 >*/
    goto L660;
/*<   410    continue >*/
L410:

/*        this section of the program inspects for */
/*        negligible elements in the s and e arrays.  on */
/*        completion the variables kase and l are set as follows. */

/*           kase = 1     if s(m) and e(l-1) are negligible and l.lt.m */
/*           kase = 2     if s(l) is negligible and l.lt.m */
/*           kase = 3     if e(l-1) is negligible, l.lt.m, and */
/*                        s(l), ..., s(m) are not negligible (qr step). */
/*           kase = 4     if e(m-1) is negligible (convergence). */

/*<          do 430 ll = 1, m >*/
    i__1 = m;
    for (ll = 1; ll <= i__1; ++ll) {
/*<             l = m - ll >*/
        l = m - ll;
/*        ...exit */
/*<             if (l .eq. 0) go to 440 >*/
        if (l == 0) {
            goto L440;
        }
/*<             test = cabs(s(l)) + cabs(s(l+1)) >*/
        test = c_abs(&s[l]) + c_abs(&s[l + 1]);
/*<             ztest = test + cabs(e(l)) >*/
        ztest = test + c_abs(&e[l]);
/*<             if (ztest .ne. test) go to 420 >*/
        if (ztest != test) {
            goto L420;
        }
/*<                e(l) = (0.0e0,0.0e0) >*/
        i__2 = l;
        e[i__2].r = (float)0., e[i__2].i = (float)0.;
/*        ......exit */
/*<                go to 440 >*/
        goto L440;
/*<   420       continue >*/
L420:
/*<   430    continue >*/
/* L430: */
        ;
    }
/*<   440    continue >*/
L440:
/*<          if (l .ne. m - 1) go to 450 >*/
    if (l != m - 1) {
        goto L450;
    }
/*<             kase = 4 >*/
    kase = 4;
/*<          go to 520 >*/
    goto L520;
/*<   450    continue >*/
L450:
/*<             lp1 = l + 1 >*/
    lp1 = l + 1;
/*<             mp1 = m + 1 >*/
    mp1 = m + 1;
/*<             do 470 lls = lp1, mp1 >*/
    i__1 = mp1;
    for (lls = lp1; lls <= i__1; ++lls) {
/*<                ls = m - lls + lp1 >*/
        ls = m - lls + lp1;
/*           ...exit */
/*<                if (ls .eq. l) go to 480 >*/
        if (ls == l) {
            goto L480;
        }
/*<                test = 0.0e0 >*/
        test = (float)0.;
/*<                if (ls .ne. m) test = test + cabs(e(ls)) >*/
        if (ls != m) {
            test += c_abs(&e[ls]);
        }
/*<                if (ls .ne. l + 1) test = test + cabs(e(ls-1)) >*/
        if (ls != l + 1) {
            test += c_abs(&e[ls - 1]);
        }
/*<                ztest = test + cabs(s(ls)) >*/
        ztest = test + c_abs(&s[ls]);
/*<                if (ztest .ne. test) go to 460 >*/
        if (ztest != test) {
            goto L460;
        }
/*<                   s(ls) = (0.0e0,0.0e0) >*/
        i__2 = ls;
        s[i__2].r = (float)0., s[i__2].i = (float)0.;
/*           ......exit */
/*<                   go to 480 >*/
        goto L480;
/*<   460          continue >*/
L460:
/*<   470       continue >*/
/* L470: */
        ;
    }
/*<   480       continue >*/
L480:
/*<             if (ls .ne. l) go to 490 >*/
    if (ls != l) {
        goto L490;
    }
/*<                kase = 3 >*/
    kase = 3;
/*<             go to 510 >*/
    goto L510;
/*<   490       continue >*/
L490:
/*<             if (ls .ne. m) go to 500 >*/
    if (ls != m) {
        goto L500;
    }
/*<                kase = 1 >*/
    kase = 1;
/*<             go to 510 >*/
    goto L510;
/*<   500       continue >*/
L500:
/*<                kase = 2 >*/
    kase = 2;
/*<                l = ls >*/
    l = ls;
/*<   510       continue >*/
L510:
/*<   520    continue >*/
L520:
/*<          l = l + 1 >*/
    ++l;

/*        perform the task indicated by kase. */

/*<          go to (530, 560, 580, 610), kase >*/
    switch (kase) {
        case 1:  goto L530;
        case 2:  goto L560;
        case 3:  goto L580;
        case 4:  goto L610;
    }

/*        deflate negligible s(m). */

/*<   530    continue >*/
L530:
/*<             mm1 = m - 1 >*/
    mm1 = m - 1;
/*<             f = real(e(m-1)) >*/
    i__1 = m - 1;
    f = e[i__1].r;
/*<             e(m-1) = (0.0e0,0.0e0) >*/
    i__1 = m - 1;
    e[i__1].r = (float)0., e[i__1].i = (float)0.;
/*<             do 550 kk = l, mm1 >*/
    i__1 = mm1;
    for (kk = l; kk <= i__1; ++kk) {
/*<                k = mm1 - kk + l >*/
        k = mm1 - kk + l;
/*<                t1 = real(s(k)) >*/
        i__2 = k;
        t1 = s[i__2].r;
/*<                call srotg(t1,f,cs,sn) >*/
        srotg_(&t1, &f, &cs, &sn);
/*<                s(k) = cmplx(t1,0.0e0) >*/
        i__2 = k;
        q__1.r = t1, q__1.i = (float)0.;
        s[i__2].r = q__1.r, s[i__2].i = q__1.i;
/*<                if (k .eq. l) go to 540 >*/
        if (k == l) {
            goto L540;
        }
/*<                   f = -sn*real(e(k-1)) >*/
        i__2 = k - 1;
        f = -sn * e[i__2].r;
/*<                   e(k-1) = cs*e(k-1) >*/
        i__2 = k - 1;
        i__3 = k - 1;
        q__1.r = cs * e[i__3].r, q__1.i = cs * e[i__3].i;
        e[i__2].r = q__1.r, e[i__2].i = q__1.i;
/*<   540          continue >*/
L540:
/*<                if (wantv) call csrot(p,v(1,k),1,v(1,m),1,cs,sn) >*/
        if (wantv) {
            csrot_(p, &v[k * v_dim1 + 1], &c__1, &v[m * v_dim1 + 1], &c__1, &
                    cs, &sn);
        }
/*<   550       continue >*/
/* L550: */
    }
/*<          go to 650 >*/
    goto L650;

/*        split at negligible s(l). */

/*<   560    continue >*/
L560:
/*<             f = real(e(l-1)) >*/
    i__1 = l - 1;
    f = e[i__1].r;
/*<             e(l-1) = (0.0e0,0.0e0) >*/
    i__1 = l - 1;
    e[i__1].r = (float)0., e[i__1].i = (float)0.;
/*<             do 570 k = l, m >*/
    i__1 = m;
    for (k = l; k <= i__1; ++k) {
/*<                t1 = real(s(k)) >*/
        i__2 = k;
        t1 = s[i__2].r;
/*<                call srotg(t1,f,cs,sn) >*/
        srotg_(&t1, &f, &cs, &sn);
/*<                s(k) = cmplx(t1,0.0e0) >*/
        i__2 = k;
        q__1.r = t1, q__1.i = (float)0.;
        s[i__2].r = q__1.r, s[i__2].i = q__1.i;
/*<                f = -sn*real(e(k)) >*/
        i__2 = k;
        f = -sn * e[i__2].r;
/*<                e(k) = cs*e(k) >*/
        i__2 = k;
        i__3 = k;
        q__1.r = cs * e[i__3].r, q__1.i = cs * e[i__3].i;
        e[i__2].r = q__1.r, e[i__2].i = q__1.i;
/*<                if (wantu) call csrot(n,u(1,k),1,u(1,l-1),1,cs,sn) >*/
        if (wantu) {
            csrot_(n, &u[k * u_dim1 + 1], &c__1, &u[(l - 1) * u_dim1 + 1], &
                    c__1, &cs, &sn);
        }
/*<   570       continue >*/
/* L570: */
    }
/*<          go to 650 >*/
    goto L650;

/*        perform one qr step. */

/*<   580    continue >*/
L580:

/*           calculate the shift. */

/*<    >*/
/* Computing MAX */
    r__1 = c_abs(&s[m]), r__2 = c_abs(&s[m - 1]), r__1 = max(r__1,r__2), r__2 
            = c_abs(&e[m - 1]), r__1 = max(r__1,r__2), r__2 = c_abs(&s[l]), 
            r__1 = max(r__1,r__2), r__2 = c_abs(&e[l]);
    scale = dmax(r__1,r__2);
/*<             sm = real(s(m))/scale >*/
    i__1 = m;
    sm = s[i__1].r / scale;
/*<             smm1 = real(s(m-1))/scale >*/
    i__1 = m - 1;
    smm1 = s[i__1].r / scale;
/*<             emm1 = real(e(m-1))/scale >*/
    i__1 = m - 1;
    emm1 = e[i__1].r / scale;
/*<             sl = real(s(l))/scale >*/
    i__1 = l;
    sl = s[i__1].r / scale;
/*<             el = real(e(l))/scale >*/
    i__1 = l;
    el = e[i__1].r / scale;
/*<             b = ((smm1 + sm)*(smm1 - sm) + emm1**2)/2.0e0 >*/
/* Computing 2nd power */
    r__1 = emm1;
    b = ((smm1 + sm) * (smm1 - sm) + r__1 * r__1) / (float)2.;
/*<             c = (sm*emm1)**2 >*/
/* Computing 2nd power */
    r__1 = sm * emm1;
    c__ = r__1 * r__1;
/*<             shift = 0.0e0 >*/
    shift = (float)0.;
/*<             if (b .eq. 0.0e0 .and. c .eq. 0.0e0) go to 590 >*/
    if (b == (float)0. && c__ == (float)0.) {
        goto L590;
    }
/*<                shift = sqrt(b**2+c) >*/
/* Computing 2nd power */
    r__1 = b;
    shift = sqrt(r__1 * r__1 + c__);
/*<                if (b .lt. 0.0e0) shift = -shift >*/
    if (b < (float)0.) {
        shift = -shift;
    }
/*<                shift = c/(b + shift) >*/
    shift = c__ / (b + shift);
/*<   590       continue >*/
L590:
/*<             f = (sl + sm)*(sl - sm) + shift >*/
    f = (sl + sm) * (sl - sm) + shift;
/*<             g = sl*el >*/
    g = sl * el;

/*           chase zeros. */

/*<             mm1 = m - 1 >*/
    mm1 = m - 1;
/*<             do 600 k = l, mm1 >*/
    i__1 = mm1;
    for (k = l; k <= i__1; ++k) {
/*<                call srotg(f,g,cs,sn) >*/
        srotg_(&f, &g, &cs, &sn);
/*<                if (k .ne. l) e(k-1) = cmplx(f,0.0e0) >*/
        if (k != l) {
            i__2 = k - 1;
            q__1.r = f, q__1.i = (float)0.;
            e[i__2].r = q__1.r, e[i__2].i = q__1.i;
        }
/*<                f = cs*real(s(k)) + sn*real(e(k)) >*/
        i__2 = k;
        i__3 = k;
        f = cs * s[i__2].r + sn * e[i__3].r;
/*<                e(k) = cs*e(k) - sn*s(k) >*/
        i__2 = k;
        i__3 = k;
        q__2.r = cs * e[i__3].r, q__2.i = cs * e[i__3].i;
        i__4 = k;
        q__3.r = sn * s[i__4].r, q__3.i = sn * s[i__4].i;
        q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i;
        e[i__2].r = q__1.r, e[i__2].i = q__1.i;
/*<                g = sn*real(s(k+1)) >*/
        i__2 = k + 1;
        g = sn * s[i__2].r;
/*<                s(k+1) = cs*s(k+1) >*/
        i__2 = k + 1;
        i__3 = k + 1;
        q__1.r = cs * s[i__3].r, q__1.i = cs * s[i__3].i;
        s[i__2].r = q__1.r, s[i__2].i = q__1.i;
/*<                if (wantv) call csrot(p,v(1,k),1,v(1,k+1),1,cs,sn) >*/
        if (wantv) {
            csrot_(p, &v[k * v_dim1 + 1], &c__1, &v[(k + 1) * v_dim1 + 1], &
                    c__1, &cs, &sn);
        }
/*<                call srotg(f,g,cs,sn) >*/
        srotg_(&f, &g, &cs, &sn);
/*<                s(k) = cmplx(f,0.0e0) >*/
        i__2 = k;
        q__1.r = f, q__1.i = (float)0.;
        s[i__2].r = q__1.r, s[i__2].i = q__1.i;
/*<                f = cs*real(e(k)) + sn*real(s(k+1)) >*/
        i__2 = k;
        i__3 = k + 1;
        f = cs * e[i__2].r + sn * s[i__3].r;
/*<                s(k+1) = -sn*e(k) + cs*s(k+1) >*/
        i__2 = k + 1;
        r__1 = -sn;
        i__3 = k;
        q__2.r = r__1 * e[i__3].r, q__2.i = r__1 * e[i__3].i;
        i__4 = k + 1;
        q__3.r = cs * s[i__4].r, q__3.i = cs * s[i__4].i;
        q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
        s[i__2].r = q__1.r, s[i__2].i = q__1.i;
/*<                g = sn*real(e(k+1)) >*/
        i__2 = k + 1;
        g = sn * e[i__2].r;
/*<                e(k+1) = cs*e(k+1) >*/
        i__2 = k + 1;
        i__3 = k + 1;
        q__1.r = cs * e[i__3].r, q__1.i = cs * e[i__3].i;
        e[i__2].r = q__1.r, e[i__2].i = q__1.i;
/*<    >*/
        if (wantu && k < *n) {
            csrot_(n, &u[k * u_dim1 + 1], &c__1, &u[(k + 1) * u_dim1 + 1], &
                    c__1, &cs, &sn);
        }
/*<   600       continue >*/
/* L600: */
    }
/*<             e(m-1) = cmplx(f,0.0e0) >*/
    i__1 = m - 1;
    q__1.r = f, q__1.i = (float)0.;
    e[i__1].r = q__1.r, e[i__1].i = q__1.i;
/*<             iter = iter + 1 >*/
    ++iter;
/*<          go to 650 >*/
    goto L650;

/*        convergence. */

/*<   610    continue >*/
L610:

/*           make the singular value  positive */

/*<             if (real(s(l)) .ge. 0.0e0) go to 620 >*/
    i__1 = l;
    if (s[i__1].r >= (float)0.) {
        goto L620;
    }
/*<                s(l) = -s(l) >*/
    i__1 = l;
    i__2 = l;
    q__1.r = -s[i__2].r, q__1.i = -s[i__2].i;
    s[i__1].r = q__1.r, s[i__1].i = q__1.i;
/*<                if (wantv) call cscal(p,(-1.0e0,0.0e0),v(1,l),1) >*/
    if (wantv) {
        cscal_(p, &c_b53, &v[l * v_dim1 + 1], &c__1);
    }
/*<   620       continue >*/
L620:

/*           order the singular value. */

/*<   630       if (l .eq. mm) go to 640 >*/
L630:
    if (l == mm) {
        goto L640;
    }
/*           ...exit */
/*<                if (real(s(l)) .ge. real(s(l+1))) go to 640 >*/
    i__1 = l;
    i__2 = l + 1;
    if (s[i__1].r >= s[i__2].r) {
        goto L640;
    }
/*<                t = s(l) >*/
    i__1 = l;
    t.r = s[i__1].r, t.i = s[i__1].i;
/*<                s(l) = s(l+1) >*/
    i__1 = l;
    i__2 = l + 1;
    s[i__1].r = s[i__2].r, s[i__1].i = s[i__2].i;
/*<                s(l+1) = t >*/
    i__1 = l + 1;
    s[i__1].r = t.r, s[i__1].i = t.i;
/*<    >*/
    if (wantv && l < *p) {
        cswap_(p, &v[l * v_dim1 + 1], &c__1, &v[(l + 1) * v_dim1 + 1], &c__1);
    }
/*<    >*/
    if (wantu && l < *n) {
        cswap_(n, &u[l * u_dim1 + 1], &c__1, &u[(l + 1) * u_dim1 + 1], &c__1);
    }
/*<                l = l + 1 >*/
    ++l;
/*<             go to 630 >*/
    goto L630;
/*<   640       continue >*/
L640:
/*<             iter = 0 >*/
    iter = 0;
/*<             m = m - 1 >*/
    --m;
/*<   650    continue >*/
L650:
/*<       go to 400 >*/
    goto L400;
/*<   660 continue >*/
L660:
/*<       return >*/
    return 0;
/*<       end >*/
} /* csvdc_ */