Esempio n. 1
0
int main(void)
{
	
     int i, N=4, inca=1, incb=1;
     float *a, *b, *bb, alpha=0.5;
     a=sf_floatalloc(N);
     b=sf_floatalloc(N);
     bb=sf_floatalloc(N);

     a[0]=1; a[1]=2; a[2]=3; a[3]=4; 
     b[0]=4; b[1]=3; b[2]=2; b[3]=1; 
     bb[0]=4; bb[1]=3; bb[2]=2; bb[3]=1; 
  
     for(i=0;i<N;i++)
     	sf_warning("a[%d]=%f",i,a[i]);
     for(i=0;i<N;i++)
     	sf_warning("b[%d]=%f",i,b[i]);

     cblas_saxpy(N, 0.5 , a, 1, b, 1);   

     for(i=0;i<N;i++)
     	sf_warning("(0.5*a+b)[%d]=%f",i,b[i]);

     saxpy_(&N, &alpha, a, &inca, bb, &incb);
 
     for(i=0;i<N;i++)
     	sf_warning("(0.5*a+bb)[%d]=%f",i,bb[i]);

    exit(0);
}
Esempio n. 2
0
void THBlas_(axpy)(long n, real a, real *x, long incx, real *y, long incy)
{
  if(n == 1)
  {
    incx = 1;
    incy = 1;
  }

#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
  if( (n <= INT_MAX) && (incx <= INT_MAX) && (incy <= INT_MAX) )
  {
    int i_n = (int)n;
    int i_incx = (int)incx;
    int i_incy = (int)incy;

#if defined(TH_REAL_IS_DOUBLE)
    daxpy_(&i_n, &a, x, &i_incx, y, &i_incy);
#else
    saxpy_(&i_n, &a, x, &i_incx, y, &i_incy);
#endif
    return;
  }
#endif
  {
    long i;
    for(i = 0; i < n; i++)
      y[i*incy] += a*x[i*incx];
  }
}
Esempio n. 3
0
int
f2c_saxpy(integer* N,
          real* alpha,
          real* X, integer* incX,
          real* Y, integer* incY)
{
    saxpy_(N, alpha, X, incX, Y, incY);
    return 0;
}
Esempio n. 4
0
INLINE void do_saxpy(float *x, float *y, int iterations, int *limit)
{
  REGISTER int i = 0;
  extern int saxpy_();

  for (;i<iterations;i++)
    {
      saxpy_(limit,&falpha,x,&stride,y,&stride);
    }
}
Esempio n. 5
0
/* Subroutine */
int slapll_(integer *n, real *x, integer *incx, real *y, integer *incy, real *ssmin)
{
    /* System generated locals */
    integer i__1;
    /* Local variables */
    real c__, a11, a12, a22, tau;
    extern real sdot_(integer *, real *, integer *, real *, integer *);
    extern /* Subroutine */
    int slas2_(real *, real *, real *, real *, real *) ;
    real ssmax;
    extern /* Subroutine */
    int saxpy_(integer *, real *, real *, integer *, real *, integer *), slarfg_(integer *, real *, real *, integer *, real *);
    /* -- LAPACK auxiliary routine (version 3.4.2) -- */
    /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
    /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
    /* September 2012 */
    /* .. Scalar Arguments .. */
    /* .. */
    /* .. Array Arguments .. */
    /* .. */
    /* ===================================================================== */
    /* .. Parameters .. */
    /* .. */
    /* .. Local Scalars .. */
    /* .. */
    /* .. External Functions .. */
    /* .. */
    /* .. External Subroutines .. */
    /* .. */
    /* .. Executable Statements .. */
    /* Quick return if possible */
    /* Parameter adjustments */
    --y;
    --x;
    /* Function Body */
    if (*n <= 1)
    {
        *ssmin = 0.f;
        return 0;
    }
    /* Compute the QR factorization of the N-by-2 matrix ( X Y ) */
    slarfg_(n, &x[1], &x[*incx + 1], incx, &tau);
    a11 = x[1];
    x[1] = 1.f;
    c__ = -tau * sdot_(n, &x[1], incx, &y[1], incy);
    saxpy_(n, &c__, &x[1], incx, &y[1], incy);
    i__1 = *n - 1;
    slarfg_(&i__1, &y[*incy + 1], &y[(*incy << 1) + 1], incy, &tau);
    a12 = y[1];
    a22 = y[*incy + 1];
    /* Compute the SVD of 2-by-2 Upper triangular matrix. */
    slas2_(&a11, &a12, &a22, ssmin, &ssmax);
    return 0;
    /* End of SLAPLL */
}
Esempio n. 6
0
/* Subroutine */ int slatps_(char *uplo, char *trans, char *diag, char *
	normin, integer *n, real *ap, real *x, real *scale, real *cnorm, 
	integer *info)
{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1992   


    Purpose   
    =======   

    SLATPS solves one of the triangular systems   

       A *x = s*b  or  A'*x = s*b   

    with scaling to prevent overflow, where A is an upper or lower   
    triangular matrix stored in packed form.  Here A' denotes the   
    transpose of A, x and b are n-element vectors, and s is a scaling   
    factor, usually less than or equal to 1, chosen so that the   
    components of x will be less than the overflow threshold.  If the   
    unscaled problem will not cause overflow, the Level 2 BLAS routine   
    STPSV is called. If the matrix A is singular (A(j,j) = 0 for some j),   
    then s is set to 0 and a non-trivial solution to A*x = 0 is returned.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the matrix A is upper or lower triangular.   
            = 'U':  Upper triangular   
            = 'L':  Lower triangular   

    TRANS   (input) CHARACTER*1   
            Specifies the operation applied to A.   
            = 'N':  Solve A * x = s*b  (No transpose)   
            = 'T':  Solve A'* x = s*b  (Transpose)   
            = 'C':  Solve A'* x = s*b  (Conjugate transpose = Transpose)   

    DIAG    (input) CHARACTER*1   
            Specifies whether or not the matrix A is unit triangular.   
            = 'N':  Non-unit triangular   
            = 'U':  Unit triangular   

    NORMIN  (input) CHARACTER*1   
            Specifies whether CNORM has been set or not.   
            = 'Y':  CNORM contains the column norms on entry   
            = 'N':  CNORM is not set on entry.  On exit, the norms will   
                    be computed and stored in CNORM.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    AP      (input) REAL array, dimension (N*(N+1)/2)   
            The upper or lower triangular matrix A, packed columnwise in   
            a linear array.  The j-th column of A is stored in the array   
            AP as follows:   
            if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;   
            if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.   

    X       (input/output) REAL array, dimension (N)   
            On entry, the right hand side b of the triangular system.   
            On exit, X is overwritten by the solution vector x.   

    SCALE   (output) REAL   
            The scaling factor s for the triangular system   
               A * x = s*b  or  A'* x = s*b.   
            If SCALE = 0, the matrix A is singular or badly scaled, and   
            the vector x is an exact or approximate solution to A*x = 0.   

    CNORM   (input or output) REAL array, dimension (N)   

            If NORMIN = 'Y', CNORM is an input argument and CNORM(j)   
            contains the norm of the off-diagonal part of the j-th column   
            of A.  If TRANS = 'N', CNORM(j) must be greater than or equal   
            to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)   
            must be greater than or equal to the 1-norm.   

            If NORMIN = 'N', CNORM is an output argument and CNORM(j)   
            returns the 1-norm of the offdiagonal part of the j-th column   
            of A.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -k, the k-th argument had an illegal value   

    Further Details   
    ======= =======   

    A rough bound on x is computed; if that is less than overflow, STPSV   
    is called, otherwise, specific code is used which checks for possible   
    overflow or divide-by-zero at every operation.   

    A columnwise scheme is used for solving A*x = b.  The basic algorithm   
    if A is lower triangular is   

         x[1:n] := b[1:n]   
         for j = 1, ..., n   
              x(j) := x(j) / A(j,j)   
              x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j]   
         end   

    Define bounds on the components of x after j iterations of the loop:   
       M(j) = bound on x[1:j]   
       G(j) = bound on x[j+1:n]   
    Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}.   

    Then for iteration j+1 we have   
       M(j+1) <= G(j) / | A(j+1,j+1) |   
       G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] |   
              <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | )   

    where CNORM(j+1) is greater than or equal to the infinity-norm of   
    column j+1 of A, not counting the diagonal.  Hence   

       G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | )   
                    1<=i<=j   
    and   

       |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| )   
                                     1<=i< j   

    Since |x(j)| <= M(j), we use the Level 2 BLAS routine STPSV if the   
    reciprocal of the largest M(j), j=1,..,n, is larger than   
    max(underflow, 1/overflow).   

    The bound on x(j) is also used to determine when a step in the   
    columnwise method can be performed without fear of overflow.  If   
    the computed bound is greater than a large constant, x is scaled to   
    prevent overflow, but if the bound overflows, x is set to 0, x(j) to   
    1, and scale to 0, and a non-trivial solution to A*x = 0 is found.   

    Similarly, a row-wise scheme is used to solve A'*x = b.  The basic   
    algorithm for A upper triangular is   

         for j = 1, ..., n   
              x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)   
         end   

    We simultaneously compute two bounds   
         G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j   
         M(j) = bound on x(i), 1<=i<=j   

    The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we   
    add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1.   
    Then the bound on x(j) is   

         M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) |   

              <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| )   
                        1<=i<=j   

    and we can safely call STPSV if 1/M(n) and 1/G(n) are both greater   
    than max(underflow, 1/overflow).   

    =====================================================================   


       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static real c_b36 = .5f;
    
    /* System generated locals */
    integer i__1, i__2, i__3;
    real r__1, r__2, r__3;
    /* Local variables */
    static integer jinc, jlen;
    static real xbnd;
    static integer imax;
    static real tmax, tjjs;
    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
    static real xmax, grow, sumj;
    static integer i__, j;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static real tscal, uscal;
    static integer jlast;
    extern doublereal sasum_(integer *, real *, integer *);
    static logical upper;
    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, 
	    real *, integer *), stpsv_(char *, char *, char *, integer *, 
	    real *, real *, integer *);
    static integer ip;
    static real xj;
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static real bignum;
    extern integer isamax_(integer *, real *, integer *);
    static logical notran;
    static integer jfirst;
    static real smlnum;
    static logical nounit;
    static real rec, tjj;


    --cnorm;
    --x;
    --ap;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    notran = lsame_(trans, "N");
    nounit = lsame_(diag, "N");

/*     Test the input parameters. */

    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (! notran && ! lsame_(trans, "T") && ! 
	    lsame_(trans, "C")) {
	*info = -2;
    } else if (! nounit && ! lsame_(diag, "U")) {
	*info = -3;
    } else if (! lsame_(normin, "Y") && ! lsame_(normin,
	     "N")) {
	*info = -4;
    } else if (*n < 0) {
	*info = -5;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLATPS", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine machine dependent parameters to control overflow. */

    smlnum = slamch_("Safe minimum") / slamch_("Precision");
    bignum = 1.f / smlnum;
    *scale = 1.f;

    if (lsame_(normin, "N")) {

/*        Compute the 1-norm of each column, not including the diagonal. */

	if (upper) {

/*           A is upper triangular. */

	    ip = 1;
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		i__2 = j - 1;
		cnorm[j] = sasum_(&i__2, &ap[ip], &c__1);
		ip += j;
/* L10: */
	    }
	} else {

/*           A is lower triangular. */

	    ip = 1;
	    i__1 = *n - 1;
	    for (j = 1; j <= i__1; ++j) {
		i__2 = *n - j;
		cnorm[j] = sasum_(&i__2, &ap[ip + 1], &c__1);
		ip = ip + *n - j + 1;
/* L20: */
	    }
	    cnorm[*n] = 0.f;
	}
    }

/*     Scale the column norms by TSCAL if the maximum element in CNORM is   
       greater than BIGNUM. */

    imax = isamax_(n, &cnorm[1], &c__1);
    tmax = cnorm[imax];
    if (tmax <= bignum) {
	tscal = 1.f;
    } else {
	tscal = 1.f / (smlnum * tmax);
	sscal_(n, &tscal, &cnorm[1], &c__1);
    }

/*     Compute a bound on the computed solution vector to see if the   
       Level 2 BLAS routine STPSV can be used. */

    j = isamax_(n, &x[1], &c__1);
    xmax = (r__1 = x[j], dabs(r__1));
    xbnd = xmax;
    if (notran) {

/*        Compute the growth in A * x = b. */

	if (upper) {
	    jfirst = *n;
	    jlast = 1;
	    jinc = -1;
	} else {
	    jfirst = 1;
	    jlast = *n;
	    jinc = 1;
	}

	if (tscal != 1.f) {
	    grow = 0.f;
	    goto L50;
	}

	if (nounit) {

/*           A is non-unit triangular.   

             Compute GROW = 1/G(j) and XBND = 1/M(j).   
             Initially, G(0) = max{x(i), i=1,...,n}. */

	    grow = 1.f / dmax(xbnd,smlnum);
	    xbnd = grow;
	    ip = jfirst * (jfirst + 1) / 2;
	    jlen = *n;
	    i__1 = jlast;
	    i__2 = jinc;
	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {

/*              Exit the loop if the growth factor is too small. */

		if (grow <= smlnum) {
		    goto L50;
		}

/*              M(j) = G(j-1) / abs(A(j,j)) */

		tjj = (r__1 = ap[ip], dabs(r__1));
/* Computing MIN */
		r__1 = xbnd, r__2 = dmin(1.f,tjj) * grow;
		xbnd = dmin(r__1,r__2);
		if (tjj + cnorm[j] >= smlnum) {

/*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */

		    grow *= tjj / (tjj + cnorm[j]);
		} else {

/*                 G(j) could overflow, set GROW to 0. */

		    grow = 0.f;
		}
		ip += jinc * jlen;
		--jlen;
/* L30: */
	    }
	    grow = xbnd;
	} else {

/*           A is unit triangular.   

             Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.   

   Computing MIN */
	    r__1 = 1.f, r__2 = 1.f / dmax(xbnd,smlnum);
	    grow = dmin(r__1,r__2);
	    i__2 = jlast;
	    i__1 = jinc;
	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {

/*              Exit the loop if the growth factor is too small. */

		if (grow <= smlnum) {
		    goto L50;
		}

/*              G(j) = G(j-1)*( 1 + CNORM(j) ) */

		grow *= 1.f / (cnorm[j] + 1.f);
/* L40: */
	    }
	}
L50:

	;
    } else {

/*        Compute the growth in A' * x = b. */

	if (upper) {
	    jfirst = 1;
	    jlast = *n;
	    jinc = 1;
	} else {
	    jfirst = *n;
	    jlast = 1;
	    jinc = -1;
	}

	if (tscal != 1.f) {
	    grow = 0.f;
	    goto L80;
	}

	if (nounit) {

/*           A is non-unit triangular.   

             Compute GROW = 1/G(j) and XBND = 1/M(j).   
             Initially, M(0) = max{x(i), i=1,...,n}. */

	    grow = 1.f / dmax(xbnd,smlnum);
	    xbnd = grow;
	    ip = jfirst * (jfirst + 1) / 2;
	    jlen = 1;
	    i__1 = jlast;
	    i__2 = jinc;
	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {

/*              Exit the loop if the growth factor is too small. */

		if (grow <= smlnum) {
		    goto L80;
		}

/*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */

		xj = cnorm[j] + 1.f;
/* Computing MIN */
		r__1 = grow, r__2 = xbnd / xj;
		grow = dmin(r__1,r__2);

/*              M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */

		tjj = (r__1 = ap[ip], dabs(r__1));
		if (xj > tjj) {
		    xbnd *= tjj / xj;
		}
		++jlen;
		ip += jinc * jlen;
/* L60: */
	    }
	    grow = dmin(grow,xbnd);
	} else {

/*           A is unit triangular.   

             Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.   

   Computing MIN */
	    r__1 = 1.f, r__2 = 1.f / dmax(xbnd,smlnum);
	    grow = dmin(r__1,r__2);
	    i__2 = jlast;
	    i__1 = jinc;
	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {

/*              Exit the loop if the growth factor is too small. */

		if (grow <= smlnum) {
		    goto L80;
		}

/*              G(j) = ( 1 + CNORM(j) )*G(j-1) */

		xj = cnorm[j] + 1.f;
		grow /= xj;
/* L70: */
	    }
	}
L80:
	;
    }

    if (grow * tscal > smlnum) {

/*        Use the Level 2 BLAS solve if the reciprocal of the bound on   
          elements of X is not too small. */

	stpsv_(uplo, trans, diag, n, &ap[1], &x[1], &c__1);
    } else {

/*        Use a Level 1 BLAS solve, scaling intermediate results. */

	if (xmax > bignum) {

/*           Scale X so that its components are less than or equal to   
             BIGNUM in absolute value. */

	    *scale = bignum / xmax;
	    sscal_(n, scale, &x[1], &c__1);
	    xmax = bignum;
	}

	if (notran) {

/*           Solve A * x = b */

	    ip = jfirst * (jfirst + 1) / 2;
	    i__1 = jlast;
	    i__2 = jinc;
	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {

/*              Compute x(j) = b(j) / A(j,j), scaling x if necessary. */

		xj = (r__1 = x[j], dabs(r__1));
		if (nounit) {
		    tjjs = ap[ip] * tscal;
		} else {
		    tjjs = tscal;
		    if (tscal == 1.f) {
			goto L95;
		    }
		}
		tjj = dabs(tjjs);
		if (tjj > smlnum) {

/*                    abs(A(j,j)) > SMLNUM: */

		    if (tjj < 1.f) {
			if (xj > tjj * bignum) {

/*                          Scale x by 1/b(j). */

			    rec = 1.f / xj;
			    sscal_(n, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
		    }
		    x[j] /= tjjs;
		    xj = (r__1 = x[j], dabs(r__1));
		} else if (tjj > 0.f) {

/*                    0 < abs(A(j,j)) <= SMLNUM: */

		    if (xj > tjj * bignum) {

/*                       Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM   
                         to avoid overflow when dividing by A(j,j). */

			rec = tjj * bignum / xj;
			if (cnorm[j] > 1.f) {

/*                          Scale by 1/CNORM(j) to avoid overflow when   
                            multiplying x(j) times column j. */

			    rec /= cnorm[j];
			}
			sscal_(n, &rec, &x[1], &c__1);
			*scale *= rec;
			xmax *= rec;
		    }
		    x[j] /= tjjs;
		    xj = (r__1 = x[j], dabs(r__1));
		} else {

/*                    A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and   
                      scale = 0, and compute a solution to A*x = 0. */

		    i__3 = *n;
		    for (i__ = 1; i__ <= i__3; ++i__) {
			x[i__] = 0.f;
/* L90: */
		    }
		    x[j] = 1.f;
		    xj = 1.f;
		    *scale = 0.f;
		    xmax = 0.f;
		}
L95:

/*              Scale x if necessary to avoid overflow when adding a   
                multiple of column j of A. */

		if (xj > 1.f) {
		    rec = 1.f / xj;
		    if (cnorm[j] > (bignum - xmax) * rec) {

/*                    Scale x by 1/(2*abs(x(j))). */

			rec *= .5f;
			sscal_(n, &rec, &x[1], &c__1);
			*scale *= rec;
		    }
		} else if (xj * cnorm[j] > bignum - xmax) {

/*                 Scale x by 1/2. */

		    sscal_(n, &c_b36, &x[1], &c__1);
		    *scale *= .5f;
		}

		if (upper) {
		    if (j > 1) {

/*                    Compute the update   
                         x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) */

			i__3 = j - 1;
			r__1 = -x[j] * tscal;
			saxpy_(&i__3, &r__1, &ap[ip - j + 1], &c__1, &x[1], &
				c__1);
			i__3 = j - 1;
			i__ = isamax_(&i__3, &x[1], &c__1);
			xmax = (r__1 = x[i__], dabs(r__1));
		    }
		    ip -= j;
		} else {
		    if (j < *n) {

/*                    Compute the update   
                         x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) */

			i__3 = *n - j;
			r__1 = -x[j] * tscal;
			saxpy_(&i__3, &r__1, &ap[ip + 1], &c__1, &x[j + 1], &
				c__1);
			i__3 = *n - j;
			i__ = j + isamax_(&i__3, &x[j + 1], &c__1);
			xmax = (r__1 = x[i__], dabs(r__1));
		    }
		    ip = ip + *n - j + 1;
		}
/* L100: */
	    }

	} else {

/*           Solve A' * x = b */

	    ip = jfirst * (jfirst + 1) / 2;
	    jlen = 1;
	    i__2 = jlast;
	    i__1 = jinc;
	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {

/*              Compute x(j) = b(j) - sum A(k,j)*x(k).   
                                      k<>j */

		xj = (r__1 = x[j], dabs(r__1));
		uscal = tscal;
		rec = 1.f / dmax(xmax,1.f);
		if (cnorm[j] > (bignum - xj) * rec) {

/*                 If x(j) could overflow, scale x by 1/(2*XMAX). */

		    rec *= .5f;
		    if (nounit) {
			tjjs = ap[ip] * tscal;
		    } else {
			tjjs = tscal;
		    }
		    tjj = dabs(tjjs);
		    if (tjj > 1.f) {

/*                       Divide by A(j,j) when scaling x if A(j,j) > 1.   

   Computing MIN */
			r__1 = 1.f, r__2 = rec * tjj;
			rec = dmin(r__1,r__2);
			uscal /= tjjs;
		    }
		    if (rec < 1.f) {
			sscal_(n, &rec, &x[1], &c__1);
			*scale *= rec;
			xmax *= rec;
		    }
		}

		sumj = 0.f;
		if (uscal == 1.f) {

/*                 If the scaling needed for A in the dot product is 1,   
                   call SDOT to perform the dot product. */

		    if (upper) {
			i__3 = j - 1;
			sumj = sdot_(&i__3, &ap[ip - j + 1], &c__1, &x[1], &
				c__1);
		    } else if (j < *n) {
			i__3 = *n - j;
			sumj = sdot_(&i__3, &ap[ip + 1], &c__1, &x[j + 1], &
				c__1);
		    }
		} else {

/*                 Otherwise, use in-line code for the dot product. */

		    if (upper) {
			i__3 = j - 1;
			for (i__ = 1; i__ <= i__3; ++i__) {
			    sumj += ap[ip - j + i__] * uscal * x[i__];
/* L110: */
			}
		    } else if (j < *n) {
			i__3 = *n - j;
			for (i__ = 1; i__ <= i__3; ++i__) {
			    sumj += ap[ip + i__] * uscal * x[j + i__];
/* L120: */
			}
		    }
		}

		if (uscal == tscal) {

/*                 Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j)   
                   was not used to scale the dotproduct. */

		    x[j] -= sumj;
		    xj = (r__1 = x[j], dabs(r__1));
		    if (nounit) {

/*                    Compute x(j) = x(j) / A(j,j), scaling if necessary. */

			tjjs = ap[ip] * tscal;
		    } else {
			tjjs = tscal;
			if (tscal == 1.f) {
			    goto L135;
			}
		    }
		    tjj = dabs(tjjs);
		    if (tjj > smlnum) {

/*                       abs(A(j,j)) > SMLNUM: */

			if (tjj < 1.f) {
			    if (xj > tjj * bignum) {

/*                             Scale X by 1/abs(x(j)). */

				rec = 1.f / xj;
				sscal_(n, &rec, &x[1], &c__1);
				*scale *= rec;
				xmax *= rec;
			    }
			}
			x[j] /= tjjs;
		    } else if (tjj > 0.f) {

/*                       0 < abs(A(j,j)) <= SMLNUM: */

			if (xj > tjj * bignum) {

/*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */

			    rec = tjj * bignum / xj;
			    sscal_(n, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
			x[j] /= tjjs;
		    } else {

/*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and   
                         scale = 0, and compute a solution to A'*x = 0. */

			i__3 = *n;
			for (i__ = 1; i__ <= i__3; ++i__) {
			    x[i__] = 0.f;
/* L130: */
			}
			x[j] = 1.f;
			*scale = 0.f;
			xmax = 0.f;
		    }
L135:
		    ;
		} else {

/*                 Compute x(j) := x(j) / A(j,j)  - sumj if the dot   
                   product has already been divided by 1/A(j,j). */

		    x[j] = x[j] / tjjs - sumj;
		}
/* Computing MAX */
		r__2 = xmax, r__3 = (r__1 = x[j], dabs(r__1));
		xmax = dmax(r__2,r__3);
		++jlen;
		ip += jinc * jlen;
/* L140: */
	    }
	}
	*scale /= tscal;
    }

/*     Scale the column norms by 1/TSCAL for return. */

    if (tscal != 1.f) {
	r__1 = 1.f / tscal;
	sscal_(n, &r__1, &cnorm[1], &c__1);
    }

    return 0;

/*     End of SLATPS */

} /* slatps_ */
Esempio n. 7
0
 int slarz_(char *side, int *m, int *n, int *l, 
	float *v, int *incv, float *tau, float *c__, int *ldc, float *
	work)
{
    /* System generated locals */
    int c_dim1, c_offset;
    float r__1;

    /* Local variables */
    extern  int sger_(int *, int *, float *, float *, 
	    int *, float *, int *, float *, int *);
    extern int lsame_(char *, char *);
    extern  int sgemv_(char *, int *, int *, float *, 
	    float *, int *, float *, int *, float *, float *, int *), scopy_(int *, float *, int *, float *, int *), 
	    saxpy_(int *, float *, float *, int *, float *, int *);


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLARZ applies a float elementary reflector H to a float M-by-N */
/*  matrix C, from either the left or the right. H is represented in the */
/*  form */

/*        H = I - tau * v * v' */

/*  where tau is a float scalar and v is a float vector. */

/*  If tau = 0, then H is taken to be the unit matrix. */


/*  H is a product of k elementary reflectors as returned by STZRZF. */

/*  Arguments */
/*  ========= */

/*  SIDE    (input) CHARACTER*1 */
/*          = 'L': form  H * C */
/*          = 'R': form  C * H */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix C. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix C. */

/*  L       (input) INTEGER */
/*          The number of entries of the vector V containing */
/*          the meaningful part of the Householder vectors. */
/*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. */

/*  V       (input) REAL array, dimension (1+(L-1)*ABS(INCV)) */
/*          The vector v in the representation of H as returned by */
/*          STZRZF. V is not used if TAU = 0. */

/*  INCV    (input) INTEGER */
/*          The increment between elements of v. INCV <> 0. */

/*  TAU     (input) REAL */
/*          The value tau in the representation of H. */

/*  C       (input/output) REAL array, dimension (LDC,N) */
/*          On entry, the M-by-N matrix C. */
/*          On exit, C is overwritten by the matrix H * C if SIDE = 'L', */
/*          or C * H if SIDE = 'R'. */

/*  LDC     (input) INTEGER */
/*          The leading dimension of the array C. LDC >= MAX(1,M). */

/*  WORK    (workspace) REAL array, dimension */
/*                         (N) if SIDE = 'L' */
/*                      or (M) if SIDE = 'R' */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    --v;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1;
    c__ -= c_offset;
    --work;

    /* Function Body */
    if (lsame_(side, "L")) {

/*        Form  H * C */

	if (*tau != 0.f) {

/*           w( 1:n ) = C( 1, 1:n ) */

	    scopy_(n, &c__[c_offset], ldc, &work[1], &c__1);

/*           w( 1:n ) = w( 1:n ) + C( m-l+1:m, 1:n )' * v( 1:l ) */

	    sgemv_("Transpose", l, n, &c_b5, &c__[*m - *l + 1 + c_dim1], ldc, 
		    &v[1], incv, &c_b5, &work[1], &c__1);

/*           C( 1, 1:n ) = C( 1, 1:n ) - tau * w( 1:n ) */

	    r__1 = -(*tau);
	    saxpy_(n, &r__1, &work[1], &c__1, &c__[c_offset], ldc);

/*           C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... */
/*                               tau * v( 1:l ) * w( 1:n )' */

	    r__1 = -(*tau);
	    sger_(l, n, &r__1, &v[1], incv, &work[1], &c__1, &c__[*m - *l + 1 
		    + c_dim1], ldc);
	}

    } else {

/*        Form  C * H */

	if (*tau != 0.f) {

/*           w( 1:m ) = C( 1:m, 1 ) */

	    scopy_(m, &c__[c_offset], &c__1, &work[1], &c__1);

/*           w( 1:m ) = w( 1:m ) + C( 1:m, n-l+1:n, 1:n ) * v( 1:l ) */

	    sgemv_("No transpose", m, l, &c_b5, &c__[(*n - *l + 1) * c_dim1 + 
		    1], ldc, &v[1], incv, &c_b5, &work[1], &c__1);

/*           C( 1:m, 1 ) = C( 1:m, 1 ) - tau * w( 1:m ) */

	    r__1 = -(*tau);
	    saxpy_(m, &r__1, &work[1], &c__1, &c__[c_offset], &c__1);

/*           C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... */
/*                               tau * w( 1:m ) * v( 1:l )' */

	    r__1 = -(*tau);
	    sger_(m, l, &r__1, &work[1], &c__1, &v[1], incv, &c__[(*n - *l + 
		    1) * c_dim1 + 1], ldc);

	}

    }

    return 0;

/*     End of SLARZ */

} /* slarz_ */
Esempio n. 8
0
/* Subroutine */ int ssytd2_(char *uplo, integer *n, real *a, integer *lda, 
	real *d, real *e, real *tau, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       October 31, 1992   


    Purpose   
    =======   

    SSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal   
    form T by an orthogonal similarity transformation: Q' * A * Q = T.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the upper or lower triangular part of the   
            symmetric matrix A is stored:   
            = 'U':  Upper triangular   
            = 'L':  Lower triangular   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the symmetric matrix A.  If UPLO = 'U', the leading 
  
            n-by-n upper triangular part of A contains the upper   
            triangular part of the matrix A, and the strictly lower   
            triangular part of A is not referenced.  If UPLO = 'L', the   
            leading n-by-n lower triangular part of A contains the lower 
  
            triangular part of the matrix A, and the strictly upper   
            triangular part of A is not referenced.   
            On exit, if UPLO = 'U', the diagonal and first superdiagonal 
  
            of A are overwritten by the corresponding elements of the   
            tridiagonal matrix T, and the elements above the first   
            superdiagonal, with the array TAU, represent the orthogonal   
            matrix Q as a product of elementary reflectors; if UPLO   
            = 'L', the diagonal and first subdiagonal of A are over-   
            written by the corresponding elements of the tridiagonal   
            matrix T, and the elements below the first subdiagonal, with 
  
            the array TAU, represent the orthogonal matrix Q as a product 
  
            of elementary reflectors. See Further Details.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    D       (output) REAL array, dimension (N)   
            The diagonal elements of the tridiagonal matrix T:   
            D(i) = A(i,i).   

    E       (output) REAL array, dimension (N-1)   
            The off-diagonal elements of the tridiagonal matrix T:   
            E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. 
  

    TAU     (output) REAL array, dimension (N-1)   
            The scalar factors of the elementary reflectors (see Further 
  
            Details).   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   

    Further Details   
    ===============   

    If UPLO = 'U', the matrix Q is represented as a product of elementary 
  
    reflectors   

       Q = H(n-1) . . . H(2) H(1).   

    Each H(i) has the form   

       H(i) = I - tau * v * v'   

    where tau is a real scalar, and v is a real vector with   
    v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in   
    A(1:i-1,i+1), and tau in TAU(i).   

    If UPLO = 'L', the matrix Q is represented as a product of elementary 
  
    reflectors   

       Q = H(1) H(2) . . . H(n-1).   

    Each H(i) has the form   

       H(i) = I - tau * v * v'   

    where tau is a real scalar, and v is a real vector with   
    v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), 
  
    and tau in TAU(i).   

    The contents of A on exit are illustrated by the following examples   
    with n = 5:   

    if UPLO = 'U':                       if UPLO = 'L':   

      (  d   e   v2  v3  v4 )              (  d                  )   
      (      d   e   v3  v4 )              (  e   d              )   
      (          d   e   v4 )              (  v1  e   d          )   
      (              d   e  )              (  v1  v2  e   d      )   
      (                  d  )              (  v1  v2  v3  e   d  )   

    where d and e denote diagonal and off-diagonal elements of T, and vi 
  
    denotes an element of the vector defining H(i).   

    ===================================================================== 
  


       Test the input parameters   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static integer c__1 = 1;
    static real c_b8 = 0.f;
    static real c_b14 = -1.f;
    
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    /* Local variables */
    static real taui;
    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
    static integer i;
    extern /* Subroutine */ int ssyr2_(char *, integer *, real *, real *, 
	    integer *, real *, integer *, real *, integer *);
    static real alpha;
    extern logical lsame_(char *, char *);
    static logical upper;
    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, 
	    real *, integer *), ssymv_(char *, integer *, real *, real *, 
	    integer *, real *, integer *, real *, real *, integer *), 
	    xerbla_(char *, integer *), slarfg_(integer *, real *, 
	    real *, integer *, real *);



#define D(I) d[(I)-1]
#define E(I) e[(I)-1]
#define TAU(I) tau[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SSYTD2", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n <= 0) {
	return 0;
    }

    if (upper) {

/*        Reduce the upper triangle of A */

	for (i = *n - 1; i >= 1; --i) {

/*           Generate elementary reflector H(i) = I - tau * v * v'
   
             to annihilate A(1:i-1,i+1) */

	    slarfg_(&i, &A(i,i+1), &A(1,i+1), &
		    c__1, &taui);
	    E(i) = A(i,i+1);

	    if (taui != 0.f) {

/*              Apply H(i) from both sides to A(1:i,1:i) */

		A(i,i+1) = 1.f;

/*              Compute  x := tau * A * v  storing x in TAU(1:
i) */

		ssymv_(uplo, &i, &taui, &A(1,1), lda, &A(1,i+1), &c__1, &c_b8, &TAU(1), &c__1);

/*              Compute  w := x - 1/2 * tau * (x'*v) * v */

		alpha = taui * -.5f * sdot_(&i, &TAU(1), &c__1, &A(1,i+1), &c__1);
		saxpy_(&i, &alpha, &A(1,i+1), &c__1, &TAU(1), &
			c__1);

/*              Apply the transformation as a rank-2 update: 
  
                   A := A - v * w' - w * v' */

		ssyr2_(uplo, &i, &c_b14, &A(1,i+1), &c__1, &
			TAU(1), &c__1, &A(1,1), lda);

		A(i,i+1) = E(i);
	    }
	    D(i + 1) = A(i+1,i+1);
	    TAU(i) = taui;
/* L10: */
	}
	D(1) = A(1,1);
    } else {

/*        Reduce the lower triangle of A */

	i__1 = *n - 1;
	for (i = 1; i <= *n-1; ++i) {

/*           Generate elementary reflector H(i) = I - tau * v * v'
   
             to annihilate A(i+2:n,i) */

	    i__2 = *n - i;
/* Computing MIN */
	    i__3 = i + 2;
	    slarfg_(&i__2, &A(i+1,i), &A(min(i+2,*n),i), &c__1, &taui);
	    E(i) = A(i+1,i);

	    if (taui != 0.f) {

/*              Apply H(i) from both sides to A(i+1:n,i+1:n) 
*/

		A(i+1,i) = 1.f;

/*              Compute  x := tau * A * v  storing y in TAU(i:
n-1) */

		i__2 = *n - i;
		ssymv_(uplo, &i__2, &taui, &A(i+1,i+1), lda, 
			&A(i+1,i), &c__1, &c_b8, &TAU(i), &c__1);

/*              Compute  w := x - 1/2 * tau * (x'*v) * v */

		i__2 = *n - i;
		alpha = taui * -.5f * sdot_(&i__2, &TAU(i), &c__1, &A(i+1,i), &c__1);
		i__2 = *n - i;
		saxpy_(&i__2, &alpha, &A(i+1,i), &c__1, &TAU(i), 
			&c__1);

/*              Apply the transformation as a rank-2 update: 
  
                   A := A - v * w' - w * v' */

		i__2 = *n - i;
		ssyr2_(uplo, &i__2, &c_b14, &A(i+1,i), &c__1, &
			TAU(i), &c__1, &A(i+1,i+1), lda);

		A(i+1,i) = E(i);
	    }
	    D(i) = A(i,i);
	    TAU(i) = taui;
/* L20: */
	}
	D(*n) = A(*n,*n);
    }

    return 0;

/*     End of SSYTD2 */

} /* ssytd2_ */
Esempio n. 9
0
/* Subroutine */ int sstein_(integer *n, real *d, real *e, integer *m, real *
	w, integer *iblock, integer *isplit, real *z, integer *ldz, real *
	work, integer *iwork, integer *ifail, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    SSTEIN computes the eigenvectors of a real symmetric tridiagonal   
    matrix T corresponding to specified eigenvalues, using inverse   
    iteration.   

    The maximum number of iterations allowed for each eigenvector is   
    specified by an internal parameter MAXITS (currently set to 5).   

    Arguments   
    =========   

    N       (input) INTEGER   
            The order of the matrix.  N >= 0.   

    D       (input) REAL array, dimension (N)   
            The n diagonal elements of the tridiagonal matrix T.   

    E       (input) REAL array, dimension (N)   
            The (n-1) subdiagonal elements of the tridiagonal matrix   
            T, in elements 1 to N-1.  E(N) need not be set.   

    M       (input) INTEGER   
            The number of eigenvectors to be found.  0 <= M <= N.   

    W       (input) REAL array, dimension (N)   
            The first M elements of W contain the eigenvalues for   
            which eigenvectors are to be computed.  The eigenvalues   
            should be grouped by split-off block and ordered from   
            smallest to largest within the block.  ( The output array   
            W from SSTEBZ with ORDER = 'B' is expected here. )   

    IBLOCK  (input) INTEGER array, dimension (N)   
            The submatrix indices associated with the corresponding   
            eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to   
            the first submatrix from the top, =2 if W(i) belongs to   
            the second submatrix, etc.  ( The output array IBLOCK   
            from SSTEBZ is expected here. )   

    ISPLIT  (input) INTEGER array, dimension (N)   
            The splitting points, at which T breaks up into submatrices. 
  
            The first submatrix consists of rows/columns 1 to   
            ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1   
            through ISPLIT( 2 ), etc.   
            ( The output array ISPLIT from SSTEBZ is expected here. )   

    Z       (output) REAL array, dimension (LDZ, M)   
            The computed eigenvectors.  The eigenvector associated   
            with the eigenvalue W(i) is stored in the i-th column of   
            Z.  Any vector which fails to converge is set to its current 
  
            iterate after MAXITS iterations.   

    LDZ     (input) INTEGER   
            The leading dimension of the array Z.  LDZ >= max(1,N).   

    WORK    (workspace) REAL array, dimension (5*N)   

    IWORK   (workspace) INTEGER array, dimension (N)   

    IFAIL   (output) INTEGER array, dimension (M)   
            On normal exit, all elements of IFAIL are zero.   
            If one or more eigenvectors fail to converge after   
            MAXITS iterations, then their indices are stored in   
            array IFAIL.   

    INFO    (output) INTEGER   
            = 0: successful exit.   
            < 0: if INFO = -i, the i-th argument had an illegal value   
            > 0: if INFO = i, then i eigenvectors failed to converge   
                 in MAXITS iterations.  Their indices are stored in   
                 array IFAIL.   

    Internal Parameters   
    ===================   

    MAXITS  INTEGER, default = 5   
            The maximum number of iterations performed.   

    EXTRA   INTEGER, default = 2   
            The number of iterations performed after norm growth   
            criterion is satisfied, should be at least 1.   

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static integer c__2 = 2;
    static integer c__1 = 1;
    static integer c_n1 = -1;
    
    /* System generated locals */
    integer z_dim1, z_offset, i__1, i__2, i__3;
    real r__1, r__2, r__3, r__4, r__5;
    /* Builtin functions */
    double sqrt(doublereal);
    /* Local variables */
    static integer jblk, nblk, jmax;
    extern doublereal sdot_(integer *, real *, integer *, real *, integer *), 
	    snrm2_(integer *, real *, integer *);
    static integer i, j, iseed[4], gpind, iinfo;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static integer b1;
    extern doublereal sasum_(integer *, real *, integer *);
    static integer j1;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *);
    static real ortol;
    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, 
	    real *, integer *);
    static integer indrv1, indrv2, indrv3, indrv4, indrv5, bn;
    static real xj;
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *), slagtf_(
	    integer *, real *, real *, real *, real *, real *, real *, 
	    integer *, integer *);
    static integer nrmchk;
    extern integer isamax_(integer *, real *, integer *);
    extern /* Subroutine */ int slagts_(integer *, integer *, real *, real *, 
	    real *, real *, integer *, real *, real *, integer *);
    static integer blksiz;
    static real onenrm, pertol;
    extern /* Subroutine */ int slarnv_(integer *, integer *, integer *, real 
	    *);
    static real stpcrt, scl, eps, ctr, sep, nrm, tol;
    static integer its;
    static real xjm, eps1;



#define ISEED(I) iseed[(I)]
#define D(I) d[(I)-1]
#define E(I) e[(I)-1]
#define W(I) w[(I)-1]
#define IBLOCK(I) iblock[(I)-1]
#define ISPLIT(I) isplit[(I)-1]
#define WORK(I) work[(I)-1]
#define IWORK(I) iwork[(I)-1]
#define IFAIL(I) ifail[(I)-1]

#define Z(I,J) z[(I)-1 + ((J)-1)* ( *ldz)]

    *info = 0;
    i__1 = *m;
    for (i = 1; i <= *m; ++i) {
	IFAIL(i) = 0;
/* L10: */
    }

    if (*n < 0) {
	*info = -1;
    } else if (*m < 0 || *m > *n) {
	*info = -4;
    } else if (*ldz < max(1,*n)) {
	*info = -9;
    } else {
	i__1 = *m;
	for (j = 2; j <= *m; ++j) {
	    if (IBLOCK(j) < IBLOCK(j - 1)) {
		*info = -6;
		goto L30;
	    }
	    if (IBLOCK(j) == IBLOCK(j - 1) && W(j) < W(j - 1)) {
		*info = -5;
		goto L30;
	    }
/* L20: */
	}
L30:
	;
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SSTEIN", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0 || *m == 0) {
	return 0;
    } else if (*n == 1) {
	Z(1,1) = 1.f;
	return 0;
    }

/*     Get machine constants. */

    eps = slamch_("Precision");

/*     Initialize seed for random number generator SLARNV. */

    for (i = 1; i <= 4; ++i) {
	ISEED(i - 1) = 1;
/* L40: */
    }

/*     Initialize pointers. */

    indrv1 = 0;
    indrv2 = indrv1 + *n;
    indrv3 = indrv2 + *n;
    indrv4 = indrv3 + *n;
    indrv5 = indrv4 + *n;

/*     Compute eigenvectors of matrix blocks. */

    j1 = 1;
    i__1 = IBLOCK(*m);
    for (nblk = 1; nblk <= IBLOCK(*m); ++nblk) {

/*        Find starting and ending indices of block nblk. */

	if (nblk == 1) {
	    b1 = 1;
	} else {
	    b1 = ISPLIT(nblk - 1) + 1;
	}
	bn = ISPLIT(nblk);
	blksiz = bn - b1 + 1;
	if (blksiz == 1) {
	    goto L60;
	}
	gpind = b1;

/*        Compute reorthogonalization criterion and stopping criterion
. */

	onenrm = (r__1 = D(b1), dabs(r__1)) + (r__2 = E(b1), dabs(r__2));
/* Computing MAX */
	r__3 = onenrm, r__4 = (r__1 = D(bn), dabs(r__1)) + (r__2 = E(bn - 1), 
		dabs(r__2));
	onenrm = dmax(r__3,r__4);
	i__2 = bn - 1;
	for (i = b1 + 1; i <= bn-1; ++i) {
/* Computing MAX */
	    r__4 = onenrm, r__5 = (r__1 = D(i), dabs(r__1)) + (r__2 = E(i - 1)
		    , dabs(r__2)) + (r__3 = E(i), dabs(r__3));
	    onenrm = dmax(r__4,r__5);
/* L50: */
	}
	ortol = onenrm * .001f;

	stpcrt = sqrt(.1f / blksiz);

/*        Loop through eigenvalues of block nblk. */

L60:
	jblk = 0;
	i__2 = *m;
	for (j = j1; j <= *m; ++j) {
	    if (IBLOCK(j) != nblk) {
		j1 = j;
		goto L160;
	    }
	    ++jblk;
	    xj = W(j);

/*           Skip all the work if the block size is one. */

	    if (blksiz == 1) {
		WORK(indrv1 + 1) = 1.f;
		goto L120;
	    }

/*           If eigenvalues j and j-1 are too close, add a relativ
ely   
             small perturbation. */

	    if (jblk > 1) {
		eps1 = (r__1 = eps * xj, dabs(r__1));
		pertol = eps1 * 10.f;
		sep = xj - xjm;
		if (sep < pertol) {
		    xj = xjm + pertol;
		}
	    }

	    its = 0;
	    nrmchk = 0;

/*           Get random starting vector. */

	    slarnv_(&c__2, iseed, &blksiz, &WORK(indrv1 + 1));

/*           Copy the matrix T so it won't be destroyed in factori
zation. */

	    scopy_(&blksiz, &D(b1), &c__1, &WORK(indrv4 + 1), &c__1);
	    i__3 = blksiz - 1;
	    scopy_(&i__3, &E(b1), &c__1, &WORK(indrv2 + 2), &c__1);
	    i__3 = blksiz - 1;
	    scopy_(&i__3, &E(b1), &c__1, &WORK(indrv3 + 1), &c__1);

/*           Compute LU factors with partial pivoting  ( PT = LU )
 */

	    tol = 0.f;
	    slagtf_(&blksiz, &WORK(indrv4 + 1), &xj, &WORK(indrv2 + 2), &WORK(
		    indrv3 + 1), &tol, &WORK(indrv5 + 1), &IWORK(1), &iinfo);

/*           Update iteration count. */

L70:
	    ++its;
	    if (its > 5) {
		goto L100;
	    }

/*           Normalize and scale the righthand side vector Pb.   

   Computing MAX */
	    r__2 = eps, r__3 = (r__1 = WORK(indrv4 + blksiz), dabs(r__1));
	    scl = blksiz * onenrm * dmax(r__2,r__3) / sasum_(&blksiz, &WORK(
		    indrv1 + 1), &c__1);
	    sscal_(&blksiz, &scl, &WORK(indrv1 + 1), &c__1);

/*           Solve the system LU = Pb. */

	    slagts_(&c_n1, &blksiz, &WORK(indrv4 + 1), &WORK(indrv2 + 2), &
		    WORK(indrv3 + 1), &WORK(indrv5 + 1), &IWORK(1), &WORK(
		    indrv1 + 1), &tol, &iinfo);

/*           Reorthogonalize by modified Gram-Schmidt if eigenvalu
es are   
             close enough. */

	    if (jblk == 1) {
		goto L90;
	    }
	    if ((r__1 = xj - xjm, dabs(r__1)) > ortol) {
		gpind = j;
	    }
	    if (gpind != j) {
		i__3 = j - 1;
		for (i = gpind; i <= j-1; ++i) {
		    ctr = -(doublereal)sdot_(&blksiz, &WORK(indrv1 + 1), &
			    c__1, &Z(b1,i), &c__1);
		    saxpy_(&blksiz, &ctr, &Z(b1,i), &c__1, &WORK(
			    indrv1 + 1), &c__1);
/* L80: */
		}
	    }

/*           Check the infinity norm of the iterate. */

L90:
	    jmax = isamax_(&blksiz, &WORK(indrv1 + 1), &c__1);
	    nrm = (r__1 = WORK(indrv1 + jmax), dabs(r__1));

/*           Continue for additional iterations after norm reaches
   
             stopping criterion. */

	    if (nrm < stpcrt) {
		goto L70;
	    }
	    ++nrmchk;
	    if (nrmchk < 3) {
		goto L70;
	    }

	    goto L110;

/*           If stopping criterion was not satisfied, update info 
and   
             store eigenvector number in array ifail. */

L100:
	    ++(*info);
	    IFAIL(*info) = j;

/*           Accept iterate as jth eigenvector. */

L110:
	    scl = 1.f / snrm2_(&blksiz, &WORK(indrv1 + 1), &c__1);
	    jmax = isamax_(&blksiz, &WORK(indrv1 + 1), &c__1);
	    if (WORK(indrv1 + jmax) < 0.f) {
		scl = -(doublereal)scl;
	    }
	    sscal_(&blksiz, &scl, &WORK(indrv1 + 1), &c__1);
L120:
	    i__3 = *n;
	    for (i = 1; i <= *n; ++i) {
		Z(i,j) = 0.f;
/* L130: */
	    }
	    i__3 = blksiz;
	    for (i = 1; i <= blksiz; ++i) {
		Z(b1+i-1,j) = WORK(indrv1 + i);
/* L140: */
	    }

/*           Save the shift to check eigenvalue spacing at next   
             iteration. */

	    xjm = xj;

/* L150: */
	}
L160:
	;
    }

    return 0;

/*     End of SSTEIN */

} /* sstein_ */
Esempio n. 10
0
/* Subroutine */ int stbt03_(char *uplo, char *trans, char *diag, integer *n,
                             integer *kd, integer *nrhs, real *ab, integer *ldab, real *scale,
                             real *cnorm, real *tscal, real *x, integer *ldx, real *b, integer *
                             ldb, real *work, real *resid)
{
    /* System generated locals */
    integer ab_dim1, ab_offset, b_dim1, b_offset, x_dim1, x_offset, i__1;
    real r__1, r__2, r__3;

    /* Local variables */
    static integer j;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static real xscal;
    extern /* Subroutine */ int stbmv_(char *, char *, char *, integer *,
                                       integer *, real *, integer *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *);
    static real tnorm, xnorm;
    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *,
                                       real *, integer *), slabad_(real *, real *);
    static integer ix;
    extern doublereal slamch_(char *);
    static real bignum;
    extern integer isamax_(integer *, real *, integer *);
    static real smlnum, eps, err;


#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
#define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1]
#define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1]


    /*  -- LAPACK test routine (version 3.0) --
           Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
           Courant Institute, Argonne National Lab, and Rice University
           February 29, 1992


        Purpose
        =======

        STBT03 computes the residual for the solution to a scaled triangular
        system of equations  A*x = s*b  or  A'*x = s*b  when A is a
        triangular band matrix. Here A' is the transpose of A, s is a scalar,
        and x and b are N by NRHS matrices.  The test ratio is the maximum
        over the number of right hand sides of
           norm(s*b - op(A)*x) / ( norm(op(A)) * norm(x) * EPS ),
        where op(A) denotes A or A' and EPS is the machine epsilon.

        Arguments
        =========

        UPLO    (input) CHARACTER*1
                Specifies whether the matrix A is upper or lower triangular.
                = 'U':  Upper triangular
                = 'L':  Lower triangular

        TRANS   (input) CHARACTER*1
                Specifies the operation applied to A.
                = 'N':  A *x = b  (No transpose)
                = 'T':  A'*x = b  (Transpose)
                = 'C':  A'*x = b  (Conjugate transpose = Transpose)

        DIAG    (input) CHARACTER*1
                Specifies whether or not the matrix A is unit triangular.
                = 'N':  Non-unit triangular
                = 'U':  Unit triangular

        N       (input) INTEGER
                The order of the matrix A.  N >= 0.

        KD      (input) INTEGER
                The number of superdiagonals or subdiagonals of the
                triangular band matrix A.  KD >= 0.

        NRHS    (input) INTEGER
                The number of right hand sides, i.e., the number of columns
                of the matrices X and B.  NRHS >= 0.

        AB      (input) REAL array, dimension (LDAB,N)
                The upper or lower triangular band matrix A, stored in the
                first kd+1 rows of the array. The j-th column of A is stored
                in the j-th column of the array AB as follows:
                if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
                if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).

        LDAB    (input) INTEGER
                The leading dimension of the array AB.  LDAB >= KD+1.

        SCALE   (input) REAL
                The scaling factor s used in solving the triangular system.

        CNORM   (input) REAL array, dimension (N)
                The 1-norms of the columns of A, not counting the diagonal.

        TSCAL   (input) REAL
                The scaling factor used in computing the 1-norms in CNORM.
                CNORM actually contains the column norms of TSCAL*A.

        X       (input) REAL array, dimension (LDX,NRHS)
                The computed solution vectors for the system of linear
                equations.

        LDX     (input) INTEGER
                The leading dimension of the array X.  LDX >= max(1,N).

        B       (input) REAL array, dimension (LDB,NRHS)
                The right hand side vectors for the system of linear
                equations.

        LDB     (input) INTEGER
                The leading dimension of the array B.  LDB >= max(1,N).

        WORK    (workspace) REAL array, dimension (N)

        RESID   (output) REAL
                The maximum over the number of right hand sides of
                norm(op(A)*x - s*b) / ( norm(op(A)) * norm(x) * EPS ).

        =====================================================================


           Quick exit if N = 0

           Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;
    --cnorm;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --work;

    /* Function Body */
    if (*n <= 0 || *nrhs <= 0) {
        *resid = 0.f;
        return 0;
    }
    eps = slamch_("Epsilon");
    smlnum = slamch_("Safe minimum");
    bignum = 1.f / smlnum;
    slabad_(&smlnum, &bignum);

    /*     Compute the norm of the triangular matrix A using the column
           norms already computed by SLATBS. */

    tnorm = 0.f;
    if (lsame_(diag, "N")) {
        if (lsame_(uplo, "U")) {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                /* Computing MAX */
                r__2 = tnorm, r__3 = *tscal * (r__1 = ab_ref(*kd + 1, j),
                                               dabs(r__1)) + cnorm[j];
                tnorm = dmax(r__2,r__3);
                /* L10: */
            }
        } else {
            i__1 = *n;
            for (j = 1; j <= i__1; ++j) {
                /* Computing MAX */
                r__2 = tnorm, r__3 = *tscal * (r__1 = ab_ref(1, j), dabs(r__1)
                                              ) + cnorm[j];
                tnorm = dmax(r__2,r__3);
                /* L20: */
            }
        }
    } else {
        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            /* Computing MAX */
            r__1 = tnorm, r__2 = *tscal + cnorm[j];
            tnorm = dmax(r__1,r__2);
            /* L30: */
        }
    }

    /*     Compute the maximum over the number of right hand sides of
              norm(op(A)*x - s*b) / ( norm(op(A)) * norm(x) * EPS ). */

    *resid = 0.f;
    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
        scopy_(n, &x_ref(1, j), &c__1, &work[1], &c__1);
        ix = isamax_(n, &work[1], &c__1);
        /* Computing MAX */
        r__2 = 1.f, r__3 = (r__1 = x_ref(ix, j), dabs(r__1));
        xnorm = dmax(r__2,r__3);
        xscal = 1.f / xnorm / (real) (*kd + 1);
        sscal_(n, &xscal, &work[1], &c__1);
        stbmv_(uplo, trans, diag, n, kd, &ab[ab_offset], ldab, &work[1], &
               c__1);
        r__1 = -(*scale) * xscal;
        saxpy_(n, &r__1, &b_ref(1, j), &c__1, &work[1], &c__1);
        ix = isamax_(n, &work[1], &c__1);
        err = *tscal * (r__1 = work[ix], dabs(r__1));
        ix = isamax_(n, &x_ref(1, j), &c__1);
        xnorm = (r__1 = x_ref(ix, j), dabs(r__1));
        if (err * smlnum <= xnorm) {
            if (xnorm > 0.f) {
                err /= xnorm;
            }
        } else {
            if (err > 0.f) {
                err = 1.f / eps;
            }
        }
        if (err * smlnum <= tnorm) {
            if (tnorm > 0.f) {
                err /= tnorm;
            }
        } else {
            if (err > 0.f) {
                err = 1.f / eps;
            }
        }
        *resid = dmax(*resid,err);
        /* L40: */
    }

    return 0;

    /*     End of STBT03 */

} /* stbt03_ */
Esempio n. 11
0
/* Subroutine */
int slatrs_(char *uplo, char *trans, char *diag, char * normin, integer *n, real *a, integer *lda, real *x, real *scale, real *cnorm, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    real r__1, r__2, r__3;
    /* Local variables */
    integer i__, j;
    real xj, rec, tjj;
    integer jinc;
    real xbnd;
    integer imax;
    real tmax, tjjs;
    extern real sdot_(integer *, real *, integer *, real *, integer *);
    real xmax, grow, sumj;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */
    int sscal_(integer *, real *, real *, integer *);
    real tscal, uscal;
    integer jlast;
    extern real sasum_(integer *, real *, integer *);
    logical upper;
    extern /* Subroutine */
    int saxpy_(integer *, real *, real *, integer *, real *, integer *), strsv_(char *, char *, char *, integer *, real *, integer *, real *, integer *);
    extern real slamch_(char *);
    extern /* Subroutine */
    int xerbla_(char *, integer *);
    real bignum;
    extern integer isamax_(integer *, real *, integer *);
    logical notran;
    integer jfirst;
    real smlnum;
    logical nounit;
    /* -- LAPACK auxiliary routine (version 3.4.2) -- */
    /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
    /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
    /* September 2012 */
    /* .. Scalar Arguments .. */
    /* .. */
    /* .. Array Arguments .. */
    /* .. */
    /* ===================================================================== */
    /* .. Parameters .. */
    /* .. */
    /* .. Local Scalars .. */
    /* .. */
    /* .. External Functions .. */
    /* .. */
    /* .. External Subroutines .. */
    /* .. */
    /* .. Intrinsic Functions .. */
    /* .. */
    /* .. Executable Statements .. */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --x;
    --cnorm;
    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    notran = lsame_(trans, "N");
    nounit = lsame_(diag, "N");
    /* Test the input parameters. */
    if (! upper && ! lsame_(uplo, "L"))
    {
        *info = -1;
    }
    else if (! notran && ! lsame_(trans, "T") && ! lsame_(trans, "C"))
    {
        *info = -2;
    }
    else if (! nounit && ! lsame_(diag, "U"))
    {
        *info = -3;
    }
    else if (! lsame_(normin, "Y") && ! lsame_(normin, "N"))
    {
        *info = -4;
    }
    else if (*n < 0)
    {
        *info = -5;
    }
    else if (*lda < max(1,*n))
    {
        *info = -7;
    }
    if (*info != 0)
    {
        i__1 = -(*info);
        xerbla_("SLATRS", &i__1);
        return 0;
    }
    /* Quick return if possible */
    if (*n == 0)
    {
        return 0;
    }
    /* Determine machine dependent parameters to control overflow. */
    smlnum = slamch_("Safe minimum") / slamch_("Precision");
    bignum = 1.f / smlnum;
    *scale = 1.f;
    if (lsame_(normin, "N"))
    {
        /* Compute the 1-norm of each column, not including the diagonal. */
        if (upper)
        {
            /* A is upper triangular. */
            i__1 = *n;
            for (j = 1;
                    j <= i__1;
                    ++j)
            {
                i__2 = j - 1;
                cnorm[j] = sasum_(&i__2, &a[j * a_dim1 + 1], &c__1);
                /* L10: */
            }
        }
        else
        {
            /* A is lower triangular. */
            i__1 = *n - 1;
            for (j = 1;
                    j <= i__1;
                    ++j)
            {
                i__2 = *n - j;
                cnorm[j] = sasum_(&i__2, &a[j + 1 + j * a_dim1], &c__1);
                /* L20: */
            }
            cnorm[*n] = 0.f;
        }
    }
    /* Scale the column norms by TSCAL if the maximum element in CNORM is */
    /* greater than BIGNUM. */
    imax = isamax_(n, &cnorm[1], &c__1);
    tmax = cnorm[imax];
    if (tmax <= bignum)
    {
        tscal = 1.f;
    }
    else
    {
        tscal = 1.f / (smlnum * tmax);
        sscal_(n, &tscal, &cnorm[1], &c__1);
    }
    /* Compute a bound on the computed solution vector to see if the */
    /* Level 2 BLAS routine STRSV can be used. */
    j = isamax_(n, &x[1], &c__1);
    xmax = (r__1 = x[j], f2c_abs(r__1));
    xbnd = xmax;
    if (notran)
    {
        /* Compute the growth in A * x = b. */
        if (upper)
        {
            jfirst = *n;
            jlast = 1;
            jinc = -1;
        }
        else
        {
            jfirst = 1;
            jlast = *n;
            jinc = 1;
        }
        if (tscal != 1.f)
        {
            grow = 0.f;
            goto L50;
        }
        if (nounit)
        {
            /* A is non-unit triangular. */
            /* Compute GROW = 1/G(j) and XBND = 1/M(j). */
            /* Initially, G(0) = max{
            x(i), i=1,...,n}
            . */
            grow = 1.f / max(xbnd,smlnum);
            xbnd = grow;
            i__1 = jlast;
            i__2 = jinc;
            for (j = jfirst;
                    i__2 < 0 ? j >= i__1 : j <= i__1;
                    j += i__2)
            {
                /* Exit the loop if the growth factor is too small. */
                if (grow <= smlnum)
                {
                    goto L50;
                }
                /* M(j) = G(j-1) / f2c_abs(A(j,j)) */
                tjj = (r__1 = a[j + j * a_dim1], f2c_abs(r__1));
                /* Computing MIN */
                r__1 = xbnd;
                r__2 = min(1.f,tjj) * grow; // , expr subst
                xbnd = min(r__1,r__2);
                if (tjj + cnorm[j] >= smlnum)
                {
                    /* G(j) = G(j-1)*( 1 + CNORM(j) / f2c_abs(A(j,j)) ) */
                    grow *= tjj / (tjj + cnorm[j]);
                }
                else
                {
                    /* G(j) could overflow, set GROW to 0. */
                    grow = 0.f;
                }
                /* L30: */
            }
            grow = xbnd;
        }
        else
        {
            /* A is unit triangular. */
            /* Compute GROW = 1/G(j), where G(0) = max{
            x(i), i=1,...,n}
            . */
            /* Computing MIN */
            r__1 = 1.f;
            r__2 = 1.f / max(xbnd,smlnum); // , expr subst
            grow = min(r__1,r__2);
            i__2 = jlast;
            i__1 = jinc;
            for (j = jfirst;
                    i__1 < 0 ? j >= i__2 : j <= i__2;
                    j += i__1)
            {
                /* Exit the loop if the growth factor is too small. */
                if (grow <= smlnum)
                {
                    goto L50;
                }
                /* G(j) = G(j-1)*( 1 + CNORM(j) ) */
                grow *= 1.f / (cnorm[j] + 1.f);
                /* L40: */
            }
        }
L50:
        ;
    }
    else
    {
        /* Compute the growth in A**T * x = b. */
        if (upper)
        {
            jfirst = 1;
            jlast = *n;
            jinc = 1;
        }
        else
        {
            jfirst = *n;
            jlast = 1;
            jinc = -1;
        }
        if (tscal != 1.f)
        {
            grow = 0.f;
            goto L80;
        }
        if (nounit)
        {
            /* A is non-unit triangular. */
            /* Compute GROW = 1/G(j) and XBND = 1/M(j). */
            /* Initially, M(0) = max{
            x(i), i=1,...,n}
            . */
            grow = 1.f / max(xbnd,smlnum);
            xbnd = grow;
            i__1 = jlast;
            i__2 = jinc;
            for (j = jfirst;
                    i__2 < 0 ? j >= i__1 : j <= i__1;
                    j += i__2)
            {
                /* Exit the loop if the growth factor is too small. */
                if (grow <= smlnum)
                {
                    goto L80;
                }
                /* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */
                xj = cnorm[j] + 1.f;
                /* Computing MIN */
                r__1 = grow;
                r__2 = xbnd / xj; // , expr subst
                grow = min(r__1,r__2);
                /* M(j) = M(j-1)*( 1 + CNORM(j) ) / f2c_abs(A(j,j)) */
                tjj = (r__1 = a[j + j * a_dim1], f2c_abs(r__1));
                if (xj > tjj)
                {
                    xbnd *= tjj / xj;
                }
                /* L60: */
            }
            grow = min(grow,xbnd);
        }
        else
        {
            /* A is unit triangular. */
            /* Compute GROW = 1/G(j), where G(0) = max{
            x(i), i=1,...,n}
            . */
            /* Computing MIN */
            r__1 = 1.f;
            r__2 = 1.f / max(xbnd,smlnum); // , expr subst
            grow = min(r__1,r__2);
            i__2 = jlast;
            i__1 = jinc;
            for (j = jfirst;
                    i__1 < 0 ? j >= i__2 : j <= i__2;
                    j += i__1)
            {
                /* Exit the loop if the growth factor is too small. */
                if (grow <= smlnum)
                {
                    goto L80;
                }
                /* G(j) = ( 1 + CNORM(j) )*G(j-1) */
                xj = cnorm[j] + 1.f;
                grow /= xj;
                /* L70: */
            }
        }
L80:
        ;
    }
    if (grow * tscal > smlnum)
    {
        /* Use the Level 2 BLAS solve if the reciprocal of the bound on */
        /* elements of X is not too small. */
        strsv_(uplo, trans, diag, n, &a[a_offset], lda, &x[1], &c__1);
    }
    else
    {
        /* Use a Level 1 BLAS solve, scaling intermediate results. */
        if (xmax > bignum)
        {
            /* Scale X so that its components are less than or equal to */
            /* BIGNUM in absolute value. */
            *scale = bignum / xmax;
            sscal_(n, scale, &x[1], &c__1);
            xmax = bignum;
        }
        if (notran)
        {
            /* Solve A * x = b */
            i__1 = jlast;
            i__2 = jinc;
            for (j = jfirst;
                    i__2 < 0 ? j >= i__1 : j <= i__1;
                    j += i__2)
            {
                /* Compute x(j) = b(j) / A(j,j), scaling x if necessary. */
                xj = (r__1 = x[j], f2c_abs(r__1));
                if (nounit)
                {
                    tjjs = a[j + j * a_dim1] * tscal;
                }
                else
                {
                    tjjs = tscal;
                    if (tscal == 1.f)
                    {
                        goto L95;
                    }
                }
                tjj = f2c_abs(tjjs);
                if (tjj > smlnum)
                {
                    /* f2c_abs(A(j,j)) > SMLNUM: */
                    if (tjj < 1.f)
                    {
                        if (xj > tjj * bignum)
                        {
                            /* Scale x by 1/b(j). */
                            rec = 1.f / xj;
                            sscal_(n, &rec, &x[1], &c__1);
                            *scale *= rec;
                            xmax *= rec;
                        }
                    }
                    x[j] /= tjjs;
                    xj = (r__1 = x[j], f2c_abs(r__1));
                }
                else if (tjj > 0.f)
                {
                    /* 0 < f2c_abs(A(j,j)) <= SMLNUM: */
                    if (xj > tjj * bignum)
                    {
                        /* Scale x by (1/f2c_abs(x(j)))*f2c_abs(A(j,j))*BIGNUM */
                        /* to avoid overflow when dividing by A(j,j). */
                        rec = tjj * bignum / xj;
                        if (cnorm[j] > 1.f)
                        {
                            /* Scale by 1/CNORM(j) to avoid overflow when */
                            /* multiplying x(j) times column j. */
                            rec /= cnorm[j];
                        }
                        sscal_(n, &rec, &x[1], &c__1);
                        *scale *= rec;
                        xmax *= rec;
                    }
                    x[j] /= tjjs;
                    xj = (r__1 = x[j], f2c_abs(r__1));
                }
                else
                {
                    /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */
                    /* scale = 0, and compute a solution to A*x = 0. */
                    i__3 = *n;
                    for (i__ = 1;
                            i__ <= i__3;
                            ++i__)
                    {
                        x[i__] = 0.f;
                        /* L90: */
                    }
                    x[j] = 1.f;
                    xj = 1.f;
                    *scale = 0.f;
                    xmax = 0.f;
                }
L95: /* Scale x if necessary to avoid overflow when adding a */
                /* multiple of column j of A. */
                if (xj > 1.f)
                {
                    rec = 1.f / xj;
                    if (cnorm[j] > (bignum - xmax) * rec)
                    {
                        /* Scale x by 1/(2*f2c_abs(x(j))). */
                        rec *= .5f;
                        sscal_(n, &rec, &x[1], &c__1);
                        *scale *= rec;
                    }
                }
                else if (xj * cnorm[j] > bignum - xmax)
                {
                    /* Scale x by 1/2. */
                    sscal_(n, &c_b36, &x[1], &c__1);
                    *scale *= .5f;
                }
                if (upper)
                {
                    if (j > 1)
                    {
                        /* Compute the update */
                        /* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) */
                        i__3 = j - 1;
                        r__1 = -x[j] * tscal;
                        saxpy_(&i__3, &r__1, &a[j * a_dim1 + 1], &c__1, &x[1], &c__1);
                        i__3 = j - 1;
                        i__ = isamax_(&i__3, &x[1], &c__1);
                        xmax = (r__1 = x[i__], f2c_abs(r__1));
                    }
                }
                else
                {
                    if (j < *n)
                    {
                        /* Compute the update */
                        /* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) */
                        i__3 = *n - j;
                        r__1 = -x[j] * tscal;
                        saxpy_(&i__3, &r__1, &a[j + 1 + j * a_dim1], &c__1, & x[j + 1], &c__1);
                        i__3 = *n - j;
                        i__ = j + isamax_(&i__3, &x[j + 1], &c__1);
                        xmax = (r__1 = x[i__], f2c_abs(r__1));
                    }
                }
                /* L100: */
            }
        }
        else
        {
            /* Solve A**T * x = b */
            i__2 = jlast;
            i__1 = jinc;
            for (j = jfirst;
                    i__1 < 0 ? j >= i__2 : j <= i__2;
                    j += i__1)
            {
                /* Compute x(j) = b(j) - sum A(k,j)*x(k). */
                /* k<>j */
                xj = (r__1 = x[j], f2c_abs(r__1));
                uscal = tscal;
                rec = 1.f / max(xmax,1.f);
                if (cnorm[j] > (bignum - xj) * rec)
                {
                    /* If x(j) could overflow, scale x by 1/(2*XMAX). */
                    rec *= .5f;
                    if (nounit)
                    {
                        tjjs = a[j + j * a_dim1] * tscal;
                    }
                    else
                    {
                        tjjs = tscal;
                    }
                    tjj = f2c_abs(tjjs);
                    if (tjj > 1.f)
                    {
                        /* Divide by A(j,j) when scaling x if A(j,j) > 1. */
                        /* Computing MIN */
                        r__1 = 1.f;
                        r__2 = rec * tjj; // , expr subst
                        rec = min(r__1,r__2);
                        uscal /= tjjs;
                    }
                    if (rec < 1.f)
                    {
                        sscal_(n, &rec, &x[1], &c__1);
                        *scale *= rec;
                        xmax *= rec;
                    }
                }
                sumj = 0.f;
                if (uscal == 1.f)
                {
                    /* If the scaling needed for A in the dot product is 1, */
                    /* call SDOT to perform the dot product. */
                    if (upper)
                    {
                        i__3 = j - 1;
                        sumj = sdot_(&i__3, &a[j * a_dim1 + 1], &c__1, &x[1], &c__1);
                    }
                    else if (j < *n)
                    {
                        i__3 = *n - j;
                        sumj = sdot_(&i__3, &a[j + 1 + j * a_dim1], &c__1, &x[ j + 1], &c__1);
                    }
                }
                else
                {
                    /* Otherwise, use in-line code for the dot product. */
                    if (upper)
                    {
                        i__3 = j - 1;
                        for (i__ = 1;
                                i__ <= i__3;
                                ++i__)
                        {
                            sumj += a[i__ + j * a_dim1] * uscal * x[i__];
                            /* L110: */
                        }
                    }
                    else if (j < *n)
                    {
                        i__3 = *n;
                        for (i__ = j + 1;
                                i__ <= i__3;
                                ++i__)
                        {
                            sumj += a[i__ + j * a_dim1] * uscal * x[i__];
                            /* L120: */
                        }
                    }
                }
                if (uscal == tscal)
                {
                    /* Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) */
                    /* was not used to scale the dotproduct. */
                    x[j] -= sumj;
                    xj = (r__1 = x[j], f2c_abs(r__1));
                    if (nounit)
                    {
                        tjjs = a[j + j * a_dim1] * tscal;
                    }
                    else
                    {
                        tjjs = tscal;
                        if (tscal == 1.f)
                        {
                            goto L135;
                        }
                    }
                    /* Compute x(j) = x(j) / A(j,j), scaling if necessary. */
                    tjj = f2c_abs(tjjs);
                    if (tjj > smlnum)
                    {
                        /* f2c_abs(A(j,j)) > SMLNUM: */
                        if (tjj < 1.f)
                        {
                            if (xj > tjj * bignum)
                            {
                                /* Scale X by 1/f2c_abs(x(j)). */
                                rec = 1.f / xj;
                                sscal_(n, &rec, &x[1], &c__1);
                                *scale *= rec;
                                xmax *= rec;
                            }
                        }
                        x[j] /= tjjs;
                    }
                    else if (tjj > 0.f)
                    {
                        /* 0 < f2c_abs(A(j,j)) <= SMLNUM: */
                        if (xj > tjj * bignum)
                        {
                            /* Scale x by (1/f2c_abs(x(j)))*f2c_abs(A(j,j))*BIGNUM. */
                            rec = tjj * bignum / xj;
                            sscal_(n, &rec, &x[1], &c__1);
                            *scale *= rec;
                            xmax *= rec;
                        }
                        x[j] /= tjjs;
                    }
                    else
                    {
                        /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */
                        /* scale = 0, and compute a solution to A**T*x = 0. */
                        i__3 = *n;
                        for (i__ = 1;
                                i__ <= i__3;
                                ++i__)
                        {
                            x[i__] = 0.f;
                            /* L130: */
                        }
                        x[j] = 1.f;
                        *scale = 0.f;
                        xmax = 0.f;
                    }
L135:
                    ;
                }
                else
                {
                    /* Compute x(j) := x(j) / A(j,j) - sumj if the dot */
                    /* product has already been divided by 1/A(j,j). */
                    x[j] = x[j] / tjjs - sumj;
                }
                /* Computing MAX */
                r__2 = xmax;
                r__3 = (r__1 = x[j], f2c_abs(r__1)); // , expr subst
                xmax = max(r__2,r__3);
                /* L140: */
            }
        }
        *scale /= tscal;
    }
    /* Scale the column norms by 1/TSCAL for return. */
    if (tscal != 1.f)
    {
        r__1 = 1.f / tscal;
        sscal_(n, &r__1, &cnorm[1], &c__1);
    }
    return 0;
    /* End of SLATRS */
}
Esempio n. 12
0
void saxpy( int n, float alpha, float *x, int incx,  float *y, int incy)
{
    saxpy_(&n, &alpha, x, &incx, y, &incy);
}
Esempio n. 13
0
/* Subroutine */ int sptrfs_(integer *n, integer *nrhs, real *d__, real *e, 
	real *df, real *ef, real *b, integer *ldb, real *x, integer *ldx, 
	real *ferr, real *berr, real *work, integer *info)
{
    /* System generated locals */
    integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2;
    real r__1, r__2, r__3;

    /* Local variables */
    integer i__, j;
    real s, bi, cx, dx, ex;
    integer ix, nz;
    real eps, safe1, safe2;
    integer count;
    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, 
	    real *, integer *);
    extern doublereal slamch_(char *);
    real safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer isamax_(integer *, real *, integer *);
    real lstres;
    extern /* Subroutine */ int spttrs_(integer *, integer *, real *, real *, 
	    real *, integer *, integer *);


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SPTRFS improves the computed solution to a system of linear */
/*  equations when the coefficient matrix is symmetric positive definite */
/*  and tridiagonal, and provides error bounds and backward error */
/*  estimates for the solution. */

/*  Arguments */
/*  ========= */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  NRHS    (input) INTEGER */
/*          The number of right hand sides, i.e., the number of columns */
/*          of the matrix B.  NRHS >= 0. */

/*  D       (input) REAL array, dimension (N) */
/*          The n diagonal elements of the tridiagonal matrix A. */

/*  E       (input) REAL array, dimension (N-1) */
/*          The (n-1) subdiagonal elements of the tridiagonal matrix A. */

/*  DF      (input) REAL array, dimension (N) */
/*          The n diagonal elements of the diagonal matrix D from the */
/*          factorization computed by SPTTRF. */

/*  EF      (input) REAL array, dimension (N-1) */
/*          The (n-1) subdiagonal elements of the unit bidiagonal factor */
/*          L from the factorization computed by SPTTRF. */

/*  B       (input) REAL array, dimension (LDB,NRHS) */
/*          The right hand side matrix B. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B.  LDB >= max(1,N). */

/*  X       (input/output) REAL array, dimension (LDX,NRHS) */
/*          On entry, the solution matrix X, as computed by SPTTRS. */
/*          On exit, the improved solution matrix X. */

/*  LDX     (input) INTEGER */
/*          The leading dimension of the array X.  LDX >= max(1,N). */

/*  FERR    (output) REAL array, dimension (NRHS) */
/*          The forward error bound for each solution vector */
/*          X(j) (the j-th column of the solution matrix X). */
/*          If XTRUE is the true solution corresponding to X(j), FERR(j) */
/*          is an estimated upper bound for the magnitude of the largest */
/*          element in (X(j) - XTRUE) divided by the magnitude of the */
/*          largest element in X(j). */

/*  BERR    (output) REAL array, dimension (NRHS) */
/*          The componentwise relative backward error of each solution */
/*          vector X(j) (i.e., the smallest relative change in */
/*          any element of A or B that makes X(j) an exact solution). */

/*  WORK    (workspace) REAL array, dimension (2*N) */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */

/*  Internal Parameters */
/*  =================== */

/*  ITMAX is the maximum number of steps of iterative refinement. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    --d__;
    --e;
    --df;
    --ef;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1;
    x -= x_offset;
    --ferr;
    --berr;
    --work;

    /* Function Body */
    *info = 0;
    if (*n < 0) {
	*info = -1;
    } else if (*nrhs < 0) {
	*info = -2;
    } else if (*ldb < max(1,*n)) {
	*info = -8;
    } else if (*ldx < max(1,*n)) {
	*info = -10;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SPTRFS", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
	i__1 = *nrhs;
	for (j = 1; j <= i__1; ++j) {
	    ferr[j] = 0.f;
	    berr[j] = 0.f;
/* L10: */
	}
	return 0;
    }

/*     NZ = maximum number of nonzero elements in each row of A, plus 1 */

    nz = 4;
    eps = slamch_("Epsilon");
    safmin = slamch_("Safe minimum");
    safe1 = nz * safmin;
    safe2 = safe1 / eps;

/*     Do for each right hand side */

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {

	count = 1;
	lstres = 3.f;
L20:

/*        Loop until stopping criterion is satisfied. */

/*        Compute residual R = B - A * X.  Also compute */
/*        abs(A)*abs(x) + abs(b) for use in the backward error bound. */

	if (*n == 1) {
	    bi = b[j * b_dim1 + 1];
	    dx = d__[1] * x[j * x_dim1 + 1];
	    work[*n + 1] = bi - dx;
	    work[1] = dabs(bi) + dabs(dx);
	} else {
	    bi = b[j * b_dim1 + 1];
	    dx = d__[1] * x[j * x_dim1 + 1];
	    ex = e[1] * x[j * x_dim1 + 2];
	    work[*n + 1] = bi - dx - ex;
	    work[1] = dabs(bi) + dabs(dx) + dabs(ex);
	    i__2 = *n - 1;
	    for (i__ = 2; i__ <= i__2; ++i__) {
		bi = b[i__ + j * b_dim1];
		cx = e[i__ - 1] * x[i__ - 1 + j * x_dim1];
		dx = d__[i__] * x[i__ + j * x_dim1];
		ex = e[i__] * x[i__ + 1 + j * x_dim1];
		work[*n + i__] = bi - cx - dx - ex;
		work[i__] = dabs(bi) + dabs(cx) + dabs(dx) + dabs(ex);
/* L30: */
	    }
	    bi = b[*n + j * b_dim1];
	    cx = e[*n - 1] * x[*n - 1 + j * x_dim1];
	    dx = d__[*n] * x[*n + j * x_dim1];
	    work[*n + *n] = bi - cx - dx;
	    work[*n] = dabs(bi) + dabs(cx) + dabs(dx);
	}

/*        Compute componentwise relative backward error from formula */

/*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) */

/*        where abs(Z) is the componentwise absolute value of the matrix */
/*        or vector Z.  If the i-th component of the denominator is less */
/*        than SAFE2, then SAFE1 is added to the i-th components of the */
/*        numerator and denominator before dividing. */

	s = 0.f;
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    if (work[i__] > safe2) {
/* Computing MAX */
		r__2 = s, r__3 = (r__1 = work[*n + i__], dabs(r__1)) / work[
			i__];
		s = dmax(r__2,r__3);
	    } else {
/* Computing MAX */
		r__2 = s, r__3 = ((r__1 = work[*n + i__], dabs(r__1)) + safe1)
			 / (work[i__] + safe1);
		s = dmax(r__2,r__3);
	    }
/* L40: */
	}
	berr[j] = s;

/*        Test stopping criterion. Continue iterating if */
/*           1) The residual BERR(J) is larger than machine epsilon, and */
/*           2) BERR(J) decreased by at least a factor of 2 during the */
/*              last iteration, and */
/*           3) At most ITMAX iterations tried. */

	if (berr[j] > eps && berr[j] * 2.f <= lstres && count <= 5) {

/*           Update solution and try again. */

	    spttrs_(n, &c__1, &df[1], &ef[1], &work[*n + 1], n, info);
	    saxpy_(n, &c_b11, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1)
		    ;
	    lstres = berr[j];
	    ++count;
	    goto L20;
	}

/*        Bound error from formula */

/*        norm(X - XTRUE) / norm(X) .le. FERR = */
/*        norm( abs(inv(A))* */
/*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) */

/*        where */
/*          norm(Z) is the magnitude of the largest component of Z */
/*          inv(A) is the inverse of A */
/*          abs(Z) is the componentwise absolute value of the matrix or */
/*             vector Z */
/*          NZ is the maximum number of nonzeros in any row of A, plus 1 */
/*          EPS is machine epsilon */

/*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) */
/*        is incremented by SAFE1 if the i-th component of */
/*        abs(A)*abs(X) + abs(B) is less than SAFE2. */

	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    if (work[i__] > safe2) {
		work[i__] = (r__1 = work[*n + i__], dabs(r__1)) + nz * eps * 
			work[i__];
	    } else {
		work[i__] = (r__1 = work[*n + i__], dabs(r__1)) + nz * eps * 
			work[i__] + safe1;
	    }
/* L50: */
	}
	ix = isamax_(n, &work[1], &c__1);
	ferr[j] = work[ix];

/*        Estimate the norm of inv(A). */

/*        Solve M(A) * x = e, where M(A) = (m(i,j)) is given by */

/*           m(i,j) =  abs(A(i,j)), i = j, */
/*           m(i,j) = -abs(A(i,j)), i .ne. j, */

/*        and e = [ 1, 1, ..., 1 ]'.  Note M(A) = M(L)*D*M(L)'. */

/*        Solve M(L) * x = e. */

	work[1] = 1.f;
	i__2 = *n;
	for (i__ = 2; i__ <= i__2; ++i__) {
	    work[i__] = work[i__ - 1] * (r__1 = ef[i__ - 1], dabs(r__1)) + 
		    1.f;
/* L60: */
	}

/*        Solve D * M(L)' * x = b. */

	work[*n] /= df[*n];
	for (i__ = *n - 1; i__ >= 1; --i__) {
	    work[i__] = work[i__] / df[i__] + work[i__ + 1] * (r__1 = ef[i__],
		     dabs(r__1));
/* L70: */
	}

/*        Compute norm(inv(A)) = max(x(i)), 1<=i<=n. */

	ix = isamax_(n, &work[1], &c__1);
	ferr[j] *= (r__1 = work[ix], dabs(r__1));

/*        Normalize error. */

	lstres = 0.f;
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    r__2 = lstres, r__3 = (r__1 = x[i__ + j * x_dim1], dabs(r__1));
	    lstres = dmax(r__2,r__3);
/* L80: */
	}
	if (lstres != 0.f) {
	    ferr[j] /= lstres;
	}

/* L90: */
    }

    return 0;

/*     End of SPTRFS */

} /* sptrfs_ */
Esempio n. 14
0
/* Subroutine */ int sgtt01_(integer *n, real *dl, real *d__, real *du, real *
	dlf, real *df, real *duf, real *du2, integer *ipiv, real *work, 
	integer *ldwork, real *rwork, real *resid)
{
    /* System generated locals */
    integer work_dim1, work_offset, i__1, i__2;

    /* Local variables */
    integer i__, j;
    real li;
    integer ip;
    real eps, anorm;
    integer lastj;


/*  -- LAPACK test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SGTT01 reconstructs a tridiagonal matrix A from its LU factorization */
/*  and computes the residual */
/*     norm(L*U - A) / ( norm(A) * EPS ), */
/*  where EPS is the machine epsilon. */

/*  Arguments */
/*  ========= */

/*  N       (input) INTEGTER */
/*          The order of the matrix A.  N >= 0. */

/*  DL      (input) REAL array, dimension (N-1) */
/*          The (n-1) sub-diagonal elements of A. */

/*  D       (input) REAL array, dimension (N) */
/*          The diagonal elements of A. */

/*  DU      (input) REAL array, dimension (N-1) */
/*          The (n-1) super-diagonal elements of A. */

/*  DLF     (input) REAL array, dimension (N-1) */
/*          The (n-1) multipliers that define the matrix L from the */
/*          LU factorization of A. */

/*  DF      (input) REAL array, dimension (N) */
/*          The n diagonal elements of the upper triangular matrix U from */
/*          the LU factorization of A. */

/*  DUF     (input) REAL array, dimension (N-1) */
/*          The (n-1) elements of the first super-diagonal of U. */

/*  DU2F    (input) REAL array, dimension (N-2) */
/*          The (n-2) elements of the second super-diagonal of U. */

/*  IPIV    (input) INTEGER array, dimension (N) */
/*          The pivot indices; for 1 <= i <= n, row i of the matrix was */
/*          interchanged with row IPIV(i).  IPIV(i) will always be either */
/*          i or i+1; IPIV(i) = i indicates a row interchange was not */
/*          required. */

/*  WORK    (workspace) REAL array, dimension (LDWORK,N) */

/*  LDWORK  (input) INTEGER */
/*          The leading dimension of the array WORK.  LDWORK >= max(1,N). */

/*  RWORK   (workspace) REAL array, dimension (N) */

/*  RESID   (output) REAL */
/*          The scaled residual:  norm(L*U - A) / (norm(A) * EPS) */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Quick return if possible */

    /* Parameter adjustments */
    --dl;
    --d__;
    --du;
    --dlf;
    --df;
    --duf;
    --du2;
    --ipiv;
    work_dim1 = *ldwork;
    work_offset = 1 + work_dim1;
    work -= work_offset;
    --rwork;

    /* Function Body */
    if (*n <= 0) {
	*resid = 0.f;
	return 0;
    }

    eps = slamch_("Epsilon");

/*     Copy the matrix U to WORK. */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    work[i__ + j * work_dim1] = 0.f;
/* L10: */
	}
/* L20: */
    }
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	if (i__ == 1) {
	    work[i__ + i__ * work_dim1] = df[i__];
	    if (*n >= 2) {
		work[i__ + (i__ + 1) * work_dim1] = duf[i__];
	    }
	    if (*n >= 3) {
		work[i__ + (i__ + 2) * work_dim1] = du2[i__];
	    }
	} else if (i__ == *n) {
	    work[i__ + i__ * work_dim1] = df[i__];
	} else {
	    work[i__ + i__ * work_dim1] = df[i__];
	    work[i__ + (i__ + 1) * work_dim1] = duf[i__];
	    if (i__ < *n - 1) {
		work[i__ + (i__ + 2) * work_dim1] = du2[i__];
	    }
	}
/* L30: */
    }

/*     Multiply on the left by L. */

    lastj = *n;
    for (i__ = *n - 1; i__ >= 1; --i__) {
	li = dlf[i__];
	i__1 = lastj - i__ + 1;
	saxpy_(&i__1, &li, &work[i__ + i__ * work_dim1], ldwork, &work[i__ + 
		1 + i__ * work_dim1], ldwork);
	ip = ipiv[i__];
	if (ip == i__) {
/* Computing MIN */
	    i__1 = i__ + 2;
	    lastj = min(i__1,*n);
	} else {
	    i__1 = lastj - i__ + 1;
	    sswap_(&i__1, &work[i__ + i__ * work_dim1], ldwork, &work[i__ + 1 
		    + i__ * work_dim1], ldwork);
	}
/* L40: */
    }

/*     Subtract the matrix A. */

    work[work_dim1 + 1] -= d__[1];
    if (*n > 1) {
	work[(work_dim1 << 1) + 1] -= du[1];
	work[*n + (*n - 1) * work_dim1] -= dl[*n - 1];
	work[*n + *n * work_dim1] -= d__[*n];
	i__1 = *n - 1;
	for (i__ = 2; i__ <= i__1; ++i__) {
	    work[i__ + (i__ - 1) * work_dim1] -= dl[i__ - 1];
	    work[i__ + i__ * work_dim1] -= d__[i__];
	    work[i__ + (i__ + 1) * work_dim1] -= du[i__];
/* L50: */
	}
    }

/*     Compute the 1-norm of the tridiagonal matrix A. */

    anorm = slangt_("1", n, &dl[1], &d__[1], &du[1]);

/*     Compute the 1-norm of WORK, which is only guaranteed to be */
/*     upper Hessenberg. */

    *resid = slanhs_("1", n, &work[work_offset], ldwork, &rwork[1])
	    ;

/*     Compute norm(L*U - A) / (norm(A) * EPS) */

    if (anorm <= 0.f) {
	if (*resid != 0.f) {
	    *resid = 1.f / eps;
	}
    } else {
	*resid = *resid / anorm / eps;
    }

    return 0;

/*     End of SGTT01 */

} /* sgtt01_ */
Esempio n. 15
0
/* Subroutine */ int ssapps_(integer *n, integer *kev, integer *np, real *
                             shift, real *v, integer *ldv, real *h__, integer *ldh, real *resid,
                             real *q, integer *ldq, real *workd)
{
    /* Initialized data */

    static logical first = TRUE_;

    /* System generated locals */
    integer h_dim1, h_offset, q_dim1, q_offset, v_dim1, v_offset, i__1, i__2,
            i__3, i__4;
    real r__1, r__2;

    /* Local variables */
    static real c__, f, g;
    static integer i__, j;
    static real r__, s, a1, a2, a3, a4, t0, t1;
    static integer jj;
    static real big;
    static integer iend, itop;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
           sgemv_(char *, integer *, integer *, real *, real *, integer *,
                  real *, integer *, real *, real *, integer *, ftnlen), scopy_(
               integer *, real *, integer *, real *, integer *), saxpy_(integer *
                       , real *, real *, integer *, real *, integer *), ivout_(integer *,
                               integer *, integer *, integer *, char *, ftnlen), svout_(integer
                                       *, integer *, real *, integer *, char *, ftnlen);
    extern doublereal slamch_(char *, ftnlen);
    extern /* Subroutine */ int arscnd_(real *);
    static real epsmch;
    static integer istart, kplusp, msglvl;
    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *,
                                        integer *, real *, integer *, ftnlen), slartg_(real *, real *,
                                                real *, real *, real *), slaset_(char *, integer *, integer *,
                                                        real *, real *, real *, integer *, ftnlen);


    /*     %----------------------------------------------------% */
    /*     | Include files for debugging and timing information | */
    /*     %----------------------------------------------------% */


    /* \SCCS Information: @(#) */
    /* FILE: debug.h   SID: 2.3   DATE OF SID: 11/16/95   RELEASE: 2 */

    /*     %---------------------------------% */
    /*     | See debug.doc for documentation | */
    /*     %---------------------------------% */

    /*     %------------------% */
    /*     | Scalar Arguments | */
    /*     %------------------% */

    /*     %--------------------------------% */
    /*     | See stat.doc for documentation | */
    /*     %--------------------------------% */

    /* \SCCS Information: @(#) */
    /* FILE: stat.h   SID: 2.2   DATE OF SID: 11/16/95   RELEASE: 2 */



    /*     %-----------------% */
    /*     | Array Arguments | */
    /*     %-----------------% */


    /*     %------------% */
    /*     | Parameters | */
    /*     %------------% */


    /*     %---------------% */
    /*     | Local Scalars | */
    /*     %---------------% */



    /*     %----------------------% */
    /*     | External Subroutines | */
    /*     %----------------------% */


    /*     %--------------------% */
    /*     | External Functions | */
    /*     %--------------------% */


    /*     %----------------------% */
    /*     | Intrinsics Functions | */
    /*     %----------------------% */


    /*     %----------------% */
    /*     | Data statments | */
    /*     %----------------% */

    /* Parameter adjustments */
    --workd;
    --resid;
    --shift;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1;
    v -= v_offset;
    h_dim1 = *ldh;
    h_offset = 1 + h_dim1;
    h__ -= h_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1;
    q -= q_offset;

    /* Function Body */

    /*     %-----------------------% */
    /*     | Executable Statements | */
    /*     %-----------------------% */

    if (first) {
        epsmch = slamch_("Epsilon-Machine", (ftnlen)15);
        first = FALSE_;
    }
    itop = 1;

    /*     %-------------------------------% */
    /*     | Initialize timing statistics  | */
    /*     | & message level for debugging | */
    /*     %-------------------------------% */

    arscnd_(&t0);
    msglvl = debug_1.msapps;

    kplusp = *kev + *np;

    /*     %----------------------------------------------% */
    /*     | Initialize Q to the identity matrix of order | */
    /*     | kplusp used to accumulate the rotations.     | */
    /*     %----------------------------------------------% */

    slaset_("All", &kplusp, &kplusp, &c_b4, &c_b5, &q[q_offset], ldq, (ftnlen)
            3);

    /*     %----------------------------------------------% */
    /*     | Quick return if there are no shifts to apply | */
    /*     %----------------------------------------------% */

    if (*np == 0) {
        goto L9000;
    }

    /*     %----------------------------------------------------------% */
    /*     | Apply the np shifts implicitly. Apply each shift to the  | */
    /*     | whole matrix and not just to the submatrix from which it | */
    /*     | comes.                                                   | */
    /*     %----------------------------------------------------------% */

    i__1 = *np;
    for (jj = 1; jj <= i__1; ++jj) {

        istart = itop;

        /*        %----------------------------------------------------------% */
        /*        | Check for splitting and deflation. Currently we consider | */
        /*        | an off-diagonal element h(i+1,1) negligible if           | */
        /*        |         h(i+1,1) .le. epsmch*( |h(i,2)| + |h(i+1,2)| )   | */
        /*        | for i=1:KEV+NP-1.                                        | */
        /*        | If above condition tests true then we set h(i+1,1) = 0.  | */
        /*        | Note that h(1:KEV+NP,1) are assumed to be non negative.  | */
        /*        %----------------------------------------------------------% */

L20:

        /*        %------------------------------------------------% */
        /*        | The following loop exits early if we encounter | */
        /*        | a negligible off diagonal element.             | */
        /*        %------------------------------------------------% */

        i__2 = kplusp - 1;
        for (i__ = istart; i__ <= i__2; ++i__) {
            big = (r__1 = h__[i__ + (h_dim1 << 1)], dabs(r__1)) + (r__2 = h__[
                        i__ + 1 + (h_dim1 << 1)], dabs(r__2));
            if (h__[i__ + 1 + h_dim1] <= epsmch * big) {
                if (msglvl > 0) {
                    ivout_(&debug_1.logfil, &c__1, &i__, &debug_1.ndigit,
                           "_sapps: deflation at row/column no.", (ftnlen)35)
                    ;
                    ivout_(&debug_1.logfil, &c__1, &jj, &debug_1.ndigit,
                           "_sapps: occured before shift number.", (ftnlen)
                           36);
                    svout_(&debug_1.logfil, &c__1, &h__[i__ + 1 + h_dim1], &
                           debug_1.ndigit, "_sapps: the corresponding off d"
                           "iagonal element", (ftnlen)46);
                }
                h__[i__ + 1 + h_dim1] = 0.f;
                iend = i__;
                goto L40;
            }
            /* L30: */
        }
        iend = kplusp;
L40:

        if (istart < iend) {

            /*           %--------------------------------------------------------% */
            /*           | Construct the plane rotation G'(istart,istart+1,theta) | */
            /*           | that attempts to drive h(istart+1,1) to zero.          | */
            /*           %--------------------------------------------------------% */

            f = h__[istart + (h_dim1 << 1)] - shift[jj];
            g = h__[istart + 1 + h_dim1];
            slartg_(&f, &g, &c__, &s, &r__);

            /*            %-------------------------------------------------------% */
            /*            | Apply rotation to the left and right of H;            | */
            /*            | H <- G' * H * G,  where G = G(istart,istart+1,theta). | */
            /*            | This will create a "bulge".                           | */
            /*            %-------------------------------------------------------% */

            a1 = c__ * h__[istart + (h_dim1 << 1)] + s * h__[istart + 1 +
                    h_dim1];
            a2 = c__ * h__[istart + 1 + h_dim1] + s * h__[istart + 1 + (
                        h_dim1 << 1)];
            a4 = c__ * h__[istart + 1 + (h_dim1 << 1)] - s * h__[istart + 1 +
                    h_dim1];
            a3 = c__ * h__[istart + 1 + h_dim1] - s * h__[istart + (h_dim1 <<
                    1)];
            h__[istart + (h_dim1 << 1)] = c__ * a1 + s * a2;
            h__[istart + 1 + (h_dim1 << 1)] = c__ * a4 - s * a3;
            h__[istart + 1 + h_dim1] = c__ * a3 + s * a4;

            /*            %----------------------------------------------------% */
            /*            | Accumulate the rotation in the matrix Q;  Q <- Q*G | */
            /*            %----------------------------------------------------% */

            /* Computing MIN */
            i__3 = istart + jj;
            i__2 = min(i__3,kplusp);
            for (j = 1; j <= i__2; ++j) {
                a1 = c__ * q[j + istart * q_dim1] + s * q[j + (istart + 1) *
                        q_dim1];
                q[j + (istart + 1) * q_dim1] = -s * q[j + istart * q_dim1] +
                                               c__ * q[j + (istart + 1) * q_dim1];
                q[j + istart * q_dim1] = a1;
                /* L60: */
            }


            /*            %----------------------------------------------% */
            /*            | The following loop chases the bulge created. | */
            /*            | Note that the previous rotation may also be  | */
            /*            | done within the following loop. But it is    | */
            /*            | kept separate to make the distinction among  | */
            /*            | the bulge chasing sweeps and the first plane | */
            /*            | rotation designed to drive h(istart+1,1) to  | */
            /*            | zero.                                        | */
            /*            %----------------------------------------------% */

            i__2 = iend - 1;
            for (i__ = istart + 1; i__ <= i__2; ++i__) {

                /*               %----------------------------------------------% */
                /*               | Construct the plane rotation G'(i,i+1,theta) | */
                /*               | that zeros the i-th bulge that was created   | */
                /*               | by G(i-1,i,theta). g represents the bulge.   | */
                /*               %----------------------------------------------% */

                f = h__[i__ + h_dim1];
                g = s * h__[i__ + 1 + h_dim1];

                /*               %----------------------------------% */
                /*               | Final update with G(i-1,i,theta) | */
                /*               %----------------------------------% */

                h__[i__ + 1 + h_dim1] = c__ * h__[i__ + 1 + h_dim1];
                slartg_(&f, &g, &c__, &s, &r__);

                /*               %-------------------------------------------% */
                /*               | The following ensures that h(1:iend-1,1), | */
                /*               | the first iend-2 off diagonal of elements | */
                /*               | H, remain non negative.                   | */
                /*               %-------------------------------------------% */

                if (r__ < 0.f) {
                    r__ = -r__;
                    c__ = -c__;
                    s = -s;
                }

                /*               %--------------------------------------------% */
                /*               | Apply rotation to the left and right of H; | */
                /*               | H <- G * H * G',  where G = G(i,i+1,theta) | */
                /*               %--------------------------------------------% */

                h__[i__ + h_dim1] = r__;

                a1 = c__ * h__[i__ + (h_dim1 << 1)] + s * h__[i__ + 1 +
                        h_dim1];
                a2 = c__ * h__[i__ + 1 + h_dim1] + s * h__[i__ + 1 + (h_dim1
                        << 1)];
                a3 = c__ * h__[i__ + 1 + h_dim1] - s * h__[i__ + (h_dim1 << 1)
                                                          ];
                a4 = c__ * h__[i__ + 1 + (h_dim1 << 1)] - s * h__[i__ + 1 +
                        h_dim1];

                h__[i__ + (h_dim1 << 1)] = c__ * a1 + s * a2;
                h__[i__ + 1 + (h_dim1 << 1)] = c__ * a4 - s * a3;
                h__[i__ + 1 + h_dim1] = c__ * a3 + s * a4;

                /*               %----------------------------------------------------% */
                /*               | Accumulate the rotation in the matrix Q;  Q <- Q*G | */
                /*               %----------------------------------------------------% */

                /* Computing MIN */
                i__4 = i__ + jj;
                i__3 = min(i__4,kplusp);
                for (j = 1; j <= i__3; ++j) {
                    a1 = c__ * q[j + i__ * q_dim1] + s * q[j + (i__ + 1) *
                                                           q_dim1];
                    q[j + (i__ + 1) * q_dim1] = -s * q[j + i__ * q_dim1] +
                                                c__ * q[j + (i__ + 1) * q_dim1];
                    q[j + i__ * q_dim1] = a1;
                    /* L50: */
                }

                /* L70: */
            }

        }

        /*        %--------------------------% */
        /*        | Update the block pointer | */
        /*        %--------------------------% */

        istart = iend + 1;

        /*        %------------------------------------------% */
        /*        | Make sure that h(iend,1) is non-negative | */
        /*        | If not then set h(iend,1) <-- -h(iend,1) | */
        /*        | and negate the last column of Q.         | */
        /*        | We have effectively carried out a        | */
        /*        | similarity on transformation H           | */
        /*        %------------------------------------------% */

        if (h__[iend + h_dim1] < 0.f) {
            h__[iend + h_dim1] = -h__[iend + h_dim1];
            sscal_(&kplusp, &c_b20, &q[iend * q_dim1 + 1], &c__1);
        }

        /*        %--------------------------------------------------------% */
        /*        | Apply the same shift to the next block if there is any | */
        /*        %--------------------------------------------------------% */

        if (iend < kplusp) {
            goto L20;
        }

        /*        %-----------------------------------------------------% */
        /*        | Check if we can increase the the start of the block | */
        /*        %-----------------------------------------------------% */

        i__2 = kplusp - 1;
        for (i__ = itop; i__ <= i__2; ++i__) {
            if (h__[i__ + 1 + h_dim1] > 0.f) {
                goto L90;
            }
            ++itop;
            /* L80: */
        }

        /*        %-----------------------------------% */
        /*        | Finished applying the jj-th shift | */
        /*        %-----------------------------------% */

L90:
        ;
    }

    /*     %------------------------------------------% */
    /*     | All shifts have been applied. Check for  | */
    /*     | more possible deflation that might occur | */
    /*     | after the last shift is applied.         | */
    /*     %------------------------------------------% */

    i__1 = kplusp - 1;
    for (i__ = itop; i__ <= i__1; ++i__) {
        big = (r__1 = h__[i__ + (h_dim1 << 1)], dabs(r__1)) + (r__2 = h__[i__
                + 1 + (h_dim1 << 1)], dabs(r__2));
        if (h__[i__ + 1 + h_dim1] <= epsmch * big) {
            if (msglvl > 0) {
                ivout_(&debug_1.logfil, &c__1, &i__, &debug_1.ndigit, "_sapp"
                       "s: deflation at row/column no.", (ftnlen)35);
                svout_(&debug_1.logfil, &c__1, &h__[i__ + 1 + h_dim1], &
                       debug_1.ndigit, "_sapps: the corresponding off diago"
                       "nal element", (ftnlen)46);
            }
            h__[i__ + 1 + h_dim1] = 0.f;
        }
        /* L100: */
    }

    /*     %-------------------------------------------------% */
    /*     | Compute the (kev+1)-st column of (V*Q) and      | */
    /*     | temporarily store the result in WORKD(N+1:2*N). | */
    /*     | This is not necessary if h(kev+1,1) = 0.         | */
    /*     %-------------------------------------------------% */

    if (h__[*kev + 1 + h_dim1] > 0.f) {
        sgemv_("N", n, &kplusp, &c_b5, &v[v_offset], ldv, &q[(*kev + 1) *
                q_dim1 + 1], &c__1, &c_b4, &workd[*n + 1], &c__1, (ftnlen)1);
    }

    /*     %-------------------------------------------------------% */
    /*     | Compute column 1 to kev of (V*Q) in backward order    | */
    /*     | taking advantage that Q is an upper triangular matrix | */
    /*     | with lower bandwidth np.                              | */
    /*     | Place results in v(:,kplusp-kev:kplusp) temporarily.  | */
    /*     %-------------------------------------------------------% */

    i__1 = *kev;
    for (i__ = 1; i__ <= i__1; ++i__) {
        i__2 = kplusp - i__ + 1;
        sgemv_("N", n, &i__2, &c_b5, &v[v_offset], ldv, &q[(*kev - i__ + 1) *
                q_dim1 + 1], &c__1, &c_b4, &workd[1], &c__1, (ftnlen)1);
        scopy_(n, &workd[1], &c__1, &v[(kplusp - i__ + 1) * v_dim1 + 1], &
               c__1);
        /* L130: */
    }

    /*     %-------------------------------------------------% */
    /*     |  Move v(:,kplusp-kev+1:kplusp) into v(:,1:kev). | */
    /*     %-------------------------------------------------% */

    slacpy_("All", n, kev, &v[(*np + 1) * v_dim1 + 1], ldv, &v[v_offset], ldv,
            (ftnlen)3);

    /*     %--------------------------------------------% */
    /*     | Copy the (kev+1)-st column of (V*Q) in the | */
    /*     | appropriate place if h(kev+1,1) .ne. zero. | */
    /*     %--------------------------------------------% */

    if (h__[*kev + 1 + h_dim1] > 0.f) {
        scopy_(n, &workd[*n + 1], &c__1, &v[(*kev + 1) * v_dim1 + 1], &c__1);
    }

    /*     %-------------------------------------% */
    /*     | Update the residual vector:         | */
    /*     |    r <- sigmak*r + betak*v(:,kev+1) | */
    /*     | where                               | */
    /*     |    sigmak = (e_{kev+p}'*Q)*e_{kev}  | */
    /*     |    betak = e_{kev+1}'*H*e_{kev}     | */
    /*     %-------------------------------------% */

    sscal_(n, &q[kplusp + *kev * q_dim1], &resid[1], &c__1);
    if (h__[*kev + 1 + h_dim1] > 0.f) {
        saxpy_(n, &h__[*kev + 1 + h_dim1], &v[(*kev + 1) * v_dim1 + 1], &c__1,
               &resid[1], &c__1);
    }

    if (msglvl > 1) {
        svout_(&debug_1.logfil, &c__1, &q[kplusp + *kev * q_dim1], &
               debug_1.ndigit, "_sapps: sigmak of the updated residual vect"
               "or", (ftnlen)45);
        svout_(&debug_1.logfil, &c__1, &h__[*kev + 1 + h_dim1], &
               debug_1.ndigit, "_sapps: betak of the updated residual vector"
               , (ftnlen)44);
        svout_(&debug_1.logfil, kev, &h__[(h_dim1 << 1) + 1], &debug_1.ndigit,
               "_sapps: updated main diagonal of H for next iteration", (
                   ftnlen)53);
        if (*kev > 1) {
            i__1 = *kev - 1;
            svout_(&debug_1.logfil, &i__1, &h__[h_dim1 + 2], &debug_1.ndigit,
                   "_sapps: updated sub diagonal of H for next iteration", (
                       ftnlen)52);
        }
    }

    arscnd_(&t1);
    timing_1.tsapps += t1 - t0;

L9000:
    return 0;

    /*     %---------------% */
    /*     | End of ssapps | */
    /*     %---------------% */

} /* ssapps_ */
Esempio n. 16
0
/* Subroutine */ int sgerfs_(char *trans, integer *n, integer *nrhs, real *a, 
	integer *lda, real *af, integer *ldaf, integer *ipiv, real *b, 
	integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real *
	work, integer *iwork, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    SGERFS improves the computed solution to a system of linear   
    equations and provides error bounds and backward error estimates for   
    the solution.   

    Arguments   
    =========   

    TRANS   (input) CHARACTER*1   
            Specifies the form of the system of equations:   
            = 'N':  A * X = B     (No transpose)   
            = 'T':  A**T * X = B  (Transpose)   
            = 'C':  A**H * X = B  (Conjugate transpose = Transpose)   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    NRHS    (input) INTEGER   
            The number of right hand sides, i.e., the number of columns   
            of the matrices B and X.  NRHS >= 0.   

    A       (input) REAL array, dimension (LDA,N)   
            The original N-by-N matrix A.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    AF      (input) REAL array, dimension (LDAF,N)   
            The factors L and U from the factorization A = P*L*U   
            as computed by SGETRF.   

    LDAF    (input) INTEGER   
            The leading dimension of the array AF.  LDAF >= max(1,N).   

    IPIV    (input) INTEGER array, dimension (N)   
            The pivot indices from SGETRF; for 1<=i<=N, row i of the   
            matrix was interchanged with row IPIV(i).   

    B       (input) REAL array, dimension (LDB,NRHS)   
            The right hand side matrix B.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    X       (input/output) REAL array, dimension (LDX,NRHS)   
            On entry, the solution matrix X, as computed by SGETRS.   
            On exit, the improved solution matrix X.   

    LDX     (input) INTEGER   
            The leading dimension of the array X.  LDX >= max(1,N).   

    FERR    (output) REAL array, dimension (NRHS)   
            The estimated forward error bound for each solution vector   
            X(j) (the j-th column of the solution matrix X).   
            If XTRUE is the true solution corresponding to X(j), FERR(j)   
            is an estimated upper bound for the magnitude of the largest   
            element in (X(j) - XTRUE) divided by the magnitude of the   
            largest element in X(j).  The estimate is as reliable as   
            the estimate for RCOND, and is almost always a slight   
            overestimate of the true error.   

    BERR    (output) REAL array, dimension (NRHS)   
            The componentwise relative backward error of each solution   
            vector X(j) (i.e., the smallest relative change in   
            any element of A or B that makes X(j) an exact solution).   

    WORK    (workspace) REAL array, dimension (3*N)   

    IWORK   (workspace) INTEGER array, dimension (N)   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   

    Internal Parameters   
    ===================   

    ITMAX is the maximum number of steps of iterative refinement.   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static real c_b15 = -1.f;
    static real c_b17 = 1.f;
    
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, 
	    x_offset, i__1, i__2, i__3;
    real r__1, r__2, r__3;
    /* Local variables */
    static integer kase;
    static real safe1, safe2;
    static integer i__, j, k;
    static real s;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
	    real *, integer *, real *, integer *, real *, real *, integer *);
    static integer count;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *), saxpy_(integer *, real *, real *, integer *, real *, 
	    integer *);
    static real xk;
    extern doublereal slamch_(char *);
    static integer nz;
    static real safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *), slacon_(
	    integer *, real *, real *, integer *, real *, integer *);
    static logical notran;
    extern /* Subroutine */ int sgetrs_(char *, integer *, integer *, real *, 
	    integer *, integer *, real *, integer *, integer *);
    static char transt[1];
    static real lstres, eps;
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
#define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    af_dim1 = *ldaf;
    af_offset = 1 + af_dim1 * 1;
    af -= af_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    --ferr;
    --berr;
    --work;
    --iwork;

    /* Function Body */
    *info = 0;
    notran = lsame_(trans, "N");
    if (! notran && ! lsame_(trans, "T") && ! lsame_(
	    trans, "C")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*nrhs < 0) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    } else if (*ldaf < max(1,*n)) {
	*info = -7;
    } else if (*ldb < max(1,*n)) {
	*info = -10;
    } else if (*ldx < max(1,*n)) {
	*info = -12;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SGERFS", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
	i__1 = *nrhs;
	for (j = 1; j <= i__1; ++j) {
	    ferr[j] = 0.f;
	    berr[j] = 0.f;
/* L10: */
	}
	return 0;
    }

    if (notran) {
	*(unsigned char *)transt = 'T';
    } else {
	*(unsigned char *)transt = 'N';
    }

/*     NZ = maximum number of nonzero elements in each row of A, plus 1 */

    nz = *n + 1;
    eps = slamch_("Epsilon");
    safmin = slamch_("Safe minimum");
    safe1 = nz * safmin;
    safe2 = safe1 / eps;

/*     Do for each right hand side */

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {

	count = 1;
	lstres = 3.f;
L20:

/*        Loop until stopping criterion is satisfied.   

          Compute residual R = B - op(A) * X,   
          where op(A) = A, A**T, or A**H, depending on TRANS. */

	scopy_(n, &b_ref(1, j), &c__1, &work[*n + 1], &c__1);
	sgemv_(trans, n, n, &c_b15, &a[a_offset], lda, &x_ref(1, j), &c__1, &
		c_b17, &work[*n + 1], &c__1);

/*        Compute componentwise relative backward error from formula   

          max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )   

          where abs(Z) is the componentwise absolute value of the matrix   
          or vector Z.  If the i-th component of the denominator is less   
          than SAFE2, then SAFE1 is added to the i-th components of the   
          numerator and denominator before dividing. */

	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    work[i__] = (r__1 = b_ref(i__, j), dabs(r__1));
/* L30: */
	}

/*        Compute abs(op(A))*abs(X) + abs(B). */

	if (notran) {
	    i__2 = *n;
	    for (k = 1; k <= i__2; ++k) {
		xk = (r__1 = x_ref(k, j), dabs(r__1));
		i__3 = *n;
		for (i__ = 1; i__ <= i__3; ++i__) {
		    work[i__] += (r__1 = a_ref(i__, k), dabs(r__1)) * xk;
/* L40: */
		}
/* L50: */
	    }
	} else {
	    i__2 = *n;
	    for (k = 1; k <= i__2; ++k) {
		s = 0.f;
		i__3 = *n;
		for (i__ = 1; i__ <= i__3; ++i__) {
		    s += (r__1 = a_ref(i__, k), dabs(r__1)) * (r__2 = x_ref(
			    i__, j), dabs(r__2));
/* L60: */
		}
		work[k] += s;
/* L70: */
	    }
	}
	s = 0.f;
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    if (work[i__] > safe2) {
/* Computing MAX */
		r__2 = s, r__3 = (r__1 = work[*n + i__], dabs(r__1)) / work[
			i__];
		s = dmax(r__2,r__3);
	    } else {
/* Computing MAX */
		r__2 = s, r__3 = ((r__1 = work[*n + i__], dabs(r__1)) + safe1)
			 / (work[i__] + safe1);
		s = dmax(r__2,r__3);
	    }
/* L80: */
	}
	berr[j] = s;

/*        Test stopping criterion. Continue iterating if   
             1) The residual BERR(J) is larger than machine epsilon, and   
             2) BERR(J) decreased by at least a factor of 2 during the   
                last iteration, and   
             3) At most ITMAX iterations tried. */

	if (berr[j] > eps && berr[j] * 2.f <= lstres && count <= 5) {

/*           Update solution and try again. */

	    sgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[*n 
		    + 1], n, info);
	    saxpy_(n, &c_b17, &work[*n + 1], &c__1, &x_ref(1, j), &c__1);
	    lstres = berr[j];
	    ++count;
	    goto L20;
	}

/*        Bound error from formula   

          norm(X - XTRUE) / norm(X) .le. FERR =   
          norm( abs(inv(op(A)))*   
             ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)   

          where   
            norm(Z) is the magnitude of the largest component of Z   
            inv(op(A)) is the inverse of op(A)   
            abs(Z) is the componentwise absolute value of the matrix or   
               vector Z   
            NZ is the maximum number of nonzeros in any row of A, plus 1   
            EPS is machine epsilon   

          The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))   
          is incremented by SAFE1 if the i-th component of   
          abs(op(A))*abs(X) + abs(B) is less than SAFE2.   

          Use SLACON to estimate the infinity-norm of the matrix   
             inv(op(A)) * diag(W),   
          where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */

	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    if (work[i__] > safe2) {
		work[i__] = (r__1 = work[*n + i__], dabs(r__1)) + nz * eps * 
			work[i__];
	    } else {
		work[i__] = (r__1 = work[*n + i__], dabs(r__1)) + nz * eps * 
			work[i__] + safe1;
	    }
/* L90: */
	}

	kase = 0;
L100:
	slacon_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], &
		kase);
	if (kase != 0) {
	    if (kase == 1) {

/*              Multiply by diag(W)*inv(op(A)**T). */

		sgetrs_(transt, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &
			work[*n + 1], n, info);
		i__2 = *n;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    work[*n + i__] = work[i__] * work[*n + i__];
/* L110: */
		}
	    } else {

/*              Multiply by inv(op(A))*diag(W). */

		i__2 = *n;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    work[*n + i__] = work[i__] * work[*n + i__];
/* L120: */
		}
		sgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &
			work[*n + 1], n, info);
	    }
	    goto L100;
	}

/*        Normalize error. */

	lstres = 0.f;
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    r__2 = lstres, r__3 = (r__1 = x_ref(i__, j), dabs(r__1));
	    lstres = dmax(r__2,r__3);
/* L130: */
	}
	if (lstres != 0.f) {
	    ferr[j] /= lstres;
	}

/* L140: */
    }

    return 0;

/*     End of SGERFS */

} /* sgerfs_ */
Esempio n. 17
0
 int cggbal_(char *job, int *n, complex *a, int *lda, 
	complex *b, int *ldb, int *ilo, int *ihi, float *lscale, 
	float *rscale, float *work, int *info)
{
    /* System generated locals */
    int a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
    float r__1, r__2, r__3;

    /* Builtin functions */
    double r_lg10(float *), r_imag(complex *), c_abs(complex *), r_sign(float *,
	     float *), pow_ri(float *, int *);

    /* Local variables */
    int i__, j, k, l, m;
    float t;
    int jc;
    float ta, tb, tc;
    int ir;
    float ew;
    int it, nr, ip1, jp1, lm1;
    float cab, rab, ewc, cor, sum;
    int nrp2, icab, lcab;
    float beta, coef;
    int irab, lrab;
    float basl, cmax;
    extern double sdot_(int *, float *, int *, float *, int *);
    float coef2, coef5, gamma, alpha;
    extern int lsame_(char *, char *);
    extern  int sscal_(int *, float *, float *, int *);
    float sfmin;
    extern  int cswap_(int *, complex *, int *, 
	    complex *, int *);
    float sfmax;
    int iflow, kount;
    extern  int saxpy_(int *, float *, float *, int *, 
	    float *, int *);
    float pgamma;
    extern int icamax_(int *, complex *, int *);
    extern double slamch_(char *);
    extern  int csscal_(int *, float *, complex *, int 
	    *), xerbla_(char *, int *);
    int lsfmin, lsfmax;


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CGGBAL balances a pair of general complex matrices (A,B).  This */
/*  involves, first, permuting A and B by similarity transformations to */
/*  isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N */
/*  elements on the diagonal; and second, applying a diagonal similarity */
/*  transformation to rows and columns ILO to IHI to make the rows */
/*  and columns as close in norm as possible. Both steps are optional. */

/*  Balancing may reduce the 1-norm of the matrices, and improve the */
/*  accuracy of the computed eigenvalues and/or eigenvectors in the */
/*  generalized eigenvalue problem A*x = lambda*B*x. */

/*  Arguments */
/*  ========= */

/*  JOB     (input) CHARACTER*1 */
/*          Specifies the operations to be performed on A and B: */
/*          = 'N':  none:  simply set ILO = 1, IHI = N, LSCALE(I) = 1.0 */
/*                  and RSCALE(I) = 1.0 for i=1,...,N; */
/*          = 'P':  permute only; */
/*          = 'S':  scale only; */
/*          = 'B':  both permute and scale. */

/*  N       (input) INTEGER */
/*          The order of the matrices A and B.  N >= 0. */

/*  A       (input/output) COMPLEX array, dimension (LDA,N) */
/*          On entry, the input matrix A. */
/*          On exit, A is overwritten by the balanced matrix. */
/*          If JOB = 'N', A is not referenced. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A. LDA >= MAX(1,N). */

/*  B       (input/output) COMPLEX array, dimension (LDB,N) */
/*          On entry, the input matrix B. */
/*          On exit, B is overwritten by the balanced matrix. */
/*          If JOB = 'N', B is not referenced. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B. LDB >= MAX(1,N). */

/*  ILO     (output) INTEGER */
/*  IHI     (output) INTEGER */
/*          ILO and IHI are set to ints such that on exit */
/*          A(i,j) = 0 and B(i,j) = 0 if i > j and */
/*          j = 1,...,ILO-1 or i = IHI+1,...,N. */
/*          If JOB = 'N' or 'S', ILO = 1 and IHI = N. */

/*  LSCALE  (output) REAL array, dimension (N) */
/*          Details of the permutations and scaling factors applied */
/*          to the left side of A and B.  If P(j) is the index of the */
/*          row interchanged with row j, and D(j) is the scaling factor */
/*          applied to row j, then */
/*            LSCALE(j) = P(j)    for J = 1,...,ILO-1 */
/*                      = D(j)    for J = ILO,...,IHI */
/*                      = P(j)    for J = IHI+1,...,N. */
/*          The order in which the interchanges are made is N to IHI+1, */
/*          then 1 to ILO-1. */

/*  RSCALE  (output) REAL array, dimension (N) */
/*          Details of the permutations and scaling factors applied */
/*          to the right side of A and B.  If P(j) is the index of the */
/*          column interchanged with column j, and D(j) is the scaling */
/*          factor applied to column j, then */
/*            RSCALE(j) = P(j)    for J = 1,...,ILO-1 */
/*                      = D(j)    for J = ILO,...,IHI */
/*                      = P(j)    for J = IHI+1,...,N. */
/*          The order in which the interchanges are made is N to IHI+1, */
/*          then 1 to ILO-1. */

/*  WORK    (workspace) REAL array, dimension (lwork) */
/*          lwork must be at least MAX(1,6*N) when JOB = 'S' or 'B', and */
/*          at least 1 when JOB = 'N' or 'P'. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */

/*  Further Details */
/*  =============== */

/*  See R.C. WARD, Balancing the generalized eigenvalue problem, */
/*                 SIAM J. Sci. Stat. Comp. 2 (1981), 141-152. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Statement Functions .. */
/*     .. */
/*     .. Statement Function definitions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    --lscale;
    --rscale;
    --work;

    /* Function Body */
    *info = 0;
    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S") 
	    && ! lsame_(job, "B")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < MAX(1,*n)) {
	*info = -4;
    } else if (*ldb < MAX(1,*n)) {
	*info = -6;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CGGBAL", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	*ilo = 1;
	*ihi = *n;
	return 0;
    }

    if (*n == 1) {
	*ilo = 1;
	*ihi = *n;
	lscale[1] = 1.f;
	rscale[1] = 1.f;
	return 0;
    }

    if (lsame_(job, "N")) {
	*ilo = 1;
	*ihi = *n;
	i__1 = *n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    lscale[i__] = 1.f;
	    rscale[i__] = 1.f;
/* L10: */
	}
	return 0;
    }

    k = 1;
    l = *n;
    if (lsame_(job, "S")) {
	goto L190;
    }

    goto L30;

/*     Permute the matrices A and B to isolate the eigenvalues. */

/*     Find row with one nonzero in columns 1 through L */

L20:
    l = lm1;
    if (l != 1) {
	goto L30;
    }

    rscale[1] = 1.f;
    lscale[1] = 1.f;
    goto L190;

L30:
    lm1 = l - 1;
    for (i__ = l; i__ >= 1; --i__) {
	i__1 = lm1;
	for (j = 1; j <= i__1; ++j) {
	    jp1 = j + 1;
	    i__2 = i__ + j * a_dim1;
	    i__3 = i__ + j * b_dim1;
	    if (a[i__2].r != 0.f || a[i__2].i != 0.f || (b[i__3].r != 0.f || 
		    b[i__3].i != 0.f)) {
		goto L50;
	    }
/* L40: */
	}
	j = l;
	goto L70;

L50:
	i__1 = l;
	for (j = jp1; j <= i__1; ++j) {
	    i__2 = i__ + j * a_dim1;
	    i__3 = i__ + j * b_dim1;
	    if (a[i__2].r != 0.f || a[i__2].i != 0.f || (b[i__3].r != 0.f || 
		    b[i__3].i != 0.f)) {
		goto L80;
	    }
/* L60: */
	}
	j = jp1 - 1;

L70:
	m = l;
	iflow = 1;
	goto L160;
L80:
	;
    }
    goto L100;

/*     Find column with one nonzero in rows K through N */

L90:
    ++k;

L100:
    i__1 = l;
    for (j = k; j <= i__1; ++j) {
	i__2 = lm1;
	for (i__ = k; i__ <= i__2; ++i__) {
	    ip1 = i__ + 1;
	    i__3 = i__ + j * a_dim1;
	    i__4 = i__ + j * b_dim1;
	    if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r != 0.f || 
		    b[i__4].i != 0.f)) {
		goto L120;
	    }
/* L110: */
	}
	i__ = l;
	goto L140;
L120:
	i__2 = l;
	for (i__ = ip1; i__ <= i__2; ++i__) {
	    i__3 = i__ + j * a_dim1;
	    i__4 = i__ + j * b_dim1;
	    if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r != 0.f || 
		    b[i__4].i != 0.f)) {
		goto L150;
	    }
/* L130: */
	}
	i__ = ip1 - 1;
L140:
	m = k;
	iflow = 2;
	goto L160;
L150:
	;
    }
    goto L190;

/*     Permute rows M and I */

L160:
    lscale[m] = (float) i__;
    if (i__ == m) {
	goto L170;
    }
    i__1 = *n - k + 1;
    cswap_(&i__1, &a[i__ + k * a_dim1], lda, &a[m + k * a_dim1], lda);
    i__1 = *n - k + 1;
    cswap_(&i__1, &b[i__ + k * b_dim1], ldb, &b[m + k * b_dim1], ldb);

/*     Permute columns M and J */

L170:
    rscale[m] = (float) j;
    if (j == m) {
	goto L180;
    }
    cswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1);
    cswap_(&l, &b[j * b_dim1 + 1], &c__1, &b[m * b_dim1 + 1], &c__1);

L180:
    switch (iflow) {
	case 1:  goto L20;
	case 2:  goto L90;
    }

L190:
    *ilo = k;
    *ihi = l;

    if (lsame_(job, "P")) {
	i__1 = *ihi;
	for (i__ = *ilo; i__ <= i__1; ++i__) {
	    lscale[i__] = 1.f;
	    rscale[i__] = 1.f;
/* L195: */
	}
	return 0;
    }

    if (*ilo == *ihi) {
	return 0;
    }

/*     Balance the submatrix in rows ILO to IHI. */

    nr = *ihi - *ilo + 1;
    i__1 = *ihi;
    for (i__ = *ilo; i__ <= i__1; ++i__) {
	rscale[i__] = 0.f;
	lscale[i__] = 0.f;

	work[i__] = 0.f;
	work[i__ + *n] = 0.f;
	work[i__ + (*n << 1)] = 0.f;
	work[i__ + *n * 3] = 0.f;
	work[i__ + (*n << 2)] = 0.f;
	work[i__ + *n * 5] = 0.f;
/* L200: */
    }

/*     Compute right side vector in resulting linear equations */

    basl = r_lg10(&c_b36);
    i__1 = *ihi;
    for (i__ = *ilo; i__ <= i__1; ++i__) {
	i__2 = *ihi;
	for (j = *ilo; j <= i__2; ++j) {
	    i__3 = i__ + j * a_dim1;
	    if (a[i__3].r == 0.f && a[i__3].i == 0.f) {
		ta = 0.f;
		goto L210;
	    }
	    i__3 = i__ + j * a_dim1;
	    r__3 = (r__1 = a[i__3].r, ABS(r__1)) + (r__2 = r_imag(&a[i__ + j 
		    * a_dim1]), ABS(r__2));
	    ta = r_lg10(&r__3) / basl;

L210:
	    i__3 = i__ + j * b_dim1;
	    if (b[i__3].r == 0.f && b[i__3].i == 0.f) {
		tb = 0.f;
		goto L220;
	    }
	    i__3 = i__ + j * b_dim1;
	    r__3 = (r__1 = b[i__3].r, ABS(r__1)) + (r__2 = r_imag(&b[i__ + j 
		    * b_dim1]), ABS(r__2));
	    tb = r_lg10(&r__3) / basl;

L220:
	    work[i__ + (*n << 2)] = work[i__ + (*n << 2)] - ta - tb;
	    work[j + *n * 5] = work[j + *n * 5] - ta - tb;
/* L230: */
	}
/* L240: */
    }

    coef = 1.f / (float) (nr << 1);
    coef2 = coef * coef;
    coef5 = coef2 * .5f;
    nrp2 = nr + 2;
    beta = 0.f;
    it = 1;

/*     Start generalized conjugate gradient iteration */

L250:

    gamma = sdot_(&nr, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + (*n << 2)]
, &c__1) + sdot_(&nr, &work[*ilo + *n * 5], &c__1, &work[*ilo + *
	    n * 5], &c__1);

    ew = 0.f;
    ewc = 0.f;
    i__1 = *ihi;
    for (i__ = *ilo; i__ <= i__1; ++i__) {
	ew += work[i__ + (*n << 2)];
	ewc += work[i__ + *n * 5];
/* L260: */
    }

/* Computing 2nd power */
    r__1 = ew;
/* Computing 2nd power */
    r__2 = ewc;
/* Computing 2nd power */
    r__3 = ew - ewc;
    gamma = coef * gamma - coef2 * (r__1 * r__1 + r__2 * r__2) - coef5 * (
	    r__3 * r__3);
    if (gamma == 0.f) {
	goto L350;
    }
    if (it != 1) {
	beta = gamma / pgamma;
    }
    t = coef5 * (ewc - ew * 3.f);
    tc = coef5 * (ew - ewc * 3.f);

    sscal_(&nr, &beta, &work[*ilo], &c__1);
    sscal_(&nr, &beta, &work[*ilo + *n], &c__1);

    saxpy_(&nr, &coef, &work[*ilo + (*n << 2)], &c__1, &work[*ilo + *n], &
	    c__1);
    saxpy_(&nr, &coef, &work[*ilo + *n * 5], &c__1, &work[*ilo], &c__1);

    i__1 = *ihi;
    for (i__ = *ilo; i__ <= i__1; ++i__) {
	work[i__] += tc;
	work[i__ + *n] += t;
/* L270: */
    }

/*     Apply matrix to vector */

    i__1 = *ihi;
    for (i__ = *ilo; i__ <= i__1; ++i__) {
	kount = 0;
	sum = 0.f;
	i__2 = *ihi;
	for (j = *ilo; j <= i__2; ++j) {
	    i__3 = i__ + j * a_dim1;
	    if (a[i__3].r == 0.f && a[i__3].i == 0.f) {
		goto L280;
	    }
	    ++kount;
	    sum += work[j];
L280:
	    i__3 = i__ + j * b_dim1;
	    if (b[i__3].r == 0.f && b[i__3].i == 0.f) {
		goto L290;
	    }
	    ++kount;
	    sum += work[j];
L290:
	    ;
	}
	work[i__ + (*n << 1)] = (float) kount * work[i__ + *n] + sum;
/* L300: */
    }

    i__1 = *ihi;
    for (j = *ilo; j <= i__1; ++j) {
	kount = 0;
	sum = 0.f;
	i__2 = *ihi;
	for (i__ = *ilo; i__ <= i__2; ++i__) {
	    i__3 = i__ + j * a_dim1;
	    if (a[i__3].r == 0.f && a[i__3].i == 0.f) {
		goto L310;
	    }
	    ++kount;
	    sum += work[i__ + *n];
L310:
	    i__3 = i__ + j * b_dim1;
	    if (b[i__3].r == 0.f && b[i__3].i == 0.f) {
		goto L320;
	    }
	    ++kount;
	    sum += work[i__ + *n];
L320:
	    ;
	}
	work[j + *n * 3] = (float) kount * work[j] + sum;
/* L330: */
    }

    sum = sdot_(&nr, &work[*ilo + *n], &c__1, &work[*ilo + (*n << 1)], &c__1) 
	    + sdot_(&nr, &work[*ilo], &c__1, &work[*ilo + *n * 3], &c__1);
    alpha = gamma / sum;

/*     Determine correction to current iteration */

    cmax = 0.f;
    i__1 = *ihi;
    for (i__ = *ilo; i__ <= i__1; ++i__) {
	cor = alpha * work[i__ + *n];
	if (ABS(cor) > cmax) {
	    cmax = ABS(cor);
	}
	lscale[i__] += cor;
	cor = alpha * work[i__];
	if (ABS(cor) > cmax) {
	    cmax = ABS(cor);
	}
	rscale[i__] += cor;
/* L340: */
    }
    if (cmax < .5f) {
	goto L350;
    }

    r__1 = -alpha;
    saxpy_(&nr, &r__1, &work[*ilo + (*n << 1)], &c__1, &work[*ilo + (*n << 2)]
, &c__1);
    r__1 = -alpha;
    saxpy_(&nr, &r__1, &work[*ilo + *n * 3], &c__1, &work[*ilo + *n * 5], &
	    c__1);

    pgamma = gamma;
    ++it;
    if (it <= nrp2) {
	goto L250;
    }

/*     End generalized conjugate gradient iteration */

L350:
    sfmin = slamch_("S");
    sfmax = 1.f / sfmin;
    lsfmin = (int) (r_lg10(&sfmin) / basl + 1.f);
    lsfmax = (int) (r_lg10(&sfmax) / basl);
    i__1 = *ihi;
    for (i__ = *ilo; i__ <= i__1; ++i__) {
	i__2 = *n - *ilo + 1;
	irab = icamax_(&i__2, &a[i__ + *ilo * a_dim1], lda);
	rab = c_abs(&a[i__ + (irab + *ilo - 1) * a_dim1]);
	i__2 = *n - *ilo + 1;
	irab = icamax_(&i__2, &b[i__ + *ilo * b_dim1], ldb);
/* Computing MAX */
	r__1 = rab, r__2 = c_abs(&b[i__ + (irab + *ilo - 1) * b_dim1]);
	rab = MAX(r__1,r__2);
	r__1 = rab + sfmin;
	lrab = (int) (r_lg10(&r__1) / basl + 1.f);
	ir = lscale[i__] + r_sign(&c_b72, &lscale[i__]);
/* Computing MIN */
	i__2 = MAX(ir,lsfmin), i__2 = MIN(i__2,lsfmax), i__3 = lsfmax - lrab;
	ir = MIN(i__2,i__3);
	lscale[i__] = pow_ri(&c_b36, &ir);
	icab = icamax_(ihi, &a[i__ * a_dim1 + 1], &c__1);
	cab = c_abs(&a[icab + i__ * a_dim1]);
	icab = icamax_(ihi, &b[i__ * b_dim1 + 1], &c__1);
/* Computing MAX */
	r__1 = cab, r__2 = c_abs(&b[icab + i__ * b_dim1]);
	cab = MAX(r__1,r__2);
	r__1 = cab + sfmin;
	lcab = (int) (r_lg10(&r__1) / basl + 1.f);
	jc = rscale[i__] + r_sign(&c_b72, &rscale[i__]);
/* Computing MIN */
	i__2 = MAX(jc,lsfmin), i__2 = MIN(i__2,lsfmax), i__3 = lsfmax - lcab;
	jc = MIN(i__2,i__3);
	rscale[i__] = pow_ri(&c_b36, &jc);
/* L360: */
    }

/*     Row scaling of matrices A and B */

    i__1 = *ihi;
    for (i__ = *ilo; i__ <= i__1; ++i__) {
	i__2 = *n - *ilo + 1;
	csscal_(&i__2, &lscale[i__], &a[i__ + *ilo * a_dim1], lda);
	i__2 = *n - *ilo + 1;
	csscal_(&i__2, &lscale[i__], &b[i__ + *ilo * b_dim1], ldb);
/* L370: */
    }

/*     Column scaling of matrices A and B */

    i__1 = *ihi;
    for (j = *ilo; j <= i__1; ++j) {
	csscal_(ihi, &rscale[j], &a[j * a_dim1 + 1], &c__1);
	csscal_(ihi, &rscale[j], &b[j * b_dim1 + 1], &c__1);
/* L380: */
    }

    return 0;

/*     End of CGGBAL */

} /* cggbal_ */
Esempio n. 18
0
/* Subroutine */ int stzrqf_(integer *m, integer *n, real *a, integer *lda, 
	real *tau, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    real r__1;

    /* Local variables */
    integer i__, k, m1;
    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, 
	    integer *, real *, integer *, real *, integer *), sgemv_(char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
, real *, real *, integer *), scopy_(integer *, real *, 
	    integer *, real *, integer *), saxpy_(integer *, real *, real *, 
	    integer *, real *, integer *), xerbla_(char *, integer *),
	     slarfg_(integer *, real *, real *, integer *, real *);


/*  -- LAPACK routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  This routine is deprecated and has been replaced by routine STZRZF. */

/*  STZRQF reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A */
/*  to upper triangular form by means of orthogonal transformations. */

/*  The upper trapezoidal matrix A is factored as */

/*     A = ( R  0 ) * Z, */

/*  where Z is an N-by-N orthogonal matrix and R is an M-by-M upper */
/*  triangular matrix. */

/*  Arguments */
/*  ========= */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix A.  M >= 0. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix A.  N >= M. */

/*  A       (input/output) REAL array, dimension (LDA,N) */
/*          On entry, the leading M-by-N upper trapezoidal part of the */
/*          array A must contain the matrix to be factorized. */
/*          On exit, the leading M-by-M upper triangular part of A */
/*          contains the upper triangular matrix R, and elements M+1 to */
/*          N of the first M rows of A, with the array TAU, represent the */
/*          orthogonal matrix Z as a product of M elementary reflectors. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,M). */

/*  TAU     (output) REAL array, dimension (M) */
/*          The scalar factors of the elementary reflectors. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */

/*  Further Details */
/*  =============== */

/*  The factorization is obtained by Householder's method.  The kth */
/*  transformation matrix, Z( k ), which is used to introduce zeros into */
/*  the ( m - k + 1 )th row of A, is given in the form */

/*     Z( k ) = ( I     0   ), */
/*              ( 0  T( k ) ) */

/*  where */

/*     T( k ) = I - tau*u( k )*u( k )',   u( k ) = (   1    ), */
/*                                                 (   0    ) */
/*                                                 ( z( k ) ) */

/*  tau is a scalar and z( k ) is an ( n - m ) element vector. */
/*  tau and z( k ) are chosen to annihilate the elements of the kth row */
/*  of X. */

/*  The scalar tau is returned in the kth element of TAU and the vector */
/*  u( k ) in the kth row of A, such that the elements of z( k ) are */
/*  in  a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in */
/*  the upper triangular part of A. */

/*  Z is given by */

/*     Z =  Z( 1 ) * Z( 2 ) * ... * Z( m ). */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --tau;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
	*info = -1;
    } else if (*n < *m) {
	*info = -2;
    } else if (*lda < max(1,*m)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("STZRQF", &i__1);
	return 0;
    }

/*     Perform the factorization. */

    if (*m == 0) {
	return 0;
    }
    if (*m == *n) {
	i__1 = *n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    tau[i__] = 0.f;
/* L10: */
	}
    } else {
/* Computing MIN */
	i__1 = *m + 1;
	m1 = min(i__1,*n);
	for (k = *m; k >= 1; --k) {

/*           Use a Householder reflection to zero the kth row of A. */
/*           First set up the reflection. */

	    i__1 = *n - *m + 1;
	    slarfg_(&i__1, &a[k + k * a_dim1], &a[k + m1 * a_dim1], lda, &tau[
		    k]);

	    if (tau[k] != 0.f && k > 1) {

/*              We now perform the operation  A := A*P( k ). */

/*              Use the first ( k - 1 ) elements of TAU to store  a( k ), */
/*              where  a( k ) consists of the first ( k - 1 ) elements of */
/*              the  kth column  of  A.  Also  let  B  denote  the  first */
/*              ( k - 1 ) rows of the last ( n - m ) columns of A. */

		i__1 = k - 1;
		scopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &tau[1], &c__1);

/*              Form   w = a( k ) + B*z( k )  in TAU. */

		i__1 = k - 1;
		i__2 = *n - *m;
		sgemv_("No transpose", &i__1, &i__2, &c_b8, &a[m1 * a_dim1 + 
			1], lda, &a[k + m1 * a_dim1], lda, &c_b8, &tau[1], &
			c__1);

/*              Now form  a( k ) := a( k ) - tau*w */
/*              and       B      := B      - tau*w*z( k )'. */

		i__1 = k - 1;
		r__1 = -tau[k];
		saxpy_(&i__1, &r__1, &tau[1], &c__1, &a[k * a_dim1 + 1], &
			c__1);
		i__1 = k - 1;
		i__2 = *n - *m;
		r__1 = -tau[k];
		sger_(&i__1, &i__2, &r__1, &tau[1], &c__1, &a[k + m1 * a_dim1]
, lda, &a[m1 * a_dim1 + 1], lda);
	    }
/* L20: */
	}
    }

    return 0;

/*     End of STZRQF */

} /* stzrqf_ */
Esempio n. 19
0
/* Subroutine */ int spbrfs_(char *uplo, integer *n, integer *kd, integer *
	nrhs, real *ab, integer *ldab, real *afb, integer *ldafb, real *b, 
	integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real *
	work, integer *iwork, integer *info)
{
    /* System generated locals */
    integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, 
	    x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5;
    real r__1, r__2, r__3;

    /* Local variables */
    integer i__, j, k, l;
    real s, xk;
    integer nz;
    real eps;
    integer kase;
    real safe1, safe2;
    integer isave[3], count;
    logical upper;
    real safmin;
    real lstres;

/*  -- LAPACK routine (version 3.2) -- */
/*     November 2006 */

/*     Modified to call SLACN2 in place of SLACON, 7 Feb 03, SJH. */

/*  Purpose */
/*  ======= */

/*  SPBRFS improves the computed solution to a system of linear */
/*  equations when the coefficient matrix is symmetric positive definite */
/*  and banded, and provides error bounds and backward error estimates */
/*  for the solution. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          = 'U':  Upper triangle of A is stored; */
/*          = 'L':  Lower triangle of A is stored. */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  KD      (input) INTEGER */
/*          The number of superdiagonals of the matrix A if UPLO = 'U', */
/*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0. */

/*  NRHS    (input) INTEGER */
/*          The number of right hand sides, i.e., the number of columns */
/*          of the matrices B and X.  NRHS >= 0. */

/*  AB      (input) REAL array, dimension (LDAB,N) */
/*          The upper or lower triangle of the symmetric band matrix A, */
/*          stored in the first KD+1 rows of the array.  The j-th column */
/*          of A is stored in the j-th column of the array AB as follows: */
/*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */
/*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd). */

/*  LDAB    (input) INTEGER */
/*          The leading dimension of the array AB.  LDAB >= KD+1. */

/*  AFB     (input) REAL array, dimension (LDAFB,N) */
/*          The triangular factor U or L from the Cholesky factorization */
/*          A = U**T*U or A = L*L**T of the band matrix A as computed by */
/*          SPBTRF, in the same storage format as A (see AB). */

/*  LDAFB   (input) INTEGER */
/*          The leading dimension of the array AFB.  LDAFB >= KD+1. */

/*  B       (input) REAL array, dimension (LDB,NRHS) */
/*          The right hand side matrix B. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B.  LDB >= max(1,N). */

/*  X       (input/output) REAL array, dimension (LDX,NRHS) */
/*          On entry, the solution matrix X, as computed by SPBTRS. */
/*          On exit, the improved solution matrix X. */

/*  LDX     (input) INTEGER */
/*          The leading dimension of the array X.  LDX >= max(1,N). */

/*  FERR    (output) REAL array, dimension (NRHS) */
/*          The estimated forward error bound for each solution vector */
/*          X(j) (the j-th column of the solution matrix X). */
/*          If XTRUE is the true solution corresponding to X(j), FERR(j) */
/*          is an estimated upper bound for the magnitude of the largest */
/*          element in (X(j) - XTRUE) divided by the magnitude of the */
/*          largest element in X(j).  The estimate is as reliable as */
/*          the estimate for RCOND, and is almost always a slight */
/*          overestimate of the true error. */

/*  BERR    (output) REAL array, dimension (NRHS) */
/*          The componentwise relative backward error of each solution */
/*          vector X(j) (i.e., the smallest relative change in */
/*          any element of A or B that makes X(j) an exact solution). */

/*  WORK    (workspace) REAL array, dimension (3*N) */

/*  IWORK   (workspace) INTEGER array, dimension (N) */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */

/*  Internal Parameters */
/*  =================== */

/*  ITMAX is the maximum number of steps of iterative refinement. */

/*  ===================================================================== */

/*     Test the input parameters. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1;
    ab -= ab_offset;
    afb_dim1 = *ldafb;
    afb_offset = 1 + afb_dim1;
    afb -= afb_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1;
    x -= x_offset;
    --ferr;
    --berr;
    --work;
    --iwork;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*kd < 0) {
	*info = -3;
    } else if (*nrhs < 0) {
	*info = -4;
    } else if (*ldab < *kd + 1) {
	*info = -6;
    } else if (*ldafb < *kd + 1) {
	*info = -8;
    } else if (*ldb < max(1,*n)) {
	*info = -10;
    } else if (*ldx < max(1,*n)) {
	*info = -12;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SPBRFS", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
	i__1 = *nrhs;
	for (j = 1; j <= i__1; ++j) {
	    ferr[j] = 0.f;
	    berr[j] = 0.f;
	}
	return 0;
    }

/*     NZ = maximum number of nonzero elements in each row of A, plus 1 */

/* Computing MIN */
    i__1 = *n + 1, i__2 = (*kd << 1) + 2;
    nz = min(i__1,i__2);
    eps = slamch_("Epsilon");
    safmin = slamch_("Safe minimum");
    safe1 = nz * safmin;
    safe2 = safe1 / eps;

/*     Do for each right hand side */

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {

	count = 1;
	lstres = 3.f;
L20:

/*        Loop until stopping criterion is satisfied. */

/*        Compute residual R = B - A * X */

	scopy_(n, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1);
	ssbmv_(uplo, n, kd, &c_b12, &ab[ab_offset], ldab, &x[j * x_dim1 + 1], 
		&c__1, &c_b14, &work[*n + 1], &c__1);

/*        Compute componentwise relative backward error from formula */

/*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) */

/*        where abs(Z) is the componentwise absolute value of the matrix */
/*        or vector Z.  If the i-th component of the denominator is less */
/*        than SAFE2, then SAFE1 is added to the i-th components of the */
/*        numerator and denominator before dividing. */

	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    work[i__] = (r__1 = b[i__ + j * b_dim1], dabs(r__1));
	}

/*        Compute abs(A)*abs(X) + abs(B). */

	if (upper) {
	    i__2 = *n;
	    for (k = 1; k <= i__2; ++k) {
		s = 0.f;
		xk = (r__1 = x[k + j * x_dim1], dabs(r__1));
		l = *kd + 1 - k;
/* Computing MAX */
		i__3 = 1, i__4 = k - *kd;
		i__5 = k - 1;
		for (i__ = max(i__3,i__4); i__ <= i__5; ++i__) {
		    work[i__] += (r__1 = ab[l + i__ + k * ab_dim1], dabs(r__1)
			    ) * xk;
		    s += (r__1 = ab[l + i__ + k * ab_dim1], dabs(r__1)) * (
			    r__2 = x[i__ + j * x_dim1], dabs(r__2));
		}
		work[k] = work[k] + (r__1 = ab[*kd + 1 + k * ab_dim1], dabs(
			r__1)) * xk + s;
	    }
	} else {
	    i__2 = *n;
	    for (k = 1; k <= i__2; ++k) {
		s = 0.f;
		xk = (r__1 = x[k + j * x_dim1], dabs(r__1));
		work[k] += (r__1 = ab[k * ab_dim1 + 1], dabs(r__1)) * xk;
		l = 1 - k;
/* Computing MIN */
		i__3 = *n, i__4 = k + *kd;
		i__5 = min(i__3,i__4);
		for (i__ = k + 1; i__ <= i__5; ++i__) {
		    work[i__] += (r__1 = ab[l + i__ + k * ab_dim1], dabs(r__1)
			    ) * xk;
		    s += (r__1 = ab[l + i__ + k * ab_dim1], dabs(r__1)) * (
			    r__2 = x[i__ + j * x_dim1], dabs(r__2));
		}
		work[k] += s;
	    }
	}
	s = 0.f;
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    if (work[i__] > safe2) {
/* Computing MAX */
		r__2 = s, r__3 = (r__1 = work[*n + i__], dabs(r__1)) / work[
			i__];
		s = dmax(r__2,r__3);
	    } else {
/* Computing MAX */
		r__2 = s, r__3 = ((r__1 = work[*n + i__], dabs(r__1)) + safe1)
			 / (work[i__] + safe1);
		s = dmax(r__2,r__3);
	    }
	}
	berr[j] = s;

/*        Test stopping criterion. Continue iterating if */
/*           1) The residual BERR(J) is larger than machine epsilon, and */
/*           2) BERR(J) decreased by at least a factor of 2 during the */
/*              last iteration, and */
/*           3) At most ITMAX iterations tried. */

	if (berr[j] > eps && berr[j] * 2.f <= lstres && count <= 5) {

/*           Update solution and try again. */

	    spbtrs_(uplo, n, kd, &c__1, &afb[afb_offset], ldafb, &work[*n + 1]
, n, info);
	    saxpy_(n, &c_b14, &work[*n + 1], &c__1, &x[j * x_dim1 + 1], &c__1)
		    ;
	    lstres = berr[j];
	    ++count;
	    goto L20;
	}

/*        Bound error from formula */

/*        norm(X - XTRUE) / norm(X) .le. FERR = */
/*        norm( abs(inv(A))* */
/*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) */

/*        where */
/*          norm(Z) is the magnitude of the largest component of Z */
/*          inv(A) is the inverse of A */
/*          abs(Z) is the componentwise absolute value of the matrix or */
/*             vector Z */
/*          NZ is the maximum number of nonzeros in any row of A, plus 1 */
/*          EPS is machine epsilon */

/*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) */
/*        is incremented by SAFE1 if the i-th component of */
/*        abs(A)*abs(X) + abs(B) is less than SAFE2. */

/*        Use SLACN2 to estimate the infinity-norm of the matrix */
/*           inv(A) * diag(W), */
/*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */

	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    if (work[i__] > safe2) {
		work[i__] = (r__1 = work[*n + i__], dabs(r__1)) + nz * eps * 
			work[i__];
	    } else {
		work[i__] = (r__1 = work[*n + i__], dabs(r__1)) + nz * eps * 
			work[i__] + safe1;
	    }
	}

	kase = 0;
L100:
	slacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], &
		kase, isave);
	if (kase != 0) {
	    if (kase == 1) {

/*              Multiply by diag(W)*inv(A'). */

		spbtrs_(uplo, n, kd, &c__1, &afb[afb_offset], ldafb, &work[*n 
			+ 1], n, info);
		i__2 = *n;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    work[*n + i__] *= work[i__];
		}
	    } else if (kase == 2) {

/*              Multiply by inv(A)*diag(W). */

		i__2 = *n;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    work[*n + i__] *= work[i__];
		}
		spbtrs_(uplo, n, kd, &c__1, &afb[afb_offset], ldafb, &work[*n 
			+ 1], n, info);
	    }
	    goto L100;
	}

/*        Normalize error. */

	lstres = 0.f;
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    r__2 = lstres, r__3 = (r__1 = x[i__ + j * x_dim1], dabs(r__1));
	    lstres = dmax(r__2,r__3);
	}
	if (lstres != 0.f) {
	    ferr[j] /= lstres;
	}

    }

    return 0;

/*     End of SPBRFS */

} /* spbrfs_ */
Esempio n. 20
0
int sfgmr(int n,
     void (*smatvec) (float, float[], float, float[]),
     void (*spsolve) (int, float[], float[]),
     float *rhs, float *sol, double tol, int im, int *itmax, FILE * fits)
{
/*----------------------------------------------------------------------
|                 *** Preconditioned FGMRES ***
+-----------------------------------------------------------------------
| This is a simple version of the ARMS preconditioned FGMRES algorithm.
+-----------------------------------------------------------------------
| Y. S. Dec. 2000. -- Apr. 2008
+-----------------------------------------------------------------------
| on entry:
|----------
|
| rhs     = real vector of length n containing the right hand side.
| sol     = real vector of length n containing an initial guess to the
|           solution on input.
| tol     = tolerance for stopping iteration
| im      = Krylov subspace dimension
| (itmax) = max number of iterations allowed.
| fits    = NULL: no output
|        != NULL: file handle to output " resid vs time and its"
|
| on return:
|----------
| fgmr      int =  0 --> successful return.
|           int =  1 --> convergence not achieved in itmax iterations.
| sol     = contains an approximate solution (upon successful return).
| itmax   = has changed. It now contains the number of steps required
|           to converge --
+-----------------------------------------------------------------------
| internal work arrays:
|----------
| vv      = work array of length [im+1][n] (used to store the Arnoldi
|           basis)
| hh      = work array of length [im][im+1] (Householder matrix)
| z       = work array of length [im][n] to store preconditioned vectors
+-----------------------------------------------------------------------
| subroutines called :
| matvec - matrix-vector multiplication operation
| psolve - (right) preconditionning operation
|	   psolve can be a NULL pointer (GMRES without preconditioner)
+---------------------------------------------------------------------*/

    int maxits = *itmax;
    int i, i1, ii, j, k, k1, its, retval, i_1 = 1, i_2 = 2;
    float beta, eps1 = 0.0, t, t0, gam;
    float **hh, *c, *s, *rs;
    float **vv, **z, tt;
    float zero = 0.0;
    float one = 1.0;

    its = 0;
    vv = (float **)SUPERLU_MALLOC((im + 1) * sizeof(float *));
    for (i = 0; i <= im; i++) vv[i] = floatMalloc(n);
    z = (float **)SUPERLU_MALLOC(im * sizeof(float *));
    hh = (float **)SUPERLU_MALLOC(im * sizeof(float *));
    for (i = 0; i < im; i++)
    {
	hh[i] = floatMalloc(i + 2);
	z[i] = floatMalloc(n);
    }
    c = floatMalloc(im);
    s = floatMalloc(im);
    rs = floatMalloc(im + 1);

    /*---- outer loop starts here ----*/
    do
    {
	/*---- compute initial residual vector ----*/
	smatvec(one, sol, zero, vv[0]);
	for (j = 0; j < n; j++)
	    vv[0][j] = rhs[j] - vv[0][j];	/* vv[0]= initial residual */
	beta = snrm2_(&n, vv[0], &i_1);

	/*---- print info if fits != null ----*/
	if (fits != NULL && its == 0)
	    fprintf(fits, "%8d   %10.2e\n", its, beta);
	/*if ( beta <= tol * dnrm2_(&n, rhs, &i_1) )*/
	if ( !(beta > tol * snrm2_(&n, rhs, &i_1)) )
	    break;
	t = 1.0 / beta;

	/*---- normalize: vv[0] = vv[0] / beta ----*/
	for (j = 0; j < n; j++)
	    vv[0][j] = vv[0][j] * t;
	if (its == 0)
	    eps1 = tol * beta;

	/*---- initialize 1-st term of rhs of hessenberg system ----*/
	rs[0] = beta;
	for (i = 0; i < im; i++)
	{
	    its++;
	    i1 = i + 1;

	    /*------------------------------------------------------------
	    |  (Right) Preconditioning Operation   z_{j} = M^{-1} v_{j}
	    +-----------------------------------------------------------*/
	    if (spsolve)
		spsolve(n, z[i], vv[i]);
	    else
		scopy_(&n, vv[i], &i_1, z[i], &i_1);

	    /*---- matvec operation w = A z_{j} = A M^{-1} v_{j} ----*/
	    smatvec(one, z[i], zero, vv[i1]);

	    /*------------------------------------------------------------
	    |     modified gram - schmidt...
	    |     h_{i,j} = (w,v_{i})
	    |     w  = w - h_{i,j} v_{i}
	    +------------------------------------------------------------*/
	    t0 = snrm2_(&n, vv[i1], &i_1);
	    for (j = 0; j <= i; j++)
	    {
		float negt;
		tt = sdot_(&n, vv[j], &i_1, vv[i1], &i_1);
		hh[i][j] = tt;
		negt = -tt;
		saxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1);
	    }

	    /*---- h_{j+1,j} = ||w||_{2} ----*/
	    t = snrm2_(&n, vv[i1], &i_1);
	    while (t < 0.5 * t0)
	    {
		t0 = t;
		for (j = 0; j <= i; j++)
		{
		    float negt;
		    tt = sdot_(&n, vv[j], &i_1, vv[i1], &i_1);
		    hh[i][j] += tt;
		    negt = -tt;
		    saxpy_(&n, &negt, vv[j], &i_1, vv[i1], &i_1);
		}
		t = snrm2_(&n, vv[i1], &i_1);
	    }

	    hh[i][i1] = t;

	    if (t != 0.0)
	    {
		/*---- v_{j+1} = w / h_{j+1,j} ----*/
		t = 1.0 / t;
		for (k = 0; k < n; k++)
		    vv[i1][k] = vv[i1][k] * t;
	    }
	    /*---------------------------------------------------
	    |     done with modified gram schimdt and arnoldi step
	    |     now  update factorization of hh
	    +--------------------------------------------------*/

	    /*--------------------------------------------------------
	    |   perform previous transformations  on i-th column of h
	    +-------------------------------------------------------*/
	    for (k = 1; k <= i; k++)
	    {
		k1 = k - 1;
		tt = hh[i][k1];
		hh[i][k1] = c[k1] * tt + s[k1] * hh[i][k];
		hh[i][k] = -s[k1] * tt + c[k1] * hh[i][k];
	    }

	    gam = sqrt(pow(hh[i][i], 2) + pow(hh[i][i1], 2));

	    /*---------------------------------------------------
	    |     if gamma is zero then any small value will do
	    |     affect only residual estimate
	    +--------------------------------------------------*/
	    /* if (gam == 0.0) gam = epsmac; */

	    /*---- get next plane rotation ---*/
	    if (gam == 0.0)
	    {
		c[i] = one;
		s[i] = zero;
	    }
            else
	    {
		c[i] = hh[i][i] / gam;
		s[i] = hh[i][i1] / gam;
	    }

	    rs[i1] = -s[i] * rs[i];
	    rs[i] = c[i] * rs[i];

	    /*----------------------------------------------------
	    |   determine residual norm and test for convergence
	    +---------------------------------------------------*/
	    hh[i][i] = c[i] * hh[i][i] + s[i] * hh[i][i1];
	    beta = fabs(rs[i1]);
	    if (fits != NULL)
		fprintf(fits, "%8d   %10.2e\n", its, beta);
	    if (beta <= eps1 || its >= maxits)
		break;
	}

	if (i == im) i--;

	/*---- now compute solution. 1st, solve upper triangular system ----*/
	rs[i] = rs[i] / hh[i][i];

	for (ii = 1; ii <= i; ii++)
	{
	    k = i - ii;
	    k1 = k + 1;
	    tt = rs[k];
	    for (j = k1; j <= i; j++)
		tt = tt - hh[j][k] * rs[j];
	    rs[k] = tt / hh[k][k];
	}

	/*---- linear combination of v[i]'s to get sol. ----*/
	for (j = 0; j <= i; j++)
	{
	    tt = rs[j];
	    for (k = 0; k < n; k++)
		sol[k] += tt * z[j][k];
	}

	/* calculate the residual and output */
	smatvec(one, sol, zero, vv[0]);
	for (j = 0; j < n; j++)
	    vv[0][j] = rhs[j] - vv[0][j];	/* vv[0]= initial residual */

	/*---- print info if fits != null ----*/
	beta = snrm2_(&n, vv[0], &i_1);

	/*---- restart outer loop if needed ----*/
	/*if (beta >= eps1 / tol)*/
	if ( !(beta < eps1 / tol) )
	{
	    its = maxits + 10;
	    break;
	}
	if (beta <= eps1)
	    break;
    } while(its < maxits);

    retval = (its >= maxits);
    for (i = 0; i <= im; i++)
	SUPERLU_FREE(vv[i]);
    SUPERLU_FREE(vv);
    for (i = 0; i < im; i++)
    {
	SUPERLU_FREE(hh[i]);
	SUPERLU_FREE(z[i]);
    }
    SUPERLU_FREE(hh);
    SUPERLU_FREE(z);
    SUPERLU_FREE(c);
    SUPERLU_FREE(s);
    SUPERLU_FREE(rs);

    *itmax = its;

    return retval;
} /*----end of fgmr ----*/
Esempio n. 21
0
/* Subroutine */ int slatdf_(integer *ijob, integer *n, real *z__, integer *
	ldz, real *rhs, real *rdsum, real *rdscal, integer *ipiv, integer *
	jpiv)
{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    SLATDF uses the LU factorization of the n-by-n matrix Z computed by   
    SGETC2 and computes a contribution to the reciprocal Dif-estimate   
    by solving Z * x = b for x, and choosing the r.h.s. b such that   
    the norm of x is as large as possible. On entry RHS = b holds the   
    contribution from earlier solved sub-systems, and on return RHS = x.   

    The factorization of Z returned by SGETC2 has the form Z = P*L*U*Q,   
    where P and Q are permutation matrices. L is lower triangular with   
    unit diagonal elements and U is upper triangular.   

    Arguments   
    =========   

    IJOB    (input) INTEGER   
            IJOB = 2: First compute an approximative null-vector e   
                of Z using SGECON, e is normalized and solve for   
                Zx = +-e - f with the sign giving the greater value   
                of 2-norm(x). About 5 times as expensive as Default.   
            IJOB .ne. 2: Local look ahead strategy where all entries of   
                the r.h.s. b is choosen as either +1 or -1 (Default).   

    N       (input) INTEGER   
            The number of columns of the matrix Z.   

    Z       (input) REAL array, dimension (LDZ, N)   
            On entry, the LU part of the factorization of the n-by-n   
            matrix Z computed by SGETC2:  Z = P * L * U * Q   

    LDZ     (input) INTEGER   
            The leading dimension of the array Z.  LDA >= max(1, N).   

    RHS     (input/output) REAL array, dimension N.   
            On entry, RHS contains contributions from other subsystems.   
            On exit, RHS contains the solution of the subsystem with   
            entries acoording to the value of IJOB (see above).   

    RDSUM   (input/output) REAL   
            On entry, the sum of squares of computed contributions to   
            the Dif-estimate under computation by STGSYL, where the   
            scaling factor RDSCAL (see below) has been factored out.   
            On exit, the corresponding sum of squares updated with the   
            contributions from the current sub-system.   
            If TRANS = 'T' RDSUM is not touched.   
            NOTE: RDSUM only makes sense when STGSY2 is called by STGSYL.   

    RDSCAL  (input/output) REAL   
            On entry, scaling factor used to prevent overflow in RDSUM.   
            On exit, RDSCAL is updated w.r.t. the current contributions   
            in RDSUM.   
            If TRANS = 'T', RDSCAL is not touched.   
            NOTE: RDSCAL only makes sense when STGSY2 is called by   
                  STGSYL.   

    IPIV    (input) INTEGER array, dimension (N).   
            The pivot indices; for 1 <= i <= N, row i of the   
            matrix has been interchanged with row IPIV(i).   

    JPIV    (input) INTEGER array, dimension (N).   
            The pivot indices; for 1 <= j <= N, column j of the   
            matrix has been interchanged with column JPIV(j).   

    Further Details   
    ===============   

    Based on contributions by   
       Bo Kagstrom and Peter Poromaa, Department of Computing Science,   
       Umea University, S-901 87 Umea, Sweden.   

    This routine is a further developed implementation of algorithm   
    BSOLVE in [1] using complete pivoting in the LU factorization.   

    [1] Bo Kagstrom and Lars Westin,   
        Generalized Schur Methods with Condition Estimators for   
        Solving the Generalized Sylvester Equation, IEEE Transactions   
        on Automatic Control, Vol. 34, No. 7, July 1989, pp 745-751.   

    [2] Peter Poromaa,   
        On Efficient and Robust Estimators for the Separation   
        between two Regular Matrix Pairs with Applications in   
        Condition Estimation. Report IMINF-95.05, Departement of   
        Computing Science, Umea University, S-901 87 Umea, Sweden, 1995.   

    =====================================================================   


       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static real c_b23 = 1.f;
    static real c_b37 = -1.f;
    
    /* System generated locals */
    integer z_dim1, z_offset, i__1, i__2;
    real r__1;
    /* Builtin functions */
    double sqrt(doublereal);
    /* Local variables */
    static integer info;
    static real temp;
    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
    static real work[32];
    static integer i__, j, k;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static real pmone;
    extern doublereal sasum_(integer *, real *, integer *);
    static real sminu;
    static integer iwork[8];
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *), saxpy_(integer *, real *, real *, integer *, real *, 
	    integer *);
    static real splus;
    extern /* Subroutine */ int sgesc2_(integer *, real *, integer *, real *, 
	    integer *, integer *, real *);
    static real bm, bp, xm[8], xp[8];
    extern /* Subroutine */ int sgecon_(char *, integer *, real *, integer *, 
	    real *, real *, real *, integer *, integer *), slassq_(
	    integer *, real *, integer *, real *, real *), slaswp_(integer *, 
	    real *, integer *, integer *, integer *, integer *, integer *);
#define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]


    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --rhs;
    --ipiv;
    --jpiv;

    /* Function Body */
    if (*ijob != 2) {

/*        Apply permutations IPIV to RHS */

	i__1 = *n - 1;
	slaswp_(&c__1, &rhs[1], ldz, &c__1, &i__1, &ipiv[1], &c__1);

/*        Solve for L-part choosing RHS either to +1 or -1. */

	pmone = -1.f;

	i__1 = *n - 1;
	for (j = 1; j <= i__1; ++j) {
	    bp = rhs[j] + 1.f;
	    bm = rhs[j] - 1.f;
	    splus = 1.f;

/*           Look-ahead for L-part RHS(1:N-1) = + or -1, SPLUS and   
             SMIN computed more efficiently than in BSOLVE [1]. */

	    i__2 = *n - j;
	    splus += sdot_(&i__2, &z___ref(j + 1, j), &c__1, &z___ref(j + 1, 
		    j), &c__1);
	    i__2 = *n - j;
	    sminu = sdot_(&i__2, &z___ref(j + 1, j), &c__1, &rhs[j + 1], &
		    c__1);
	    splus *= rhs[j];
	    if (splus > sminu) {
		rhs[j] = bp;
	    } else if (sminu > splus) {
		rhs[j] = bm;
	    } else {

/*              In this case the updating sums are equal and we can   
                choose RHS(J) +1 or -1. The first time this happens   
                we choose -1, thereafter +1. This is a simple way to   
                get good estimates of matrices like Byers well-known   
                example (see [1]). (Not done in BSOLVE.) */

		rhs[j] += pmone;
		pmone = 1.f;
	    }

/*           Compute the remaining r.h.s. */

	    temp = -rhs[j];
	    i__2 = *n - j;
	    saxpy_(&i__2, &temp, &z___ref(j + 1, j), &c__1, &rhs[j + 1], &
		    c__1);

/* L10: */
	}

/*        Solve for U-part, look-ahead for RHS(N) = +-1. This is not done   
          in BSOLVE and will hopefully give us a better estimate because   
          any ill-conditioning of the original matrix is transfered to U   
          and not to L. U(N, N) is an approximation to sigma_min(LU). */

	i__1 = *n - 1;
	scopy_(&i__1, &rhs[1], &c__1, xp, &c__1);
	xp[*n - 1] = rhs[*n] + 1.f;
	rhs[*n] += -1.f;
	splus = 0.f;
	sminu = 0.f;
	for (i__ = *n; i__ >= 1; --i__) {
	    temp = 1.f / z___ref(i__, i__);
	    xp[i__ - 1] *= temp;
	    rhs[i__] *= temp;
	    i__1 = *n;
	    for (k = i__ + 1; k <= i__1; ++k) {
		xp[i__ - 1] -= xp[k - 1] * (z___ref(i__, k) * temp);
		rhs[i__] -= rhs[k] * (z___ref(i__, k) * temp);
/* L20: */
	    }
	    splus += (r__1 = xp[i__ - 1], dabs(r__1));
	    sminu += (r__1 = rhs[i__], dabs(r__1));
/* L30: */
	}
	if (splus > sminu) {
	    scopy_(n, xp, &c__1, &rhs[1], &c__1);
	}

/*        Apply the permutations JPIV to the computed solution (RHS) */

	i__1 = *n - 1;
	slaswp_(&c__1, &rhs[1], ldz, &c__1, &i__1, &jpiv[1], &c_n1);

/*        Compute the sum of squares */

	slassq_(n, &rhs[1], &c__1, rdscal, rdsum);

    } else {

/*        IJOB = 2, Compute approximate nullvector XM of Z */

	sgecon_("I", n, &z__[z_offset], ldz, &c_b23, &temp, work, iwork, &
		info);
	scopy_(n, &work[*n], &c__1, xm, &c__1);

/*        Compute RHS */

	i__1 = *n - 1;
	slaswp_(&c__1, xm, ldz, &c__1, &i__1, &ipiv[1], &c_n1);
	temp = 1.f / sqrt(sdot_(n, xm, &c__1, xm, &c__1));
	sscal_(n, &temp, xm, &c__1);
	scopy_(n, xm, &c__1, xp, &c__1);
	saxpy_(n, &c_b23, &rhs[1], &c__1, xp, &c__1);
	saxpy_(n, &c_b37, xm, &c__1, &rhs[1], &c__1);
	sgesc2_(n, &z__[z_offset], ldz, &rhs[1], &ipiv[1], &jpiv[1], &temp);
	sgesc2_(n, &z__[z_offset], ldz, xp, &ipiv[1], &jpiv[1], &temp);
	if (sasum_(n, xp, &c__1) > sasum_(n, &rhs[1], &c__1)) {
	    scopy_(n, xp, &c__1, &rhs[1], &c__1);
	}

/*        Compute the sum of squares */

	slassq_(n, &rhs[1], &c__1, rdscal, rdsum);

    }

    return 0;

/*     End of SLATDF */

} /* slatdf_ */
Esempio n. 22
0
/* Subroutine */ int slahr2_(integer *n, integer *k, integer *nb, real *a,
                             integer *lda, real *tau, real *t, integer *ldt, real *y, integer *ldy)
{
    /* System generated locals */
    integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2,
            i__3;
    real r__1;

    /* Local variables */
    integer i__;
    real ei;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
           sgemm_(char *, char *, integer *, integer *, integer *, real *,
                  real *, integer *, real *, integer *, real *, real *, integer *), sgemv_(char *, integer *, integer *, real *,
                          real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *),
           strmm_(char *, char *, char *, char *, integer *, integer *, real
                  *, real *, integer *, real *, integer *), saxpy_(integer *, real *, real *, integer *, real *,
                          integer *), strmv_(char *, char *, char *, integer *, real *,
                                  integer *, real *, integer *), slarfg_(
               integer *, real *, real *, integer *, real *), slacpy_(char *,
                       integer *, integer *, real *, integer *, real *, integer *);


    /*  -- LAPACK auxiliary routine (version 3.2) -- */
    /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
    /*     November 2006 */

    /*     .. Scalar Arguments .. */
    /*     .. */
    /*     .. Array Arguments .. */
    /*     .. */

    /*  Purpose */
    /*  ======= */

    /*  SLAHR2 reduces the first NB columns of A real general n-BY-(n-k+1) */
    /*  matrix A so that elements below the k-th subdiagonal are zero. The */
    /*  reduction is performed by an orthogonal similarity transformation */
    /*  Q' * A * Q. The routine returns the matrices V and T which determine */
    /*  Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. */

    /*  This is an auxiliary routine called by SGEHRD. */

    /*  Arguments */
    /*  ========= */

    /*  N       (input) INTEGER */
    /*          The order of the matrix A. */

    /*  K       (input) INTEGER */
    /*          The offset for the reduction. Elements below the k-th */
    /*          subdiagonal in the first NB columns are reduced to zero. */
    /*          K < N. */

    /*  NB      (input) INTEGER */
    /*          The number of columns to be reduced. */

    /*  A       (input/output) REAL array, dimension (LDA,N-K+1) */
    /*          On entry, the n-by-(n-k+1) general matrix A. */
    /*          On exit, the elements on and above the k-th subdiagonal in */
    /*          the first NB columns are overwritten with the corresponding */
    /*          elements of the reduced matrix; the elements below the k-th */
    /*          subdiagonal, with the array TAU, represent the matrix Q as a */
    /*          product of elementary reflectors. The other columns of A are */
    /*          unchanged. See Further Details. */

    /*  LDA     (input) INTEGER */
    /*          The leading dimension of the array A.  LDA >= max(1,N). */

    /*  TAU     (output) REAL array, dimension (NB) */
    /*          The scalar factors of the elementary reflectors. See Further */
    /*          Details. */

    /*  T       (output) REAL array, dimension (LDT,NB) */
    /*          The upper triangular matrix T. */

    /*  LDT     (input) INTEGER */
    /*          The leading dimension of the array T.  LDT >= NB. */

    /*  Y       (output) REAL array, dimension (LDY,NB) */
    /*          The n-by-nb matrix Y. */

    /*  LDY     (input) INTEGER */
    /*          The leading dimension of the array Y. LDY >= N. */

    /*  Further Details */
    /*  =============== */

    /*  The matrix Q is represented as a product of nb elementary reflectors */

    /*     Q = H(1) H(2) . . . H(nb). */

    /*  Each H(i) has the form */

    /*     H(i) = I - tau * v * v' */

    /*  where tau is a real scalar, and v is a real vector with */
    /*  v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in */
    /*  A(i+k+1:n,i), and tau in TAU(i). */

    /*  The elements of the vectors v together form the (n-k+1)-by-nb matrix */
    /*  V which is needed, with T and Y, to apply the transformation to the */
    /*  unreduced part of the matrix, using an update of the form: */
    /*  A := (I - V*T*V') * (A - Y*V'). */

    /*  The contents of A on exit are illustrated by the following example */
    /*  with n = 7, k = 3 and nb = 2: */

    /*     ( a   a   a   a   a ) */
    /*     ( a   a   a   a   a ) */
    /*     ( a   a   a   a   a ) */
    /*     ( h   h   a   a   a ) */
    /*     ( v1  h   a   a   a ) */
    /*     ( v1  v2  a   a   a ) */
    /*     ( v1  v2  a   a   a ) */

    /*  where a denotes an element of the original matrix A, h denotes a */
    /*  modified element of the upper Hessenberg matrix H, and vi denotes an */
    /*  element of the vector defining H(i). */

    /*  This file is a slight modification of LAPACK-3.0's SLAHRD */
    /*  incorporating improvements proposed by Quintana-Orti and Van de */
    /*  Gejin. Note that the entries of A(1:K,2:NB) differ from those */
    /*  returned by the original LAPACK routine. This function is */
    /*  not backward compatible with LAPACK3.0. */

    /*  ===================================================================== */

    /*     .. Parameters .. */
    /*     .. */
    /*     .. Local Scalars .. */
    /*     .. */
    /*     .. External Subroutines .. */
    /*     .. */
    /*     .. Intrinsic Functions .. */
    /*     .. */
    /*     .. Executable Statements .. */

    /*     Quick return if possible */

    /* Parameter adjustments */
    --tau;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1;
    t -= t_offset;
    y_dim1 = *ldy;
    y_offset = 1 + y_dim1;
    y -= y_offset;

    /* Function Body */
    if (*n <= 1) {
        return 0;
    }

    i__1 = *nb;
    for (i__ = 1; i__ <= i__1; ++i__) {
        if (i__ > 1) {

            /*           Update A(K+1:N,I) */

            /*           Update I-th column of A - Y * V' */

            i__2 = *n - *k;
            i__3 = i__ - 1;
            sgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b4, &y[*k + 1 + y_dim1],
                   ldy, &a[*k + i__ - 1 + a_dim1], lda, &c_b5, &a[*k + 1 +
                           i__ * a_dim1], &c__1);

            /*           Apply I - V * T' * V' to this column (call it b) from the */
            /*           left, using the last column of T as workspace */

            /*           Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows) */
            /*                    ( V2 )             ( b2 ) */

            /*           where V1 is unit lower triangular */

            /*           w := V1' * b1 */

            i__2 = i__ - 1;
            scopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 +
                    1], &c__1);
            i__2 = i__ - 1;
            strmv_("Lower", "Transpose", "UNIT", &i__2, &a[*k + 1 + a_dim1],
                   lda, &t[*nb * t_dim1 + 1], &c__1);

            /*           w := w + V2'*b2 */

            i__2 = *n - *k - i__ + 1;
            i__3 = i__ - 1;
            sgemv_("Transpose", &i__2, &i__3, &c_b5, &a[*k + i__ + a_dim1],
                   lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b5, &t[*nb *
                           t_dim1 + 1], &c__1);

            /*           w := T'*w */

            i__2 = i__ - 1;
            strmv_("Upper", "Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt,
                   &t[*nb * t_dim1 + 1], &c__1);

            /*           b2 := b2 - V2*w */

            i__2 = *n - *k - i__ + 1;
            i__3 = i__ - 1;
            sgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b4, &a[*k + i__ + a_dim1],
                   lda, &t[*nb * t_dim1 + 1], &c__1, &c_b5, &a[*k + i__ +
                           i__ * a_dim1], &c__1);

            /*           b1 := b1 - V1*w */

            i__2 = i__ - 1;
            strmv_("Lower", "NO TRANSPOSE", "UNIT", &i__2, &a[*k + 1 + a_dim1]
                   , lda, &t[*nb * t_dim1 + 1], &c__1);
            i__2 = i__ - 1;
            saxpy_(&i__2, &c_b4, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__
                    * a_dim1], &c__1);

            a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei;
        }

        /*        Generate the elementary reflector H(I) to annihilate */
        /*        A(K+I+1:N,I) */

        i__2 = *n - *k - i__ + 1;
        /* Computing MIN */
        i__3 = *k + i__ + 1;
        slarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3, *n)+ i__ *
                a_dim1], &c__1, &tau[i__]);
        ei = a[*k + i__ + i__ * a_dim1];
        a[*k + i__ + i__ * a_dim1] = 1.f;

        /*        Compute  Y(K+1:N,I) */

        i__2 = *n - *k;
        i__3 = *n - *k - i__ + 1;
        sgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b5, &a[*k + 1 + (i__ + 1) *
                a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b38, &y[*
                        k + 1 + i__ * y_dim1], &c__1);
        i__2 = *n - *k - i__ + 1;
        i__3 = i__ - 1;
        sgemv_("Transpose", &i__2, &i__3, &c_b5, &a[*k + i__ + a_dim1], lda, &
               a[*k + i__ + i__ * a_dim1], &c__1, &c_b38, &t[i__ * t_dim1 +
                       1], &c__1);
        i__2 = *n - *k;
        i__3 = i__ - 1;
        sgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b4, &y[*k + 1 + y_dim1], ldy,
               &t[i__ * t_dim1 + 1], &c__1, &c_b5, &y[*k + 1 + i__ * y_dim1],
               &c__1);
        i__2 = *n - *k;
        sscal_(&i__2, &tau[i__], &y[*k + 1 + i__ * y_dim1], &c__1);

        /*        Compute T(1:I,I) */

        i__2 = i__ - 1;
        r__1 = -tau[i__];
        sscal_(&i__2, &r__1, &t[i__ * t_dim1 + 1], &c__1);
        i__2 = i__ - 1;
        strmv_("Upper", "No Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt,
               &t[i__ * t_dim1 + 1], &c__1)
        ;
        t[i__ + i__ * t_dim1] = tau[i__];

        /* L10: */
    }
    a[*k + *nb + *nb * a_dim1] = ei;

    /*     Compute Y(1:K,1:NB) */

    slacpy_("ALL", k, nb, &a[(a_dim1 << 1) + 1], lda, &y[y_offset], ldy);
    strmm_("RIGHT", "Lower", "NO TRANSPOSE", "UNIT", k, nb, &c_b5, &a[*k + 1
            + a_dim1], lda, &y[y_offset], ldy);
    if (*n > *k + *nb) {
        i__1 = *n - *k - *nb;
        sgemm_("NO TRANSPOSE", "NO TRANSPOSE", k, nb, &i__1, &c_b5, &a[(*nb +
                2) * a_dim1 + 1], lda, &a[*k + 1 + *nb + a_dim1], lda, &c_b5,
               &y[y_offset], ldy);
    }
    strmm_("RIGHT", "Upper", "NO TRANSPOSE", "NON-UNIT", k, nb, &c_b5, &t[
               t_offset], ldt, &y[y_offset], ldy);

    return 0;

    /*     End of SLAHR2 */

} /* slahr2_ */
Esempio n. 23
0
/* Subroutine */ int slaqtr_(logical *ltran, logical *lreal, integer *n, real 
	*t, integer *ldt, real *b, real *w, real *scale, real *x, real *work, 
	integer *info)
{
    /* System generated locals */
    integer t_dim1, t_offset, i__1, i__2;
    real r__1, r__2, r__3, r__4, r__5, r__6;

    /* Local variables */
    real d__[4]	/* was [2][2] */;
    integer i__, j, k;
    real v[4]	/* was [2][2] */, z__;
    integer j1, j2, n1, n2;
    real si, xj, sr, rec, eps, tjj, tmp;
    integer ierr;
    real smin;
    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
    real xmax;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    integer jnext;
    extern doublereal sasum_(integer *, real *, integer *);
    real sminw, xnorm;
    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, 
	    real *, integer *), slaln2_(logical *, integer *, integer *, real 
	    *, real *, real *, integer *, real *, real *, real *, integer *, 
	    real *, real *, real *, integer *, real *, real *, integer *);
    real scaloc;
    extern doublereal slamch_(char *), slange_(char *, integer *, 
	    integer *, real *, integer *, real *);
    real bignum;
    extern integer isamax_(integer *, real *, integer *);
    extern /* Subroutine */ int sladiv_(real *, real *, real *, real *, real *
, real *);
    logical notran;
    real smlnum;


/*  -- LAPACK auxiliary routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLAQTR solves the real quasi-triangular system */

/*               op(T)*p = scale*c,               if LREAL = .TRUE. */

/*  or the complex quasi-triangular systems */

/*             op(T + iB)*(p+iq) = scale*(c+id),  if LREAL = .FALSE. */

/*  in real arithmetic, where T is upper quasi-triangular. */
/*  If LREAL = .FALSE., then the first diagonal block of T must be */
/*  1 by 1, B is the specially structured matrix */

/*                 B = [ b(1) b(2) ... b(n) ] */
/*                     [       w            ] */
/*                     [           w        ] */
/*                     [              .     ] */
/*                     [                 w  ] */

/*  op(A) = A or A', A' denotes the conjugate transpose of */
/*  matrix A. */

/*  On input, X = [ c ].  On output, X = [ p ]. */
/*                [ d ]                  [ q ] */

/*  This subroutine is designed for the condition number estimation */
/*  in routine STRSNA. */

/*  Arguments */
/*  ========= */

/*  LTRAN   (input) LOGICAL */
/*          On entry, LTRAN specifies the option of conjugate transpose: */
/*             = .FALSE.,    op(T+i*B) = T+i*B, */
/*             = .TRUE.,     op(T+i*B) = (T+i*B)'. */

/*  LREAL   (input) LOGICAL */
/*          On entry, LREAL specifies the input matrix structure: */
/*             = .FALSE.,    the input is complex */
/*             = .TRUE.,     the input is real */

/*  N       (input) INTEGER */
/*          On entry, N specifies the order of T+i*B. N >= 0. */

/*  T       (input) REAL array, dimension (LDT,N) */
/*          On entry, T contains a matrix in Schur canonical form. */
/*          If LREAL = .FALSE., then the first diagonal block of T must */
/*          be 1 by 1. */

/*  LDT     (input) INTEGER */
/*          The leading dimension of the matrix T. LDT >= max(1,N). */

/*  B       (input) REAL array, dimension (N) */
/*          On entry, B contains the elements to form the matrix */
/*          B as described above. */
/*          If LREAL = .TRUE., B is not referenced. */

/*  W       (input) REAL */
/*          On entry, W is the diagonal element of the matrix B. */
/*          If LREAL = .TRUE., W is not referenced. */

/*  SCALE   (output) REAL */
/*          On exit, SCALE is the scale factor. */

/*  X       (input/output) REAL array, dimension (2*N) */
/*          On entry, X contains the right hand side of the system. */
/*          On exit, X is overwritten by the solution. */

/*  WORK    (workspace) REAL array, dimension (N) */

/*  INFO    (output) INTEGER */
/*          On exit, INFO is set to */
/*             0: successful exit. */
/*               1: the some diagonal 1 by 1 block has been perturbed by */
/*                  a small number SMIN to keep nonsingularity. */
/*               2: the some diagonal 2 by 2 block has been perturbed by */
/*                  a small number in SLALN2 to keep nonsingularity. */
/*          NOTE: In the interests of speed, this routine does not */
/*                check the inputs for errors. */

/* ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Do not test the input parameters for errors */

    /* Parameter adjustments */
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1;
    t -= t_offset;
    --b;
    --x;
    --work;

    /* Function Body */
    notran = ! (*ltran);
    *info = 0;

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Set constants to control overflow */

    eps = slamch_("P");
    smlnum = slamch_("S") / eps;
    bignum = 1.f / smlnum;

    xnorm = slange_("M", n, n, &t[t_offset], ldt, d__);
    if (! (*lreal)) {
/* Computing MAX */
	r__1 = xnorm, r__2 = dabs(*w), r__1 = max(r__1,r__2), r__2 = slange_(
		"M", n, &c__1, &b[1], n, d__);
	xnorm = dmax(r__1,r__2);
    }
/* Computing MAX */
    r__1 = smlnum, r__2 = eps * xnorm;
    smin = dmax(r__1,r__2);

/*     Compute 1-norm of each column of strictly upper triangular */
/*     part of T to control overflow in triangular solver. */

    work[1] = 0.f;
    i__1 = *n;
    for (j = 2; j <= i__1; ++j) {
	i__2 = j - 1;
	work[j] = sasum_(&i__2, &t[j * t_dim1 + 1], &c__1);
/* L10: */
    }

    if (! (*lreal)) {
	i__1 = *n;
	for (i__ = 2; i__ <= i__1; ++i__) {
	    work[i__] += (r__1 = b[i__], dabs(r__1));
/* L20: */
	}
    }

    n2 = *n << 1;
    n1 = *n;
    if (! (*lreal)) {
	n1 = n2;
    }
    k = isamax_(&n1, &x[1], &c__1);
    xmax = (r__1 = x[k], dabs(r__1));
    *scale = 1.f;

    if (xmax > bignum) {
	*scale = bignum / xmax;
	sscal_(&n1, scale, &x[1], &c__1);
	xmax = bignum;
    }

    if (*lreal) {

	if (notran) {

/*           Solve T*p = scale*c */

	    jnext = *n;
	    for (j = *n; j >= 1; --j) {
		if (j > jnext) {
		    goto L30;
		}
		j1 = j;
		j2 = j;
		jnext = j - 1;
		if (j > 1) {
		    if (t[j + (j - 1) * t_dim1] != 0.f) {
			j1 = j - 1;
			jnext = j - 2;
		    }
		}

		if (j1 == j2) {

/*                 Meet 1 by 1 diagonal block */

/*                 Scale to avoid overflow when computing */
/*                     x(j) = b(j)/T(j,j) */

		    xj = (r__1 = x[j1], dabs(r__1));
		    tjj = (r__1 = t[j1 + j1 * t_dim1], dabs(r__1));
		    tmp = t[j1 + j1 * t_dim1];
		    if (tjj < smin) {
			tmp = smin;
			tjj = smin;
			*info = 1;
		    }

		    if (xj == 0.f) {
			goto L30;
		    }

		    if (tjj < 1.f) {
			if (xj > bignum * tjj) {
			    rec = 1.f / xj;
			    sscal_(n, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
		    }
		    x[j1] /= tmp;
		    xj = (r__1 = x[j1], dabs(r__1));

/*                 Scale x if necessary to avoid overflow when adding a */
/*                 multiple of column j1 of T. */

		    if (xj > 1.f) {
			rec = 1.f / xj;
			if (work[j1] > (bignum - xmax) * rec) {
			    sscal_(n, &rec, &x[1], &c__1);
			    *scale *= rec;
			}
		    }
		    if (j1 > 1) {
			i__1 = j1 - 1;
			r__1 = -x[j1];
			saxpy_(&i__1, &r__1, &t[j1 * t_dim1 + 1], &c__1, &x[1]
, &c__1);
			i__1 = j1 - 1;
			k = isamax_(&i__1, &x[1], &c__1);
			xmax = (r__1 = x[k], dabs(r__1));
		    }

		} else {

/*                 Meet 2 by 2 diagonal block */

/*                 Call 2 by 2 linear system solve, to take */
/*                 care of possible overflow by scaling factor. */

		    d__[0] = x[j1];
		    d__[1] = x[j2];
		    slaln2_(&c_false, &c__2, &c__1, &smin, &c_b21, &t[j1 + j1 
			    * t_dim1], ldt, &c_b21, &c_b21, d__, &c__2, &
			    c_b25, &c_b25, v, &c__2, &scaloc, &xnorm, &ierr);
		    if (ierr != 0) {
			*info = 2;
		    }

		    if (scaloc != 1.f) {
			sscal_(n, &scaloc, &x[1], &c__1);
			*scale *= scaloc;
		    }
		    x[j1] = v[0];
		    x[j2] = v[1];

/*                 Scale V(1,1) (= X(J1)) and/or V(2,1) (=X(J2)) */
/*                 to avoid overflow in updating right-hand side. */

/* Computing MAX */
		    r__1 = dabs(v[0]), r__2 = dabs(v[1]);
		    xj = dmax(r__1,r__2);
		    if (xj > 1.f) {
			rec = 1.f / xj;
/* Computing MAX */
			r__1 = work[j1], r__2 = work[j2];
			if (dmax(r__1,r__2) > (bignum - xmax) * rec) {
			    sscal_(n, &rec, &x[1], &c__1);
			    *scale *= rec;
			}
		    }

/*                 Update right-hand side */

		    if (j1 > 1) {
			i__1 = j1 - 1;
			r__1 = -x[j1];
			saxpy_(&i__1, &r__1, &t[j1 * t_dim1 + 1], &c__1, &x[1]
, &c__1);
			i__1 = j1 - 1;
			r__1 = -x[j2];
			saxpy_(&i__1, &r__1, &t[j2 * t_dim1 + 1], &c__1, &x[1]
, &c__1);
			i__1 = j1 - 1;
			k = isamax_(&i__1, &x[1], &c__1);
			xmax = (r__1 = x[k], dabs(r__1));
		    }

		}

L30:
		;
	    }

	} else {

/*           Solve T'*p = scale*c */

	    jnext = 1;
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		if (j < jnext) {
		    goto L40;
		}
		j1 = j;
		j2 = j;
		jnext = j + 1;
		if (j < *n) {
		    if (t[j + 1 + j * t_dim1] != 0.f) {
			j2 = j + 1;
			jnext = j + 2;
		    }
		}

		if (j1 == j2) {

/*                 1 by 1 diagonal block */

/*                 Scale if necessary to avoid overflow in forming the */
/*                 right-hand side element by inner product. */

		    xj = (r__1 = x[j1], dabs(r__1));
		    if (xmax > 1.f) {
			rec = 1.f / xmax;
			if (work[j1] > (bignum - xj) * rec) {
			    sscal_(n, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
		    }

		    i__2 = j1 - 1;
		    x[j1] -= sdot_(&i__2, &t[j1 * t_dim1 + 1], &c__1, &x[1], &
			    c__1);

		    xj = (r__1 = x[j1], dabs(r__1));
		    tjj = (r__1 = t[j1 + j1 * t_dim1], dabs(r__1));
		    tmp = t[j1 + j1 * t_dim1];
		    if (tjj < smin) {
			tmp = smin;
			tjj = smin;
			*info = 1;
		    }

		    if (tjj < 1.f) {
			if (xj > bignum * tjj) {
			    rec = 1.f / xj;
			    sscal_(n, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
		    }
		    x[j1] /= tmp;
/* Computing MAX */
		    r__2 = xmax, r__3 = (r__1 = x[j1], dabs(r__1));
		    xmax = dmax(r__2,r__3);

		} else {

/*                 2 by 2 diagonal block */

/*                 Scale if necessary to avoid overflow in forming the */
/*                 right-hand side elements by inner product. */

/* Computing MAX */
		    r__3 = (r__1 = x[j1], dabs(r__1)), r__4 = (r__2 = x[j2], 
			    dabs(r__2));
		    xj = dmax(r__3,r__4);
		    if (xmax > 1.f) {
			rec = 1.f / xmax;
/* Computing MAX */
			r__1 = work[j2], r__2 = work[j1];
			if (dmax(r__1,r__2) > (bignum - xj) * rec) {
			    sscal_(n, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
		    }

		    i__2 = j1 - 1;
		    d__[0] = x[j1] - sdot_(&i__2, &t[j1 * t_dim1 + 1], &c__1, 
			    &x[1], &c__1);
		    i__2 = j1 - 1;
		    d__[1] = x[j2] - sdot_(&i__2, &t[j2 * t_dim1 + 1], &c__1, 
			    &x[1], &c__1);

		    slaln2_(&c_true, &c__2, &c__1, &smin, &c_b21, &t[j1 + j1 *
			     t_dim1], ldt, &c_b21, &c_b21, d__, &c__2, &c_b25, 
			     &c_b25, v, &c__2, &scaloc, &xnorm, &ierr);
		    if (ierr != 0) {
			*info = 2;
		    }

		    if (scaloc != 1.f) {
			sscal_(n, &scaloc, &x[1], &c__1);
			*scale *= scaloc;
		    }
		    x[j1] = v[0];
		    x[j2] = v[1];
/* Computing MAX */
		    r__3 = (r__1 = x[j1], dabs(r__1)), r__4 = (r__2 = x[j2], 
			    dabs(r__2)), r__3 = max(r__3,r__4);
		    xmax = dmax(r__3,xmax);

		}
L40:
		;
	    }
	}

    } else {

/* Computing MAX */
	r__1 = eps * dabs(*w);
	sminw = dmax(r__1,smin);
	if (notran) {

/*           Solve (T + iB)*(p+iq) = c+id */

	    jnext = *n;
	    for (j = *n; j >= 1; --j) {
		if (j > jnext) {
		    goto L70;
		}
		j1 = j;
		j2 = j;
		jnext = j - 1;
		if (j > 1) {
		    if (t[j + (j - 1) * t_dim1] != 0.f) {
			j1 = j - 1;
			jnext = j - 2;
		    }
		}

		if (j1 == j2) {

/*                 1 by 1 diagonal block */

/*                 Scale if necessary to avoid overflow in division */

		    z__ = *w;
		    if (j1 == 1) {
			z__ = b[1];
		    }
		    xj = (r__1 = x[j1], dabs(r__1)) + (r__2 = x[*n + j1], 
			    dabs(r__2));
		    tjj = (r__1 = t[j1 + j1 * t_dim1], dabs(r__1)) + dabs(z__)
			    ;
		    tmp = t[j1 + j1 * t_dim1];
		    if (tjj < sminw) {
			tmp = sminw;
			tjj = sminw;
			*info = 1;
		    }

		    if (xj == 0.f) {
			goto L70;
		    }

		    if (tjj < 1.f) {
			if (xj > bignum * tjj) {
			    rec = 1.f / xj;
			    sscal_(&n2, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
		    }
		    sladiv_(&x[j1], &x[*n + j1], &tmp, &z__, &sr, &si);
		    x[j1] = sr;
		    x[*n + j1] = si;
		    xj = (r__1 = x[j1], dabs(r__1)) + (r__2 = x[*n + j1], 
			    dabs(r__2));

/*                 Scale x if necessary to avoid overflow when adding a */
/*                 multiple of column j1 of T. */

		    if (xj > 1.f) {
			rec = 1.f / xj;
			if (work[j1] > (bignum - xmax) * rec) {
			    sscal_(&n2, &rec, &x[1], &c__1);
			    *scale *= rec;
			}
		    }

		    if (j1 > 1) {
			i__1 = j1 - 1;
			r__1 = -x[j1];
			saxpy_(&i__1, &r__1, &t[j1 * t_dim1 + 1], &c__1, &x[1]
, &c__1);
			i__1 = j1 - 1;
			r__1 = -x[*n + j1];
			saxpy_(&i__1, &r__1, &t[j1 * t_dim1 + 1], &c__1, &x[*
				n + 1], &c__1);

			x[1] += b[j1] * x[*n + j1];
			x[*n + 1] -= b[j1] * x[j1];

			xmax = 0.f;
			i__1 = j1 - 1;
			for (k = 1; k <= i__1; ++k) {
/* Computing MAX */
			    r__3 = xmax, r__4 = (r__1 = x[k], dabs(r__1)) + (
				    r__2 = x[k + *n], dabs(r__2));
			    xmax = dmax(r__3,r__4);
/* L50: */
			}
		    }

		} else {

/*                 Meet 2 by 2 diagonal block */

		    d__[0] = x[j1];
		    d__[1] = x[j2];
		    d__[2] = x[*n + j1];
		    d__[3] = x[*n + j2];
		    r__1 = -(*w);
		    slaln2_(&c_false, &c__2, &c__2, &sminw, &c_b21, &t[j1 + 
			    j1 * t_dim1], ldt, &c_b21, &c_b21, d__, &c__2, &
			    c_b25, &r__1, v, &c__2, &scaloc, &xnorm, &ierr);
		    if (ierr != 0) {
			*info = 2;
		    }

		    if (scaloc != 1.f) {
			i__1 = *n << 1;
			sscal_(&i__1, &scaloc, &x[1], &c__1);
			*scale = scaloc * *scale;
		    }
		    x[j1] = v[0];
		    x[j2] = v[1];
		    x[*n + j1] = v[2];
		    x[*n + j2] = v[3];

/*                 Scale X(J1), .... to avoid overflow in */
/*                 updating right hand side. */

/* Computing MAX */
		    r__1 = dabs(v[0]) + dabs(v[2]), r__2 = dabs(v[1]) + dabs(
			    v[3]);
		    xj = dmax(r__1,r__2);
		    if (xj > 1.f) {
			rec = 1.f / xj;
/* Computing MAX */
			r__1 = work[j1], r__2 = work[j2];
			if (dmax(r__1,r__2) > (bignum - xmax) * rec) {
			    sscal_(&n2, &rec, &x[1], &c__1);
			    *scale *= rec;
			}
		    }

/*                 Update the right-hand side. */

		    if (j1 > 1) {
			i__1 = j1 - 1;
			r__1 = -x[j1];
			saxpy_(&i__1, &r__1, &t[j1 * t_dim1 + 1], &c__1, &x[1]
, &c__1);
			i__1 = j1 - 1;
			r__1 = -x[j2];
			saxpy_(&i__1, &r__1, &t[j2 * t_dim1 + 1], &c__1, &x[1]
, &c__1);

			i__1 = j1 - 1;
			r__1 = -x[*n + j1];
			saxpy_(&i__1, &r__1, &t[j1 * t_dim1 + 1], &c__1, &x[*
				n + 1], &c__1);
			i__1 = j1 - 1;
			r__1 = -x[*n + j2];
			saxpy_(&i__1, &r__1, &t[j2 * t_dim1 + 1], &c__1, &x[*
				n + 1], &c__1);

			x[1] = x[1] + b[j1] * x[*n + j1] + b[j2] * x[*n + j2];
			x[*n + 1] = x[*n + 1] - b[j1] * x[j1] - b[j2] * x[j2];

			xmax = 0.f;
			i__1 = j1 - 1;
			for (k = 1; k <= i__1; ++k) {
/* Computing MAX */
			    r__3 = (r__1 = x[k], dabs(r__1)) + (r__2 = x[k + *
				    n], dabs(r__2));
			    xmax = dmax(r__3,xmax);
/* L60: */
			}
		    }

		}
L70:
		;
	    }

	} else {

/*           Solve (T + iB)'*(p+iq) = c+id */

	    jnext = 1;
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		if (j < jnext) {
		    goto L80;
		}
		j1 = j;
		j2 = j;
		jnext = j + 1;
		if (j < *n) {
		    if (t[j + 1 + j * t_dim1] != 0.f) {
			j2 = j + 1;
			jnext = j + 2;
		    }
		}

		if (j1 == j2) {

/*                 1 by 1 diagonal block */

/*                 Scale if necessary to avoid overflow in forming the */
/*                 right-hand side element by inner product. */

		    xj = (r__1 = x[j1], dabs(r__1)) + (r__2 = x[j1 + *n], 
			    dabs(r__2));
		    if (xmax > 1.f) {
			rec = 1.f / xmax;
			if (work[j1] > (bignum - xj) * rec) {
			    sscal_(&n2, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
		    }

		    i__2 = j1 - 1;
		    x[j1] -= sdot_(&i__2, &t[j1 * t_dim1 + 1], &c__1, &x[1], &
			    c__1);
		    i__2 = j1 - 1;
		    x[*n + j1] -= sdot_(&i__2, &t[j1 * t_dim1 + 1], &c__1, &x[
			    *n + 1], &c__1);
		    if (j1 > 1) {
			x[j1] -= b[j1] * x[*n + 1];
			x[*n + j1] += b[j1] * x[1];
		    }
		    xj = (r__1 = x[j1], dabs(r__1)) + (r__2 = x[j1 + *n], 
			    dabs(r__2));

		    z__ = *w;
		    if (j1 == 1) {
			z__ = b[1];
		    }

/*                 Scale if necessary to avoid overflow in */
/*                 complex division */

		    tjj = (r__1 = t[j1 + j1 * t_dim1], dabs(r__1)) + dabs(z__)
			    ;
		    tmp = t[j1 + j1 * t_dim1];
		    if (tjj < sminw) {
			tmp = sminw;
			tjj = sminw;
			*info = 1;
		    }

		    if (tjj < 1.f) {
			if (xj > bignum * tjj) {
			    rec = 1.f / xj;
			    sscal_(&n2, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
		    }
		    r__1 = -z__;
		    sladiv_(&x[j1], &x[*n + j1], &tmp, &r__1, &sr, &si);
		    x[j1] = sr;
		    x[j1 + *n] = si;
/* Computing MAX */
		    r__3 = (r__1 = x[j1], dabs(r__1)) + (r__2 = x[j1 + *n], 
			    dabs(r__2));
		    xmax = dmax(r__3,xmax);

		} else {

/*                 2 by 2 diagonal block */

/*                 Scale if necessary to avoid overflow in forming the */
/*                 right-hand side element by inner product. */

/* Computing MAX */
		    r__5 = (r__1 = x[j1], dabs(r__1)) + (r__2 = x[*n + j1], 
			    dabs(r__2)), r__6 = (r__3 = x[j2], dabs(r__3)) + (
			    r__4 = x[*n + j2], dabs(r__4));
		    xj = dmax(r__5,r__6);
		    if (xmax > 1.f) {
			rec = 1.f / xmax;
/* Computing MAX */
			r__1 = work[j1], r__2 = work[j2];
			if (dmax(r__1,r__2) > (bignum - xj) / xmax) {
			    sscal_(&n2, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
		    }

		    i__2 = j1 - 1;
		    d__[0] = x[j1] - sdot_(&i__2, &t[j1 * t_dim1 + 1], &c__1, 
			    &x[1], &c__1);
		    i__2 = j1 - 1;
		    d__[1] = x[j2] - sdot_(&i__2, &t[j2 * t_dim1 + 1], &c__1, 
			    &x[1], &c__1);
		    i__2 = j1 - 1;
		    d__[2] = x[*n + j1] - sdot_(&i__2, &t[j1 * t_dim1 + 1], &
			    c__1, &x[*n + 1], &c__1);
		    i__2 = j1 - 1;
		    d__[3] = x[*n + j2] - sdot_(&i__2, &t[j2 * t_dim1 + 1], &
			    c__1, &x[*n + 1], &c__1);
		    d__[0] -= b[j1] * x[*n + 1];
		    d__[1] -= b[j2] * x[*n + 1];
		    d__[2] += b[j1] * x[1];
		    d__[3] += b[j2] * x[1];

		    slaln2_(&c_true, &c__2, &c__2, &sminw, &c_b21, &t[j1 + j1 
			    * t_dim1], ldt, &c_b21, &c_b21, d__, &c__2, &
			    c_b25, w, v, &c__2, &scaloc, &xnorm, &ierr);
		    if (ierr != 0) {
			*info = 2;
		    }

		    if (scaloc != 1.f) {
			sscal_(&n2, &scaloc, &x[1], &c__1);
			*scale = scaloc * *scale;
		    }
		    x[j1] = v[0];
		    x[j2] = v[1];
		    x[*n + j1] = v[2];
		    x[*n + j2] = v[3];
/* Computing MAX */
		    r__5 = (r__1 = x[j1], dabs(r__1)) + (r__2 = x[*n + j1], 
			    dabs(r__2)), r__6 = (r__3 = x[j2], dabs(r__3)) + (
			    r__4 = x[*n + j2], dabs(r__4)), r__5 = max(r__5,
			    r__6);
		    xmax = dmax(r__5,xmax);

		}

L80:
		;
	    }

	}

    }

    return 0;

/*     End of SLAQTR */

} /* slaqtr_ */
Esempio n. 24
0
/* Subroutine */ int ssytd2_(char *uplo, integer *n, real *a, integer *lda, 
	real *d__, real *e, real *tau, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;

    /* Local variables */
    integer i__;
    real taui;
    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
    extern /* Subroutine */ int ssyr2_(char *, integer *, real *, real *, 
	    integer *, real *, integer *, real *, integer *);
    real alpha;
    extern logical lsame_(char *, char *);
    logical upper;
    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, 
	    real *, integer *), ssymv_(char *, integer *, real *, real *, 
	    integer *, real *, integer *, real *, real *, integer *), 
	    xerbla_(char *, integer *), slarfg_(integer *, real *, 
	    real *, integer *, real *);


/*  -- LAPACK routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal */
/*  form T by an orthogonal similarity transformation: Q' * A * Q = T. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          Specifies whether the upper or lower triangular part of the */
/*          symmetric matrix A is stored: */
/*          = 'U':  Upper triangular */
/*          = 'L':  Lower triangular */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  A       (input/output) REAL array, dimension (LDA,N) */
/*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading */
/*          n-by-n upper triangular part of A contains the upper */
/*          triangular part of the matrix A, and the strictly lower */
/*          triangular part of A is not referenced.  If UPLO = 'L', the */
/*          leading n-by-n lower triangular part of A contains the lower */
/*          triangular part of the matrix A, and the strictly upper */
/*          triangular part of A is not referenced. */
/*          On exit, if UPLO = 'U', the diagonal and first superdiagonal */
/*          of A are overwritten by the corresponding elements of the */
/*          tridiagonal matrix T, and the elements above the first */
/*          superdiagonal, with the array TAU, represent the orthogonal */
/*          matrix Q as a product of elementary reflectors; if UPLO */
/*          = 'L', the diagonal and first subdiagonal of A are over- */
/*          written by the corresponding elements of the tridiagonal */
/*          matrix T, and the elements below the first subdiagonal, with */
/*          the array TAU, represent the orthogonal matrix Q as a product */
/*          of elementary reflectors. See Further Details. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  D       (output) REAL array, dimension (N) */
/*          The diagonal elements of the tridiagonal matrix T: */
/*          D(i) = A(i,i). */

/*  E       (output) REAL array, dimension (N-1) */
/*          The off-diagonal elements of the tridiagonal matrix T: */
/*          E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. */

/*  TAU     (output) REAL array, dimension (N-1) */
/*          The scalar factors of the elementary reflectors (see Further */
/*          Details). */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */

/*  Further Details */
/*  =============== */

/*  If UPLO = 'U', the matrix Q is represented as a product of elementary */
/*  reflectors */

/*     Q = H(n-1) . . . H(2) H(1). */

/*  Each H(i) has the form */

/*     H(i) = I - tau * v * v' */

/*  where tau is a real scalar, and v is a real vector with */
/*  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in */
/*  A(1:i-1,i+1), and tau in TAU(i). */

/*  If UPLO = 'L', the matrix Q is represented as a product of elementary */
/*  reflectors */

/*     Q = H(1) H(2) . . . H(n-1). */

/*  Each H(i) has the form */

/*     H(i) = I - tau * v * v' */

/*  where tau is a real scalar, and v is a real vector with */
/*  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), */
/*  and tau in TAU(i). */

/*  The contents of A on exit are illustrated by the following examples */
/*  with n = 5: */

/*  if UPLO = 'U':                       if UPLO = 'L': */

/*    (  d   e   v2  v3  v4 )              (  d                  ) */
/*    (      d   e   v3  v4 )              (  e   d              ) */
/*    (          d   e   v4 )              (  v1  e   d          ) */
/*    (              d   e  )              (  v1  v2  e   d      ) */
/*    (                  d  )              (  v1  v2  v3  e   d  ) */

/*  where d and e denote diagonal and off-diagonal elements of T, and vi */
/*  denotes an element of the vector defining H(i). */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --d__;
    --e;
    --tau;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SSYTD2", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n <= 0) {
	return 0;
    }

    if (upper) {

/*        Reduce the upper triangle of A */

	for (i__ = *n - 1; i__ >= 1; --i__) {

/*           Generate elementary reflector H(i) = I - tau * v * v' */
/*           to annihilate A(1:i-1,i+1) */

	    slarfg_(&i__, &a[i__ + (i__ + 1) * a_dim1], &a[(i__ + 1) * a_dim1 
		    + 1], &c__1, &taui);
	    e[i__] = a[i__ + (i__ + 1) * a_dim1];

	    if (taui != 0.f) {

/*              Apply H(i) from both sides to A(1:i,1:i) */

		a[i__ + (i__ + 1) * a_dim1] = 1.f;

/*              Compute  x := tau * A * v  storing x in TAU(1:i) */

		ssymv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) * 
			a_dim1 + 1], &c__1, &c_b8, &tau[1], &c__1);

/*              Compute  w := x - 1/2 * tau * (x'*v) * v */

		alpha = taui * -.5f * sdot_(&i__, &tau[1], &c__1, &a[(i__ + 1)
			 * a_dim1 + 1], &c__1);
		saxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[
			1], &c__1);

/*              Apply the transformation as a rank-2 update: */
/*                 A := A - v * w' - w * v' */

		ssyr2_(uplo, &i__, &c_b14, &a[(i__ + 1) * a_dim1 + 1], &c__1, 
			&tau[1], &c__1, &a[a_offset], lda);

		a[i__ + (i__ + 1) * a_dim1] = e[i__];
	    }
	    d__[i__ + 1] = a[i__ + 1 + (i__ + 1) * a_dim1];
	    tau[i__] = taui;
/* L10: */
	}
	d__[1] = a[a_dim1 + 1];
    } else {

/*        Reduce the lower triangle of A */

	i__1 = *n - 1;
	for (i__ = 1; i__ <= i__1; ++i__) {

/*           Generate elementary reflector H(i) = I - tau * v * v' */
/*           to annihilate A(i+2:n,i) */

	    i__2 = *n - i__;
/* Computing MIN */
	    i__3 = i__ + 2;
	    slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3, *n)+ i__ *
		     a_dim1], &c__1, &taui);
	    e[i__] = a[i__ + 1 + i__ * a_dim1];

	    if (taui != 0.f) {

/*              Apply H(i) from both sides to A(i+1:n,i+1:n) */

		a[i__ + 1 + i__ * a_dim1] = 1.f;

/*              Compute  x := tau * A * v  storing y in TAU(i:n-1) */

		i__2 = *n - i__;
		ssymv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1], 
			lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b8, &tau[
			i__], &c__1);

/*              Compute  w := x - 1/2 * tau * (x'*v) * v */

		i__2 = *n - i__;
		alpha = taui * -.5f * sdot_(&i__2, &tau[i__], &c__1, &a[i__ + 
			1 + i__ * a_dim1], &c__1);
		i__2 = *n - i__;
		saxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
			i__], &c__1);

/*              Apply the transformation as a rank-2 update: */
/*                 A := A - v * w' - w * v' */

		i__2 = *n - i__;
		ssyr2_(uplo, &i__2, &c_b14, &a[i__ + 1 + i__ * a_dim1], &c__1, 
			 &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * a_dim1], 
			lda);

		a[i__ + 1 + i__ * a_dim1] = e[i__];
	    }
	    d__[i__] = a[i__ + i__ * a_dim1];
	    tau[i__] = taui;
/* L20: */
	}
	d__[*n] = a[*n + *n * a_dim1];
    }

    return 0;

/*     End of SSYTD2 */

} /* ssytd2_ */
Esempio n. 25
0
doublereal sqrt12_(integer *m, integer *n, real *a, integer *lda, real *s, 
	real *work, integer *lwork)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    real ret_val;

    /* Local variables */
    integer i__, j, mn, iscl, info;
    real anrm;
    extern doublereal snrm2_(integer *, real *, integer *), sasum_(integer *, 
	    real *, integer *);
    real dummy[1];
    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, 
	    real *, integer *), sgebd2_(integer *, integer *, real *, integer 
	    *, real *, real *, real *, real *, real *, integer *), slabad_(
	    real *, real *);
    extern doublereal slamch_(char *), slange_(char *, integer *, 
	    integer *, real *, integer *, real *);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    real bignum;
    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, 
	    real *, integer *, integer *, real *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *, 
	    real *, integer *), sbdsqr_(char *, integer *, integer *, 
	    integer *, integer *, real *, real *, real *, integer *, real *, 
	    integer *, real *, integer *, real *, integer *);
    real smlnum, nrmsvl;


/*  -- LAPACK test routine (version 3.1.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     January 2007 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SQRT12 computes the singular values `svlues' of the upper trapezoid */
/*  of A(1:M,1:N) and returns the ratio */

/*       || s - svlues||/(||svlues||*eps*max(M,N)) */

/*  Arguments */
/*  ========= */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix A. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix A. */

/*  A       (input) REAL array, dimension (LDA,N) */
/*          The M-by-N matrix A. Only the upper trapezoid is referenced. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A. */

/*  S       (input) REAL array, dimension (min(M,N)) */
/*          The singular values of the matrix A. */

/*  WORK    (workspace) REAL array, dimension (LWORK) */

/*  LWORK   (input) INTEGER */
/*          The length of the array WORK. LWORK >= max(M*N + 4*min(M,N) + */
/*          max(M,N), M*N+2*MIN( M, N )+4*N). */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --s;
    --work;

    /* Function Body */
    ret_val = 0.f;

/*     Test that enough workspace is supplied */

/* Computing MAX */
    i__1 = *m * *n + (min(*m,*n) << 2) + max(*m,*n), i__2 = *m * *n + (min(*m,
	    *n) << 1) + (*n << 2);
    if (*lwork < max(i__1,i__2)) {
	xerbla_("SQRT12", &c__7);
	return ret_val;
    }

/*     Quick return if possible */

    mn = min(*m,*n);
    if ((real) mn <= 0.f) {
	return ret_val;
    }

    nrmsvl = snrm2_(&mn, &s[1], &c__1);

/*     Copy upper triangle of A into work */

    slaset_("Full", m, n, &c_b6, &c_b6, &work[1], m);
    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	i__2 = min(j,*m);
	for (i__ = 1; i__ <= i__2; ++i__) {
	    work[(j - 1) * *m + i__] = a[i__ + j * a_dim1];
/* L10: */
	}
/* L20: */
    }

/*     Get machine parameters */

    smlnum = slamch_("S") / slamch_("P");
    bignum = 1.f / smlnum;
    slabad_(&smlnum, &bignum);

/*     Scale work if max entry outside range [SMLNUM,BIGNUM] */

    anrm = slange_("M", m, n, &work[1], m, dummy);
    iscl = 0;
    if (anrm > 0.f && anrm < smlnum) {

/*        Scale matrix norm up to SMLNUM */

	slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &work[1], m, &info);
	iscl = 1;
    } else if (anrm > bignum) {

/*        Scale matrix norm down to BIGNUM */

	slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &work[1], m, &info);
	iscl = 1;
    }

    if (anrm != 0.f) {

/*        Compute SVD of work */

	sgebd2_(m, n, &work[1], m, &work[*m * *n + 1], &work[*m * *n + mn + 1]
, &work[*m * *n + (mn << 1) + 1], &work[*m * *n + mn * 3 + 1], 
		 &work[*m * *n + (mn << 2) + 1], &info);
	sbdsqr_("Upper", &mn, &c__0, &c__0, &c__0, &work[*m * *n + 1], &work[*
		m * *n + mn + 1], dummy, &mn, dummy, &c__1, dummy, &mn, &work[
		*m * *n + (mn << 1) + 1], &info);

	if (iscl == 1) {
	    if (anrm > bignum) {
		slascl_("G", &c__0, &c__0, &bignum, &anrm, &mn, &c__1, &work[*
			m * *n + 1], &mn, &info);
	    }
	    if (anrm < smlnum) {
		slascl_("G", &c__0, &c__0, &smlnum, &anrm, &mn, &c__1, &work[*
			m * *n + 1], &mn, &info);
	    }
	}

    } else {

	i__1 = mn;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    work[*m * *n + i__] = 0.f;
/* L30: */
	}
    }

/*     Compare s and singular values of work */

    saxpy_(&mn, &c_b33, &s[1], &c__1, &work[*m * *n + 1], &c__1);
    ret_val = sasum_(&mn, &work[*m * *n + 1], &c__1) / (slamch_("Epsilon") * (real) max(*m,*n));
    if (nrmsvl != 0.f) {
	ret_val /= nrmsvl;
    }

    return ret_val;

/*     End of SQRT12 */

} /* sqrt12_ */
Esempio n. 26
0
/* Subroutine */ int slatbs_(char *uplo, char *trans, char *diag, char *
	normin, integer *n, integer *kd, real *ab, integer *ldab, real *x, 
	real *scale, real *cnorm, integer *info)
{
    /* System generated locals */
    integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4;
    real r__1, r__2, r__3;

    /* Local variables */
    integer i__, j;
    real xj, rec, tjj;
    integer jinc, jlen;
    real xbnd;
    integer imax;
    real tmax, tjjs;
    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
    real xmax, grow, sumj;
    integer maind;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    real tscal, uscal;
    integer jlast;
    extern doublereal sasum_(integer *, real *, integer *);
    logical upper;
    extern /* Subroutine */ int stbsv_(char *, char *, char *, integer *, 
	    integer *, real *, integer *, real *, integer *), saxpy_(integer *, real *, real *, integer *, real *, 
	    integer *);
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    real bignum;
    extern integer isamax_(integer *, real *, integer *);
    logical notran;
    integer jfirst;
    real smlnum;
    logical nounit;


/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLATBS solves one of the triangular systems */

/*     A *x = s*b  or  A'*x = s*b */

/*  with scaling to prevent overflow, where A is an upper or lower */
/*  triangular band matrix.  Here A' denotes the transpose of A, x and b */
/*  are n-element vectors, and s is a scaling factor, usually less than */
/*  or equal to 1, chosen so that the components of x will be less than */
/*  the overflow threshold.  If the unscaled problem will not cause */
/*  overflow, the Level 2 BLAS routine STBSV is called.  If the matrix A */
/*  is singular (A(j,j) = 0 for some j), then s is set to 0 and a */
/*  non-trivial solution to A*x = 0 is returned. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          Specifies whether the matrix A is upper or lower triangular. */
/*          = 'U':  Upper triangular */
/*          = 'L':  Lower triangular */

/*  TRANS   (input) CHARACTER*1 */
/*          Specifies the operation applied to A. */
/*          = 'N':  Solve A * x = s*b  (No transpose) */
/*          = 'T':  Solve A'* x = s*b  (Transpose) */
/*          = 'C':  Solve A'* x = s*b  (Conjugate transpose = Transpose) */

/*  DIAG    (input) CHARACTER*1 */
/*          Specifies whether or not the matrix A is unit triangular. */
/*          = 'N':  Non-unit triangular */
/*          = 'U':  Unit triangular */

/*  NORMIN  (input) CHARACTER*1 */
/*          Specifies whether CNORM has been set or not. */
/*          = 'Y':  CNORM contains the column norms on entry */
/*          = 'N':  CNORM is not set on entry.  On exit, the norms will */
/*                  be computed and stored in CNORM. */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  KD      (input) INTEGER */
/*          The number of subdiagonals or superdiagonals in the */
/*          triangular matrix A.  KD >= 0. */

/*  AB      (input) REAL array, dimension (LDAB,N) */
/*          The upper or lower triangular band matrix A, stored in the */
/*          first KD+1 rows of the array. The j-th column of A is stored */
/*          in the j-th column of the array AB as follows: */
/*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */
/*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd). */

/*  LDAB    (input) INTEGER */
/*          The leading dimension of the array AB.  LDAB >= KD+1. */

/*  X       (input/output) REAL array, dimension (N) */
/*          On entry, the right hand side b of the triangular system. */
/*          On exit, X is overwritten by the solution vector x. */

/*  SCALE   (output) REAL */
/*          The scaling factor s for the triangular system */
/*             A * x = s*b  or  A'* x = s*b. */
/*          If SCALE = 0, the matrix A is singular or badly scaled, and */
/*          the vector x is an exact or approximate solution to A*x = 0. */

/*  CNORM   (input or output) REAL array, dimension (N) */

/*          If NORMIN = 'Y', CNORM is an input argument and CNORM(j) */
/*          contains the norm of the off-diagonal part of the j-th column */
/*          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal */
/*          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j) */
/*          must be greater than or equal to the 1-norm. */

/*          If NORMIN = 'N', CNORM is an output argument and CNORM(j) */
/*          returns the 1-norm of the offdiagonal part of the j-th column */
/*          of A. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -k, the k-th argument had an illegal value */

/*  Further Details */
/*  ======= ======= */

/*  A rough bound on x is computed; if that is less than overflow, STBSV */
/*  is called, otherwise, specific code is used which checks for possible */
/*  overflow or divide-by-zero at every operation. */

/*  A columnwise scheme is used for solving A*x = b.  The basic algorithm */
/*  if A is lower triangular is */

/*       x[1:n] := b[1:n] */
/*       for j = 1, ..., n */
/*            x(j) := x(j) / A(j,j) */
/*            x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j] */
/*       end */

/*  Define bounds on the components of x after j iterations of the loop: */
/*     M(j) = bound on x[1:j] */
/*     G(j) = bound on x[j+1:n] */
/*  Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}. */

/*  Then for iteration j+1 we have */
/*     M(j+1) <= G(j) / | A(j+1,j+1) | */
/*     G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] | */
/*            <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | ) */

/*  where CNORM(j+1) is greater than or equal to the infinity-norm of */
/*  column j+1 of A, not counting the diagonal.  Hence */

/*     G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | ) */
/*                  1<=i<=j */
/*  and */

/*     |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) */
/*                                   1<=i< j */

/*  Since |x(j)| <= M(j), we use the Level 2 BLAS routine STBSV if the */
/*  reciprocal of the largest M(j), j=1,..,n, is larger than */
/*  max(underflow, 1/overflow). */

/*  The bound on x(j) is also used to determine when a step in the */
/*  columnwise method can be performed without fear of overflow.  If */
/*  the computed bound is greater than a large constant, x is scaled to */
/*  prevent overflow, but if the bound overflows, x is set to 0, x(j) to */
/*  1, and scale to 0, and a non-trivial solution to A*x = 0 is found. */

/*  Similarly, a row-wise scheme is used to solve A'*x = b.  The basic */
/*  algorithm for A upper triangular is */

/*       for j = 1, ..., n */
/*            x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) */
/*       end */

/*  We simultaneously compute two bounds */
/*       G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j */
/*       M(j) = bound on x(i), 1<=i<=j */

/*  The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we */
/*  add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1. */
/*  Then the bound on x(j) is */

/*       M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) | */

/*            <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| ) */
/*                      1<=i<=j */

/*  and we can safely call STBSV if 1/M(n) and 1/G(n) are both greater */
/*  than max(underflow, 1/overflow). */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1;
    ab -= ab_offset;
    --x;
    --cnorm;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    notran = lsame_(trans, "N");
    nounit = lsame_(diag, "N");

/*     Test the input parameters. */

    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (! notran && ! lsame_(trans, "T") && ! 
	    lsame_(trans, "C")) {
	*info = -2;
    } else if (! nounit && ! lsame_(diag, "U")) {
	*info = -3;
    } else if (! lsame_(normin, "Y") && ! lsame_(normin, 
	     "N")) {
	*info = -4;
    } else if (*n < 0) {
	*info = -5;
    } else if (*kd < 0) {
	*info = -6;
    } else if (*ldab < *kd + 1) {
	*info = -8;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLATBS", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine machine dependent parameters to control overflow. */

    smlnum = slamch_("Safe minimum") / slamch_("Precision");
    bignum = 1.f / smlnum;
    *scale = 1.f;

    if (lsame_(normin, "N")) {

/*        Compute the 1-norm of each column, not including the diagonal. */

	if (upper) {

/*           A is upper triangular. */

	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
/* Computing MIN */
		i__2 = *kd, i__3 = j - 1;
		jlen = min(i__2,i__3);
		cnorm[j] = sasum_(&jlen, &ab[*kd + 1 - jlen + j * ab_dim1], &
			c__1);
/* L10: */
	    }
	} else {

/*           A is lower triangular. */

	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
/* Computing MIN */
		i__2 = *kd, i__3 = *n - j;
		jlen = min(i__2,i__3);
		if (jlen > 0) {
		    cnorm[j] = sasum_(&jlen, &ab[j * ab_dim1 + 2], &c__1);
		} else {
		    cnorm[j] = 0.f;
		}
/* L20: */
	    }
	}
    }

/*     Scale the column norms by TSCAL if the maximum element in CNORM is */
/*     greater than BIGNUM. */

    imax = isamax_(n, &cnorm[1], &c__1);
    tmax = cnorm[imax];
    if (tmax <= bignum) {
	tscal = 1.f;
    } else {
	tscal = 1.f / (smlnum * tmax);
	sscal_(n, &tscal, &cnorm[1], &c__1);
    }

/*     Compute a bound on the computed solution vector to see if the */
/*     Level 2 BLAS routine STBSV can be used. */

    j = isamax_(n, &x[1], &c__1);
    xmax = (r__1 = x[j], dabs(r__1));
    xbnd = xmax;
    if (notran) {

/*        Compute the growth in A * x = b. */

	if (upper) {
	    jfirst = *n;
	    jlast = 1;
	    jinc = -1;
	    maind = *kd + 1;
	} else {
	    jfirst = 1;
	    jlast = *n;
	    jinc = 1;
	    maind = 1;
	}

	if (tscal != 1.f) {
	    grow = 0.f;
	    goto L50;
	}

	if (nounit) {

/*           A is non-unit triangular. */

/*           Compute GROW = 1/G(j) and XBND = 1/M(j). */
/*           Initially, G(0) = max{x(i), i=1,...,n}. */

	    grow = 1.f / dmax(xbnd,smlnum);
	    xbnd = grow;
	    i__1 = jlast;
	    i__2 = jinc;
	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {

/*              Exit the loop if the growth factor is too small. */

		if (grow <= smlnum) {
		    goto L50;
		}

/*              M(j) = G(j-1) / abs(A(j,j)) */

		tjj = (r__1 = ab[maind + j * ab_dim1], dabs(r__1));
/* Computing MIN */
		r__1 = xbnd, r__2 = dmin(1.f,tjj) * grow;
		xbnd = dmin(r__1,r__2);
		if (tjj + cnorm[j] >= smlnum) {

/*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */

		    grow *= tjj / (tjj + cnorm[j]);
		} else {

/*                 G(j) could overflow, set GROW to 0. */

		    grow = 0.f;
		}
/* L30: */
	    }
	    grow = xbnd;
	} else {

/*           A is unit triangular. */

/*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */

/* Computing MIN */
	    r__1 = 1.f, r__2 = 1.f / dmax(xbnd,smlnum);
	    grow = dmin(r__1,r__2);
	    i__2 = jlast;
	    i__1 = jinc;
	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {

/*              Exit the loop if the growth factor is too small. */

		if (grow <= smlnum) {
		    goto L50;
		}

/*              G(j) = G(j-1)*( 1 + CNORM(j) ) */

		grow *= 1.f / (cnorm[j] + 1.f);
/* L40: */
	    }
	}
L50:

	;
    } else {

/*        Compute the growth in A' * x = b. */

	if (upper) {
	    jfirst = 1;
	    jlast = *n;
	    jinc = 1;
	    maind = *kd + 1;
	} else {
	    jfirst = *n;
	    jlast = 1;
	    jinc = -1;
	    maind = 1;
	}

	if (tscal != 1.f) {
	    grow = 0.f;
	    goto L80;
	}

	if (nounit) {

/*           A is non-unit triangular. */

/*           Compute GROW = 1/G(j) and XBND = 1/M(j). */
/*           Initially, M(0) = max{x(i), i=1,...,n}. */

	    grow = 1.f / dmax(xbnd,smlnum);
	    xbnd = grow;
	    i__1 = jlast;
	    i__2 = jinc;
	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {

/*              Exit the loop if the growth factor is too small. */

		if (grow <= smlnum) {
		    goto L80;
		}

/*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */

		xj = cnorm[j] + 1.f;
/* Computing MIN */
		r__1 = grow, r__2 = xbnd / xj;
		grow = dmin(r__1,r__2);

/*              M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */

		tjj = (r__1 = ab[maind + j * ab_dim1], dabs(r__1));
		if (xj > tjj) {
		    xbnd *= tjj / xj;
		}
/* L60: */
	    }
	    grow = dmin(grow,xbnd);
	} else {

/*           A is unit triangular. */

/*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}. */

/* Computing MIN */
	    r__1 = 1.f, r__2 = 1.f / dmax(xbnd,smlnum);
	    grow = dmin(r__1,r__2);
	    i__2 = jlast;
	    i__1 = jinc;
	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {

/*              Exit the loop if the growth factor is too small. */

		if (grow <= smlnum) {
		    goto L80;
		}

/*              G(j) = ( 1 + CNORM(j) )*G(j-1) */

		xj = cnorm[j] + 1.f;
		grow /= xj;
/* L70: */
	    }
	}
L80:
	;
    }

    if (grow * tscal > smlnum) {

/*        Use the Level 2 BLAS solve if the reciprocal of the bound on */
/*        elements of X is not too small. */

	stbsv_(uplo, trans, diag, n, kd, &ab[ab_offset], ldab, &x[1], &c__1);
    } else {

/*        Use a Level 1 BLAS solve, scaling intermediate results. */

	if (xmax > bignum) {

/*           Scale X so that its components are less than or equal to */
/*           BIGNUM in absolute value. */

	    *scale = bignum / xmax;
	    sscal_(n, scale, &x[1], &c__1);
	    xmax = bignum;
	}

	if (notran) {

/*           Solve A * x = b */

	    i__1 = jlast;
	    i__2 = jinc;
	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {

/*              Compute x(j) = b(j) / A(j,j), scaling x if necessary. */

		xj = (r__1 = x[j], dabs(r__1));
		if (nounit) {
		    tjjs = ab[maind + j * ab_dim1] * tscal;
		} else {
		    tjjs = tscal;
		    if (tscal == 1.f) {
			goto L95;
		    }
		}
		tjj = dabs(tjjs);
		if (tjj > smlnum) {

/*                    abs(A(j,j)) > SMLNUM: */

		    if (tjj < 1.f) {
			if (xj > tjj * bignum) {

/*                          Scale x by 1/b(j). */

			    rec = 1.f / xj;
			    sscal_(n, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
		    }
		    x[j] /= tjjs;
		    xj = (r__1 = x[j], dabs(r__1));
		} else if (tjj > 0.f) {

/*                    0 < abs(A(j,j)) <= SMLNUM: */

		    if (xj > tjj * bignum) {

/*                       Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM */
/*                       to avoid overflow when dividing by A(j,j). */

			rec = tjj * bignum / xj;
			if (cnorm[j] > 1.f) {

/*                          Scale by 1/CNORM(j) to avoid overflow when */
/*                          multiplying x(j) times column j. */

			    rec /= cnorm[j];
			}
			sscal_(n, &rec, &x[1], &c__1);
			*scale *= rec;
			xmax *= rec;
		    }
		    x[j] /= tjjs;
		    xj = (r__1 = x[j], dabs(r__1));
		} else {

/*                    A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and */
/*                    scale = 0, and compute a solution to A*x = 0. */

		    i__3 = *n;
		    for (i__ = 1; i__ <= i__3; ++i__) {
			x[i__] = 0.f;
/* L90: */
		    }
		    x[j] = 1.f;
		    xj = 1.f;
		    *scale = 0.f;
		    xmax = 0.f;
		}
L95:

/*              Scale x if necessary to avoid overflow when adding a */
/*              multiple of column j of A. */

		if (xj > 1.f) {
		    rec = 1.f / xj;
		    if (cnorm[j] > (bignum - xmax) * rec) {

/*                    Scale x by 1/(2*abs(x(j))). */

			rec *= .5f;
			sscal_(n, &rec, &x[1], &c__1);
			*scale *= rec;
		    }
		} else if (xj * cnorm[j] > bignum - xmax) {

/*                 Scale x by 1/2. */

		    sscal_(n, &c_b36, &x[1], &c__1);
		    *scale *= .5f;
		}

		if (upper) {
		    if (j > 1) {

/*                    Compute the update */
/*                       x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) - */
/*                                             x(j)* A(max(1,j-kd):j-1,j) */

/* Computing MIN */
			i__3 = *kd, i__4 = j - 1;
			jlen = min(i__3,i__4);
			r__1 = -x[j] * tscal;
			saxpy_(&jlen, &r__1, &ab[*kd + 1 - jlen + j * ab_dim1]
, &c__1, &x[j - jlen], &c__1);
			i__3 = j - 1;
			i__ = isamax_(&i__3, &x[1], &c__1);
			xmax = (r__1 = x[i__], dabs(r__1));
		    }
		} else if (j < *n) {

/*                 Compute the update */
/*                    x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) - */
/*                                          x(j) * A(j+1:min(j+kd,n),j) */

/* Computing MIN */
		    i__3 = *kd, i__4 = *n - j;
		    jlen = min(i__3,i__4);
		    if (jlen > 0) {
			r__1 = -x[j] * tscal;
			saxpy_(&jlen, &r__1, &ab[j * ab_dim1 + 2], &c__1, &x[
				j + 1], &c__1);
		    }
		    i__3 = *n - j;
		    i__ = j + isamax_(&i__3, &x[j + 1], &c__1);
		    xmax = (r__1 = x[i__], dabs(r__1));
		}
/* L100: */
	    }

	} else {

/*           Solve A' * x = b */

	    i__2 = jlast;
	    i__1 = jinc;
	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {

/*              Compute x(j) = b(j) - sum A(k,j)*x(k). */
/*                                    k<>j */

		xj = (r__1 = x[j], dabs(r__1));
		uscal = tscal;
		rec = 1.f / dmax(xmax,1.f);
		if (cnorm[j] > (bignum - xj) * rec) {

/*                 If x(j) could overflow, scale x by 1/(2*XMAX). */

		    rec *= .5f;
		    if (nounit) {
			tjjs = ab[maind + j * ab_dim1] * tscal;
		    } else {
			tjjs = tscal;
		    }
		    tjj = dabs(tjjs);
		    if (tjj > 1.f) {

/*                       Divide by A(j,j) when scaling x if A(j,j) > 1. */

/* Computing MIN */
			r__1 = 1.f, r__2 = rec * tjj;
			rec = dmin(r__1,r__2);
			uscal /= tjjs;
		    }
		    if (rec < 1.f) {
			sscal_(n, &rec, &x[1], &c__1);
			*scale *= rec;
			xmax *= rec;
		    }
		}

		sumj = 0.f;
		if (uscal == 1.f) {

/*                 If the scaling needed for A in the dot product is 1, */
/*                 call SDOT to perform the dot product. */

		    if (upper) {
/* Computing MIN */
			i__3 = *kd, i__4 = j - 1;
			jlen = min(i__3,i__4);
			sumj = sdot_(&jlen, &ab[*kd + 1 - jlen + j * ab_dim1], 
				 &c__1, &x[j - jlen], &c__1);
		    } else {
/* Computing MIN */
			i__3 = *kd, i__4 = *n - j;
			jlen = min(i__3,i__4);
			if (jlen > 0) {
			    sumj = sdot_(&jlen, &ab[j * ab_dim1 + 2], &c__1, &
				    x[j + 1], &c__1);
			}
		    }
		} else {

/*                 Otherwise, use in-line code for the dot product. */

		    if (upper) {
/* Computing MIN */
			i__3 = *kd, i__4 = j - 1;
			jlen = min(i__3,i__4);
			i__3 = jlen;
			for (i__ = 1; i__ <= i__3; ++i__) {
			    sumj += ab[*kd + i__ - jlen + j * ab_dim1] * 
				    uscal * x[j - jlen - 1 + i__];
/* L110: */
			}
		    } else {
/* Computing MIN */
			i__3 = *kd, i__4 = *n - j;
			jlen = min(i__3,i__4);
			i__3 = jlen;
			for (i__ = 1; i__ <= i__3; ++i__) {
			    sumj += ab[i__ + 1 + j * ab_dim1] * uscal * x[j + 
				    i__];
/* L120: */
			}
		    }
		}

		if (uscal == tscal) {

/*                 Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j) */
/*                 was not used to scale the dotproduct. */

		    x[j] -= sumj;
		    xj = (r__1 = x[j], dabs(r__1));
		    if (nounit) {

/*                    Compute x(j) = x(j) / A(j,j), scaling if necessary. */

			tjjs = ab[maind + j * ab_dim1] * tscal;
		    } else {
			tjjs = tscal;
			if (tscal == 1.f) {
			    goto L135;
			}
		    }
		    tjj = dabs(tjjs);
		    if (tjj > smlnum) {

/*                       abs(A(j,j)) > SMLNUM: */

			if (tjj < 1.f) {
			    if (xj > tjj * bignum) {

/*                             Scale X by 1/abs(x(j)). */

				rec = 1.f / xj;
				sscal_(n, &rec, &x[1], &c__1);
				*scale *= rec;
				xmax *= rec;
			    }
			}
			x[j] /= tjjs;
		    } else if (tjj > 0.f) {

/*                       0 < abs(A(j,j)) <= SMLNUM: */

			if (xj > tjj * bignum) {

/*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */

			    rec = tjj * bignum / xj;
			    sscal_(n, &rec, &x[1], &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
			x[j] /= tjjs;
		    } else {

/*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and */
/*                       scale = 0, and compute a solution to A'*x = 0. */

			i__3 = *n;
			for (i__ = 1; i__ <= i__3; ++i__) {
			    x[i__] = 0.f;
/* L130: */
			}
			x[j] = 1.f;
			*scale = 0.f;
			xmax = 0.f;
		    }
L135:
		    ;
		} else {

/*                 Compute x(j) := x(j) / A(j,j) - sumj if the dot */
/*                 product has already been divided by 1/A(j,j). */

		    x[j] = x[j] / tjjs - sumj;
		}
/* Computing MAX */
		r__2 = xmax, r__3 = (r__1 = x[j], dabs(r__1));
		xmax = dmax(r__2,r__3);
/* L140: */
	    }
	}
	*scale /= tscal;
    }

/*     Scale the column norms by 1/TSCAL for return. */

    if (tscal != 1.f) {
	r__1 = 1.f / tscal;
	sscal_(n, &r__1, &cnorm[1], &c__1);
    }

    return 0;

/*     End of SLATBS */

} /* slatbs_ */
Esempio n. 27
0
/* Subroutine */
int stprfs_(char *uplo, char *trans, char *diag, integer *n, integer *nrhs, real *ap, real *b, integer *ldb, real *x, integer *ldx, real *ferr, real *berr, real *work, integer *iwork, integer *info)
{
    /* System generated locals */
    integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3;
    real r__1, r__2, r__3;
    /* Local variables */
    integer i__, j, k;
    real s;
    integer kc;
    real xk;
    integer nz;
    real eps;
    integer kase;
    real safe1, safe2;
    extern logical lsame_(char *, char *);
    integer isave[3];
    logical upper;
    extern /* Subroutine */
    int scopy_(integer *, real *, integer *, real *, integer *), saxpy_(integer *, real *, real *, integer *, real *, integer *), stpmv_(char *, char *, char *, integer *, real *, real *, integer *), stpsv_(char *, char *, char *, integer *, real *, real *, integer *), slacn2_(integer *, real *, real *, integer *, real *, integer *, integer *);
    extern real slamch_(char *);
    real safmin;
    extern /* Subroutine */
    int xerbla_(char *, integer *);
    logical notran;
    char transt[1];
    logical nounit;
    real lstres;
    /* -- LAPACK computational routine (version 3.4.0) -- */
    /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
    /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
    /* November 2011 */
    /* .. Scalar Arguments .. */
    /* .. */
    /* .. Array Arguments .. */
    /* .. */
    /* ===================================================================== */
    /* .. Parameters .. */
    /* .. */
    /* .. Local Scalars .. */
    /* .. */
    /* .. Local Arrays .. */
    /* .. */
    /* .. External Subroutines .. */
    /* .. */
    /* .. Intrinsic Functions .. */
    /* .. */
    /* .. External Functions .. */
    /* .. */
    /* .. Executable Statements .. */
    /* Test the input parameters. */
    /* Parameter adjustments */
    --ap;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1;
    x -= x_offset;
    --ferr;
    --berr;
    --work;
    --iwork;
    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    notran = lsame_(trans, "N");
    nounit = lsame_(diag, "N");
    if (! upper && ! lsame_(uplo, "L"))
    {
        *info = -1;
    }
    else if (! notran && ! lsame_(trans, "T") && ! lsame_(trans, "C"))
    {
        *info = -2;
    }
    else if (! nounit && ! lsame_(diag, "U"))
    {
        *info = -3;
    }
    else if (*n < 0)
    {
        *info = -4;
    }
    else if (*nrhs < 0)
    {
        *info = -5;
    }
    else if (*ldb < max(1,*n))
    {
        *info = -8;
    }
    else if (*ldx < max(1,*n))
    {
        *info = -10;
    }
    if (*info != 0)
    {
        i__1 = -(*info);
        xerbla_("STPRFS", &i__1);
        return 0;
    }
    /* Quick return if possible */
    if (*n == 0 || *nrhs == 0)
    {
        i__1 = *nrhs;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            ferr[j] = 0.f;
            berr[j] = 0.f;
            /* L10: */
        }
        return 0;
    }
    if (notran)
    {
        *(unsigned char *)transt = 'T';
    }
    else
    {
        *(unsigned char *)transt = 'N';
    }
    /* NZ = maximum number of nonzero elements in each row of A, plus 1 */
    nz = *n + 1;
    eps = slamch_("Epsilon");
    safmin = slamch_("Safe minimum");
    safe1 = nz * safmin;
    safe2 = safe1 / eps;
    /* Do for each right hand side */
    i__1 = *nrhs;
    for (j = 1;
            j <= i__1;
            ++j)
    {
        /* Compute residual R = B - op(A) * X, */
        /* where op(A) = A or A**T, depending on TRANS. */
        scopy_(n, &x[j * x_dim1 + 1], &c__1, &work[*n + 1], &c__1);
        stpmv_(uplo, trans, diag, n, &ap[1], &work[*n + 1], &c__1);
        saxpy_(n, &c_b19, &b[j * b_dim1 + 1], &c__1, &work[*n + 1], &c__1);
        /* Compute componentwise relative backward error from formula */
        /* max(i) ( f2c_abs(R(i)) / ( f2c_abs(op(A))*f2c_abs(X) + f2c_abs(B) )(i) ) */
        /* where f2c_abs(Z) is the componentwise absolute value of the matrix */
        /* or vector Z. If the i-th component of the denominator is less */
        /* than SAFE2, then SAFE1 is added to the i-th components of the */
        /* numerator and denominator before dividing. */
        i__2 = *n;
        for (i__ = 1;
                i__ <= i__2;
                ++i__)
        {
            work[i__] = (r__1 = b[i__ + j * b_dim1], f2c_abs(r__1));
            /* L20: */
        }
        if (notran)
        {
            /* Compute f2c_abs(A)*f2c_abs(X) + f2c_abs(B). */
            if (upper)
            {
                kc = 1;
                if (nounit)
                {
                    i__2 = *n;
                    for (k = 1;
                            k <= i__2;
                            ++k)
                    {
                        xk = (r__1 = x[k + j * x_dim1], f2c_abs(r__1));
                        i__3 = k;
                        for (i__ = 1;
                                i__ <= i__3;
                                ++i__)
                        {
                            work[i__] += (r__1 = ap[kc + i__ - 1], f2c_abs(r__1)) * xk;
                            /* L30: */
                        }
                        kc += k;
                        /* L40: */
                    }
                }
                else
                {
                    i__2 = *n;
                    for (k = 1;
                            k <= i__2;
                            ++k)
                    {
                        xk = (r__1 = x[k + j * x_dim1], f2c_abs(r__1));
                        i__3 = k - 1;
                        for (i__ = 1;
                                i__ <= i__3;
                                ++i__)
                        {
                            work[i__] += (r__1 = ap[kc + i__ - 1], f2c_abs(r__1)) * xk;
                            /* L50: */
                        }
                        work[k] += xk;
                        kc += k;
                        /* L60: */
                    }
                }
            }
            else
            {
                kc = 1;
                if (nounit)
                {
                    i__2 = *n;
                    for (k = 1;
                            k <= i__2;
                            ++k)
                    {
                        xk = (r__1 = x[k + j * x_dim1], f2c_abs(r__1));
                        i__3 = *n;
                        for (i__ = k;
                                i__ <= i__3;
                                ++i__)
                        {
                            work[i__] += (r__1 = ap[kc + i__ - k], f2c_abs(r__1)) * xk;
                            /* L70: */
                        }
                        kc = kc + *n - k + 1;
                        /* L80: */
                    }
                }
                else
                {
                    i__2 = *n;
                    for (k = 1;
                            k <= i__2;
                            ++k)
                    {
                        xk = (r__1 = x[k + j * x_dim1], f2c_abs(r__1));
                        i__3 = *n;
                        for (i__ = k + 1;
                                i__ <= i__3;
                                ++i__)
                        {
                            work[i__] += (r__1 = ap[kc + i__ - k], f2c_abs(r__1)) * xk;
                            /* L90: */
                        }
                        work[k] += xk;
                        kc = kc + *n - k + 1;
                        /* L100: */
                    }
                }
            }
        }
        else
        {
            /* Compute f2c_abs(A**T)*f2c_abs(X) + f2c_abs(B). */
            if (upper)
            {
                kc = 1;
                if (nounit)
                {
                    i__2 = *n;
                    for (k = 1;
                            k <= i__2;
                            ++k)
                    {
                        s = 0.f;
                        i__3 = k;
                        for (i__ = 1;
                                i__ <= i__3;
                                ++i__)
                        {
                            s += (r__1 = ap[kc + i__ - 1], f2c_abs(r__1)) * (r__2 = x[i__ + j * x_dim1], f2c_abs(r__2));
                            /* L110: */
                        }
                        work[k] += s;
                        kc += k;
                        /* L120: */
                    }
                }
                else
                {
                    i__2 = *n;
                    for (k = 1;
                            k <= i__2;
                            ++k)
                    {
                        s = (r__1 = x[k + j * x_dim1], f2c_abs(r__1));
                        i__3 = k - 1;
                        for (i__ = 1;
                                i__ <= i__3;
                                ++i__)
                        {
                            s += (r__1 = ap[kc + i__ - 1], f2c_abs(r__1)) * (r__2 = x[i__ + j * x_dim1], f2c_abs(r__2));
                            /* L130: */
                        }
                        work[k] += s;
                        kc += k;
                        /* L140: */
                    }
                }
            }
            else
            {
                kc = 1;
                if (nounit)
                {
                    i__2 = *n;
                    for (k = 1;
                            k <= i__2;
                            ++k)
                    {
                        s = 0.f;
                        i__3 = *n;
                        for (i__ = k;
                                i__ <= i__3;
                                ++i__)
                        {
                            s += (r__1 = ap[kc + i__ - k], f2c_abs(r__1)) * (r__2 = x[i__ + j * x_dim1], f2c_abs(r__2));
                            /* L150: */
                        }
                        work[k] += s;
                        kc = kc + *n - k + 1;
                        /* L160: */
                    }
                }
                else
                {
                    i__2 = *n;
                    for (k = 1;
                            k <= i__2;
                            ++k)
                    {
                        s = (r__1 = x[k + j * x_dim1], f2c_abs(r__1));
                        i__3 = *n;
                        for (i__ = k + 1;
                                i__ <= i__3;
                                ++i__)
                        {
                            s += (r__1 = ap[kc + i__ - k], f2c_abs(r__1)) * (r__2 = x[i__ + j * x_dim1], f2c_abs(r__2));
                            /* L170: */
                        }
                        work[k] += s;
                        kc = kc + *n - k + 1;
                        /* L180: */
                    }
                }
            }
        }
        s = 0.f;
        i__2 = *n;
        for (i__ = 1;
                i__ <= i__2;
                ++i__)
        {
            if (work[i__] > safe2)
            {
                /* Computing MAX */
                r__2 = s;
                r__3 = (r__1 = work[*n + i__], f2c_abs(r__1)) / work[ i__]; // , expr subst
                s = max(r__2,r__3);
            }
            else
            {
                /* Computing MAX */
                r__2 = s;
                r__3 = ((r__1 = work[*n + i__], f2c_abs(r__1)) + safe1) / (work[i__] + safe1); // , expr subst
                s = max(r__2,r__3);
            }
            /* L190: */
        }
        berr[j] = s;
        /* Bound error from formula */
        /* norm(X - XTRUE) / norm(X) .le. FERR = */
        /* norm( f2c_abs(inv(op(A)))* */
        /* ( f2c_abs(R) + NZ*EPS*( f2c_abs(op(A))*f2c_abs(X)+f2c_abs(B) ))) / norm(X) */
        /* where */
        /* norm(Z) is the magnitude of the largest component of Z */
        /* inv(op(A)) is the inverse of op(A) */
        /* f2c_abs(Z) is the componentwise absolute value of the matrix or */
        /* vector Z */
        /* NZ is the maximum number of nonzeros in any row of A, plus 1 */
        /* EPS is machine epsilon */
        /* The i-th component of f2c_abs(R)+NZ*EPS*(f2c_abs(op(A))*f2c_abs(X)+f2c_abs(B)) */
        /* is incremented by SAFE1 if the i-th component of */
        /* f2c_abs(op(A))*f2c_abs(X) + f2c_abs(B) is less than SAFE2. */
        /* Use SLACN2 to estimate the infinity-norm of the matrix */
        /* inv(op(A)) * diag(W), */
        /* where W = f2c_abs(R) + NZ*EPS*( f2c_abs(op(A))*f2c_abs(X)+f2c_abs(B) ))) */
        i__2 = *n;
        for (i__ = 1;
                i__ <= i__2;
                ++i__)
        {
            if (work[i__] > safe2)
            {
                work[i__] = (r__1 = work[*n + i__], f2c_abs(r__1)) + nz * eps * work[i__];
            }
            else
            {
                work[i__] = (r__1 = work[*n + i__], f2c_abs(r__1)) + nz * eps * work[i__] + safe1;
            }
            /* L200: */
        }
        kase = 0;
L210:
        slacn2_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & kase, isave);
        if (kase != 0)
        {
            if (kase == 1)
            {
                /* Multiply by diag(W)*inv(op(A)**T). */
                stpsv_(uplo, transt, diag, n, &ap[1], &work[*n + 1], &c__1);
                i__2 = *n;
                for (i__ = 1;
                        i__ <= i__2;
                        ++i__)
                {
                    work[*n + i__] = work[i__] * work[*n + i__];
                    /* L220: */
                }
            }
            else
            {
                /* Multiply by inv(op(A))*diag(W). */
                i__2 = *n;
                for (i__ = 1;
                        i__ <= i__2;
                        ++i__)
                {
                    work[*n + i__] = work[i__] * work[*n + i__];
                    /* L230: */
                }
                stpsv_(uplo, trans, diag, n, &ap[1], &work[*n + 1], &c__1);
            }
            goto L210;
        }
        /* Normalize error. */
        lstres = 0.f;
        i__2 = *n;
        for (i__ = 1;
                i__ <= i__2;
                ++i__)
        {
            /* Computing MAX */
            r__2 = lstres;
            r__3 = (r__1 = x[i__ + j * x_dim1], f2c_abs(r__1)); // , expr subst
            lstres = max(r__2,r__3);
            /* L240: */
        }
        if (lstres != 0.f)
        {
            ferr[j] /= lstres;
        }
        /* L250: */
    }
    return 0;
    /* End of STPRFS */
}
Esempio n. 28
0
/* Subroutine */ int slarfy_(char *uplo, integer *n, real *v, integer *incv,
                             real *tau, real *c__, integer *ldc, real *work)
{
    /* System generated locals */
    integer c_dim1, c_offset;
    real r__1;

    /* Local variables */
    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
    extern /* Subroutine */ int ssyr2_(char *, integer *, real *, real *,
                                       integer *, real *, integer *, real *, integer *);
    real alpha;
    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *,
                                       real *, integer *), ssymv_(char *, integer *, real *, real *,
                                               integer *, real *, integer *, real *, real *, integer *);


    /*  -- LAPACK auxiliary test routine (version 3.1) -- */
    /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
    /*     November 2006 */

    /*     .. Scalar Arguments .. */
    /*     .. */
    /*     .. Array Arguments .. */
    /*     .. */

    /*  Purpose */
    /*  ======= */

    /*  SLARFY applies an elementary reflector, or Householder matrix, H, */
    /*  to an n x n symmetric matrix C, from both the left and the right. */

    /*  H is represented in the form */

    /*     H = I - tau * v * v' */

    /*  where  tau  is a scalar and  v  is a vector. */

    /*  If  tau  is  zero, then  H  is taken to be the unit matrix. */

    /*  Arguments */
    /*  ========= */

    /*  UPLO    (input) CHARACTER*1 */
    /*          Specifies whether the upper or lower triangular part of the */
    /*          symmetric matrix C is stored. */
    /*          = 'U':  Upper triangle */
    /*          = 'L':  Lower triangle */

    /*  N       (input) INTEGER */
    /*          The number of rows and columns of the matrix C.  N >= 0. */

    /*  V       (input) REAL array, dimension */
    /*                  (1 + (N-1)*abs(INCV)) */
    /*          The vector v as described above. */

    /*  INCV    (input) INTEGER */
    /*          The increment between successive elements of v.  INCV must */
    /*          not be zero. */

    /*  TAU     (input) REAL */
    /*          The value tau as described above. */

    /*  C       (input/output) REAL array, dimension (LDC, N) */
    /*          On entry, the matrix C. */
    /*          On exit, C is overwritten by H * C * H'. */

    /*  LDC     (input) INTEGER */
    /*          The leading dimension of the array C.  LDC >= max( 1, N ). */

    /*  WORK    (workspace) REAL array, dimension (N) */

    /*  ===================================================================== */

    /*     .. Parameters .. */
    /*     .. */
    /*     .. Local Scalars .. */
    /*     .. */
    /*     .. External Subroutines .. */
    /*     .. */
    /*     .. External Functions .. */
    /*     .. */
    /*     .. Executable Statements .. */

    /* Parameter adjustments */
    --v;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1;
    c__ -= c_offset;
    --work;

    /* Function Body */
    if (*tau == 0.f) {
        return 0;
    }

    /*     Form  w:= C * v */

    ssymv_(uplo, n, &c_b2, &c__[c_offset], ldc, &v[1], incv, &c_b3, &work[1],
           &c__1);

    alpha = *tau * -.5f * sdot_(n, &work[1], &c__1, &v[1], incv);
    saxpy_(n, &alpha, &v[1], incv, &work[1], &c__1);

    /*     C := C - v * w' - w * v' */

    r__1 = -(*tau);
    ssyr2_(uplo, n, &r__1, &v[1], incv, &work[1], &c__1, &c__[c_offset], ldc);

    return 0;

    /*     End of SLARFY */

} /* slarfy_ */
Esempio n. 29
0
/* Subroutine */ int sgglse_(integer *m, integer *n, integer *p, real *a, 
	integer *lda, real *b, integer *ldb, real *c__, real *d__, real *x, 
	real *work, integer *lwork, integer *info)
{
/*  -- LAPACK driver routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    SGGLSE solves the linear equality-constrained least squares (LSE)   
    problem:   

            minimize || c - A*x ||_2   subject to   B*x = d   

    where A is an M-by-N matrix, B is a P-by-N matrix, c is a given   
    M-vector, and d is a given P-vector. It is assumed that   
    P <= N <= M+P, and   

             rank(B) = P and  rank( ( A ) ) = N.   
                                  ( ( B ) )   

    These conditions ensure that the LSE problem has a unique solution,   
    which is obtained using a GRQ factorization of the matrices B and A.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrices A and B. N >= 0.   

    P       (input) INTEGER   
            The number of rows of the matrix B. 0 <= P <= N <= M+P.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the M-by-N matrix A.   
            On exit, A is destroyed.   

    LDA     (input) INTEGER   
            The leading dimension of the array A. LDA >= max(1,M).   

    B       (input/output) REAL array, dimension (LDB,N)   
            On entry, the P-by-N matrix B.   
            On exit, B is destroyed.   

    LDB     (input) INTEGER   
            The leading dimension of the array B. LDB >= max(1,P).   

    C       (input/output) REAL array, dimension (M)   
            On entry, C contains the right hand side vector for the   
            least squares part of the LSE problem.   
            On exit, the residual sum of squares for the solution   
            is given by the sum of squares of elements N-P+1 to M of   
            vector C.   

    D       (input/output) REAL array, dimension (P)   
            On entry, D contains the right hand side vector for the   
            constrained equation.   
            On exit, D is destroyed.   

    X       (output) REAL array, dimension (N)   
            On exit, X is the solution of the LSE problem.   

    WORK    (workspace/output) REAL array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The dimension of the array WORK. LWORK >= max(1,M+N+P).   
            For optimum performance LWORK >= P+min(M,N)+max(M,N)*NB,   
            where NB is an upper bound for the optimal blocksizes for   
            SGEQRF, SGERQF, SORMQR and SORMRQ.   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    INFO    (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   

    =====================================================================   


       Test the input parameters   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static real c_b29 = -1.f;
    static real c_b31 = 1.f;
    
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
    /* Local variables */
    static integer lopt;
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
	    real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *), 
	    saxpy_(integer *, real *, real *, integer *, real *, integer *), 
	    strmv_(char *, char *, char *, integer *, real *, integer *, real 
	    *, integer *), strsv_(char *, char *, 
	    char *, integer *, real *, integer *, real *, integer *);
    static integer nb, mn, nr;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int sggrqf_(integer *, integer *, integer *, real 
	    *, integer *, real *, real *, integer *, real *, real *, integer *
	    , integer *);
    static integer nb1, nb2, nb3, nb4, lwkopt;
    static logical lquery;
    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *, 
	    integer *, real *, integer *, real *, real *, integer *, real *, 
	    integer *, integer *), sormrq_(char *, char *, 
	    integer *, integer *, integer *, real *, integer *, real *, real *
	    , integer *, real *, integer *, integer *);
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --c__;
    --d__;
    --x;
    --work;

    /* Function Body */
    *info = 0;
    mn = min(*m,*n);
    nb1 = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)1);
    nb2 = ilaenv_(&c__1, "SGERQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)1);
    nb3 = ilaenv_(&c__1, "SORMQR", " ", m, n, p, &c_n1, (ftnlen)6, (ftnlen)1);
    nb4 = ilaenv_(&c__1, "SORMRQ", " ", m, n, p, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
    i__1 = max(nb1,nb2), i__1 = max(i__1,nb3);
    nb = max(i__1,nb4);
    lwkopt = *p + mn + max(*m,*n) * nb;
    work[1] = (real) lwkopt;
    lquery = *lwork == -1;
    if (*m < 0) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*p < 0 || *p > *n || *p < *n - *m) {
	*info = -3;
    } else if (*lda < max(1,*m)) {
	*info = -5;
    } else if (*ldb < max(1,*p)) {
	*info = -7;
    } else /* if(complicated condition) */ {
/* Computing MAX */
	i__1 = 1, i__2 = *m + *n + *p;
	if (*lwork < max(i__1,i__2) && ! lquery) {
	    *info = -12;
	}
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SGGLSE", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Compute the GRQ factorization of matrices B and A:   

              B*Q' = (  0  T12 ) P   Z'*A*Q' = ( R11 R12 ) N-P   
                       N-P  P                  (  0  R22 ) M+P-N   
                                                 N-P  P   

       where T12 and R11 are upper triangular, and Q and Z are   
       orthogonal. */

    i__1 = *lwork - *p - mn;
    sggrqf_(p, m, n, &b[b_offset], ldb, &work[1], &a[a_offset], lda, &work[*p 
	    + 1], &work[*p + mn + 1], &i__1, info);
    lopt = work[*p + mn + 1];

/*     Update c = Z'*c = ( c1 ) N-P   
                         ( c2 ) M+P-N */

    i__1 = max(1,*m);
    i__2 = *lwork - *p - mn;
    sormqr_("Left", "Transpose", m, &c__1, &mn, &a[a_offset], lda, &work[*p + 
	    1], &c__[1], &i__1, &work[*p + mn + 1], &i__2, info);
/* Computing MAX */
    i__1 = lopt, i__2 = (integer) work[*p + mn + 1];
    lopt = max(i__1,i__2);

/*     Solve T12*x2 = d for x2 */

    strsv_("Upper", "No transpose", "Non unit", p, &b_ref(1, *n - *p + 1), 
	    ldb, &d__[1], &c__1);

/*     Update c1 */

    i__1 = *n - *p;
    sgemv_("No transpose", &i__1, p, &c_b29, &a_ref(1, *n - *p + 1), lda, &
	    d__[1], &c__1, &c_b31, &c__[1], &c__1);

/*     Sovle R11*x1 = c1 for x1 */

    i__1 = *n - *p;
    strsv_("Upper", "No transpose", "Non unit", &i__1, &a[a_offset], lda, &
	    c__[1], &c__1);

/*     Put the solutions in X */

    i__1 = *n - *p;
    scopy_(&i__1, &c__[1], &c__1, &x[1], &c__1);
    scopy_(p, &d__[1], &c__1, &x[*n - *p + 1], &c__1);

/*     Compute the residual vector: */

    if (*m < *n) {
	nr = *m + *p - *n;
	i__1 = *n - *m;
	sgemv_("No transpose", &nr, &i__1, &c_b29, &a_ref(*n - *p + 1, *m + 1)
		, lda, &d__[nr + 1], &c__1, &c_b31, &c__[*n - *p + 1], &c__1);
    } else {
	nr = *p;
    }
    strmv_("Upper", "No transpose", "Non unit", &nr, &a_ref(*n - *p + 1, *n - 
	    *p + 1), lda, &d__[1], &c__1);
    saxpy_(&nr, &c_b29, &d__[1], &c__1, &c__[*n - *p + 1], &c__1);

/*     Backward transformation x = Q'*x */

    i__1 = *lwork - *p - mn;
    sormrq_("Left", "Transpose", n, &c__1, p, &b[b_offset], ldb, &work[1], &x[
	    1], n, &work[*p + mn + 1], &i__1, info);
/* Computing MAX */
    i__1 = lopt, i__2 = (integer) work[*p + mn + 1];
    work[1] = (real) (*p + mn + max(i__1,i__2));

    return 0;

/*     End of SGGLSE */

} /* sgglse_ */
Esempio n. 30
0
/* Subroutine */ int sspgst_(integer *itype, char *uplo, integer *n, real *ap, 
	 real *bp, integer *info)
{
    /* System generated locals */
    integer i__1, i__2;
    real r__1;

    /* Local variables */
    integer j, k, j1, k1, jj, kk;
    real ct, ajj;
    integer j1j1;
    real akk;
    integer k1k1;
    real bjj, bkk;
    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
    extern /* Subroutine */ int sspr2_(char *, integer *, real *, real *, 
	    integer *, real *, integer *, real *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    logical upper;
    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, 
	    real *, integer *), sspmv_(char *, integer *, real *, real *, 
	    real *, integer *, real *, real *, integer *), stpmv_(
	    char *, char *, char *, integer *, real *, real *, integer *), stpsv_(char *, char *, char *, integer *, 
	     real *, real *, integer *), xerbla_(char 
	    *, integer *);


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SSPGST reduces a real symmetric-definite generalized eigenproblem */
/*  to standard form, using packed storage. */

/*  If ITYPE = 1, the problem is A*x = lambda*B*x, */
/*  and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) */

/*  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */
/*  B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. */

/*  B must have been previously factorized as U**T*U or L*L**T by SPPTRF. */

/*  Arguments */
/*  ========= */

/*  ITYPE   (input) INTEGER */
/*          = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); */
/*          = 2 or 3: compute U*A*U**T or L**T*A*L. */

/*  UPLO    (input) CHARACTER*1 */
/*          = 'U':  Upper triangle of A is stored and B is factored as */
/*                  U**T*U; */
/*          = 'L':  Lower triangle of A is stored and B is factored as */
/*                  L*L**T. */

/*  N       (input) INTEGER */
/*          The order of the matrices A and B.  N >= 0. */

/*  AP      (input/output) REAL array, dimension (N*(N+1)/2) */
/*          On entry, the upper or lower triangle of the symmetric matrix */
/*          A, packed columnwise in a linear array.  The j-th column of A */
/*          is stored in the array AP as follows: */
/*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j; */
/*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n. */

/*          On exit, if INFO = 0, the transformed matrix, stored in the */
/*          same format as A. */

/*  BP      (input) REAL array, dimension (N*(N+1)/2) */
/*          The triangular factor from the Cholesky factorization of B, */
/*          stored in the same format as A, as returned by SPPTRF. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    --bp;
    --ap;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (*itype < 1 || *itype > 3) {
	*info = -1;
    } else if (! upper && ! lsame_(uplo, "L")) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SSPGST", &i__1);
	return 0;
    }

    if (*itype == 1) {
	if (upper) {

/*           Compute inv(U')*A*inv(U) */

/*           J1 and JJ are the indices of A(1,j) and A(j,j) */

	    jj = 0;
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		j1 = jj + 1;
		jj += j;

/*              Compute the j-th column of the upper triangle of A */

		bjj = bp[jj];
		stpsv_(uplo, "Transpose", "Nonunit", &j, &bp[1], &ap[j1], &
			c__1);
		i__2 = j - 1;
		sspmv_(uplo, &i__2, &c_b9, &ap[1], &bp[j1], &c__1, &c_b11, &
			ap[j1], &c__1);
		i__2 = j - 1;
		r__1 = 1.f / bjj;
		sscal_(&i__2, &r__1, &ap[j1], &c__1);
		i__2 = j - 1;
		ap[jj] = (ap[jj] - sdot_(&i__2, &ap[j1], &c__1, &bp[j1], &
			c__1)) / bjj;
/* L10: */
	    }
	} else {

/*           Compute inv(L)*A*inv(L') */

/*           KK and K1K1 are the indices of A(k,k) and A(k+1,k+1) */

	    kk = 1;
	    i__1 = *n;
	    for (k = 1; k <= i__1; ++k) {
		k1k1 = kk + *n - k + 1;

/*              Update the lower triangle of A(k:n,k:n) */

		akk = ap[kk];
		bkk = bp[kk];
/* Computing 2nd power */
		r__1 = bkk;
		akk /= r__1 * r__1;
		ap[kk] = akk;
		if (k < *n) {
		    i__2 = *n - k;
		    r__1 = 1.f / bkk;
		    sscal_(&i__2, &r__1, &ap[kk + 1], &c__1);
		    ct = akk * -.5f;
		    i__2 = *n - k;
		    saxpy_(&i__2, &ct, &bp[kk + 1], &c__1, &ap[kk + 1], &c__1)
			    ;
		    i__2 = *n - k;
		    sspr2_(uplo, &i__2, &c_b9, &ap[kk + 1], &c__1, &bp[kk + 1]
, &c__1, &ap[k1k1]);
		    i__2 = *n - k;
		    saxpy_(&i__2, &ct, &bp[kk + 1], &c__1, &ap[kk + 1], &c__1)
			    ;
		    i__2 = *n - k;
		    stpsv_(uplo, "No transpose", "Non-unit", &i__2, &bp[k1k1], 
			     &ap[kk + 1], &c__1);
		}
		kk = k1k1;
/* L20: */
	    }
	}
    } else {
	if (upper) {

/*           Compute U*A*U' */

/*           K1 and KK are the indices of A(1,k) and A(k,k) */

	    kk = 0;
	    i__1 = *n;
	    for (k = 1; k <= i__1; ++k) {
		k1 = kk + 1;
		kk += k;

/*              Update the upper triangle of A(1:k,1:k) */

		akk = ap[kk];
		bkk = bp[kk];
		i__2 = k - 1;
		stpmv_(uplo, "No transpose", "Non-unit", &i__2, &bp[1], &ap[
			k1], &c__1);
		ct = akk * .5f;
		i__2 = k - 1;
		saxpy_(&i__2, &ct, &bp[k1], &c__1, &ap[k1], &c__1);
		i__2 = k - 1;
		sspr2_(uplo, &i__2, &c_b11, &ap[k1], &c__1, &bp[k1], &c__1, &
			ap[1]);
		i__2 = k - 1;
		saxpy_(&i__2, &ct, &bp[k1], &c__1, &ap[k1], &c__1);
		i__2 = k - 1;
		sscal_(&i__2, &bkk, &ap[k1], &c__1);
/* Computing 2nd power */
		r__1 = bkk;
		ap[kk] = akk * (r__1 * r__1);
/* L30: */
	    }
	} else {

/*           Compute L'*A*L */

/*           JJ and J1J1 are the indices of A(j,j) and A(j+1,j+1) */

	    jj = 1;
	    i__1 = *n;
	    for (j = 1; j <= i__1; ++j) {
		j1j1 = jj + *n - j + 1;

/*              Compute the j-th column of the lower triangle of A */

		ajj = ap[jj];
		bjj = bp[jj];
		i__2 = *n - j;
		ap[jj] = ajj * bjj + sdot_(&i__2, &ap[jj + 1], &c__1, &bp[jj 
			+ 1], &c__1);
		i__2 = *n - j;
		sscal_(&i__2, &bjj, &ap[jj + 1], &c__1);
		i__2 = *n - j;
		sspmv_(uplo, &i__2, &c_b11, &ap[j1j1], &bp[jj + 1], &c__1, &
			c_b11, &ap[jj + 1], &c__1);
		i__2 = *n - j + 1;
		stpmv_(uplo, "Transpose", "Non-unit", &i__2, &bp[jj], &ap[jj], 
			 &c__1);
		jj = j1j1;
/* L40: */
	    }
	}
    }
    return 0;

/*     End of SSPGST */

} /* sspgst_ */