Esempio n. 1
0
void 
qpb_sun_project(qpb_link *u, int n)
{
  for(int k=0; k<n; k++)
    {
      qpb_complex *u_ptr =(qpb_complex *)(u + k);
      qpb_double norm = CNORM(*u_ptr);
      *u_ptr = (qpb_complex){
	u_ptr->re/norm,
	u_ptr->im/norm
      };
    }
  return;
}
Esempio n. 2
0
/* Subroutine */ int zlatps_(char *uplo, char *trans, char *diag, char *
	normin, integer *n, doublecomplex *ap, doublecomplex *x, doublereal *
	scale, doublereal *cnorm, integer *info)
{
/*  -- LAPACK auxiliary routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       October 31, 1992   


    Purpose   
    =======   

    ZLATPS solves one of the triangular systems   

       A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b,   

    with scaling to prevent overflow, where A is an upper or lower   
    triangular matrix stored in packed form.  Here A**T denotes the   
    transpose of A, A**H denotes the conjugate transpose of A, x and b   
    are n-element vectors, and s is a scaling factor, usually less than   
    or equal to 1, chosen so that the components of x will be less than   
    the overflow threshold.  If the unscaled problem will not cause   
    overflow, the Level 2 BLAS routine ZTPSV is called. If the matrix A   
    is singular (A(j,j) = 0 for some j), then s is set to 0 and a   
    non-trivial solution to A*x = 0 is returned.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the matrix A is upper or lower triangular. 
  
            = 'U':  Upper triangular   
            = 'L':  Lower triangular   

    TRANS   (input) CHARACTER*1   
            Specifies the operation applied to A.   
            = 'N':  Solve A * x = s*b     (No transpose)   
            = 'T':  Solve A**T * x = s*b  (Transpose)   
            = 'C':  Solve A**H * x = s*b  (Conjugate transpose)   

    DIAG    (input) CHARACTER*1   
            Specifies whether or not the matrix A is unit triangular.   
            = 'N':  Non-unit triangular   
            = 'U':  Unit triangular   

    NORMIN  (input) CHARACTER*1   
            Specifies whether CNORM has been set or not.   
            = 'Y':  CNORM contains the column norms on entry   
            = 'N':  CNORM is not set on entry.  On exit, the norms will   
                    be computed and stored in CNORM.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    AP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)   
            The upper or lower triangular matrix A, packed columnwise in 
  
            a linear array.  The j-th column of A is stored in the array 
  
            AP as follows:   
            if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;   
            if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.   

    X       (input/output) COMPLEX*16 array, dimension (N)   
            On entry, the right hand side b of the triangular system.   
            On exit, X is overwritten by the solution vector x.   

    SCALE   (output) DOUBLE PRECISION   
            The scaling factor s for the triangular system   
               A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b.   
            If SCALE = 0, the matrix A is singular or badly scaled, and   
            the vector x is an exact or approximate solution to A*x = 0. 
  

    CNORM   (input or output) DOUBLE PRECISION array, dimension (N)   

            If NORMIN = 'Y', CNORM is an input argument and CNORM(j)   
            contains the norm of the off-diagonal part of the j-th column 
  
            of A.  If TRANS = 'N', CNORM(j) must be greater than or equal 
  
            to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)   
            must be greater than or equal to the 1-norm.   

            If NORMIN = 'N', CNORM is an output argument and CNORM(j)   
            returns the 1-norm of the offdiagonal part of the j-th column 
  
            of A.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -k, the k-th argument had an illegal value   

    Further Details   
    ======= =======   

    A rough bound on x is computed; if that is less than overflow, ZTPSV 
  
    is called, otherwise, specific code is used which checks for possible 
  
    overflow or divide-by-zero at every operation.   

    A columnwise scheme is used for solving A*x = b.  The basic algorithm 
  
    if A is lower triangular is   

         x[1:n] := b[1:n]   
         for j = 1, ..., n   
              x(j) := x(j) / A(j,j)   
              x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j]   
         end   

    Define bounds on the components of x after j iterations of the loop: 
  
       M(j) = bound on x[1:j]   
       G(j) = bound on x[j+1:n]   
    Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}.   

    Then for iteration j+1 we have   
       M(j+1) <= G(j) / | A(j+1,j+1) |   
       G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] |   
              <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | )   

    where CNORM(j+1) is greater than or equal to the infinity-norm of   
    column j+1 of A, not counting the diagonal.  Hence   

       G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | )   
                    1<=i<=j   
    and   

       |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) 
  
                                     1<=i< j   

    Since |x(j)| <= M(j), we use the Level 2 BLAS routine ZTPSV if the   
    reciprocal of the largest M(j), j=1,..,n, is larger than   
    max(underflow, 1/overflow).   

    The bound on x(j) is also used to determine when a step in the   
    columnwise method can be performed without fear of overflow.  If   
    the computed bound is greater than a large constant, x is scaled to   
    prevent overflow, but if the bound overflows, x is set to 0, x(j) to 
  
    1, and scale to 0, and a non-trivial solution to A*x = 0 is found.   

    Similarly, a row-wise scheme is used to solve A**T *x = b  or   
    A**H *x = b.  The basic algorithm for A upper triangular is   

         for j = 1, ..., n   
              x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)   
         end   

    We simultaneously compute two bounds   
         G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j   
         M(j) = bound on x(i), 1<=i<=j   

    The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we   
    add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1.   
    Then the bound on x(j) is   

         M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) |   

              <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| )   
                        1<=i<=j   

    and we can safely call ZTPSV if 1/M(n) and 1/G(n) are both greater   
    than max(underflow, 1/overflow).   

    ===================================================================== 
  


    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static integer c__1 = 1;
    static doublereal c_b36 = .5;
    
    /* System generated locals */
    integer i__1, i__2, i__3, i__4, i__5;
    doublereal d__1, d__2, d__3, d__4;
    doublecomplex z__1, z__2, z__3, z__4;
    /* Builtin functions */
    double d_imag(doublecomplex *);
    void d_cnjg(doublecomplex *, doublecomplex *);
    /* Local variables */
    static integer jinc, jlen;
    static doublereal xbnd;
    static integer imax;
    static doublereal tmax;
    static doublecomplex tjjs;
    static doublereal xmax, grow;
    static integer i, j;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
	    integer *);
    extern logical lsame_(char *, char *);
    static doublereal tscal;
    static doublecomplex uscal;
    static integer jlast;
    static doublecomplex csumj;
    extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *, 
	    doublecomplex *, integer *, doublecomplex *, integer *);
    static logical upper;
    extern /* Double Complex */ VOID zdotu_(doublecomplex *, integer *, 
	    doublecomplex *, integer *, doublecomplex *, integer *);
    extern /* Subroutine */ int zaxpy_(integer *, doublecomplex *, 
	    doublecomplex *, integer *, doublecomplex *, integer *), ztpsv_(
	    char *, char *, char *, integer *, doublecomplex *, doublecomplex 
	    *, integer *), dlabad_(doublereal *, 
	    doublereal *);
    extern doublereal dlamch_(char *);
    static integer ip;
    static doublereal xj;
    extern integer idamax_(integer *, doublereal *, integer *);
    extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_(
	    integer *, doublereal *, doublecomplex *, integer *);
    static doublereal bignum;
    extern integer izamax_(integer *, doublecomplex *, integer *);
    extern /* Double Complex */ VOID zladiv_(doublecomplex *, doublecomplex *,
	     doublecomplex *);
    static logical notran;
    static integer jfirst;
    extern doublereal dzasum_(integer *, doublecomplex *, integer *);
    static doublereal smlnum;
    static logical nounit;
    static doublereal rec, tjj;



#define CNORM(I) cnorm[(I)-1]
#define X(I) x[(I)-1]
#define AP(I) ap[(I)-1]


    *info = 0;
    upper = lsame_(uplo, "U");
    notran = lsame_(trans, "N");
    nounit = lsame_(diag, "N");

/*     Test the input parameters. */

    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (! notran && ! lsame_(trans, "T") && ! lsame_(trans, 
	    "C")) {
	*info = -2;
    } else if (! nounit && ! lsame_(diag, "U")) {
	*info = -3;
    } else if (! lsame_(normin, "Y") && ! lsame_(normin, "N"))
	     {
	*info = -4;
    } else if (*n < 0) {
	*info = -5;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("ZLATPS", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine machine dependent parameters to control overflow. */

    smlnum = dlamch_("Safe minimum");
    bignum = 1. / smlnum;
    dlabad_(&smlnum, &bignum);
    smlnum /= dlamch_("Precision");
    bignum = 1. / smlnum;
    *scale = 1.;

    if (lsame_(normin, "N")) {

/*        Compute the 1-norm of each column, not including the diagona
l. */

	if (upper) {

/*           A is upper triangular. */

	    ip = 1;
	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
		i__2 = j - 1;
		CNORM(j) = dzasum_(&i__2, &AP(ip), &c__1);
		ip += j;
/* L10: */
	    }
	} else {

/*           A is lower triangular. */

	    ip = 1;
	    i__1 = *n - 1;
	    for (j = 1; j <= *n-1; ++j) {
		i__2 = *n - j;
		CNORM(j) = dzasum_(&i__2, &AP(ip + 1), &c__1);
		ip = ip + *n - j + 1;
/* L20: */
	    }
	    CNORM(*n) = 0.;
	}
    }

/*     Scale the column norms by TSCAL if the maximum element in CNORM is 
  
       greater than BIGNUM/2. */

    imax = idamax_(n, &CNORM(1), &c__1);
    tmax = CNORM(imax);
    if (tmax <= bignum * .5) {
	tscal = 1.;
    } else {
	tscal = .5 / (smlnum * tmax);
	dscal_(n, &tscal, &CNORM(1), &c__1);
    }

/*     Compute a bound on the computed solution vector to see if the   
       Level 2 BLAS routine ZTPSV can be used. */

    xmax = 0.;
    i__1 = *n;
    for (j = 1; j <= *n; ++j) {
/* Computing MAX */
	i__2 = j;
	d__3 = xmax, d__4 = (d__1 = X(j).r / 2., abs(d__1)) + (d__2 = 
		d_imag(&X(j)) / 2., abs(d__2));
	xmax = max(d__3,d__4);
/* L30: */
    }
    xbnd = xmax;
    if (notran) {

/*        Compute the growth in A * x = b. */

	if (upper) {
	    jfirst = *n;
	    jlast = 1;
	    jinc = -1;
	} else {
	    jfirst = 1;
	    jlast = *n;
	    jinc = 1;
	}

	if (tscal != 1.) {
	    grow = 0.;
	    goto L60;
	}

	if (nounit) {

/*           A is non-unit triangular.   

             Compute GROW = 1/G(j) and XBND = 1/M(j).   
             Initially, G(0) = max{x(i), i=1,...,n}. */

	    grow = .5 / max(xbnd,smlnum);
	    xbnd = grow;
	    ip = jfirst * (jfirst + 1) / 2;
	    jlen = *n;
	    i__1 = jlast;
	    i__2 = jinc;
	    for (j = jfirst; jinc < 0 ? j >= jlast : j <= jlast; j += jinc) {

/*              Exit the loop if the growth factor is too smal
l. */

		if (grow <= smlnum) {
		    goto L60;
		}

		i__3 = ip;
		tjjs.r = AP(ip).r, tjjs.i = AP(ip).i;
		tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), abs(
			d__2));

		if (tjj >= smlnum) {

/*                 M(j) = G(j-1) / abs(A(j,j))   

   Computing MIN */
		    d__1 = xbnd, d__2 = min(1.,tjj) * grow;
		    xbnd = min(d__1,d__2);
		} else {

/*                 M(j) could overflow, set XBND to 0. */

		    xbnd = 0.;
		}

		if (tjj + CNORM(j) >= smlnum) {

/*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,
j)) ) */

		    grow *= tjj / (tjj + CNORM(j));
		} else {

/*                 G(j) could overflow, set GROW to 0. */

		    grow = 0.;
		}
		ip += jinc * jlen;
		--jlen;
/* L40: */
	    }
	    grow = xbnd;
	} else {

/*           A is unit triangular.   

             Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...
,n}.   

   Computing MIN */
	    d__1 = 1., d__2 = .5 / max(xbnd,smlnum);
	    grow = min(d__1,d__2);
	    i__2 = jlast;
	    i__1 = jinc;
	    for (j = jfirst; jinc < 0 ? j >= jlast : j <= jlast; j += jinc) {

/*              Exit the loop if the growth factor is too smal
l. */

		if (grow <= smlnum) {
		    goto L60;
		}

/*              G(j) = G(j-1)*( 1 + CNORM(j) ) */

		grow *= 1. / (CNORM(j) + 1.);
/* L50: */
	    }
	}
L60:

	;
    } else {

/*        Compute the growth in A**T * x = b  or  A**H * x = b. */

	if (upper) {
	    jfirst = 1;
	    jlast = *n;
	    jinc = 1;
	} else {
	    jfirst = *n;
	    jlast = 1;
	    jinc = -1;
	}

	if (tscal != 1.) {
	    grow = 0.;
	    goto L90;
	}

	if (nounit) {

/*           A is non-unit triangular.   

             Compute GROW = 1/G(j) and XBND = 1/M(j).   
             Initially, M(0) = max{x(i), i=1,...,n}. */

	    grow = .5 / max(xbnd,smlnum);
	    xbnd = grow;
	    ip = jfirst * (jfirst + 1) / 2;
	    jlen = 1;
	    i__1 = jlast;
	    i__2 = jinc;
	    for (j = jfirst; jinc < 0 ? j >= jlast : j <= jlast; j += jinc) {

/*              Exit the loop if the growth factor is too smal
l. */

		if (grow <= smlnum) {
		    goto L90;
		}

/*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) 
*/

		xj = CNORM(j) + 1.;
/* Computing MIN */
		d__1 = grow, d__2 = xbnd / xj;
		grow = min(d__1,d__2);

		i__3 = ip;
		tjjs.r = AP(ip).r, tjjs.i = AP(ip).i;
		tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), abs(
			d__2));

		if (tjj >= smlnum) {

/*                 M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(
j,j)) */

		    if (xj > tjj) {
			xbnd *= tjj / xj;
		    }
		} else {

/*                 M(j) could overflow, set XBND to 0. */

		    xbnd = 0.;
		}
		++jlen;
		ip += jinc * jlen;
/* L70: */
	    }
	    grow = min(grow,xbnd);
	} else {

/*           A is unit triangular.   

             Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...
,n}.   

   Computing MIN */
	    d__1 = 1., d__2 = .5 / max(xbnd,smlnum);
	    grow = min(d__1,d__2);
	    i__2 = jlast;
	    i__1 = jinc;
	    for (j = jfirst; jinc < 0 ? j >= jlast : j <= jlast; j += jinc) {

/*              Exit the loop if the growth factor is too smal
l. */

		if (grow <= smlnum) {
		    goto L90;
		}

/*              G(j) = ( 1 + CNORM(j) )*G(j-1) */

		xj = CNORM(j) + 1.;
		grow /= xj;
/* L80: */
	    }
	}
L90:
	;
    }

    if (grow * tscal > smlnum) {

/*        Use the Level 2 BLAS solve if the reciprocal of the bound on
   
          elements of X is not too small. */

	ztpsv_(uplo, trans, diag, n, &AP(1), &X(1), &c__1);
    } else {

/*        Use a Level 1 BLAS solve, scaling intermediate results. */

	if (xmax > bignum * .5) {

/*           Scale X so that its components are less than or equal
 to   
             BIGNUM in absolute value. */

	    *scale = bignum * .5 / xmax;
	    zdscal_(n, scale, &X(1), &c__1);
	    xmax = bignum;
	} else {
	    xmax *= 2.;
	}

	if (notran) {

/*           Solve A * x = b */

	    ip = jfirst * (jfirst + 1) / 2;
	    i__1 = jlast;
	    i__2 = jinc;
	    for (j = jfirst; jinc < 0 ? j >= jlast : j <= jlast; j += jinc) {

/*              Compute x(j) = b(j) / A(j,j), scaling x if nec
essary. */

		i__3 = j;
		xj = (d__1 = X(j).r, abs(d__1)) + (d__2 = d_imag(&X(j)), 
			abs(d__2));
		if (nounit) {
		    i__3 = ip;
		    z__1.r = tscal * AP(ip).r, z__1.i = tscal * AP(ip).i;
		    tjjs.r = z__1.r, tjjs.i = z__1.i;
		} else {
		    tjjs.r = tscal, tjjs.i = 0.;
		    if (tscal == 1.) {
			goto L110;
		    }
		}
		tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), abs(
			d__2));
		if (tjj > smlnum) {

/*                    abs(A(j,j)) > SMLNUM: */

		    if (tjj < 1.) {
			if (xj > tjj * bignum) {

/*                          Scale x by 1/b(j). */

			    rec = 1. / xj;
			    zdscal_(n, &rec, &X(1), &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
		    }
		    i__3 = j;
		    zladiv_(&z__1, &X(j), &tjjs);
		    X(j).r = z__1.r, X(j).i = z__1.i;
		    i__3 = j;
		    xj = (d__1 = X(j).r, abs(d__1)) + (d__2 = d_imag(&X(j))
			    , abs(d__2));
		} else if (tjj > 0.) {

/*                    0 < abs(A(j,j)) <= SMLNUM: */

		    if (xj > tjj * bignum) {

/*                       Scale x by (1/abs(x(j)))*abs(
A(j,j))*BIGNUM   
                         to avoid overflow when dividi
ng by A(j,j). */

			rec = tjj * bignum / xj;
			if (CNORM(j) > 1.) {

/*                          Scale by 1/CNORM(j) to
 avoid overflow when   
                            multiplying x(j) times
 column j. */

			    rec /= CNORM(j);
			}
			zdscal_(n, &rec, &X(1), &c__1);
			*scale *= rec;
			xmax *= rec;
		    }
		    i__3 = j;
		    zladiv_(&z__1, &X(j), &tjjs);
		    X(j).r = z__1.r, X(j).i = z__1.i;
		    i__3 = j;
		    xj = (d__1 = X(j).r, abs(d__1)) + (d__2 = d_imag(&X(j))
			    , abs(d__2));
		} else {

/*                    A(j,j) = 0:  Set x(1:n) = 0, x(j) = 
1, and   
                      scale = 0, and compute a solution to
 A*x = 0. */

		    i__3 = *n;
		    for (i = 1; i <= *n; ++i) {
			i__4 = i;
			X(i).r = 0., X(i).i = 0.;
/* L100: */
		    }
		    i__3 = j;
		    X(j).r = 1., X(j).i = 0.;
		    xj = 1.;
		    *scale = 0.;
		    xmax = 0.;
		}
L110:

/*              Scale x if necessary to avoid overflow when ad
ding a   
                multiple of column j of A. */

		if (xj > 1.) {
		    rec = 1. / xj;
		    if (CNORM(j) > (bignum - xmax) * rec) {

/*                    Scale x by 1/(2*abs(x(j))). */

			rec *= .5;
			zdscal_(n, &rec, &X(1), &c__1);
			*scale *= rec;
		    }
		} else if (xj * CNORM(j) > bignum - xmax) {

/*                 Scale x by 1/2. */

		    zdscal_(n, &c_b36, &X(1), &c__1);
		    *scale *= .5;
		}

		if (upper) {
		    if (j > 1) {

/*                    Compute the update   
                         x(1:j-1) := x(1:j-1) - x(j) *
 A(1:j-1,j) */

			i__3 = j - 1;
			i__4 = j;
			z__2.r = -X(j).r, z__2.i = -X(j).i;
			z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i;
			zaxpy_(&i__3, &z__1, &AP(ip - j + 1), &c__1, &X(1), &
				c__1);
			i__3 = j - 1;
			i = izamax_(&i__3, &X(1), &c__1);
			i__3 = i;
			xmax = (d__1 = X(i).r, abs(d__1)) + (d__2 = d_imag(
				&X(i)), abs(d__2));
		    }
		    ip -= j;
		} else {
		    if (j < *n) {

/*                    Compute the update   
                         x(j+1:n) := x(j+1:n) - x(j) *
 A(j+1:n,j) */

			i__3 = *n - j;
			i__4 = j;
			z__2.r = -X(j).r, z__2.i = -X(j).i;
			z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i;
			zaxpy_(&i__3, &z__1, &AP(ip + 1), &c__1, &X(j + 1), &
				c__1);
			i__3 = *n - j;
			i = j + izamax_(&i__3, &X(j + 1), &c__1);
			i__3 = i;
			xmax = (d__1 = X(i).r, abs(d__1)) + (d__2 = d_imag(
				&X(i)), abs(d__2));
		    }
		    ip = ip + *n - j + 1;
		}
/* L120: */
	    }

	} else if (lsame_(trans, "T")) {

/*           Solve A**T * x = b */

	    ip = jfirst * (jfirst + 1) / 2;
	    jlen = 1;
	    i__2 = jlast;
	    i__1 = jinc;
	    for (j = jfirst; jinc < 0 ? j >= jlast : j <= jlast; j += jinc) {

/*              Compute x(j) = b(j) - sum A(k,j)*x(k).   
                                      k<>j */

		i__3 = j;
		xj = (d__1 = X(j).r, abs(d__1)) + (d__2 = d_imag(&X(j)), 
			abs(d__2));
		uscal.r = tscal, uscal.i = 0.;
		rec = 1. / max(xmax,1.);
		if (CNORM(j) > (bignum - xj) * rec) {

/*                 If x(j) could overflow, scale x by 1/(2
*XMAX). */

		    rec *= .5;
		    if (nounit) {
			i__3 = ip;
			z__1.r = tscal * AP(ip).r, z__1.i = tscal * AP(ip)
				.i;
			tjjs.r = z__1.r, tjjs.i = z__1.i;
		    } else {
			tjjs.r = tscal, tjjs.i = 0.;
		    }
		    tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), 
			    abs(d__2));
		    if (tjj > 1.) {

/*                       Divide by A(j,j) when scaling
 x if A(j,j) > 1.   

   Computing MIN */
			d__1 = 1., d__2 = rec * tjj;
			rec = min(d__1,d__2);
			zladiv_(&z__1, &uscal, &tjjs);
			uscal.r = z__1.r, uscal.i = z__1.i;
		    }
		    if (rec < 1.) {
			zdscal_(n, &rec, &X(1), &c__1);
			*scale *= rec;
			xmax *= rec;
		    }
		}

		csumj.r = 0., csumj.i = 0.;
		if (uscal.r == 1. && uscal.i == 0.) {

/*                 If the scaling needed for A in the dot 
product is 1,   
                   call ZDOTU to perform the dot product. 
*/

		    if (upper) {
			i__3 = j - 1;
			zdotu_(&z__1, &i__3, &AP(ip - j + 1), &c__1, &X(1), &
				c__1);
			csumj.r = z__1.r, csumj.i = z__1.i;
		    } else if (j < *n) {
			i__3 = *n - j;
			zdotu_(&z__1, &i__3, &AP(ip + 1), &c__1, &X(j + 1), &
				c__1);
			csumj.r = z__1.r, csumj.i = z__1.i;
		    }
		} else {

/*                 Otherwise, use in-line code for the dot
 product. */

		    if (upper) {
			i__3 = j - 1;
			for (i = 1; i <= j-1; ++i) {
			    i__4 = ip - j + i;
			    z__3.r = AP(ip-j+i).r * uscal.r - AP(ip-j+i).i * 
				    uscal.i, z__3.i = AP(ip-j+i).r * uscal.i + 
				    AP(ip-j+i).i * uscal.r;
			    i__5 = i;
			    z__2.r = z__3.r * X(i).r - z__3.i * X(i).i, 
				    z__2.i = z__3.r * X(i).i + z__3.i * X(
				    i).r;
			    z__1.r = csumj.r + z__2.r, z__1.i = csumj.i + 
				    z__2.i;
			    csumj.r = z__1.r, csumj.i = z__1.i;
/* L130: */
			}
		    } else if (j < *n) {
			i__3 = *n - j;
			for (i = 1; i <= *n-j; ++i) {
			    i__4 = ip + i;
			    z__3.r = AP(ip+i).r * uscal.r - AP(ip+i).i * 
				    uscal.i, z__3.i = AP(ip+i).r * uscal.i + 
				    AP(ip+i).i * uscal.r;
			    i__5 = j + i;
			    z__2.r = z__3.r * X(j+i).r - z__3.i * X(j+i).i, 
				    z__2.i = z__3.r * X(j+i).i + z__3.i * X(
				    j+i).r;
			    z__1.r = csumj.r + z__2.r, z__1.i = csumj.i + 
				    z__2.i;
			    csumj.r = z__1.r, csumj.i = z__1.i;
/* L140: */
			}
		    }
		}

		z__1.r = tscal, z__1.i = 0.;
		if (uscal.r == z__1.r && uscal.i == z__1.i) {

/*                 Compute x(j) := ( x(j) - CSUMJ ) / A(j,
j) if 1/A(j,j)   
                   was not used to scale the dotproduct. 
*/

		    i__3 = j;
		    i__4 = j;
		    z__1.r = X(j).r - csumj.r, z__1.i = X(j).i - 
			    csumj.i;
		    X(j).r = z__1.r, X(j).i = z__1.i;
		    i__3 = j;
		    xj = (d__1 = X(j).r, abs(d__1)) + (d__2 = d_imag(&X(j))
			    , abs(d__2));
		    if (nounit) {

/*                    Compute x(j) = x(j) / A(j,j), sc
aling if necessary. */

			i__3 = ip;
			z__1.r = tscal * AP(ip).r, z__1.i = tscal * AP(ip)
				.i;
			tjjs.r = z__1.r, tjjs.i = z__1.i;
		    } else {
			tjjs.r = tscal, tjjs.i = 0.;
			if (tscal == 1.) {
			    goto L160;
			}
		    }
		    tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), 
			    abs(d__2));
		    if (tjj > smlnum) {

/*                       abs(A(j,j)) > SMLNUM: */

			if (tjj < 1.) {
			    if (xj > tjj * bignum) {

/*                             Scale X by 1/ab
s(x(j)). */

				rec = 1. / xj;
				zdscal_(n, &rec, &X(1), &c__1);
				*scale *= rec;
				xmax *= rec;
			    }
			}
			i__3 = j;
			zladiv_(&z__1, &X(j), &tjjs);
			X(j).r = z__1.r, X(j).i = z__1.i;
		    } else if (tjj > 0.) {

/*                       0 < abs(A(j,j)) <= SMLNUM: */

			if (xj > tjj * bignum) {

/*                          Scale x by (1/abs(x(j)
))*abs(A(j,j))*BIGNUM. */

			    rec = tjj * bignum / xj;
			    zdscal_(n, &rec, &X(1), &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
			i__3 = j;
			zladiv_(&z__1, &X(j), &tjjs);
			X(j).r = z__1.r, X(j).i = z__1.i;
		    } else {

/*                       A(j,j) = 0:  Set x(1:n) = 0, 
x(j) = 1, and   
                         scale = 0 and compute a solut
ion to A**T *x = 0. */

			i__3 = *n;
			for (i = 1; i <= *n; ++i) {
			    i__4 = i;
			    X(i).r = 0., X(i).i = 0.;
/* L150: */
			}
			i__3 = j;
			X(j).r = 1., X(j).i = 0.;
			*scale = 0.;
			xmax = 0.;
		    }
L160:
		    ;
		} else {

/*                 Compute x(j) := x(j) / A(j,j) - CSUMJ i
f the dot   
                   product has already been divided by 1/A
(j,j). */

		    i__3 = j;
		    zladiv_(&z__2, &X(j), &tjjs);
		    z__1.r = z__2.r - csumj.r, z__1.i = z__2.i - csumj.i;
		    X(j).r = z__1.r, X(j).i = z__1.i;
		}
/* Computing MAX */
		i__3 = j;
		d__3 = xmax, d__4 = (d__1 = X(j).r, abs(d__1)) + (d__2 = 
			d_imag(&X(j)), abs(d__2));
		xmax = max(d__3,d__4);
		++jlen;
		ip += jinc * jlen;
/* L170: */
	    }

	} else {

/*           Solve A**H * x = b */

	    ip = jfirst * (jfirst + 1) / 2;
	    jlen = 1;
	    i__1 = jlast;
	    i__2 = jinc;
	    for (j = jfirst; jinc < 0 ? j >= jlast : j <= jlast; j += jinc) {

/*              Compute x(j) = b(j) - sum A(k,j)*x(k).   
                                      k<>j */

		i__3 = j;
		xj = (d__1 = X(j).r, abs(d__1)) + (d__2 = d_imag(&X(j)), 
			abs(d__2));
		uscal.r = tscal, uscal.i = 0.;
		rec = 1. / max(xmax,1.);
		if (CNORM(j) > (bignum - xj) * rec) {

/*                 If x(j) could overflow, scale x by 1/(2
*XMAX). */

		    rec *= .5;
		    if (nounit) {
			d_cnjg(&z__2, &AP(ip));
			z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i;
			tjjs.r = z__1.r, tjjs.i = z__1.i;
		    } else {
			tjjs.r = tscal, tjjs.i = 0.;
		    }
		    tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), 
			    abs(d__2));
		    if (tjj > 1.) {

/*                       Divide by A(j,j) when scaling
 x if A(j,j) > 1.   

   Computing MIN */
			d__1 = 1., d__2 = rec * tjj;
			rec = min(d__1,d__2);
			zladiv_(&z__1, &uscal, &tjjs);
			uscal.r = z__1.r, uscal.i = z__1.i;
		    }
		    if (rec < 1.) {
			zdscal_(n, &rec, &X(1), &c__1);
			*scale *= rec;
			xmax *= rec;
		    }
		}

		csumj.r = 0., csumj.i = 0.;
		if (uscal.r == 1. && uscal.i == 0.) {

/*                 If the scaling needed for A in the dot 
product is 1,   
                   call ZDOTC to perform the dot product. 
*/

		    if (upper) {
			i__3 = j - 1;
			zdotc_(&z__1, &i__3, &AP(ip - j + 1), &c__1, &X(1), &
				c__1);
			csumj.r = z__1.r, csumj.i = z__1.i;
		    } else if (j < *n) {
			i__3 = *n - j;
			zdotc_(&z__1, &i__3, &AP(ip + 1), &c__1, &X(j + 1), &
				c__1);
			csumj.r = z__1.r, csumj.i = z__1.i;
		    }
		} else {

/*                 Otherwise, use in-line code for the dot
 product. */

		    if (upper) {
			i__3 = j - 1;
			for (i = 1; i <= j-1; ++i) {
			    d_cnjg(&z__4, &AP(ip - j + i));
			    z__3.r = z__4.r * uscal.r - z__4.i * uscal.i, 
				    z__3.i = z__4.r * uscal.i + z__4.i * 
				    uscal.r;
			    i__4 = i;
			    z__2.r = z__3.r * X(i).r - z__3.i * X(i).i, 
				    z__2.i = z__3.r * X(i).i + z__3.i * X(
				    i).r;
			    z__1.r = csumj.r + z__2.r, z__1.i = csumj.i + 
				    z__2.i;
			    csumj.r = z__1.r, csumj.i = z__1.i;
/* L180: */
			}
		    } else if (j < *n) {
			i__3 = *n - j;
			for (i = 1; i <= *n-j; ++i) {
			    d_cnjg(&z__4, &AP(ip + i));
			    z__3.r = z__4.r * uscal.r - z__4.i * uscal.i, 
				    z__3.i = z__4.r * uscal.i + z__4.i * 
				    uscal.r;
			    i__4 = j + i;
			    z__2.r = z__3.r * X(j+i).r - z__3.i * X(j+i).i, 
				    z__2.i = z__3.r * X(j+i).i + z__3.i * X(
				    j+i).r;
			    z__1.r = csumj.r + z__2.r, z__1.i = csumj.i + 
				    z__2.i;
			    csumj.r = z__1.r, csumj.i = z__1.i;
/* L190: */
			}
		    }
		}

		z__1.r = tscal, z__1.i = 0.;
		if (uscal.r == z__1.r && uscal.i == z__1.i) {

/*                 Compute x(j) := ( x(j) - CSUMJ ) / A(j,
j) if 1/A(j,j)   
                   was not used to scale the dotproduct. 
*/

		    i__3 = j;
		    i__4 = j;
		    z__1.r = X(j).r - csumj.r, z__1.i = X(j).i - 
			    csumj.i;
		    X(j).r = z__1.r, X(j).i = z__1.i;
		    i__3 = j;
		    xj = (d__1 = X(j).r, abs(d__1)) + (d__2 = d_imag(&X(j))
			    , abs(d__2));
		    if (nounit) {

/*                    Compute x(j) = x(j) / A(j,j), sc
aling if necessary. */

			d_cnjg(&z__2, &AP(ip));
			z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i;
			tjjs.r = z__1.r, tjjs.i = z__1.i;
		    } else {
			tjjs.r = tscal, tjjs.i = 0.;
			if (tscal == 1.) {
			    goto L210;
			}
		    }
		    tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), 
			    abs(d__2));
		    if (tjj > smlnum) {

/*                       abs(A(j,j)) > SMLNUM: */

			if (tjj < 1.) {
			    if (xj > tjj * bignum) {

/*                             Scale X by 1/ab
s(x(j)). */

				rec = 1. / xj;
				zdscal_(n, &rec, &X(1), &c__1);
				*scale *= rec;
				xmax *= rec;
			    }
			}
			i__3 = j;
			zladiv_(&z__1, &X(j), &tjjs);
			X(j).r = z__1.r, X(j).i = z__1.i;
		    } else if (tjj > 0.) {

/*                       0 < abs(A(j,j)) <= SMLNUM: */

			if (xj > tjj * bignum) {

/*                          Scale x by (1/abs(x(j)
))*abs(A(j,j))*BIGNUM. */

			    rec = tjj * bignum / xj;
			    zdscal_(n, &rec, &X(1), &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
			i__3 = j;
			zladiv_(&z__1, &X(j), &tjjs);
			X(j).r = z__1.r, X(j).i = z__1.i;
		    } else {

/*                       A(j,j) = 0:  Set x(1:n) = 0, 
x(j) = 1, and   
                         scale = 0 and compute a solut
ion to A**H *x = 0. */

			i__3 = *n;
			for (i = 1; i <= *n; ++i) {
			    i__4 = i;
			    X(i).r = 0., X(i).i = 0.;
/* L200: */
			}
			i__3 = j;
			X(j).r = 1., X(j).i = 0.;
			*scale = 0.;
			xmax = 0.;
		    }
L210:
		    ;
		} else {

/*                 Compute x(j) := x(j) / A(j,j) - CSUMJ i
f the dot   
                   product has already been divided by 1/A
(j,j). */

		    i__3 = j;
		    zladiv_(&z__2, &X(j), &tjjs);
		    z__1.r = z__2.r - csumj.r, z__1.i = z__2.i - csumj.i;
		    X(j).r = z__1.r, X(j).i = z__1.i;
		}
/* Computing MAX */
		i__3 = j;
		d__3 = xmax, d__4 = (d__1 = X(j).r, abs(d__1)) + (d__2 = 
			d_imag(&X(j)), abs(d__2));
		xmax = max(d__3,d__4);
		++jlen;
		ip += jinc * jlen;
/* L220: */
	    }
	}
	*scale /= tscal;
    }

/*     Scale the column norms by 1/TSCAL for return. */

    if (tscal != 1.) {
	d__1 = 1. / tscal;
	dscal_(n, &d__1, &CNORM(1), &c__1);
    }

    return 0;

/*     End of ZLATPS */

} /* zlatps_ */
Esempio n. 3
0
/* Subroutine */ int dlatbs_(char *uplo, char *trans, char *diag, char *
	normin, integer *n, integer *kd, doublereal *ab, integer *ldab, 
	doublereal *x, doublereal *scale, doublereal *cnorm, integer *info)
{
/*  -- LAPACK auxiliary routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1992   


    Purpose   
    =======   

    DLATBS solves one of the triangular systems   

       A *x = s*b  or  A'*x = s*b   

    with scaling to prevent overflow, where A is an upper or lower   
    triangular band matrix.  Here A' denotes the transpose of A, x and b 
  
    are n-element vectors, and s is a scaling factor, usually less than   
    or equal to 1, chosen so that the components of x will be less than   
    the overflow threshold.  If the unscaled problem will not cause   
    overflow, the Level 2 BLAS routine DTBSV is called.  If the matrix A 
  
    is singular (A(j,j) = 0 for some j), then s is set to 0 and a   
    non-trivial solution to A*x = 0 is returned.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the matrix A is upper or lower triangular. 
  
            = 'U':  Upper triangular   
            = 'L':  Lower triangular   

    TRANS   (input) CHARACTER*1   
            Specifies the operation applied to A.   
            = 'N':  Solve A * x = s*b  (No transpose)   
            = 'T':  Solve A'* x = s*b  (Transpose)   
            = 'C':  Solve A'* x = s*b  (Conjugate transpose = Transpose) 
  

    DIAG    (input) CHARACTER*1   
            Specifies whether or not the matrix A is unit triangular.   
            = 'N':  Non-unit triangular   
            = 'U':  Unit triangular   

    NORMIN  (input) CHARACTER*1   
            Specifies whether CNORM has been set or not.   
            = 'Y':  CNORM contains the column norms on entry   
            = 'N':  CNORM is not set on entry.  On exit, the norms will   
                    be computed and stored in CNORM.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    KD      (input) INTEGER   
            The number of subdiagonals or superdiagonals in the   
            triangular matrix A.  KD >= 0.   

    AB      (input) DOUBLE PRECISION array, dimension (LDAB,N)   
            The upper or lower triangular band matrix A, stored in the   
            first KD+1 rows of the array. The j-th column of A is stored 
  
            in the j-th column of the array AB as follows:   
            if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; 
  
            if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd). 
  

    LDAB    (input) INTEGER   
            The leading dimension of the array AB.  LDAB >= KD+1.   

    X       (input/output) DOUBLE PRECISION array, dimension (N)   
            On entry, the right hand side b of the triangular system.   
            On exit, X is overwritten by the solution vector x.   

    SCALE   (output) DOUBLE PRECISION   
            The scaling factor s for the triangular system   
               A * x = s*b  or  A'* x = s*b.   
            If SCALE = 0, the matrix A is singular or badly scaled, and   
            the vector x is an exact or approximate solution to A*x = 0. 
  

    CNORM   (input or output) DOUBLE PRECISION array, dimension (N)   

            If NORMIN = 'Y', CNORM is an input argument and CNORM(j)   
            contains the norm of the off-diagonal part of the j-th column 
  
            of A.  If TRANS = 'N', CNORM(j) must be greater than or equal 
  
            to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)   
            must be greater than or equal to the 1-norm.   

            If NORMIN = 'N', CNORM is an output argument and CNORM(j)   
            returns the 1-norm of the offdiagonal part of the j-th column 
  
            of A.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -k, the k-th argument had an illegal value   

    Further Details   
    ======= =======   

    A rough bound on x is computed; if that is less than overflow, DTBSV 
  
    is called, otherwise, specific code is used which checks for possible 
  
    overflow or divide-by-zero at every operation.   

    A columnwise scheme is used for solving A*x = b.  The basic algorithm 
  
    if A is lower triangular is   

         x[1:n] := b[1:n]   
         for j = 1, ..., n   
              x(j) := x(j) / A(j,j)   
              x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j]   
         end   

    Define bounds on the components of x after j iterations of the loop: 
  
       M(j) = bound on x[1:j]   
       G(j) = bound on x[j+1:n]   
    Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}.   

    Then for iteration j+1 we have   
       M(j+1) <= G(j) / | A(j+1,j+1) |   
       G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] |   
              <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | )   

    where CNORM(j+1) is greater than or equal to the infinity-norm of   
    column j+1 of A, not counting the diagonal.  Hence   

       G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | )   
                    1<=i<=j   
    and   

       |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| ) 
  
                                     1<=i< j   

    Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTBSV if the   
    reciprocal of the largest M(j), j=1,..,n, is larger than   
    max(underflow, 1/overflow).   

    The bound on x(j) is also used to determine when a step in the   
    columnwise method can be performed without fear of overflow.  If   
    the computed bound is greater than a large constant, x is scaled to   
    prevent overflow, but if the bound overflows, x is set to 0, x(j) to 
  
    1, and scale to 0, and a non-trivial solution to A*x = 0 is found.   

    Similarly, a row-wise scheme is used to solve A'*x = b.  The basic   
    algorithm for A upper triangular is   

         for j = 1, ..., n   
              x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)   
         end   

    We simultaneously compute two bounds   
         G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j   
         M(j) = bound on x(i), 1<=i<=j   

    The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we   
    add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1.   
    Then the bound on x(j) is   

         M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) |   

              <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| )   
                        1<=i<=j   

    and we can safely call DTBSV if 1/M(n) and 1/G(n) are both greater   
    than max(underflow, 1/overflow).   

    ===================================================================== 
  


    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static integer c__1 = 1;
    static doublereal c_b36 = .5;
    
    /* System generated locals */
    integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4;
    doublereal d__1, d__2, d__3;
    /* Local variables */
    static integer jinc, jlen;
    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, 
	    integer *);
    static doublereal xbnd;
    static integer imax;
    static doublereal tmax, tjjs, xmax, grow, sumj;
    static integer i, j;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
	    integer *);
    static integer maind;
    extern logical lsame_(char *, char *);
    static doublereal tscal, uscal;
    extern doublereal dasum_(integer *, doublereal *, integer *);
    static integer jlast;
    extern /* Subroutine */ int dtbsv_(char *, char *, char *, integer *, 
	    integer *, doublereal *, integer *, doublereal *, integer *), daxpy_(integer *, doublereal *, 
	    doublereal *, integer *, doublereal *, integer *);
    static logical upper;
    extern doublereal dlamch_(char *);
    static doublereal xj;
    extern integer idamax_(integer *, doublereal *, integer *);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static doublereal bignum;
    static logical notran;
    static integer jfirst;
    static doublereal smlnum;
    static logical nounit;
    static doublereal rec, tjj;



#define X(I) x[(I)-1]
#define CNORM(I) cnorm[(I)-1]

#define AB(I,J) ab[(I)-1 + ((J)-1)* ( *ldab)]

    *info = 0;
    upper = lsame_(uplo, "U");
    notran = lsame_(trans, "N");
    nounit = lsame_(diag, "N");

/*     Test the input parameters. */

    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (! notran && ! lsame_(trans, "T") && ! lsame_(trans, 
	    "C")) {
	*info = -2;
    } else if (! nounit && ! lsame_(diag, "U")) {
	*info = -3;
    } else if (! lsame_(normin, "Y") && ! lsame_(normin, "N"))
	     {
	*info = -4;
    } else if (*n < 0) {
	*info = -5;
    } else if (*kd < 0) {
	*info = -6;
    } else if (*ldab < *kd + 1) {
	*info = -8;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DLATBS", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine machine dependent parameters to control overflow. */

    smlnum = dlamch_("Safe minimum") / dlamch_("Precision");
    bignum = 1. / smlnum;
    *scale = 1.;

    if (lsame_(normin, "N")) {

/*        Compute the 1-norm of each column, not including the diagona
l. */

	if (upper) {

/*           A is upper triangular. */

	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
/* Computing MIN */
		i__2 = *kd, i__3 = j - 1;
		jlen = min(i__2,i__3);
		CNORM(j) = dasum_(&jlen, &AB(*kd+1-jlen,j), &
			c__1);
/* L10: */
	    }
	} else {

/*           A is lower triangular. */

	    i__1 = *n;
	    for (j = 1; j <= *n; ++j) {
/* Computing MIN */
		i__2 = *kd, i__3 = *n - j;
		jlen = min(i__2,i__3);
		if (jlen > 0) {
		    CNORM(j) = dasum_(&jlen, &AB(2,j), &c__1);
		} else {
		    CNORM(j) = 0.;
		}
/* L20: */
	    }
	}
    }

/*     Scale the column norms by TSCAL if the maximum element in CNORM is 
  
       greater than BIGNUM. */

    imax = idamax_(n, &CNORM(1), &c__1);
    tmax = CNORM(imax);
    if (tmax <= bignum) {
	tscal = 1.;
    } else {
	tscal = 1. / (smlnum * tmax);
	dscal_(n, &tscal, &CNORM(1), &c__1);
    }

/*     Compute a bound on the computed solution vector to see if the   
       Level 2 BLAS routine DTBSV can be used. */

    j = idamax_(n, &X(1), &c__1);
    xmax = (d__1 = X(j), abs(d__1));
    xbnd = xmax;
    if (notran) {

/*        Compute the growth in A * x = b. */

	if (upper) {
	    jfirst = *n;
	    jlast = 1;
	    jinc = -1;
	    maind = *kd + 1;
	} else {
	    jfirst = 1;
	    jlast = *n;
	    jinc = 1;
	    maind = 1;
	}

	if (tscal != 1.) {
	    grow = 0.;
	    goto L50;
	}

	if (nounit) {

/*           A is non-unit triangular.   

             Compute GROW = 1/G(j) and XBND = 1/M(j).   
             Initially, G(0) = max{x(i), i=1,...,n}. */

	    grow = 1. / max(xbnd,smlnum);
	    xbnd = grow;
	    i__1 = jlast;
	    i__2 = jinc;
	    for (j = jfirst; jinc < 0 ? j >= jlast : j <= jlast; j += jinc) {

/*              Exit the loop if the growth factor is too smal
l. */

		if (grow <= smlnum) {
		    goto L50;
		}

/*              M(j) = G(j-1) / abs(A(j,j)) */

		tjj = (d__1 = AB(maind,j), abs(d__1));
/* Computing MIN */
		d__1 = xbnd, d__2 = min(1.,tjj) * grow;
		xbnd = min(d__1,d__2);
		if (tjj + CNORM(j) >= smlnum) {

/*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,
j)) ) */

		    grow *= tjj / (tjj + CNORM(j));
		} else {

/*                 G(j) could overflow, set GROW to 0. */

		    grow = 0.;
		}
/* L30: */
	    }
	    grow = xbnd;
	} else {

/*           A is unit triangular.   

             Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...
,n}.   

   Computing MIN */
	    d__1 = 1., d__2 = 1. / max(xbnd,smlnum);
	    grow = min(d__1,d__2);
	    i__2 = jlast;
	    i__1 = jinc;
	    for (j = jfirst; jinc < 0 ? j >= jlast : j <= jlast; j += jinc) {

/*              Exit the loop if the growth factor is too smal
l. */

		if (grow <= smlnum) {
		    goto L50;
		}

/*              G(j) = G(j-1)*( 1 + CNORM(j) ) */

		grow *= 1. / (CNORM(j) + 1.);
/* L40: */
	    }
	}
L50:

	;
    } else {

/*        Compute the growth in A' * x = b. */

	if (upper) {
	    jfirst = 1;
	    jlast = *n;
	    jinc = 1;
	    maind = *kd + 1;
	} else {
	    jfirst = *n;
	    jlast = 1;
	    jinc = -1;
	    maind = 1;
	}

	if (tscal != 1.) {
	    grow = 0.;
	    goto L80;
	}

	if (nounit) {

/*           A is non-unit triangular.   

             Compute GROW = 1/G(j) and XBND = 1/M(j).   
             Initially, M(0) = max{x(i), i=1,...,n}. */

	    grow = 1. / max(xbnd,smlnum);
	    xbnd = grow;
	    i__1 = jlast;
	    i__2 = jinc;
	    for (j = jfirst; jinc < 0 ? j >= jlast : j <= jlast; j += jinc) {

/*              Exit the loop if the growth factor is too smal
l. */

		if (grow <= smlnum) {
		    goto L80;
		}

/*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) 
*/

		xj = CNORM(j) + 1.;
/* Computing MIN */
		d__1 = grow, d__2 = xbnd / xj;
		grow = min(d__1,d__2);

/*              M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) 
*/

		tjj = (d__1 = AB(maind,j), abs(d__1));
		if (xj > tjj) {
		    xbnd *= tjj / xj;
		}
/* L60: */
	    }
	    grow = min(grow,xbnd);
	} else {

/*           A is unit triangular.   

             Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...
,n}.   

   Computing MIN */
	    d__1 = 1., d__2 = 1. / max(xbnd,smlnum);
	    grow = min(d__1,d__2);
	    i__2 = jlast;
	    i__1 = jinc;
	    for (j = jfirst; jinc < 0 ? j >= jlast : j <= jlast; j += jinc) {

/*              Exit the loop if the growth factor is too smal
l. */

		if (grow <= smlnum) {
		    goto L80;
		}

/*              G(j) = ( 1 + CNORM(j) )*G(j-1) */

		xj = CNORM(j) + 1.;
		grow /= xj;
/* L70: */
	    }
	}
L80:
	;
    }

    if (grow * tscal > smlnum) {

/*        Use the Level 2 BLAS solve if the reciprocal of the bound on
   
          elements of X is not too small. */

	dtbsv_(uplo, trans, diag, n, kd, &AB(1,1), ldab, &X(1), &c__1);
    } else {

/*        Use a Level 1 BLAS solve, scaling intermediate results. */

	if (xmax > bignum) {

/*           Scale X so that its components are less than or equal
 to   
             BIGNUM in absolute value. */

	    *scale = bignum / xmax;
	    dscal_(n, scale, &X(1), &c__1);
	    xmax = bignum;
	}

	if (notran) {

/*           Solve A * x = b */

	    i__1 = jlast;
	    i__2 = jinc;
	    for (j = jfirst; jinc < 0 ? j >= jlast : j <= jlast; j += jinc) {

/*              Compute x(j) = b(j) / A(j,j), scaling x if nec
essary. */

		xj = (d__1 = X(j), abs(d__1));
		if (nounit) {
		    tjjs = AB(maind,j) * tscal;
		} else {
		    tjjs = tscal;
		    if (tscal == 1.) {
			goto L100;
		    }
		}
		tjj = abs(tjjs);
		if (tjj > smlnum) {

/*                    abs(A(j,j)) > SMLNUM: */

		    if (tjj < 1.) {
			if (xj > tjj * bignum) {

/*                          Scale x by 1/b(j). */

			    rec = 1. / xj;
			    dscal_(n, &rec, &X(1), &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
		    }
		    X(j) /= tjjs;
		    xj = (d__1 = X(j), abs(d__1));
		} else if (tjj > 0.) {

/*                    0 < abs(A(j,j)) <= SMLNUM: */

		    if (xj > tjj * bignum) {

/*                       Scale x by (1/abs(x(j)))*abs(
A(j,j))*BIGNUM   
                         to avoid overflow when dividi
ng by A(j,j). */

			rec = tjj * bignum / xj;
			if (CNORM(j) > 1.) {

/*                          Scale by 1/CNORM(j) to
 avoid overflow when   
                            multiplying x(j) times
 column j. */

			    rec /= CNORM(j);
			}
			dscal_(n, &rec, &X(1), &c__1);
			*scale *= rec;
			xmax *= rec;
		    }
		    X(j) /= tjjs;
		    xj = (d__1 = X(j), abs(d__1));
		} else {

/*                    A(j,j) = 0:  Set x(1:n) = 0, x(j) = 
1, and   
                      scale = 0, and compute a solution to
 A*x = 0. */

		    i__3 = *n;
		    for (i = 1; i <= *n; ++i) {
			X(i) = 0.;
/* L90: */
		    }
		    X(j) = 1.;
		    xj = 1.;
		    *scale = 0.;
		    xmax = 0.;
		}
L100:

/*              Scale x if necessary to avoid overflow when ad
ding a   
                multiple of column j of A. */

		if (xj > 1.) {
		    rec = 1. / xj;
		    if (CNORM(j) > (bignum - xmax) * rec) {

/*                    Scale x by 1/(2*abs(x(j))). */

			rec *= .5;
			dscal_(n, &rec, &X(1), &c__1);
			*scale *= rec;
		    }
		} else if (xj * CNORM(j) > bignum - xmax) {

/*                 Scale x by 1/2. */

		    dscal_(n, &c_b36, &X(1), &c__1);
		    *scale *= .5;
		}

		if (upper) {
		    if (j > 1) {

/*                    Compute the update   
                         x(max(1,j-kd):j-1) := x(max(1
,j-kd):j-1) -   
                                               x(j)* A
(max(1,j-kd):j-1,j)   

   Computing MIN */
			i__3 = *kd, i__4 = j - 1;
			jlen = min(i__3,i__4);
			d__1 = -X(j) * tscal;
			daxpy_(&jlen, &d__1, &AB(*kd+1-jlen,j)
				, &c__1, &X(j - jlen), &c__1);
			i__3 = j - 1;
			i = idamax_(&i__3, &X(1), &c__1);
			xmax = (d__1 = X(i), abs(d__1));
		    }
		} else if (j < *n) {

/*                 Compute the update   
                      x(j+1:min(j+kd,n)) := x(j+1:min(j+kd
,n)) -   
                                            x(j) * A(j+1:m
in(j+kd,n),j)   

   Computing MIN */
		    i__3 = *kd, i__4 = *n - j;
		    jlen = min(i__3,i__4);
		    if (jlen > 0) {
			d__1 = -X(j) * tscal;
			daxpy_(&jlen, &d__1, &AB(2,j), &c__1, &X(
				j + 1), &c__1);
		    }
		    i__3 = *n - j;
		    i = j + idamax_(&i__3, &X(j + 1), &c__1);
		    xmax = (d__1 = X(i), abs(d__1));
		}
/* L110: */
	    }

	} else {

/*           Solve A' * x = b */

	    i__2 = jlast;
	    i__1 = jinc;
	    for (j = jfirst; jinc < 0 ? j >= jlast : j <= jlast; j += jinc) {

/*              Compute x(j) = b(j) - sum A(k,j)*x(k).   
                                      k<>j */

		xj = (d__1 = X(j), abs(d__1));
		uscal = tscal;
		rec = 1. / max(xmax,1.);
		if (CNORM(j) > (bignum - xj) * rec) {

/*                 If x(j) could overflow, scale x by 1/(2
*XMAX). */

		    rec *= .5;
		    if (nounit) {
			tjjs = AB(maind,j) * tscal;
		    } else {
			tjjs = tscal;
		    }
		    tjj = abs(tjjs);
		    if (tjj > 1.) {

/*                       Divide by A(j,j) when scaling
 x if A(j,j) > 1.   

   Computing MIN */
			d__1 = 1., d__2 = rec * tjj;
			rec = min(d__1,d__2);
			uscal /= tjjs;
		    }
		    if (rec < 1.) {
			dscal_(n, &rec, &X(1), &c__1);
			*scale *= rec;
			xmax *= rec;
		    }
		}

		sumj = 0.;
		if (uscal == 1.) {

/*                 If the scaling needed for A in the dot 
product is 1,   
                   call DDOT to perform the dot product. 
*/

		    if (upper) {
/* Computing MIN */
			i__3 = *kd, i__4 = j - 1;
			jlen = min(i__3,i__4);
			sumj = ddot_(&jlen, &AB(*kd+1-jlen,j),
				 &c__1, &X(j - jlen), &c__1);
		    } else {
/* Computing MIN */
			i__3 = *kd, i__4 = *n - j;
			jlen = min(i__3,i__4);
			if (jlen > 0) {
			    sumj = ddot_(&jlen, &AB(2,j), &c__1, &
				    X(j + 1), &c__1);
			}
		    }
		} else {

/*                 Otherwise, use in-line code for the dot
 product. */

		    if (upper) {
/* Computing MIN */
			i__3 = *kd, i__4 = j - 1;
			jlen = min(i__3,i__4);
			i__3 = jlen;
			for (i = 1; i <= jlen; ++i) {
			    sumj += AB(*kd+i-jlen,j) * uscal *
				     X(j - jlen - 1 + i);
/* L120: */
			}
		    } else {
/* Computing MIN */
			i__3 = *kd, i__4 = *n - j;
			jlen = min(i__3,i__4);
			i__3 = jlen;
			for (i = 1; i <= jlen; ++i) {
			    sumj += AB(i+1,j) * uscal * X(j + i)
				    ;
/* L130: */
			}
		    }
		}

		if (uscal == tscal) {

/*                 Compute x(j) := ( x(j) - sumj ) / A(j,j
) if 1/A(j,j)   
                   was not used to scale the dotproduct. 
*/

		    X(j) -= sumj;
		    xj = (d__1 = X(j), abs(d__1));
		    if (nounit) {

/*                    Compute x(j) = x(j) / A(j,j), sc
aling if necessary. */

			tjjs = AB(maind,j) * tscal;
		    } else {
			tjjs = tscal;
			if (tscal == 1.) {
			    goto L150;
			}
		    }
		    tjj = abs(tjjs);
		    if (tjj > smlnum) {

/*                       abs(A(j,j)) > SMLNUM: */

			if (tjj < 1.) {
			    if (xj > tjj * bignum) {

/*                             Scale X by 1/ab
s(x(j)). */

				rec = 1. / xj;
				dscal_(n, &rec, &X(1), &c__1);
				*scale *= rec;
				xmax *= rec;
			    }
			}
			X(j) /= tjjs;
		    } else if (tjj > 0.) {

/*                       0 < abs(A(j,j)) <= SMLNUM: */

			if (xj > tjj * bignum) {

/*                          Scale x by (1/abs(x(j)
))*abs(A(j,j))*BIGNUM. */

			    rec = tjj * bignum / xj;
			    dscal_(n, &rec, &X(1), &c__1);
			    *scale *= rec;
			    xmax *= rec;
			}
			X(j) /= tjjs;
		    } else {

/*                       A(j,j) = 0:  Set x(1:n) = 0, 
x(j) = 1, and   
                         scale = 0, and compute a solu
tion to A'*x = 0. */

			i__3 = *n;
			for (i = 1; i <= *n; ++i) {
			    X(i) = 0.;
/* L140: */
			}
			X(j) = 1.;
			*scale = 0.;
			xmax = 0.;
		    }
L150:
		    ;
		} else {

/*                 Compute x(j) := x(j) / A(j,j) - sumj if
 the dot   
                   product has already been divided by 1/A
(j,j). */

		    X(j) = X(j) / tjjs - sumj;
		}
/* Computing MAX */
		d__2 = xmax, d__3 = (d__1 = X(j), abs(d__1));
		xmax = max(d__2,d__3);
/* L160: */
	    }
	}
	*scale /= tscal;
    }

/*     Scale the column norms by 1/TSCAL for return. */

    if (tscal != 1.) {
	d__1 = 1. / tscal;
	dscal_(n, &d__1, &CNORM(1), &c__1);
    }

    return 0;

/*     End of DLATBS */

} /* dlatbs_ */
Esempio n. 4
0
/*
 *  Computes meson 2pt function for gammas: 
 *  g5-g5, g5-g4g5, g4g5-g5, g4g5-g4g5, g1-g1, g2-g2, g3-g3
 *
 *  The function does not return anything. It writes the correlation functions
 *  to a file (as ascii).
 *
 *  Updated for non-zero momentum correlator. Correlator calculated explicitely
 *  for all momentum vectors (i.e. non-FFT)
 *  
 */  
void
qpb_mesons_2pt_corr(qpb_spinor_field *light, qpb_spinor_field *heavy, int max_q2, char outfile[])
{
  if(heavy == NULL)
    heavy = light;

  /* This should never happen. For now the package is built so that
     only x, y and z are parallelized accross MPI and t along OpenMP */
  if(problem_params.par_dir[0] == 1)
    {
      error(" %s() not implemented for distributed t-direction, quiting\n", __func__);
      exit(QPB_NOT_IMPLEMENTED_ERROR);
    }
  
  int lvol = problem_params.l_vol;
  int lt = problem_params.l_dim[0];
  int lvol3d = lvol/lt;
  qpb_complex **corr_x;
  qpb_complex **corr_k;
  qpb_complex **corr[QPB_N_MESON_2PT_CHANNELS];
  int N = (NS*NS*NS*NS);
  qpb_complex prod[N];
  int ndirac = 0;
  int mu[N],nu[N],ku[N],lu[N];
  qpb_complex gamma_5x[NS][NS];
  qpb_complex gamma_5y[NS][NS];
  qpb_complex gamma_5z[NS][NS];
  int nmom = 0, nq = (int)sqrt(max_q2)+1;
  int (*mom)[4];
  /*
    Count momentum vectors <= max_q2
   */
  for(int z=-nq; z<nq; z++)
    for(int y=-nq; y<nq; y++)
      for(int x=-nq; x<nq; x++)
	{
	  double q2 = x*x+y*y+z*z;
	  if(q2 <= max_q2)
	    nmom++;
	}
  
  mom = qpb_alloc(sizeof(int)*4*nmom);
  nmom = 0;

  /*
    Store momentum vectors <= max_q2
   */
  for(int z=-nq; z<nq; z++)
    for(int y=-nq; y<nq; y++)
      for(int x=-nq; x<nq; x++)
	{
	  double q2 = x*x+y*y+z*z;
	  if(q2 <= max_q2)
	    {
	      mom[nmom][3] = x;
	      mom[nmom][2] = y;
	      mom[nmom][1] = z;
	      mom[nmom][0] = q2;
	      nmom++;
	    }
	}
  

  /*
    Sort in ascending q^2 value
   */
  for(int i=0; i<nmom; i++)
    {
      int x = mom[i][0]; /* the q^2 value */
      int k = i;
      for(int j=i+1; j<nmom; j++)
	if(mom[j][0] < x)
	  {
	    k = j;
	    x = mom[j][0];
	  }
      int swap[] = {mom[k][0], mom[k][1], mom[k][2], mom[k][3]};
      for(int j=0; j<4; j++) mom[k][j] = mom[i][j];
      for(int j=0; j<4; j++) mom[i][j] = swap[j];
    }

  corr_x = qpb_alloc(lt * sizeof(qpb_complex *));
  corr_k = qpb_alloc(lt * sizeof(qpb_complex *));
  for(int t=0; t<lt; t++)
    {
      corr_x[t] = qpb_alloc(lvol3d * sizeof(qpb_complex));
      corr_k[t] = qpb_alloc(nmom * sizeof(qpb_complex));
    }

  for(int ich=0; ich<QPB_N_MESON_2PT_CHANNELS; ich++)
    {
      
      corr[ich] = qpb_alloc(nmom * sizeof(qpb_complex *));
      for(int p=0; p<nmom; p++)
	corr[ich][p] = qpb_alloc(lt * sizeof(qpb_complex));

      ndirac = 0;
      switch(ich)
	{
	case S_S:
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      for(int k=0; k<NS; k++)
		for(int l=0; l<NS; l++)
		  {
		    if(CNORM(CMUL(qpb_gamma_5[i][j],qpb_gamma_5[k][l])) > 0.5 )
		      {
			mu[ndirac] = i;
			nu[ndirac] = j;
			ku[ndirac] = k;
			lu[ndirac] = l;
			prod[ndirac] = CMUL(qpb_gamma_5[i][j],qpb_gamma_5[k][l]);
			ndirac++;
		      }
		  }
	  break;
	case G5_G5:
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      for(int k=0; k<NS; k++)
		for(int l=0; l<NS; l++)
		  {
		    if(i==j && k==l)
		      {
			mu[ndirac] = i;
			nu[ndirac] = j;
			ku[ndirac] = k;
			lu[ndirac] = l;
			prod[ndirac] = (qpb_complex){1.,0.};
			ndirac++;
		      }
		  }
	  break;
	case G5_G4G5:	
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      for(int k=0; k<NS; k++)
		for(int l=0; l<NS; l++)
		  {
		    if(i==j && CNORM(qpb_gamma_t[k][l]) > 0.5)
		      {
			mu[ndirac] = i;
			nu[ndirac] = j;
			ku[ndirac] = k;
			lu[ndirac] = l;
			prod[ndirac] = qpb_gamma_t[k][l];
			ndirac++;
		      }
		  }
	  break;
	case G4G5_G5:
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      for(int k=0; k<NS; k++)
		for(int l=0; l<NS; l++)
		  {
		    if(CNORM(qpb_gamma_t[i][j]) > 0.5 && k==l )
		      {
			mu[ndirac] = i;
			nu[ndirac] = j;
			ku[ndirac] = k;
			lu[ndirac] = l;
			prod[ndirac] = qpb_gamma_t[i][j];
			ndirac++;
		      }
		  }
	  break;
	case G4G5_G4G5:
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      for(int k=0; k<NS; k++)
		for(int l=0; l<NS; l++)
		  {
		    if(CNORM(CMUL(qpb_gamma_t[i][j],qpb_gamma_t[k][l])) > 0.5 )
		      {
			mu[ndirac] = i;
			nu[ndirac] = j;
			ku[ndirac] = k;
			lu[ndirac] = l;
			prod[ndirac] = CMUL(qpb_gamma_t[i][j],qpb_gamma_t[k][l]);
			ndirac++;
		      }
		  }
	  break;
	case G1_G1:
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      {
		gamma_5x[i][j] = (qpb_complex){0., 0.};
		for(int k=0; k<NS; k++)
		  {
		    gamma_5x[i][j].re += 
		      CMULR(qpb_gamma_5[i][k], qpb_gamma_x[k][j]);
		    gamma_5x[i][j].im += 
		      CMULI(qpb_gamma_5[i][k], qpb_gamma_x[k][j]);
		  }
	      }
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      for(int k=0; k<NS; k++)
		for(int l=0; l<NS; l++)
		  {
		    if(CNORM(CMUL(gamma_5x[i][j],gamma_5x[k][l])) > 0.5 )
		      {
			mu[ndirac] = i;
			nu[ndirac] = j;
			ku[ndirac] = k;
			lu[ndirac] = l;
			prod[ndirac] = CNEGATE(CMUL(gamma_5x[i][j],gamma_5x[k][l]));
			ndirac++;
		      }
		  }
	  break;
	case G2_G2:
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      {
		gamma_5y[i][j] = (qpb_complex){0., 0.};
		for(int k=0; k<NS; k++)
		  {
		    gamma_5y[i][j].re += 
		      CMULR(qpb_gamma_5[i][k], qpb_gamma_y[k][j]);
		    gamma_5y[i][j].im += 
		      CMULI(qpb_gamma_5[i][k], qpb_gamma_y[k][j]);
		  }
	      }
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      for(int k=0; k<NS; k++)
		for(int l=0; l<NS; l++)
		  {
		    if(CNORM(CMUL(gamma_5y[i][j],gamma_5y[k][l])) > 0.5 )
		      {
			mu[ndirac] = i;
			nu[ndirac] = j;
			ku[ndirac] = k;
			lu[ndirac] = l;
			prod[ndirac] = CNEGATE(CMUL(gamma_5y[i][j],gamma_5y[k][l]));
			ndirac++;
		      }
		  }
	  break;
	case G3_G3:
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      {
		gamma_5z[i][j] = (qpb_complex){0., 0.};
		for(int k=0; k<NS; k++)
		  {
		    gamma_5z[i][j].re += 
		      CMULR(qpb_gamma_5[i][k], qpb_gamma_z[k][j]);
		    gamma_5z[i][j].im += 
		      CMULI(qpb_gamma_5[i][k], qpb_gamma_z[k][j]);
		  }
	      }
	  for(int i=0; i<NS; i++)
	    for(int j=0; j<NS; j++)
	      for(int k=0; k<NS; k++)
		for(int l=0; l<NS; l++)
		  {
		    if(CNORM(CMUL(gamma_5z[i][j],gamma_5z[k][l])) > 0.5 )
		      {
			mu[ndirac] = i;
			nu[ndirac] = j;
			ku[ndirac] = k;
			lu[ndirac] = l;
			prod[ndirac] = CNEGATE(CMUL(gamma_5z[i][j],gamma_5z[k][l]));
			ndirac++;
		      }
		  }
	  break;
	}

      for(int t=0; t<lt; t++)
	for(int lv=0; lv<lvol3d; lv++)
	  corr_x[t][lv] = (qpb_complex){0., 0.};

      for(int col0=0; col0<NC; col0++)
	for(int col1=0; col1<NC; col1++)
	  for(int id=0; id<ndirac; id++)
	    {
	      int i = mu[id];
	      int j = nu[id];
	      int k = ku[id];
	      int l = lu[id];
#ifdef OPENMP
#	pragma omp parallel for
#endif
	      for(int t=0; t<lt; t++)
		for(int lv=0; lv<lvol3d; lv++)
		  {
		    int v = blk_to_ext[lv + t*lvol3d];
		    qpb_complex hp = ((qpb_complex *)(light[col0+NC*l].index[v]))[col1+NC*i];
		    qpb_complex lp = ((qpb_complex *)(heavy[col0+NC*k].index[v]))[col1+NC*j];
		    /* c = x * conj(y) */
		    qpb_complex c = {hp.re*lp.re + hp.im*lp.im, hp.im*lp.re - hp.re*lp.im};
		    corr_x[t][lv].re += CMULR(prod[id], c);
		    corr_x[t][lv].im += CMULI(prod[id], c);
		  }
	    }

      qpb_ft(corr_k, corr_x, lt, mom, nmom);
      for(int t=0; t<lt; t++)
	for(int p=0; p<nmom; p++)
	  corr[ich][p][t] = corr_k[t][p];
      
    }
  
  FILE *fp = NULL;
  if(am_master)
    {
      if((fp = fopen(outfile, "w")) == NULL)
	{
	  error("%s: error opening file in \"w\" mode\n", outfile);
	  MPI_Abort(MPI_COMM_WORLD, QPB_FILE_ERROR);
	  exit(QPB_FILE_ERROR);
	}
    }
  for(int t=0; t<lt; t++)
    {
      char ctag[QPB_MAX_STRING];
      for(int p=0; p<nmom; p++)
	for(int ich=0; ich<QPB_N_MESON_2PT_CHANNELS; ich++)
	  {
	    switch(ich)
	      {
	      case S_S:
		strcpy(ctag ,"1-1");
		break;
	      case G5_G5:
		strcpy(ctag ,"g5-g5");
		break;
	      case G5_G4G5:
		strcpy(ctag ,"g5-g4g5");
		break;
	      case G4G5_G5:
		strcpy(ctag ,"g4g5-g5");
		break;
	      case G4G5_G4G5:
		strcpy(ctag ,"g4g5-g4g5");
		break;
	      case G1_G1:
		strcpy(ctag ,"g1-g1");
		break;
	      case G2_G2:
		strcpy(ctag ,"g2-g2");
		break;
	      case G3_G3:
		strcpy(ctag ,"g3-g3");
		break;
	      }
	    if(am_master)
	      fprintf(fp, " %+2d %+2d %+2d %3d %+e %+e %s\n", 
		      mom[p][3], mom[p][2], mom[p][1], t, corr[ich][p][t].re, corr[ich][p][t].im, ctag);
	  }
    }
  if(am_master)
    fclose(fp);
  
  for(int t=0; t<lt; t++)
    {
      free(corr_x[t]);
      free(corr_k[t]);
    }
  free(corr_x);
  free(corr_k);

  for(int ich=0; ich<QPB_N_MESON_2PT_CHANNELS; ich++)
    {
      for(int p=0; p<nmom; p++)
	free(corr[ich][p]);
      free(corr[ich]);
    }
  free(mom);
  return;
}