Пример #1
0
int
f2c_cgemv(char* trans, integer* M, integer* N,
          complex* alpha,
          complex* A, integer* lda,
          complex* X, integer* incX,
          complex* beta,
          complex* Y, integer* incY)
{
    cgemv_(trans, M, N,
           alpha, A, lda, X, incX, beta, Y, incY);
    return 0;
}
Пример #2
0
/* Subroutine */ int claqps_(integer *m, integer *n, integer *offset, integer 
	*nb, integer *kb, complex *a, integer *lda, integer *jpvt, complex *
	tau, real *vn1, real *vn2, complex *auxv, complex *f, integer *ldf)
{
    /* System generated locals */
    integer a_dim1, a_offset, f_dim1, f_offset, i__1, i__2, i__3;
    real r__1, r__2;
    complex q__1;

    /* Local variables */
    integer j, k, rk;
    complex akk;
    integer pvt;
    real temp, temp2, tol3z;
    integer itemp;
    integer lsticc;
    integer lastrk;

/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
/*     November 2006 */

/*  Purpose */
/*  ======= */

/*  CLAQPS computes a step of QR factorization with column pivoting */
/*  of a complex M-by-N matrix A by using Blas-3.  It tries to factorize */
/*  NB columns from A starting from the row OFFSET+1, and updates all */
/*  of the matrix with Blas-3 xGEMM. */

/*  In some cases, due to catastrophic cancellations, it cannot */
/*  factorize NB columns.  Hence, the actual number of factorized */
/*  columns is returned in KB. */

/*  Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. */

/*  Arguments */
/*  ========= */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix A. M >= 0. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix A. N >= 0 */

/*  OFFSET  (input) INTEGER */
/*          The number of rows of A that have been factorized in */
/*          previous steps. */

/*  NB      (input) INTEGER */
/*          The number of columns to factorize. */

/*  KB      (output) INTEGER */
/*          The number of columns actually factorized. */

/*  A       (input/output) COMPLEX array, dimension (LDA,N) */
/*          On entry, the M-by-N matrix A. */
/*          On exit, block A(OFFSET+1:M,1:KB) is the triangular */
/*          factor obtained and block A(1:OFFSET,1:N) has been */
/*          accordingly pivoted, but no factorized. */
/*          The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has */
/*          been updated. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A. LDA >= max(1,M). */

/*  JPVT    (input/output) INTEGER array, dimension (N) */
/*          JPVT(I) = K <==> Column K of the full matrix A has been */
/*          permuted into position I in AP. */

/*  TAU     (output) COMPLEX array, dimension (KB) */
/*          The scalar factors of the elementary reflectors. */

/*  VN1     (input/output) REAL array, dimension (N) */
/*          The vector with the partial column norms. */

/*  VN2     (input/output) REAL array, dimension (N) */
/*          The vector with the exact column norms. */

/*  AUXV    (input/output) COMPLEX array, dimension (NB) */
/*          Auxiliar vector. */

/*  F       (input/output) COMPLEX array, dimension (LDF,NB) */
/*          Matrix F' = L*Y'*A. */

/*  LDF     (input) INTEGER */
/*          The leading dimension of the array F. LDF >= max(1,N). */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */
/*    X. Sun, Computer Science Dept., Duke University, USA */

/*  Partial column norm updating strategy modified by */
/*    Z. Drmac and Z. Bujanovic, Dept. of Mathematics, */
/*    University of Zagreb, Croatia. */
/*    June 2006. */
/*  For more details see LAPACK Working Note 176. */
/*  ===================================================================== */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --jpvt;
    --tau;
    --vn1;
    --vn2;
    --auxv;
    f_dim1 = *ldf;
    f_offset = 1 + f_dim1;
    f -= f_offset;

    /* Function Body */
/* Computing MIN */
    i__1 = *m, i__2 = *n + *offset;
    lastrk = min(i__1,i__2);
    lsticc = 0;
    k = 0;
    tol3z = sqrt(slamch_("Epsilon"));

/*     Beginning of while loop. */

L10:
    if (k < *nb && lsticc == 0) {
	++k;
	rk = *offset + k;

/*        Determine ith pivot column and swap if necessary */

	i__1 = *n - k + 1;
	pvt = k - 1 + isamax_(&i__1, &vn1[k], &c__1);
	if (pvt != k) {
	    cswap_(m, &a[pvt * a_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1);
	    i__1 = k - 1;
	    cswap_(&i__1, &f[pvt + f_dim1], ldf, &f[k + f_dim1], ldf);
	    itemp = jpvt[pvt];
	    jpvt[pvt] = jpvt[k];
	    jpvt[k] = itemp;
	    vn1[pvt] = vn1[k];
	    vn2[pvt] = vn2[k];
	}

/*        Apply previous Householder reflectors to column K: */
/*        A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)'. */

	if (k > 1) {
	    i__1 = k - 1;
	    for (j = 1; j <= i__1; ++j) {
		i__2 = k + j * f_dim1;
		r_cnjg(&q__1, &f[k + j * f_dim1]);
		f[i__2].r = q__1.r, f[i__2].i = q__1.i;
	    }
	    i__1 = *m - rk + 1;
	    i__2 = k - 1;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgemv_("No transpose", &i__1, &i__2, &q__1, &a[rk + a_dim1], lda, 
		    &f[k + f_dim1], ldf, &c_b2, &a[rk + k * a_dim1], &c__1);
	    i__1 = k - 1;
	    for (j = 1; j <= i__1; ++j) {
		i__2 = k + j * f_dim1;
		r_cnjg(&q__1, &f[k + j * f_dim1]);
		f[i__2].r = q__1.r, f[i__2].i = q__1.i;
	    }
	}

/*        Generate elementary reflector H(k). */

	if (rk < *m) {
	    i__1 = *m - rk + 1;
	    clarfp_(&i__1, &a[rk + k * a_dim1], &a[rk + 1 + k * a_dim1], &
		    c__1, &tau[k]);
	} else {
	    clarfp_(&c__1, &a[rk + k * a_dim1], &a[rk + k * a_dim1], &c__1, &
		    tau[k]);
	}

	i__1 = rk + k * a_dim1;
	akk.r = a[i__1].r, akk.i = a[i__1].i;
	i__1 = rk + k * a_dim1;
	a[i__1].r = 1.f, a[i__1].i = 0.f;

/*        Compute Kth column of F: */

/*        Compute  F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)'*A(RK:M,K). */

	if (k < *n) {
	    i__1 = *m - rk + 1;
	    i__2 = *n - k;
	    cgemv_("Conjugate transpose", &i__1, &i__2, &tau[k], &a[rk + (k + 
		    1) * a_dim1], lda, &a[rk + k * a_dim1], &c__1, &c_b1, &f[
		    k + 1 + k * f_dim1], &c__1);
	}

/*        Padding F(1:K,K) with zeros. */

	i__1 = k;
	for (j = 1; j <= i__1; ++j) {
	    i__2 = j + k * f_dim1;
	    f[i__2].r = 0.f, f[i__2].i = 0.f;
	}

/*        Incremental updating of F: */
/*        F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)' */
/*                    *A(RK:M,K). */

	if (k > 1) {
	    i__1 = *m - rk + 1;
	    i__2 = k - 1;
	    i__3 = k;
	    q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
	    cgemv_("Conjugate transpose", &i__1, &i__2, &q__1, &a[rk + a_dim1]
, lda, &a[rk + k * a_dim1], &c__1, &c_b1, &auxv[1], &c__1);

	    i__1 = k - 1;
	    cgemv_("No transpose", n, &i__1, &c_b2, &f[f_dim1 + 1], ldf, &
		    auxv[1], &c__1, &c_b2, &f[k * f_dim1 + 1], &c__1);
	}

/*        Update the current row of A: */
/*        A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)'. */

	if (k < *n) {
	    i__1 = *n - k;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgemm_("No transpose", "Conjugate transpose", &c__1, &i__1, &k, &
		    q__1, &a[rk + a_dim1], lda, &f[k + 1 + f_dim1], ldf, &
		    c_b2, &a[rk + (k + 1) * a_dim1], lda);
	}

/*        Update partial column norms. */

	if (rk < lastrk) {
	    i__1 = *n;
	    for (j = k + 1; j <= i__1; ++j) {
		if (vn1[j] != 0.f) {

/*                 NOTE: The following 4 lines follow from the analysis in */
/*                 Lapack Working Note 176. */

		    temp = c_abs(&a[rk + j * a_dim1]) / vn1[j];
/* Computing MAX */
		    r__1 = 0.f, r__2 = (temp + 1.f) * (1.f - temp);
		    temp = dmax(r__1,r__2);
/* Computing 2nd power */
		    r__1 = vn1[j] / vn2[j];
		    temp2 = temp * (r__1 * r__1);
		    if (temp2 <= tol3z) {
			vn2[j] = (real) lsticc;
			lsticc = j;
		    } else {
			vn1[j] *= sqrt(temp);
		    }
		}
	    }
	}

	i__1 = rk + k * a_dim1;
	a[i__1].r = akk.r, a[i__1].i = akk.i;

/*        End of while loop. */

	goto L10;
    }
    *kb = k;
    rk = *offset + *kb;

/*     Apply the block reflector to the rest of the matrix: */
/*     A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) - */
/*                         A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)'. */

/* Computing MIN */
    i__1 = *n, i__2 = *m - *offset;
    if (*kb < min(i__1,i__2)) {
	i__1 = *m - rk;
	i__2 = *n - *kb;
	q__1.r = -1.f, q__1.i = -0.f;
	cgemm_("No transpose", "Conjugate transpose", &i__1, &i__2, kb, &q__1, 
		 &a[rk + 1 + a_dim1], lda, &f[*kb + 1 + f_dim1], ldf, &c_b2, &
		a[rk + 1 + (*kb + 1) * a_dim1], lda);
    }

/*     Recomputation of difficult columns. */

L60:
    if (lsticc > 0) {
	itemp = i_nint(&vn2[lsticc]);
	i__1 = *m - rk;
	vn1[lsticc] = scnrm2_(&i__1, &a[rk + 1 + lsticc * a_dim1], &c__1);

/*        NOTE: The computation of VN1( LSTICC ) relies on the fact that */
/*        SNRM2 does not fail on vectors with norm below the value of */
/*        SQRT(DLAMCH('S')) */

	vn2[lsticc] = vn1[lsticc];
	lsticc = itemp;
	goto L60;
    }

    return 0;

/*     End of CLAQPS */

} /* claqps_ */
Пример #3
0
void
cpanel_bmod (
            const int  m,          /* in - number of rows in the matrix */
            const int  w,          /* in */
            const int  jcol,       /* in */
            const int  nseg,       /* in */
            complex     *dense,     /* out, of size n by w */
            complex     *tempv,     /* working array */
            int        *segrep,    /* in */
            int        *repfnz,    /* in, of size n by w */
            GlobalLU_t *Glu,       /* modified */
            SuperLUStat_t *stat    /* output */
            )
{


#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
    _fcd ftcs1 = _cptofcd("L", strlen("L")),
         ftcs2 = _cptofcd("N", strlen("N")),
         ftcs3 = _cptofcd("U", strlen("U"));
#endif
    int          incx = 1, incy = 1;
    complex       alpha, beta;
#endif

    register int k, ksub;
    int          fsupc, nsupc, nsupr, nrow;
    int          krep, krep_ind;
    complex       ukj, ukj1, ukj2;
    int          luptr, luptr1, luptr2;
    int          segsze;
    int          block_nrow;  /* no of rows in a block row */
    register int lptr;        /* Points to the row subscripts of a supernode */
    int          kfnz, irow, no_zeros;
    register int isub, isub1, i;
    register int jj;          /* Index through each column in the panel */
    int          *xsup, *supno;
    int          *lsub, *xlsub;
    complex       *lusup;
    int          *xlusup;
    int          *repfnz_col; /* repfnz[] for a column in the panel */
    complex       *dense_col;  /* dense[] for a column in the panel */
    complex       *tempv1;             /* Used in 1-D update */
    complex       *TriTmp, *MatvecTmp; /* used in 2-D update */
    complex      zero = {0.0, 0.0};
    complex      one = {1.0, 0.0};
    complex      comp_temp, comp_temp1;
    register int ldaTmp;
    register int r_ind, r_hi;
    static   int first = 1, maxsuper, rowblk, colblk;
    flops_t  *ops = stat->ops;

    xsup    = Glu->xsup;
    supno   = Glu->supno;
    lsub    = Glu->lsub;
    xlsub   = Glu->xlsub;
    lusup   = Glu->lusup;
    xlusup  = Glu->xlusup;

    if ( first ) {
        maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) );
        rowblk   = sp_ienv(4);
        colblk   = sp_ienv(5);
        first = 0;
    }
    ldaTmp = maxsuper + rowblk;

    /*
     * For each nonz supernode segment of U[*,j] in topological order
     */
    k = nseg - 1;
    for (ksub = 0; ksub < nseg; ksub++) { /* for each updating supernode */

        /* krep = representative of current k-th supernode
         * fsupc = first supernodal column
         * nsupc = no of columns in a supernode
         * nsupr = no of rows in a supernode
         */
        krep = segrep[k--];
        fsupc = xsup[supno[krep]];
        nsupc = krep - fsupc + 1;
        nsupr = xlsub[fsupc+1] - xlsub[fsupc];
        nrow = nsupr - nsupc;
        lptr = xlsub[fsupc];
        krep_ind = lptr + nsupc - 1;

        repfnz_col = repfnz;
        dense_col = dense;

        if ( nsupc >= colblk && nrow > rowblk ) { /* 2-D block update */

            TriTmp = tempv;

            /* Sequence through each column in panel -- triangular solves */
            for (jj = jcol; jj < jcol + w; jj++,
                 repfnz_col += m, dense_col += m, TriTmp += ldaTmp ) {

                kfnz = repfnz_col[krep];
                if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */

                segsze = krep - kfnz + 1;
                luptr = xlusup[fsupc];

                ops[TRSV] += 4 * segsze * (segsze - 1);
                ops[GEMV] += 8 * nrow * segsze;

                /* Case 1: Update U-segment of size 1 -- col-col update */
                if ( segsze == 1 ) {
                    ukj = dense_col[lsub[krep_ind]];
                    luptr += nsupr*(nsupc-1) + nsupc;

                    for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
                        irow = lsub[i];
                        cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                        c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
                        ++luptr;
                    }

                } else if ( segsze <= 3 ) {
                    ukj = dense_col[lsub[krep_ind]];
                    ukj1 = dense_col[lsub[krep_ind - 1]];
                    luptr += nsupr*(nsupc-1) + nsupc-1;
                    luptr1 = luptr - nsupr;

                    if ( segsze == 2 ) {
                        cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                        c_sub(&ukj, &ukj, &comp_temp);
                        dense_col[lsub[krep_ind]] = ukj;
                        for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
                            irow = lsub[i];
                            luptr++; luptr1++;
                            cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                            cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                            c_add(&comp_temp, &comp_temp, &comp_temp1);
                            c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
                        }
                    } else {
                        ukj2 = dense_col[lsub[krep_ind - 2]];
                        luptr2 = luptr1 - nsupr;
                        cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
                        c_sub(&ukj1, &ukj1, &comp_temp);

                        cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                        cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                        c_add(&comp_temp, &comp_temp, &comp_temp1);
                        c_sub(&ukj, &ukj, &comp_temp);
                        dense_col[lsub[krep_ind]] = ukj;
                        dense_col[lsub[krep_ind-1]] = ukj1;
                        for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
                            irow = lsub[i];
                            luptr++; luptr1++; luptr2++;
                            cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                            cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                            c_add(&comp_temp, &comp_temp, &comp_temp1);
                            cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                            c_add(&comp_temp, &comp_temp, &comp_temp1);
                            c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
                        }
                    }

                } else  {       /* segsze >= 4 */

                    /* Copy U[*,j] segment from dense[*] to TriTmp[*], which
                       holds the result of triangular solves.    */
                    no_zeros = kfnz - fsupc;
                    isub = lptr + no_zeros;
                    for (i = 0; i < segsze; ++i) {
                        irow = lsub[isub];
                        TriTmp[i] = dense_col[irow]; /* Gather */
                        ++isub;
                    }

                    /* start effective triangle */
                    luptr += nsupr * no_zeros + no_zeros;

#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
                    CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr],
                           &nsupr, TriTmp, &incx );
#else
                    ctrsv_( "L", "N", "U", &segsze, &lusup[luptr],
                           &nsupr, TriTmp, &incx );
#endif
#else
                    clsolve ( nsupr, segsze, &lusup[luptr], TriTmp );
#endif


                } /* else ... */

            }  /* for jj ... end tri-solves */

            /* Block row updates; push all the way into dense[*] block */
            for ( r_ind = 0; r_ind < nrow; r_ind += rowblk ) {

                r_hi = SUPERLU_MIN(nrow, r_ind + rowblk);
                block_nrow = SUPERLU_MIN(rowblk, r_hi - r_ind);
                luptr = xlusup[fsupc] + nsupc + r_ind;
                isub1 = lptr + nsupc + r_ind;

                repfnz_col = repfnz;
                TriTmp = tempv;
                dense_col = dense;

                /* Sequence through each column in panel -- matrix-vector */
                for (jj = jcol; jj < jcol + w; jj++,
                     repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {

                    kfnz = repfnz_col[krep];
                    if ( kfnz == EMPTY ) continue; /* Skip any zero segment */

                    segsze = krep - kfnz + 1;
                    if ( segsze <= 3 ) continue;   /* skip unrolled cases */

                    /* Perform a block update, and scatter the result of
                       matrix-vector to dense[].                 */
                    no_zeros = kfnz - fsupc;
                    luptr1 = luptr + nsupr * no_zeros;
                    MatvecTmp = &TriTmp[maxsuper];

#ifdef USE_VENDOR_BLAS
                    alpha = one;
                    beta = zero;
#ifdef _CRAY
                    CGEMV(ftcs2, &block_nrow, &segsze, &alpha, &lusup[luptr1],
                           &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
#else
                    cgemv_("N", &block_nrow, &segsze, &alpha, &lusup[luptr1],
                           &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
#endif
#else
                    cmatvec(nsupr, block_nrow, segsze, &lusup[luptr1],
                           TriTmp, MatvecTmp);
#endif

                    /* Scatter MatvecTmp[*] into SPA dense[*] temporarily
                     * such that MatvecTmp[*] can be re-used for the
                     * the next blok row update. dense[] will be copied into
                     * global store after the whole panel has been finished.
                     */
                    isub = isub1;
                    for (i = 0; i < block_nrow; i++) {
                        irow = lsub[isub];
                        c_sub(&dense_col[irow], &dense_col[irow],
                              &MatvecTmp[i]);
                        MatvecTmp[i] = zero;
                        ++isub;
                    }

                } /* for jj ... */

            } /* for each block row ... */

            /* Scatter the triangular solves into SPA dense[*] */
            repfnz_col = repfnz;
            TriTmp = tempv;
            dense_col = dense;

            for (jj = jcol; jj < jcol + w; jj++,
                 repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
                kfnz = repfnz_col[krep];
                if ( kfnz == EMPTY ) continue; /* Skip any zero segment */

                segsze = krep - kfnz + 1;
                if ( segsze <= 3 ) continue; /* skip unrolled cases */

                no_zeros = kfnz - fsupc;
                isub = lptr + no_zeros;
                for (i = 0; i < segsze; i++) {
                    irow = lsub[isub];
                    dense_col[irow] = TriTmp[i];
                    TriTmp[i] = zero;
                    ++isub;
                }

            } /* for jj ... */

        } else { /* 1-D block modification */


            /* Sequence through each column in the panel */
            for (jj = jcol; jj < jcol + w; jj++,
                 repfnz_col += m, dense_col += m) {

                kfnz = repfnz_col[krep];
                if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */

                segsze = krep - kfnz + 1;
                luptr = xlusup[fsupc];

                ops[TRSV] += 4 * segsze * (segsze - 1);
                ops[GEMV] += 8 * nrow * segsze;

                /* Case 1: Update U-segment of size 1 -- col-col update */
                if ( segsze == 1 ) {
                    ukj = dense_col[lsub[krep_ind]];
                    luptr += nsupr*(nsupc-1) + nsupc;

                    for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
                        irow = lsub[i];
                        cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                        c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
                        ++luptr;
                    }

                } else if ( segsze <= 3 ) {
                    ukj = dense_col[lsub[krep_ind]];
                    luptr += nsupr*(nsupc-1) + nsupc-1;
                    ukj1 = dense_col[lsub[krep_ind - 1]];
                    luptr1 = luptr - nsupr;

                    if ( segsze == 2 ) {
                        cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                        c_sub(&ukj, &ukj, &comp_temp);
                        dense_col[lsub[krep_ind]] = ukj;
                        for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
                            irow = lsub[i];
                            ++luptr;  ++luptr1;
                            cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                            cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                            c_add(&comp_temp, &comp_temp, &comp_temp1);
                            c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
                        }
                    } else {
                        ukj2 = dense_col[lsub[krep_ind - 2]];
                        luptr2 = luptr1 - nsupr;
                        cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
                        c_sub(&ukj1, &ukj1, &comp_temp);

                        cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                        cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                        c_add(&comp_temp, &comp_temp, &comp_temp1);
                        c_sub(&ukj, &ukj, &comp_temp);
                        dense_col[lsub[krep_ind]] = ukj;
                        dense_col[lsub[krep_ind-1]] = ukj1;
                        for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
                            irow = lsub[i];
                            ++luptr; ++luptr1; ++luptr2;
                            cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                            cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                            c_add(&comp_temp, &comp_temp, &comp_temp1);
                            cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                            c_add(&comp_temp, &comp_temp, &comp_temp1);
                            c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
                        }
                    }

                } else  { /* segsze >= 4 */
                    /*
                     * Perform a triangular solve and block update,
                     * then scatter the result of sup-col update to dense[].
                     */
                    no_zeros = kfnz - fsupc;

                    /* Copy U[*,j] segment from dense[*] to tempv[*]:
                     *    The result of triangular solve is in tempv[*];
                     *    The result of matrix vector update is in dense_col[*]
                     */
                    isub = lptr + no_zeros;
                    for (i = 0; i < segsze; ++i) {
                        irow = lsub[isub];
                        tempv[i] = dense_col[irow]; /* Gather */
                        ++isub;
                    }

                    /* start effective triangle */
                    luptr += nsupr * no_zeros + no_zeros;

#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
                    CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr],
                           &nsupr, tempv, &incx );
#else
                    ctrsv_( "L", "N", "U", &segsze, &lusup[luptr],
                           &nsupr, tempv, &incx );
#endif

                    luptr += segsze;    /* Dense matrix-vector */
                    tempv1 = &tempv[segsze];
                    alpha = one;
                    beta = zero;
#ifdef _CRAY
                    CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr],
                           &nsupr, tempv, &incx, &beta, tempv1, &incy );
#else
                    cgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr],
                           &nsupr, tempv, &incx, &beta, tempv1, &incy );
#endif
#else
                    clsolve ( nsupr, segsze, &lusup[luptr], tempv );

                    luptr += segsze;        /* Dense matrix-vector */
                    tempv1 = &tempv[segsze];
                    cmatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1);
#endif

                    /* Scatter tempv[*] into SPA dense[*] temporarily, such
                     * that tempv[*] can be used for the triangular solve of
                     * the next column of the panel. They will be copied into
                     * ucol[*] after the whole panel has been finished.
                     */
                    isub = lptr + no_zeros;
                    for (i = 0; i < segsze; i++) {
                        irow = lsub[isub];
                        dense_col[irow] = tempv[i];
                        tempv[i] = zero;
                        isub++;
                    }

                    /* Scatter the update from tempv1[*] into SPA dense[*] */
                    /* Start dense rectangular L */
                    for (i = 0; i < nrow; i++) {
                        irow = lsub[isub];
                        c_sub(&dense_col[irow], &dense_col[irow], &tempv1[i]);
                        tempv1[i] = zero;
                        ++isub;
                    }

                } /* else segsze>=4 ... */

            } /* for each column in the panel... */

        } /* else 1-D update ... */

    } /* for each updating supernode ... */

}
Пример #4
0
/* Subroutine */ int clabrd_(integer *m, integer *n, integer *nb, complex *a, 
	integer *lda, real *d__, real *e, complex *tauq, complex *taup, 
	complex *x, integer *ldx, complex *y, integer *ldy)
{
    /* System generated locals */
    integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2, 
	    i__3;
    complex q__1;

    /* Local variables */
    integer i__;
    complex alpha;
    extern /* Subroutine */ int cscal_(integer *, complex *, complex *, 
	    integer *), cgemv_(char *, integer *, integer *, complex *, 
	    complex *, integer *, complex *, integer *, complex *, complex *, 
	    integer *), clarfg_(integer *, complex *, complex *, 
	    integer *, complex *), clacgv_(integer *, complex *, integer *);


/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CLABRD reduces the first NB rows and columns of a complex general */
/*  m by n matrix A to upper or lower real bidiagonal form by a unitary */
/*  transformation Q' * A * P, and returns the matrices X and Y which */
/*  are needed to apply the transformation to the unreduced part of A. */

/*  If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower */
/*  bidiagonal form. */

/*  This is an auxiliary routine called by CGEBRD */

/*  Arguments */
/*  ========= */

/*  M       (input) INTEGER */
/*          The number of rows in the matrix A. */

/*  N       (input) INTEGER */
/*          The number of columns in the matrix A. */

/*  NB      (input) INTEGER */
/*          The number of leading rows and columns of A to be reduced. */

/*  A       (input/output) COMPLEX array, dimension (LDA,N) */
/*          On entry, the m by n general matrix to be reduced. */
/*          On exit, the first NB rows and columns of the matrix are */
/*          overwritten; the rest of the array is unchanged. */
/*          If m >= n, elements on and below the diagonal in the first NB */
/*            columns, with the array TAUQ, represent the unitary */
/*            matrix Q as a product of elementary reflectors; and */
/*            elements above the diagonal in the first NB rows, with the */
/*            array TAUP, represent the unitary matrix P as a product */
/*            of elementary reflectors. */
/*          If m < n, elements below the diagonal in the first NB */
/*            columns, with the array TAUQ, represent the unitary */
/*            matrix Q as a product of elementary reflectors, and */
/*            elements on and above the diagonal in the first NB rows, */
/*            with the array TAUP, represent the unitary matrix P as */
/*            a product of elementary reflectors. */
/*          See Further Details. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,M). */

/*  D       (output) REAL array, dimension (NB) */
/*          The diagonal elements of the first NB rows and columns of */
/*          the reduced matrix.  D(i) = A(i,i). */

/*  E       (output) REAL array, dimension (NB) */
/*          The off-diagonal elements of the first NB rows and columns of */
/*          the reduced matrix. */

/*  TAUQ    (output) COMPLEX array dimension (NB) */
/*          The scalar factors of the elementary reflectors which */
/*          represent the unitary matrix Q. See Further Details. */

/*  TAUP    (output) COMPLEX array, dimension (NB) */
/*          The scalar factors of the elementary reflectors which */
/*          represent the unitary matrix P. See Further Details. */

/*  X       (output) COMPLEX array, dimension (LDX,NB) */
/*          The m-by-nb matrix X required to update the unreduced part */
/*          of A. */

/*  LDX     (input) INTEGER */
/*          The leading dimension of the array X. LDX >= max(1,M). */

/*  Y       (output) COMPLEX array, dimension (LDY,NB) */
/*          The n-by-nb matrix Y required to update the unreduced part */
/*          of A. */

/*  LDY     (input) INTEGER */
/*          The leading dimension of the array Y. LDY >= max(1,N). */

/*  Further Details */
/*  =============== */

/*  The matrices Q and P are represented as products of elementary */
/*  reflectors: */

/*     Q = H(1) H(2) . . . H(nb)  and  P = G(1) G(2) . . . G(nb) */

/*  Each H(i) and G(i) has the form: */

/*     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u' */

/*  where tauq and taup are complex scalars, and v and u are complex */
/*  vectors. */

/*  If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in */
/*  A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in */
/*  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). */

/*  If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in */
/*  A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in */
/*  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i). */

/*  The elements of the vectors v and u together form the m-by-nb matrix */
/*  V and the nb-by-n matrix U' which are needed, with X and Y, to apply */
/*  the transformation to the unreduced part of the matrix, using a block */
/*  update of the form:  A := A - V*Y' - X*U'. */

/*  The contents of A on exit are illustrated by the following examples */
/*  with nb = 2: */

/*  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n): */

/*    (  1   1   u1  u1  u1 )           (  1   u1  u1  u1  u1  u1 ) */
/*    (  v1  1   1   u2  u2 )           (  1   1   u2  u2  u2  u2 ) */
/*    (  v1  v2  a   a   a  )           (  v1  1   a   a   a   a  ) */
/*    (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  ) */
/*    (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  ) */
/*    (  v1  v2  a   a   a  ) */

/*  where a denotes an element of the original matrix which is unchanged, */
/*  vi denotes an element of the vector defining H(i), and ui an element */
/*  of the vector defining G(i). */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Quick return if possible */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --d__;
    --e;
    --tauq;
    --taup;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1;
    x -= x_offset;
    y_dim1 = *ldy;
    y_offset = 1 + y_dim1;
    y -= y_offset;

    /* Function Body */
    if (*m <= 0 || *n <= 0) {
	return 0;
    }

    if (*m >= *n) {

/*        Reduce to upper bidiagonal form */

	i__1 = *nb;
	for (i__ = 1; i__ <= i__1; ++i__) {

/*           Update A(i:m,i) */

	    i__2 = i__ - 1;
	    clacgv_(&i__2, &y[i__ + y_dim1], ldy);
	    i__2 = *m - i__ + 1;
	    i__3 = i__ - 1;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + a_dim1], lda, 
		     &y[i__ + y_dim1], ldy, &c_b2, &a[i__ + i__ * a_dim1], &
		    c__1);
	    i__2 = i__ - 1;
	    clacgv_(&i__2, &y[i__ + y_dim1], ldy);
	    i__2 = *m - i__ + 1;
	    i__3 = i__ - 1;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + x_dim1], ldx, 
		     &a[i__ * a_dim1 + 1], &c__1, &c_b2, &a[i__ + i__ * 
		    a_dim1], &c__1);

/*           Generate reflection Q(i) to annihilate A(i+1:m,i) */

	    i__2 = i__ + i__ * a_dim1;
	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
	    i__2 = *m - i__ + 1;
/* Computing MIN */
	    i__3 = i__ + 1;
	    clarfg_(&i__2, &alpha, &a[min(i__3, *m)+ i__ * a_dim1], &c__1, &
		    tauq[i__]);
	    i__2 = i__;
	    d__[i__2] = alpha.r;
	    if (i__ < *n) {
		i__2 = i__ + i__ * a_dim1;
		a[i__2].r = 1.f, a[i__2].i = 0.f;

/*              Compute Y(i+1:n,i) */

		i__2 = *m - i__ + 1;
		i__3 = *n - i__;
		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a[i__ + (
			i__ + 1) * a_dim1], lda, &a[i__ + i__ * a_dim1], &
			c__1, &c_b1, &y[i__ + 1 + i__ * y_dim1], &c__1);
		i__2 = *m - i__ + 1;
		i__3 = i__ - 1;
		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a[i__ + 
			a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b1, &
			y[i__ * y_dim1 + 1], &c__1);
		i__2 = *n - i__;
		i__3 = i__ - 1;
		q__1.r = -1.f, q__1.i = -0.f;
		cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + 1 + 
			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b2, &y[
			i__ + 1 + i__ * y_dim1], &c__1);
		i__2 = *m - i__ + 1;
		i__3 = i__ - 1;
		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &x[i__ + 
			x_dim1], ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b1, &
			y[i__ * y_dim1 + 1], &c__1);
		i__2 = i__ - 1;
		i__3 = *n - i__;
		q__1.r = -1.f, q__1.i = -0.f;
		cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[(i__ + 
			1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
			c_b2, &y[i__ + 1 + i__ * y_dim1], &c__1);
		i__2 = *n - i__;
		cscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);

/*              Update A(i,i+1:n) */

		i__2 = *n - i__;
		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
		clacgv_(&i__, &a[i__ + a_dim1], lda);
		i__2 = *n - i__;
		q__1.r = -1.f, q__1.i = -0.f;
		cgemv_("No transpose", &i__2, &i__, &q__1, &y[i__ + 1 + 
			y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b2, &a[i__ + (
			i__ + 1) * a_dim1], lda);
		clacgv_(&i__, &a[i__ + a_dim1], lda);
		i__2 = i__ - 1;
		clacgv_(&i__2, &x[i__ + x_dim1], ldx);
		i__2 = i__ - 1;
		i__3 = *n - i__;
		q__1.r = -1.f, q__1.i = -0.f;
		cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[(i__ + 
			1) * a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b2, &
			a[i__ + (i__ + 1) * a_dim1], lda);
		i__2 = i__ - 1;
		clacgv_(&i__2, &x[i__ + x_dim1], ldx);

/*              Generate reflection P(i) to annihilate A(i,i+2:n) */

		i__2 = i__ + (i__ + 1) * a_dim1;
		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
		i__2 = *n - i__;
/* Computing MIN */
		i__3 = i__ + 2;
		clarfg_(&i__2, &alpha, &a[i__ + min(i__3, *n)* a_dim1], lda, &
			taup[i__]);
		i__2 = i__;
		e[i__2] = alpha.r;
		i__2 = i__ + (i__ + 1) * a_dim1;
		a[i__2].r = 1.f, a[i__2].i = 0.f;

/*              Compute X(i+1:m,i) */

		i__2 = *m - i__;
		i__3 = *n - i__;
		cgemv_("No transpose", &i__2, &i__3, &c_b2, &a[i__ + 1 + (i__ 
			+ 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1], 
			lda, &c_b1, &x[i__ + 1 + i__ * x_dim1], &c__1);
		i__2 = *n - i__;
		cgemv_("Conjugate transpose", &i__2, &i__, &c_b2, &y[i__ + 1 
			+ y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &
			c_b1, &x[i__ * x_dim1 + 1], &c__1);
		i__2 = *m - i__;
		q__1.r = -1.f, q__1.i = -0.f;
		cgemv_("No transpose", &i__2, &i__, &q__1, &a[i__ + 1 + 
			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b2, &x[
			i__ + 1 + i__ * x_dim1], &c__1);
		i__2 = i__ - 1;
		i__3 = *n - i__;
		cgemv_("No transpose", &i__2, &i__3, &c_b2, &a[(i__ + 1) * 
			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
			c_b1, &x[i__ * x_dim1 + 1], &c__1);
		i__2 = *m - i__;
		i__3 = i__ - 1;
		q__1.r = -1.f, q__1.i = -0.f;
		cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + 1 + 
			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b2, &x[
			i__ + 1 + i__ * x_dim1], &c__1);
		i__2 = *m - i__;
		cscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
		i__2 = *n - i__;
		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
	    }
/* L10: */
	}
    } else {

/*        Reduce to lower bidiagonal form */

	i__1 = *nb;
	for (i__ = 1; i__ <= i__1; ++i__) {

/*           Update A(i,i:n) */

	    i__2 = *n - i__ + 1;
	    clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
	    i__2 = i__ - 1;
	    clacgv_(&i__2, &a[i__ + a_dim1], lda);
	    i__2 = *n - i__ + 1;
	    i__3 = i__ - 1;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + y_dim1], ldy, 
		     &a[i__ + a_dim1], lda, &c_b2, &a[i__ + i__ * a_dim1], 
		    lda);
	    i__2 = i__ - 1;
	    clacgv_(&i__2, &a[i__ + a_dim1], lda);
	    i__2 = i__ - 1;
	    clacgv_(&i__2, &x[i__ + x_dim1], ldx);
	    i__2 = i__ - 1;
	    i__3 = *n - i__ + 1;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[i__ * 
		    a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b2, &a[i__ + 
		    i__ * a_dim1], lda);
	    i__2 = i__ - 1;
	    clacgv_(&i__2, &x[i__ + x_dim1], ldx);

/*           Generate reflection P(i) to annihilate A(i,i+1:n) */

	    i__2 = i__ + i__ * a_dim1;
	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
	    i__2 = *n - i__ + 1;
/* Computing MIN */
	    i__3 = i__ + 1;
	    clarfg_(&i__2, &alpha, &a[i__ + min(i__3, *n)* a_dim1], lda, &
		    taup[i__]);
	    i__2 = i__;
	    d__[i__2] = alpha.r;
	    if (i__ < *m) {
		i__2 = i__ + i__ * a_dim1;
		a[i__2].r = 1.f, a[i__2].i = 0.f;

/*              Compute X(i+1:m,i) */

		i__2 = *m - i__;
		i__3 = *n - i__ + 1;
		cgemv_("No transpose", &i__2, &i__3, &c_b2, &a[i__ + 1 + i__ *
			 a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b1, &x[
			i__ + 1 + i__ * x_dim1], &c__1);
		i__2 = *n - i__ + 1;
		i__3 = i__ - 1;
		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &y[i__ + 
			y_dim1], ldy, &a[i__ + i__ * a_dim1], lda, &c_b1, &x[
			i__ * x_dim1 + 1], &c__1);
		i__2 = *m - i__;
		i__3 = i__ - 1;
		q__1.r = -1.f, q__1.i = -0.f;
		cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 + 
			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b2, &x[
			i__ + 1 + i__ * x_dim1], &c__1);
		i__2 = i__ - 1;
		i__3 = *n - i__ + 1;
		cgemv_("No transpose", &i__2, &i__3, &c_b2, &a[i__ * a_dim1 + 
			1], lda, &a[i__ + i__ * a_dim1], lda, &c_b1, &x[i__ * 
			x_dim1 + 1], &c__1);
		i__2 = *m - i__;
		i__3 = i__ - 1;
		q__1.r = -1.f, q__1.i = -0.f;
		cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + 1 + 
			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b2, &x[
			i__ + 1 + i__ * x_dim1], &c__1);
		i__2 = *m - i__;
		cscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
		i__2 = *n - i__ + 1;
		clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);

/*              Update A(i+1:m,i) */

		i__2 = i__ - 1;
		clacgv_(&i__2, &y[i__ + y_dim1], ldy);
		i__2 = *m - i__;
		i__3 = i__ - 1;
		q__1.r = -1.f, q__1.i = -0.f;
		cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 + 
			a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b2, &a[i__ + 
			1 + i__ * a_dim1], &c__1);
		i__2 = i__ - 1;
		clacgv_(&i__2, &y[i__ + y_dim1], ldy);
		i__2 = *m - i__;
		q__1.r = -1.f, q__1.i = -0.f;
		cgemv_("No transpose", &i__2, &i__, &q__1, &x[i__ + 1 + 
			x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b2, &a[
			i__ + 1 + i__ * a_dim1], &c__1);

/*              Generate reflection Q(i) to annihilate A(i+2:m,i) */

		i__2 = i__ + 1 + i__ * a_dim1;
		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
		i__2 = *m - i__;
/* Computing MIN */
		i__3 = i__ + 2;
		clarfg_(&i__2, &alpha, &a[min(i__3, *m)+ i__ * a_dim1], &c__1, 
			 &tauq[i__]);
		i__2 = i__;
		e[i__2] = alpha.r;
		i__2 = i__ + 1 + i__ * a_dim1;
		a[i__2].r = 1.f, a[i__2].i = 0.f;

/*              Compute Y(i+1:n,i) */

		i__2 = *m - i__;
		i__3 = *n - i__;
		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a[i__ + 1 
			+ (i__ + 1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1]
, &c__1, &c_b1, &y[i__ + 1 + i__ * y_dim1], &c__1);
		i__2 = *m - i__;
		i__3 = i__ - 1;
		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a[i__ + 1 
			+ a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
			c_b1, &y[i__ * y_dim1 + 1], &c__1);
		i__2 = *n - i__;
		i__3 = i__ - 1;
		q__1.r = -1.f, q__1.i = -0.f;
		cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + 1 + 
			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b2, &y[
			i__ + 1 + i__ * y_dim1], &c__1);
		i__2 = *m - i__;
		cgemv_("Conjugate transpose", &i__2, &i__, &c_b2, &x[i__ + 1 
			+ x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &
			c_b1, &y[i__ * y_dim1 + 1], &c__1);
		i__2 = *n - i__;
		q__1.r = -1.f, q__1.i = -0.f;
		cgemv_("Conjugate transpose", &i__, &i__2, &q__1, &a[(i__ + 1)
			 * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
			c_b2, &y[i__ + 1 + i__ * y_dim1], &c__1);
		i__2 = *n - i__;
		cscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
	    } else {
		i__2 = *n - i__ + 1;
		clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
	    }
/* L20: */
	}
    }
    return 0;

/*     End of CLABRD */

} /* clabrd_ */
Пример #5
0
/* Subroutine */
int ctgevc_(char *side, char *howmny, logical *select, integer *n, complex *s, integer *lds, complex *p, integer *ldp, complex *vl, integer *ldvl, complex *vr, integer *ldvr, integer *mm, integer *m, complex *work, real *rwork, integer *info)
{
    /* System generated locals */
    integer p_dim1, p_offset, s_dim1, s_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2, i__3, i__4, i__5;
    real r__1, r__2, r__3, r__4, r__5, r__6;
    complex q__1, q__2, q__3, q__4;
    /* Builtin functions */
    double r_imag(complex *);
    void r_cnjg(complex *, complex *);
    /* Local variables */
    complex d__;
    integer i__, j;
    complex ca, cb;
    integer je, im, jr;
    real big;
    logical lsa, lsb;
    real ulp;
    complex sum;
    integer ibeg, ieig, iend;
    real dmin__;
    integer isrc;
    real temp;
    complex suma, sumb;
    real xmax, scale;
    logical ilall;
    integer iside;
    real sbeta;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */
    int cgemv_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *);
    real small;
    logical compl;
    real anorm, bnorm;
    logical compr, ilbbad;
    real acoefa, bcoefa, acoeff;
    complex bcoeff;
    logical ilback;
    extern /* Subroutine */
    int slabad_(real *, real *);
    real ascale, bscale;
    extern /* Complex */
    VOID cladiv_(complex *, complex *, complex *);
    extern real slamch_(char *);
    complex salpha;
    real safmin;
    extern /* Subroutine */
    int xerbla_(char *, integer *);
    real bignum;
    logical ilcomp;
    integer ihwmny;
    /* -- LAPACK computational routine (version 3.4.0) -- */
    /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
    /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
    /* November 2011 */
    /* .. Scalar Arguments .. */
    /* .. */
    /* .. Array Arguments .. */
    /* .. */
    /* ===================================================================== */
    /* .. Parameters .. */
    /* .. */
    /* .. Local Scalars .. */
    /* .. */
    /* .. External Functions .. */
    /* .. */
    /* .. External Subroutines .. */
    /* .. */
    /* .. Intrinsic Functions .. */
    /* .. */
    /* .. Statement Functions .. */
    /* .. */
    /* .. Statement Function definitions .. */
    /* .. */
    /* .. Executable Statements .. */
    /* Decode and Test the input parameters */
    /* Parameter adjustments */
    --select;
    s_dim1 = *lds;
    s_offset = 1 + s_dim1;
    s -= s_offset;
    p_dim1 = *ldp;
    p_offset = 1 + p_dim1;
    p -= p_offset;
    vl_dim1 = *ldvl;
    vl_offset = 1 + vl_dim1;
    vl -= vl_offset;
    vr_dim1 = *ldvr;
    vr_offset = 1 + vr_dim1;
    vr -= vr_offset;
    --work;
    --rwork;
    /* Function Body */
    if (lsame_(howmny, "A"))
    {
        ihwmny = 1;
        ilall = TRUE_;
        ilback = FALSE_;
    }
    else if (lsame_(howmny, "S"))
    {
        ihwmny = 2;
        ilall = FALSE_;
        ilback = FALSE_;
    }
    else if (lsame_(howmny, "B"))
    {
        ihwmny = 3;
        ilall = TRUE_;
        ilback = TRUE_;
    }
    else
    {
        ihwmny = -1;
    }
    if (lsame_(side, "R"))
    {
        iside = 1;
        compl = FALSE_;
        compr = TRUE_;
    }
    else if (lsame_(side, "L"))
    {
        iside = 2;
        compl = TRUE_;
        compr = FALSE_;
    }
    else if (lsame_(side, "B"))
    {
        iside = 3;
        compl = TRUE_;
        compr = TRUE_;
    }
    else
    {
        iside = -1;
    }
    *info = 0;
    if (iside < 0)
    {
        *info = -1;
    }
    else if (ihwmny < 0)
    {
        *info = -2;
    }
    else if (*n < 0)
    {
        *info = -4;
    }
    else if (*lds < max(1,*n))
    {
        *info = -6;
    }
    else if (*ldp < max(1,*n))
    {
        *info = -8;
    }
    if (*info != 0)
    {
        i__1 = -(*info);
        xerbla_("CTGEVC", &i__1);
        return 0;
    }
    /* Count the number of eigenvectors */
    if (! ilall)
    {
        im = 0;
        i__1 = *n;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            if (select[j])
            {
                ++im;
            }
            /* L10: */
        }
    }
    else
    {
        im = *n;
    }
    /* Check diagonal of B */
    ilbbad = FALSE_;
    i__1 = *n;
    for (j = 1;
            j <= i__1;
            ++j)
    {
        if (r_imag(&p[j + j * p_dim1]) != 0.f)
        {
            ilbbad = TRUE_;
        }
        /* L20: */
    }
    if (ilbbad)
    {
        *info = -7;
    }
    else if (compl && *ldvl < *n || *ldvl < 1)
    {
        *info = -10;
    }
    else if (compr && *ldvr < *n || *ldvr < 1)
    {
        *info = -12;
    }
    else if (*mm < im)
    {
        *info = -13;
    }
    if (*info != 0)
    {
        i__1 = -(*info);
        xerbla_("CTGEVC", &i__1);
        return 0;
    }
    /* Quick return if possible */
    *m = im;
    if (*n == 0)
    {
        return 0;
    }
    /* Machine Constants */
    safmin = slamch_("Safe minimum");
    big = 1.f / safmin;
    slabad_(&safmin, &big);
    ulp = slamch_("Epsilon") * slamch_("Base");
    small = safmin * *n / ulp;
    big = 1.f / small;
    bignum = 1.f / (safmin * *n);
    /* Compute the 1-norm of each column of the strictly upper triangular */
    /* part of A and B to check for possible overflow in the triangular */
    /* solver. */
    i__1 = s_dim1 + 1;
    anorm = (r__1 = s[i__1].r, abs(r__1)) + (r__2 = r_imag(&s[s_dim1 + 1]), abs(r__2));
    i__1 = p_dim1 + 1;
    bnorm = (r__1 = p[i__1].r, abs(r__1)) + (r__2 = r_imag(&p[p_dim1 + 1]), abs(r__2));
    rwork[1] = 0.f;
    rwork[*n + 1] = 0.f;
    i__1 = *n;
    for (j = 2;
            j <= i__1;
            ++j)
    {
        rwork[j] = 0.f;
        rwork[*n + j] = 0.f;
        i__2 = j - 1;
        for (i__ = 1;
                i__ <= i__2;
                ++i__)
        {
            i__3 = i__ + j * s_dim1;
            rwork[j] += (r__1 = s[i__3].r, abs(r__1)) + (r__2 = r_imag(&s[i__ + j * s_dim1]), abs(r__2));
            i__3 = i__ + j * p_dim1;
            rwork[*n + j] += (r__1 = p[i__3].r, abs(r__1)) + (r__2 = r_imag(& p[i__ + j * p_dim1]), abs(r__2));
            /* L30: */
        }
        /* Computing MAX */
        i__2 = j + j * s_dim1;
        r__3 = anorm;
        r__4 = rwork[j] + ((r__1 = s[i__2].r, abs(r__1)) + ( r__2 = r_imag(&s[j + j * s_dim1]), abs(r__2))); // , expr subst
        anorm = max(r__3,r__4);
        /* Computing MAX */
        i__2 = j + j * p_dim1;
        r__3 = bnorm;
        r__4 = rwork[*n + j] + ((r__1 = p[i__2].r, abs(r__1)) + (r__2 = r_imag(&p[j + j * p_dim1]), abs(r__2))); // , expr subst
        bnorm = max(r__3,r__4);
        /* L40: */
    }
    ascale = 1.f / max(anorm,safmin);
    bscale = 1.f / max(bnorm,safmin);
    /* Left eigenvectors */
    if (compl)
    {
        ieig = 0;
        /* Main loop over eigenvalues */
        i__1 = *n;
        for (je = 1;
                je <= i__1;
                ++je)
        {
            if (ilall)
            {
                ilcomp = TRUE_;
            }
            else
            {
                ilcomp = select[je];
            }
            if (ilcomp)
            {
                ++ieig;
                i__2 = je + je * s_dim1;
                i__3 = je + je * p_dim1;
                if ((r__2 = s[i__2].r, abs(r__2)) + (r__3 = r_imag(&s[je + je * s_dim1]), abs(r__3)) <= safmin && (r__1 = p[i__3].r, abs(r__1)) <= safmin)
                {
                    /* Singular matrix pencil -- return unit eigenvector */
                    i__2 = *n;
                    for (jr = 1;
                            jr <= i__2;
                            ++jr)
                    {
                        i__3 = jr + ieig * vl_dim1;
                        vl[i__3].r = 0.f;
                        vl[i__3].i = 0.f; // , expr subst
                        /* L50: */
                    }
                    i__2 = ieig + ieig * vl_dim1;
                    vl[i__2].r = 1.f;
                    vl[i__2].i = 0.f; // , expr subst
                    goto L140;
                }
                /* Non-singular eigenvalue: */
                /* Compute coefficients a and b in */
                /* H */
                /* y ( a A - b B ) = 0 */
                /* Computing MAX */
                i__2 = je + je * s_dim1;
                i__3 = je + je * p_dim1;
                r__4 = ((r__2 = s[i__2].r, abs(r__2)) + (r__3 = r_imag(&s[je + je * s_dim1]), abs(r__3))) * ascale;
                r__5 = (r__1 = p[i__3].r, abs(r__1)) * bscale;
                r__4 = max(r__4,r__5); // ; expr subst
                temp = 1.f / max(r__4,safmin);
                i__2 = je + je * s_dim1;
                q__2.r = temp * s[i__2].r;
                q__2.i = temp * s[i__2].i; // , expr subst
                q__1.r = ascale * q__2.r;
                q__1.i = ascale * q__2.i; // , expr subst
                salpha.r = q__1.r;
                salpha.i = q__1.i; // , expr subst
                i__2 = je + je * p_dim1;
                sbeta = temp * p[i__2].r * bscale;
                acoeff = sbeta * ascale;
                q__1.r = bscale * salpha.r;
                q__1.i = bscale * salpha.i; // , expr subst
                bcoeff.r = q__1.r;
                bcoeff.i = q__1.i; // , expr subst
                /* Scale to avoid underflow */
                lsa = abs(sbeta) >= safmin && abs(acoeff) < small;
                lsb = (r__1 = salpha.r, abs(r__1)) + (r__2 = r_imag(&salpha), abs(r__2)) >= safmin && (r__3 = bcoeff.r, abs(r__3)) + (r__4 = r_imag(&bcoeff), abs(r__4)) < small;
                scale = 1.f;
                if (lsa)
                {
                    scale = small / abs(sbeta) * min(anorm,big);
                }
                if (lsb)
                {
                    /* Computing MAX */
                    r__3 = scale;
                    r__4 = small / ((r__1 = salpha.r, abs(r__1)) + (r__2 = r_imag(&salpha), abs(r__2))) * min( bnorm,big); // , expr subst
                    scale = max(r__3,r__4);
                }
                if (lsa || lsb)
                {
                    /* Computing MIN */
                    /* Computing MAX */
                    r__5 = 1.f, r__6 = abs(acoeff);
                    r__5 = max(r__5,r__6);
                    r__6 = (r__1 = bcoeff.r, abs(r__1)) + (r__2 = r_imag(&bcoeff), abs(r__2)); // ; expr subst
                    r__3 = scale;
                    r__4 = 1.f / (safmin * max(r__5,r__6)); // , expr subst
                    scale = min(r__3,r__4);
                    if (lsa)
                    {
                        acoeff = ascale * (scale * sbeta);
                    }
                    else
                    {
                        acoeff = scale * acoeff;
                    }
                    if (lsb)
                    {
                        q__2.r = scale * salpha.r;
                        q__2.i = scale * salpha.i; // , expr subst
                        q__1.r = bscale * q__2.r;
                        q__1.i = bscale * q__2.i; // , expr subst
                        bcoeff.r = q__1.r;
                        bcoeff.i = q__1.i; // , expr subst
                    }
                    else
                    {
                        q__1.r = scale * bcoeff.r;
                        q__1.i = scale * bcoeff.i; // , expr subst
                        bcoeff.r = q__1.r;
                        bcoeff.i = q__1.i; // , expr subst
                    }
                }
                acoefa = abs(acoeff);
                bcoefa = (r__1 = bcoeff.r, abs(r__1)) + (r__2 = r_imag(& bcoeff), abs(r__2));
                xmax = 1.f;
                i__2 = *n;
                for (jr = 1;
                        jr <= i__2;
                        ++jr)
                {
                    i__3 = jr;
                    work[i__3].r = 0.f;
                    work[i__3].i = 0.f; // , expr subst
                    /* L60: */
                }
                i__2 = je;
                work[i__2].r = 1.f;
                work[i__2].i = 0.f; // , expr subst
                /* Computing MAX */
                r__1 = ulp * acoefa * anorm;
                r__2 = ulp * bcoefa * bnorm;
                r__1 = max(r__1,r__2); // ; expr subst
                dmin__ = max(r__1,safmin);
                /* H */
                /* Triangular solve of (a A - b B) y = 0 */
                /* H */
                /* (rowwise in (a A - b B) , or columnwise in a A - b B) */
                i__2 = *n;
                for (j = je + 1;
                        j <= i__2;
                        ++j)
                {
                    /* Compute */
                    /* j-1 */
                    /* SUM = sum conjg( a*S(k,j) - b*P(k,j) )*x(k) */
                    /* k=je */
                    /* (Scale if necessary) */
                    temp = 1.f / xmax;
                    if (acoefa * rwork[j] + bcoefa * rwork[*n + j] > bignum * temp)
                    {
                        i__3 = j - 1;
                        for (jr = je;
                                jr <= i__3;
                                ++jr)
                        {
                            i__4 = jr;
                            i__5 = jr;
                            q__1.r = temp * work[i__5].r;
                            q__1.i = temp * work[i__5].i; // , expr subst
                            work[i__4].r = q__1.r;
                            work[i__4].i = q__1.i; // , expr subst
                            /* L70: */
                        }
                        xmax = 1.f;
                    }
                    suma.r = 0.f;
                    suma.i = 0.f; // , expr subst
                    sumb.r = 0.f;
                    sumb.i = 0.f; // , expr subst
                    i__3 = j - 1;
                    for (jr = je;
                            jr <= i__3;
                            ++jr)
                    {
                        r_cnjg(&q__3, &s[jr + j * s_dim1]);
                        i__4 = jr;
                        q__2.r = q__3.r * work[i__4].r - q__3.i * work[i__4] .i;
                        q__2.i = q__3.r * work[i__4].i + q__3.i * work[i__4].r; // , expr subst
                        q__1.r = suma.r + q__2.r;
                        q__1.i = suma.i + q__2.i; // , expr subst
                        suma.r = q__1.r;
                        suma.i = q__1.i; // , expr subst
                        r_cnjg(&q__3, &p[jr + j * p_dim1]);
                        i__4 = jr;
                        q__2.r = q__3.r * work[i__4].r - q__3.i * work[i__4] .i;
                        q__2.i = q__3.r * work[i__4].i + q__3.i * work[i__4].r; // , expr subst
                        q__1.r = sumb.r + q__2.r;
                        q__1.i = sumb.i + q__2.i; // , expr subst
                        sumb.r = q__1.r;
                        sumb.i = q__1.i; // , expr subst
                        /* L80: */
                    }
                    q__2.r = acoeff * suma.r;
                    q__2.i = acoeff * suma.i; // , expr subst
                    r_cnjg(&q__4, &bcoeff);
                    q__3.r = q__4.r * sumb.r - q__4.i * sumb.i;
                    q__3.i = q__4.r * sumb.i + q__4.i * sumb.r; // , expr subst
                    q__1.r = q__2.r - q__3.r;
                    q__1.i = q__2.i - q__3.i; // , expr subst
                    sum.r = q__1.r;
                    sum.i = q__1.i; // , expr subst
                    /* Form x(j) = - SUM / conjg( a*S(j,j) - b*P(j,j) ) */
                    /* with scaling and perturbation of the denominator */
                    i__3 = j + j * s_dim1;
                    q__3.r = acoeff * s[i__3].r;
                    q__3.i = acoeff * s[i__3].i; // , expr subst
                    i__4 = j + j * p_dim1;
                    q__4.r = bcoeff.r * p[i__4].r - bcoeff.i * p[i__4].i;
                    q__4.i = bcoeff.r * p[i__4].i + bcoeff.i * p[i__4] .r; // , expr subst
                    q__2.r = q__3.r - q__4.r;
                    q__2.i = q__3.i - q__4.i; // , expr subst
                    r_cnjg(&q__1, &q__2);
                    d__.r = q__1.r;
                    d__.i = q__1.i; // , expr subst
                    if ((r__1 = d__.r, abs(r__1)) + (r__2 = r_imag(&d__), abs( r__2)) <= dmin__)
                    {
                        q__1.r = dmin__;
                        q__1.i = 0.f; // , expr subst
                        d__.r = q__1.r;
                        d__.i = q__1.i; // , expr subst
                    }
                    if ((r__1 = d__.r, abs(r__1)) + (r__2 = r_imag(&d__), abs( r__2)) < 1.f)
                    {
                        if ((r__1 = sum.r, abs(r__1)) + (r__2 = r_imag(&sum), abs(r__2)) >= bignum * ((r__3 = d__.r, abs( r__3)) + (r__4 = r_imag(&d__), abs(r__4))))
                        {
                            temp = 1.f / ((r__1 = sum.r, abs(r__1)) + (r__2 = r_imag(&sum), abs(r__2)));
                            i__3 = j - 1;
                            for (jr = je;
                                    jr <= i__3;
                                    ++jr)
                            {
                                i__4 = jr;
                                i__5 = jr;
                                q__1.r = temp * work[i__5].r;
                                q__1.i = temp * work[i__5].i; // , expr subst
                                work[i__4].r = q__1.r;
                                work[i__4].i = q__1.i; // , expr subst
                                /* L90: */
                            }
                            xmax = temp * xmax;
                            q__1.r = temp * sum.r;
                            q__1.i = temp * sum.i; // , expr subst
                            sum.r = q__1.r;
                            sum.i = q__1.i; // , expr subst
                        }
                    }
                    i__3 = j;
                    q__2.r = -sum.r;
                    q__2.i = -sum.i; // , expr subst
                    cladiv_(&q__1, &q__2, &d__);
                    work[i__3].r = q__1.r;
                    work[i__3].i = q__1.i; // , expr subst
                    /* Computing MAX */
                    i__3 = j;
                    r__3 = xmax;
                    r__4 = (r__1 = work[i__3].r, abs(r__1)) + ( r__2 = r_imag(&work[j]), abs(r__2)); // , expr subst
                    xmax = max(r__3,r__4);
                    /* L100: */
                }
                /* Back transform eigenvector if HOWMNY='B'. */
                if (ilback)
                {
                    i__2 = *n + 1 - je;
                    cgemv_("N", n, &i__2, &c_b2, &vl[je * vl_dim1 + 1], ldvl, &work[je], &c__1, &c_b1, &work[*n + 1], &c__1);
                    isrc = 2;
                    ibeg = 1;
                }
                else
                {
                    isrc = 1;
                    ibeg = je;
                }
                /* Copy and scale eigenvector into column of VL */
                xmax = 0.f;
                i__2 = *n;
                for (jr = ibeg;
                        jr <= i__2;
                        ++jr)
                {
                    /* Computing MAX */
                    i__3 = (isrc - 1) * *n + jr;
                    r__3 = xmax;
                    r__4 = (r__1 = work[i__3].r, abs(r__1)) + ( r__2 = r_imag(&work[(isrc - 1) * *n + jr]), abs( r__2)); // , expr subst
                    xmax = max(r__3,r__4);
                    /* L110: */
                }
                if (xmax > safmin)
                {
                    temp = 1.f / xmax;
                    i__2 = *n;
                    for (jr = ibeg;
                            jr <= i__2;
                            ++jr)
                    {
                        i__3 = jr + ieig * vl_dim1;
                        i__4 = (isrc - 1) * *n + jr;
                        q__1.r = temp * work[i__4].r;
                        q__1.i = temp * work[ i__4].i; // , expr subst
                        vl[i__3].r = q__1.r;
                        vl[i__3].i = q__1.i; // , expr subst
                        /* L120: */
                    }
                }
                else
                {
                    ibeg = *n + 1;
                }
                i__2 = ibeg - 1;
                for (jr = 1;
                        jr <= i__2;
                        ++jr)
                {
                    i__3 = jr + ieig * vl_dim1;
                    vl[i__3].r = 0.f;
                    vl[i__3].i = 0.f; // , expr subst
                    /* L130: */
                }
            }
L140:
            ;
        }
    }
    /* Right eigenvectors */
    if (compr)
    {
        ieig = im + 1;
        /* Main loop over eigenvalues */
        for (je = *n;
                je >= 1;
                --je)
        {
            if (ilall)
            {
                ilcomp = TRUE_;
            }
            else
            {
                ilcomp = select[je];
            }
            if (ilcomp)
            {
                --ieig;
                i__1 = je + je * s_dim1;
                i__2 = je + je * p_dim1;
                if ((r__2 = s[i__1].r, abs(r__2)) + (r__3 = r_imag(&s[je + je * s_dim1]), abs(r__3)) <= safmin && (r__1 = p[i__2].r, abs(r__1)) <= safmin)
                {
                    /* Singular matrix pencil -- return unit eigenvector */
                    i__1 = *n;
                    for (jr = 1;
                            jr <= i__1;
                            ++jr)
                    {
                        i__2 = jr + ieig * vr_dim1;
                        vr[i__2].r = 0.f;
                        vr[i__2].i = 0.f; // , expr subst
                        /* L150: */
                    }
                    i__1 = ieig + ieig * vr_dim1;
                    vr[i__1].r = 1.f;
                    vr[i__1].i = 0.f; // , expr subst
                    goto L250;
                }
                /* Non-singular eigenvalue: */
                /* Compute coefficients a and b in */
                /* ( a A - b B ) x = 0 */
                /* Computing MAX */
                i__1 = je + je * s_dim1;
                i__2 = je + je * p_dim1;
                r__4 = ((r__2 = s[i__1].r, abs(r__2)) + (r__3 = r_imag(&s[je + je * s_dim1]), abs(r__3))) * ascale;
                r__5 = (r__1 = p[i__2].r, abs(r__1)) * bscale;
                r__4 = max(r__4,r__5); // ; expr subst
                temp = 1.f / max(r__4,safmin);
                i__1 = je + je * s_dim1;
                q__2.r = temp * s[i__1].r;
                q__2.i = temp * s[i__1].i; // , expr subst
                q__1.r = ascale * q__2.r;
                q__1.i = ascale * q__2.i; // , expr subst
                salpha.r = q__1.r;
                salpha.i = q__1.i; // , expr subst
                i__1 = je + je * p_dim1;
                sbeta = temp * p[i__1].r * bscale;
                acoeff = sbeta * ascale;
                q__1.r = bscale * salpha.r;
                q__1.i = bscale * salpha.i; // , expr subst
                bcoeff.r = q__1.r;
                bcoeff.i = q__1.i; // , expr subst
                /* Scale to avoid underflow */
                lsa = abs(sbeta) >= safmin && abs(acoeff) < small;
                lsb = (r__1 = salpha.r, abs(r__1)) + (r__2 = r_imag(&salpha), abs(r__2)) >= safmin && (r__3 = bcoeff.r, abs(r__3)) + (r__4 = r_imag(&bcoeff), abs(r__4)) < small;
                scale = 1.f;
                if (lsa)
                {
                    scale = small / abs(sbeta) * min(anorm,big);
                }
                if (lsb)
                {
                    /* Computing MAX */
                    r__3 = scale;
                    r__4 = small / ((r__1 = salpha.r, abs(r__1)) + (r__2 = r_imag(&salpha), abs(r__2))) * min( bnorm,big); // , expr subst
                    scale = max(r__3,r__4);
                }
                if (lsa || lsb)
                {
                    /* Computing MIN */
                    /* Computing MAX */
                    r__5 = 1.f, r__6 = abs(acoeff);
                    r__5 = max(r__5,r__6);
                    r__6 = (r__1 = bcoeff.r, abs(r__1)) + (r__2 = r_imag(&bcoeff), abs(r__2)); // ; expr subst
                    r__3 = scale;
                    r__4 = 1.f / (safmin * max(r__5,r__6)); // , expr subst
                    scale = min(r__3,r__4);
                    if (lsa)
                    {
                        acoeff = ascale * (scale * sbeta);
                    }
                    else
                    {
                        acoeff = scale * acoeff;
                    }
                    if (lsb)
                    {
                        q__2.r = scale * salpha.r;
                        q__2.i = scale * salpha.i; // , expr subst
                        q__1.r = bscale * q__2.r;
                        q__1.i = bscale * q__2.i; // , expr subst
                        bcoeff.r = q__1.r;
                        bcoeff.i = q__1.i; // , expr subst
                    }
                    else
                    {
                        q__1.r = scale * bcoeff.r;
                        q__1.i = scale * bcoeff.i; // , expr subst
                        bcoeff.r = q__1.r;
                        bcoeff.i = q__1.i; // , expr subst
                    }
                }
                acoefa = abs(acoeff);
                bcoefa = (r__1 = bcoeff.r, abs(r__1)) + (r__2 = r_imag(& bcoeff), abs(r__2));
                xmax = 1.f;
                i__1 = *n;
                for (jr = 1;
                        jr <= i__1;
                        ++jr)
                {
                    i__2 = jr;
                    work[i__2].r = 0.f;
                    work[i__2].i = 0.f; // , expr subst
                    /* L160: */
                }
                i__1 = je;
                work[i__1].r = 1.f;
                work[i__1].i = 0.f; // , expr subst
                /* Computing MAX */
                r__1 = ulp * acoefa * anorm;
                r__2 = ulp * bcoefa * bnorm;
                r__1 = max(r__1,r__2); // ; expr subst
                dmin__ = max(r__1,safmin);
                /* Triangular solve of (a A - b B) x = 0 (columnwise) */
                /* WORK(1:j-1) contains sums w, */
                /* WORK(j+1:JE) contains x */
                i__1 = je - 1;
                for (jr = 1;
                        jr <= i__1;
                        ++jr)
                {
                    i__2 = jr;
                    i__3 = jr + je * s_dim1;
                    q__2.r = acoeff * s[i__3].r;
                    q__2.i = acoeff * s[i__3].i; // , expr subst
                    i__4 = jr + je * p_dim1;
                    q__3.r = bcoeff.r * p[i__4].r - bcoeff.i * p[i__4].i;
                    q__3.i = bcoeff.r * p[i__4].i + bcoeff.i * p[i__4] .r; // , expr subst
                    q__1.r = q__2.r - q__3.r;
                    q__1.i = q__2.i - q__3.i; // , expr subst
                    work[i__2].r = q__1.r;
                    work[i__2].i = q__1.i; // , expr subst
                    /* L170: */
                }
                i__1 = je;
                work[i__1].r = 1.f;
                work[i__1].i = 0.f; // , expr subst
                for (j = je - 1;
                        j >= 1;
                        --j)
                {
                    /* Form x(j) := - w(j) / d */
                    /* with scaling and perturbation of the denominator */
                    i__1 = j + j * s_dim1;
                    q__2.r = acoeff * s[i__1].r;
                    q__2.i = acoeff * s[i__1].i; // , expr subst
                    i__2 = j + j * p_dim1;
                    q__3.r = bcoeff.r * p[i__2].r - bcoeff.i * p[i__2].i;
                    q__3.i = bcoeff.r * p[i__2].i + bcoeff.i * p[i__2] .r; // , expr subst
                    q__1.r = q__2.r - q__3.r;
                    q__1.i = q__2.i - q__3.i; // , expr subst
                    d__.r = q__1.r;
                    d__.i = q__1.i; // , expr subst
                    if ((r__1 = d__.r, abs(r__1)) + (r__2 = r_imag(&d__), abs( r__2)) <= dmin__)
                    {
                        q__1.r = dmin__;
                        q__1.i = 0.f; // , expr subst
                        d__.r = q__1.r;
                        d__.i = q__1.i; // , expr subst
                    }
                    if ((r__1 = d__.r, abs(r__1)) + (r__2 = r_imag(&d__), abs( r__2)) < 1.f)
                    {
                        i__1 = j;
                        if ((r__1 = work[i__1].r, abs(r__1)) + (r__2 = r_imag( &work[j]), abs(r__2)) >= bignum * ((r__3 = d__.r, abs(r__3)) + (r__4 = r_imag(&d__), abs( r__4))))
                        {
                            i__1 = j;
                            temp = 1.f / ((r__1 = work[i__1].r, abs(r__1)) + ( r__2 = r_imag(&work[j]), abs(r__2)));
                            i__1 = je;
                            for (jr = 1;
                                    jr <= i__1;
                                    ++jr)
                            {
                                i__2 = jr;
                                i__3 = jr;
                                q__1.r = temp * work[i__3].r;
                                q__1.i = temp * work[i__3].i; // , expr subst
                                work[i__2].r = q__1.r;
                                work[i__2].i = q__1.i; // , expr subst
                                /* L180: */
                            }
                        }
                    }
                    i__1 = j;
                    i__2 = j;
                    q__2.r = -work[i__2].r;
                    q__2.i = -work[i__2].i; // , expr subst
                    cladiv_(&q__1, &q__2, &d__);
                    work[i__1].r = q__1.r;
                    work[i__1].i = q__1.i; // , expr subst
                    if (j > 1)
                    {
                        /* w = w + x(j)*(a S(*,j) - b P(*,j) ) with scaling */
                        i__1 = j;
                        if ((r__1 = work[i__1].r, abs(r__1)) + (r__2 = r_imag( &work[j]), abs(r__2)) > 1.f)
                        {
                            i__1 = j;
                            temp = 1.f / ((r__1 = work[i__1].r, abs(r__1)) + ( r__2 = r_imag(&work[j]), abs(r__2)));
                            if (acoefa * rwork[j] + bcoefa * rwork[*n + j] >= bignum * temp)
                            {
                                i__1 = je;
                                for (jr = 1;
                                        jr <= i__1;
                                        ++jr)
                                {
                                    i__2 = jr;
                                    i__3 = jr;
                                    q__1.r = temp * work[i__3].r;
                                    q__1.i = temp * work[i__3].i; // , expr subst
                                    work[i__2].r = q__1.r;
                                    work[i__2].i = q__1.i; // , expr subst
                                    /* L190: */
                                }
                            }
                        }
                        i__1 = j;
                        q__1.r = acoeff * work[i__1].r;
                        q__1.i = acoeff * work[i__1].i; // , expr subst
                        ca.r = q__1.r;
                        ca.i = q__1.i; // , expr subst
                        i__1 = j;
                        q__1.r = bcoeff.r * work[i__1].r - bcoeff.i * work[ i__1].i;
                        q__1.i = bcoeff.r * work[i__1].i + bcoeff.i * work[i__1].r; // , expr subst
                        cb.r = q__1.r;
                        cb.i = q__1.i; // , expr subst
                        i__1 = j - 1;
                        for (jr = 1;
                                jr <= i__1;
                                ++jr)
                        {
                            i__2 = jr;
                            i__3 = jr;
                            i__4 = jr + j * s_dim1;
                            q__3.r = ca.r * s[i__4].r - ca.i * s[i__4].i;
                            q__3.i = ca.r * s[i__4].i + ca.i * s[i__4] .r; // , expr subst
                            q__2.r = work[i__3].r + q__3.r;
                            q__2.i = work[ i__3].i + q__3.i; // , expr subst
                            i__5 = jr + j * p_dim1;
                            q__4.r = cb.r * p[i__5].r - cb.i * p[i__5].i;
                            q__4.i = cb.r * p[i__5].i + cb.i * p[i__5] .r; // , expr subst
                            q__1.r = q__2.r - q__4.r;
                            q__1.i = q__2.i - q__4.i; // , expr subst
                            work[i__2].r = q__1.r;
                            work[i__2].i = q__1.i; // , expr subst
                            /* L200: */
                        }
                    }
                    /* L210: */
                }
                /* Back transform eigenvector if HOWMNY='B'. */
                if (ilback)
                {
                    cgemv_("N", n, &je, &c_b2, &vr[vr_offset], ldvr, &work[1], &c__1, &c_b1, &work[*n + 1], &c__1);
                    isrc = 2;
                    iend = *n;
                }
                else
                {
                    isrc = 1;
                    iend = je;
                }
                /* Copy and scale eigenvector into column of VR */
                xmax = 0.f;
                i__1 = iend;
                for (jr = 1;
                        jr <= i__1;
                        ++jr)
                {
                    /* Computing MAX */
                    i__2 = (isrc - 1) * *n + jr;
                    r__3 = xmax;
                    r__4 = (r__1 = work[i__2].r, abs(r__1)) + ( r__2 = r_imag(&work[(isrc - 1) * *n + jr]), abs( r__2)); // , expr subst
                    xmax = max(r__3,r__4);
                    /* L220: */
                }
                if (xmax > safmin)
                {
                    temp = 1.f / xmax;
                    i__1 = iend;
                    for (jr = 1;
                            jr <= i__1;
                            ++jr)
                    {
                        i__2 = jr + ieig * vr_dim1;
                        i__3 = (isrc - 1) * *n + jr;
                        q__1.r = temp * work[i__3].r;
                        q__1.i = temp * work[ i__3].i; // , expr subst
                        vr[i__2].r = q__1.r;
                        vr[i__2].i = q__1.i; // , expr subst
                        /* L230: */
                    }
                }
                else
                {
                    iend = 0;
                }
                i__1 = *n;
                for (jr = iend + 1;
                        jr <= i__1;
                        ++jr)
                {
                    i__2 = jr + ieig * vr_dim1;
                    vr[i__2].r = 0.f;
                    vr[i__2].i = 0.f; // , expr subst
                    /* L240: */
                }
            }
L250:
            ;
        }
    }
    return 0;
    /* End of CTGEVC */
}
Пример #6
0
/* Subroutine */ int clarf_(char *side, integer *m, integer *n, complex *v, 
	integer *incv, complex *tau, complex *c__, integer *ldc, complex *
	work)
{
    /* System generated locals */
    integer c_dim1, c_offset, i__1;
    complex q__1;

    /* Local variables */
    integer i__;
    logical applyleft;
    integer lastc, lastv;

/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     November 2006 */

/*  Purpose */
/*  ======= */

/*  CLARF applies a complex elementary reflector H to a complex M-by-N */
/*  matrix C, from either the left or the right. H is represented in the */
/*  form */

/*        H = I - tau * v * v' */

/*  where tau is a complex scalar and v is a complex vector. */

/*  If tau = 0, then H is taken to be the unit matrix. */

/*  To apply H' (the conjugate transpose of H), supply conjg(tau) instead */
/*  tau. */

/*  Arguments */
/*  ========= */

/*  SIDE    (input) CHARACTER*1 */
/*          = 'L': form  H * C */
/*          = 'R': form  C * H */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix C. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix C. */

/*  V       (input) COMPLEX array, dimension */
/*                     (1 + (M-1)*abs(INCV)) if SIDE = 'L' */
/*                  or (1 + (N-1)*abs(INCV)) if SIDE = 'R' */
/*          The vector v in the representation of H. V is not used if */
/*          TAU = 0. */

/*  INCV    (input) INTEGER */
/*          The increment between elements of v. INCV <> 0. */

/*  TAU     (input) COMPLEX */
/*          The value tau in the representation of H. */

/*  C       (input/output) COMPLEX array, dimension (LDC,N) */
/*          On entry, the M-by-N matrix C. */
/*          On exit, C is overwritten by the matrix H * C if SIDE = 'L', */
/*          or C * H if SIDE = 'R'. */

/*  LDC     (input) INTEGER */
/*          The leading dimension of the array C. LDC >= max(1,M). */

/*  WORK    (workspace) COMPLEX array, dimension */
/*                         (N) if SIDE = 'L' */
/*                      or (M) if SIDE = 'R' */

/*  ===================================================================== */

    /* Parameter adjustments */
    --v;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1;
    c__ -= c_offset;
    --work;

    /* Function Body */
    applyleft = lsame_(side, "L");
    lastv = 0;
    lastc = 0;
    if (tau->r != 0.f || tau->i != 0.f) {
/*     Set up variables for scanning V.  LASTV begins pointing to the end */
/*     of V. */
	if (applyleft) {
	    lastv = *m;
	} else {
	    lastv = *n;
	}
	if (*incv > 0) {
	    i__ = (lastv - 1) * *incv + 1;
	} else {
	    i__ = 1;
	}
/*     Look for the last non-zero row in V. */
	for(;;) { /* while(complicated condition) */
	    i__1 = i__;
	    if (!(lastv > 0 && (v[i__1].r == 0.f && v[i__1].i == 0.f)))
	    	break;
	    --lastv;
	    i__ -= *incv;
	}
	if (applyleft) {
/*     Scan for the last non-zero column in C(1:lastv,:). */
	    lastc = ilaclc_(&lastv, n, &c__[c_offset], ldc);
	} else {
/*     Scan for the last non-zero row in C(:,1:lastv). */
	    lastc = ilaclr_(m, &lastv, &c__[c_offset], ldc);
	}
    }
/*     Note that lastc.eq.0 renders the BLAS operations null; no special */
/*     case is needed at this level. */
    if (applyleft) {

/*        Form  H * C */

	if (lastv > 0) {

/*           w(1:lastc,1) := C(1:lastv,1:lastc)' * v(1:lastv,1) */

	    cgemv_("Conjugate transpose", &lastv, &lastc, &c_b1, &c__[
		    c_offset], ldc, &v[1], incv, &c_b2, &work[1], &c__1);

	    q__1.r = -tau->r, q__1.i = -tau->i;
	    cgerc_(&lastv, &lastc, &q__1, &v[1], incv, &work[1], &c__1, &c__[
		    c_offset], ldc);
	}
    } else {

/*        Form  C * H */

	if (lastv > 0) {

/*           w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */

	    cgemv_("No transpose", &lastc, &lastv, &c_b1, &c__[c_offset], ldc, 
		     &v[1], incv, &c_b2, &work[1], &c__1);

	    q__1.r = -tau->r, q__1.i = -tau->i;
	    cgerc_(&lastc, &lastv, &q__1, &work[1], &c__1, &v[1], incv, &c__[
		    c_offset], ldc);
	}
    }
    return 0;

/*     End of CLARF */

} /* clarf_ */
Пример #7
0
/* Subroutine */ int csytrs_(char *uplo, integer *n, integer *nrhs, complex *
	a, integer *lda, integer *ipiv, complex *b, integer *ldb, integer *
	info)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
    complex q__1, q__2, q__3;

    /* Builtin functions */
    void c_div(complex *, complex *, complex *);

    /* Local variables */
    integer j, k;
    complex ak, bk;
    integer kp;
    complex akm1, bkm1, akm1k;
    extern /* Subroutine */ int cscal_(integer *, complex *, complex *, 
	    integer *);
    extern logical lsame_(char *, char *);
    complex denom;
    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
, complex *, integer *, complex *, integer *, complex *, complex *
, integer *), cgeru_(integer *, integer *, complex *, 
	    complex *, integer *, complex *, integer *, complex *, integer *),
	     cswap_(integer *, complex *, integer *, complex *, integer *);
    logical upper;
    extern /* Subroutine */ int xerbla_(char *, integer *);


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CSYTRS solves a system of linear equations A*X = B with a complex */
/*  symmetric matrix A using the factorization A = U*D*U**T or */
/*  A = L*D*L**T computed by CSYTRF. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          Specifies whether the details of the factorization are stored */
/*          as an upper or lower triangular matrix. */
/*          = 'U':  Upper triangular, form is A = U*D*U**T; */
/*          = 'L':  Lower triangular, form is A = L*D*L**T. */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  NRHS    (input) INTEGER */
/*          The number of right hand sides, i.e., the number of columns */
/*          of the matrix B.  NRHS >= 0. */

/*  A       (input) COMPLEX array, dimension (LDA,N) */
/*          The block diagonal matrix D and the multipliers used to */
/*          obtain the factor U or L as computed by CSYTRF. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  IPIV    (input) INTEGER array, dimension (N) */
/*          Details of the interchanges and the block structure of D */
/*          as determined by CSYTRF. */

/*  B       (input/output) COMPLEX array, dimension (LDB,NRHS) */
/*          On entry, the right hand side matrix B. */
/*          On exit, the solution matrix X. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B.  LDB >= max(1,N). */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*nrhs < 0) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    } else if (*ldb < max(1,*n)) {
	*info = -8;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CSYTRS", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
	return 0;
    }

    if (upper) {

/*        Solve A*X = B, where A = U*D*U'. */

/*        First solve U*D*X = B, overwriting B with X. */

/*        K is the main loop index, decreasing from N to 1 in steps of */
/*        1 or 2, depending on the size of the diagonal blocks. */

	k = *n;
L10:

/*        If K < 1, exit from loop. */

	if (k < 1) {
	    goto L30;
	}

	if (ipiv[k] > 0) {

/*           1 x 1 diagonal block */

/*           Interchange rows K and IPIV(K). */

	    kp = ipiv[k];
	    if (kp != k) {
		cswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb);
	    }

/*           Multiply by inv(U(K)), where U(K) is the transformation */
/*           stored in column K of A. */

	    i__1 = k - 1;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgeru_(&i__1, nrhs, &q__1, &a[k * a_dim1 + 1], &c__1, &b[k + 
		    b_dim1], ldb, &b[b_dim1 + 1], ldb);

/*           Multiply by the inverse of the diagonal block. */

	    c_div(&q__1, &c_b1, &a[k + k * a_dim1]);
	    cscal_(nrhs, &q__1, &b[k + b_dim1], ldb);
	    --k;
	} else {

/*           2 x 2 diagonal block */

/*           Interchange rows K-1 and -IPIV(K). */

	    kp = -ipiv[k];
	    if (kp != k - 1) {
		cswap_(nrhs, &b[k - 1 + b_dim1], ldb, &b[kp + b_dim1], ldb);
	    }

/*           Multiply by inv(U(K)), where U(K) is the transformation */
/*           stored in columns K-1 and K of A. */

	    i__1 = k - 2;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgeru_(&i__1, nrhs, &q__1, &a[k * a_dim1 + 1], &c__1, &b[k + 
		    b_dim1], ldb, &b[b_dim1 + 1], ldb);
	    i__1 = k - 2;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgeru_(&i__1, nrhs, &q__1, &a[(k - 1) * a_dim1 + 1], &c__1, &b[k 
		    - 1 + b_dim1], ldb, &b[b_dim1 + 1], ldb);

/*           Multiply by the inverse of the diagonal block. */

	    i__1 = k - 1 + k * a_dim1;
	    akm1k.r = a[i__1].r, akm1k.i = a[i__1].i;
	    c_div(&q__1, &a[k - 1 + (k - 1) * a_dim1], &akm1k);
	    akm1.r = q__1.r, akm1.i = q__1.i;
	    c_div(&q__1, &a[k + k * a_dim1], &akm1k);
	    ak.r = q__1.r, ak.i = q__1.i;
	    q__2.r = akm1.r * ak.r - akm1.i * ak.i, q__2.i = akm1.r * ak.i + 
		    akm1.i * ak.r;
	    q__1.r = q__2.r - 1.f, q__1.i = q__2.i - 0.f;
	    denom.r = q__1.r, denom.i = q__1.i;
	    i__1 = *nrhs;
	    for (j = 1; j <= i__1; ++j) {
		c_div(&q__1, &b[k - 1 + j * b_dim1], &akm1k);
		bkm1.r = q__1.r, bkm1.i = q__1.i;
		c_div(&q__1, &b[k + j * b_dim1], &akm1k);
		bk.r = q__1.r, bk.i = q__1.i;
		i__2 = k - 1 + j * b_dim1;
		q__3.r = ak.r * bkm1.r - ak.i * bkm1.i, q__3.i = ak.r * 
			bkm1.i + ak.i * bkm1.r;
		q__2.r = q__3.r - bk.r, q__2.i = q__3.i - bk.i;
		c_div(&q__1, &q__2, &denom);
		b[i__2].r = q__1.r, b[i__2].i = q__1.i;
		i__2 = k + j * b_dim1;
		q__3.r = akm1.r * bk.r - akm1.i * bk.i, q__3.i = akm1.r * 
			bk.i + akm1.i * bk.r;
		q__2.r = q__3.r - bkm1.r, q__2.i = q__3.i - bkm1.i;
		c_div(&q__1, &q__2, &denom);
		b[i__2].r = q__1.r, b[i__2].i = q__1.i;
/* L20: */
	    }
	    k += -2;
	}

	goto L10;
L30:

/*        Next solve U'*X = B, overwriting B with X. */

/*        K is the main loop index, increasing from 1 to N in steps of */
/*        1 or 2, depending on the size of the diagonal blocks. */

	k = 1;
L40:

/*        If K > N, exit from loop. */

	if (k > *n) {
	    goto L50;
	}

	if (ipiv[k] > 0) {

/*           1 x 1 diagonal block */

/*           Multiply by inv(U'(K)), where U(K) is the transformation */
/*           stored in column K of A. */

	    i__1 = k - 1;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgemv_("Transpose", &i__1, nrhs, &q__1, &b[b_offset], ldb, &a[k * 
		    a_dim1 + 1], &c__1, &c_b1, &b[k + b_dim1], ldb)
		    ;

/*           Interchange rows K and IPIV(K). */

	    kp = ipiv[k];
	    if (kp != k) {
		cswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb);
	    }
	    ++k;
	} else {

/*           2 x 2 diagonal block */

/*           Multiply by inv(U'(K+1)), where U(K+1) is the transformation */
/*           stored in columns K and K+1 of A. */

	    i__1 = k - 1;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgemv_("Transpose", &i__1, nrhs, &q__1, &b[b_offset], ldb, &a[k * 
		    a_dim1 + 1], &c__1, &c_b1, &b[k + b_dim1], ldb)
		    ;
	    i__1 = k - 1;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgemv_("Transpose", &i__1, nrhs, &q__1, &b[b_offset], ldb, &a[(k 
		    + 1) * a_dim1 + 1], &c__1, &c_b1, &b[k + 1 + b_dim1], ldb);

/*           Interchange rows K and -IPIV(K). */

	    kp = -ipiv[k];
	    if (kp != k) {
		cswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb);
	    }
	    k += 2;
	}

	goto L40;
L50:

	;
    } else {

/*        Solve A*X = B, where A = L*D*L'. */

/*        First solve L*D*X = B, overwriting B with X. */

/*        K is the main loop index, increasing from 1 to N in steps of */
/*        1 or 2, depending on the size of the diagonal blocks. */

	k = 1;
L60:

/*        If K > N, exit from loop. */

	if (k > *n) {
	    goto L80;
	}

	if (ipiv[k] > 0) {

/*           1 x 1 diagonal block */

/*           Interchange rows K and IPIV(K). */

	    kp = ipiv[k];
	    if (kp != k) {
		cswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb);
	    }

/*           Multiply by inv(L(K)), where L(K) is the transformation */
/*           stored in column K of A. */

	    if (k < *n) {
		i__1 = *n - k;
		q__1.r = -1.f, q__1.i = -0.f;
		cgeru_(&i__1, nrhs, &q__1, &a[k + 1 + k * a_dim1], &c__1, &b[
			k + b_dim1], ldb, &b[k + 1 + b_dim1], ldb);
	    }

/*           Multiply by the inverse of the diagonal block. */

	    c_div(&q__1, &c_b1, &a[k + k * a_dim1]);
	    cscal_(nrhs, &q__1, &b[k + b_dim1], ldb);
	    ++k;
	} else {

/*           2 x 2 diagonal block */

/*           Interchange rows K+1 and -IPIV(K). */

	    kp = -ipiv[k];
	    if (kp != k + 1) {
		cswap_(nrhs, &b[k + 1 + b_dim1], ldb, &b[kp + b_dim1], ldb);
	    }

/*           Multiply by inv(L(K)), where L(K) is the transformation */
/*           stored in columns K and K+1 of A. */

	    if (k < *n - 1) {
		i__1 = *n - k - 1;
		q__1.r = -1.f, q__1.i = -0.f;
		cgeru_(&i__1, nrhs, &q__1, &a[k + 2 + k * a_dim1], &c__1, &b[
			k + b_dim1], ldb, &b[k + 2 + b_dim1], ldb);
		i__1 = *n - k - 1;
		q__1.r = -1.f, q__1.i = -0.f;
		cgeru_(&i__1, nrhs, &q__1, &a[k + 2 + (k + 1) * a_dim1], &
			c__1, &b[k + 1 + b_dim1], ldb, &b[k + 2 + b_dim1], 
			ldb);
	    }

/*           Multiply by the inverse of the diagonal block. */

	    i__1 = k + 1 + k * a_dim1;
	    akm1k.r = a[i__1].r, akm1k.i = a[i__1].i;
	    c_div(&q__1, &a[k + k * a_dim1], &akm1k);
	    akm1.r = q__1.r, akm1.i = q__1.i;
	    c_div(&q__1, &a[k + 1 + (k + 1) * a_dim1], &akm1k);
	    ak.r = q__1.r, ak.i = q__1.i;
	    q__2.r = akm1.r * ak.r - akm1.i * ak.i, q__2.i = akm1.r * ak.i + 
		    akm1.i * ak.r;
	    q__1.r = q__2.r - 1.f, q__1.i = q__2.i - 0.f;
	    denom.r = q__1.r, denom.i = q__1.i;
	    i__1 = *nrhs;
	    for (j = 1; j <= i__1; ++j) {
		c_div(&q__1, &b[k + j * b_dim1], &akm1k);
		bkm1.r = q__1.r, bkm1.i = q__1.i;
		c_div(&q__1, &b[k + 1 + j * b_dim1], &akm1k);
		bk.r = q__1.r, bk.i = q__1.i;
		i__2 = k + j * b_dim1;
		q__3.r = ak.r * bkm1.r - ak.i * bkm1.i, q__3.i = ak.r * 
			bkm1.i + ak.i * bkm1.r;
		q__2.r = q__3.r - bk.r, q__2.i = q__3.i - bk.i;
		c_div(&q__1, &q__2, &denom);
		b[i__2].r = q__1.r, b[i__2].i = q__1.i;
		i__2 = k + 1 + j * b_dim1;
		q__3.r = akm1.r * bk.r - akm1.i * bk.i, q__3.i = akm1.r * 
			bk.i + akm1.i * bk.r;
		q__2.r = q__3.r - bkm1.r, q__2.i = q__3.i - bkm1.i;
		c_div(&q__1, &q__2, &denom);
		b[i__2].r = q__1.r, b[i__2].i = q__1.i;
/* L70: */
	    }
	    k += 2;
	}

	goto L60;
L80:

/*        Next solve L'*X = B, overwriting B with X. */

/*        K is the main loop index, decreasing from N to 1 in steps of */
/*        1 or 2, depending on the size of the diagonal blocks. */

	k = *n;
L90:

/*        If K < 1, exit from loop. */

	if (k < 1) {
	    goto L100;
	}

	if (ipiv[k] > 0) {

/*           1 x 1 diagonal block */

/*           Multiply by inv(L'(K)), where L(K) is the transformation */
/*           stored in column K of A. */

	    if (k < *n) {
		i__1 = *n - k;
		q__1.r = -1.f, q__1.i = -0.f;
		cgemv_("Transpose", &i__1, nrhs, &q__1, &b[k + 1 + b_dim1], 
			ldb, &a[k + 1 + k * a_dim1], &c__1, &c_b1, &b[k + 
			b_dim1], ldb);
	    }

/*           Interchange rows K and IPIV(K). */

	    kp = ipiv[k];
	    if (kp != k) {
		cswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb);
	    }
	    --k;
	} else {

/*           2 x 2 diagonal block */

/*           Multiply by inv(L'(K-1)), where L(K-1) is the transformation */
/*           stored in columns K-1 and K of A. */

	    if (k < *n) {
		i__1 = *n - k;
		q__1.r = -1.f, q__1.i = -0.f;
		cgemv_("Transpose", &i__1, nrhs, &q__1, &b[k + 1 + b_dim1], 
			ldb, &a[k + 1 + k * a_dim1], &c__1, &c_b1, &b[k + 
			b_dim1], ldb);
		i__1 = *n - k;
		q__1.r = -1.f, q__1.i = -0.f;
		cgemv_("Transpose", &i__1, nrhs, &q__1, &b[k + 1 + b_dim1], 
			ldb, &a[k + 1 + (k - 1) * a_dim1], &c__1, &c_b1, &b[k 
			- 1 + b_dim1], ldb);
	    }

/*           Interchange rows K and -IPIV(K). */

	    kp = -ipiv[k];
	    if (kp != k) {
		cswap_(nrhs, &b[k + b_dim1], ldb, &b[kp + b_dim1], ldb);
	    }
	    k += -2;
	}

	goto L90;
L100:
	;
    }

    return 0;

/*     End of CSYTRS */

} /* csytrs_ */
Пример #8
0
 int ctgsna_(char *job, char *howmny, int *select, 
	int *n, complex *a, int *lda, complex *b, int *ldb, 
	complex *vl, int *ldvl, complex *vr, int *ldvr, float *s, float 
	*dif, int *mm, int *m, complex *work, int *lwork, int 
	*iwork, int *info)
{
    /* System generated locals */
    int a_dim1, a_offset, b_dim1, b_offset, vl_dim1, vl_offset, vr_dim1, 
	    vr_offset, i__1;
    float r__1, r__2;
    complex q__1;

    /* Builtin functions */
    double c_abs(complex *);

    /* Local variables */
    int i__, k, n1, n2, ks;
    float eps, cond;
    int ierr, ifst;
    float lnrm;
    complex yhax, yhbx;
    int ilst;
    float rnrm, scale;
    extern /* Complex */ VOID cdotc_(complex *, int *, complex *, int 
	    *, complex *, int *);
    extern int lsame_(char *, char *);
    extern  int cgemv_(char *, int *, int *, complex *
, complex *, int *, complex *, int *, complex *, complex *
, int *);
    int lwmin;
    int wants;
    complex dummy[1];
    extern double scnrm2_(int *, complex *, int *), slapy2_(float *
, float *);
    complex dummy1[1];
    extern  int slabad_(float *, float *);
    extern double slamch_(char *);
    extern  int clacpy_(char *, int *, int *, complex 
	    *, int *, complex *, int *), ctgexc_(int *, 
	    int *, int *, complex *, int *, complex *, int *, 
	    complex *, int *, complex *, int *, int *, int *, 
	    int *), xerbla_(char *, int *);
    float bignum;
    int wantbh, wantdf, somcon;
    extern  int ctgsyl_(char *, int *, int *, int 
	    *, complex *, int *, complex *, int *, complex *, int 
	    *, complex *, int *, complex *, int *, complex *, int 
	    *, float *, float *, complex *, int *, int *, int *);
    float smlnum;
    int lquery;


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CTGSNA estimates reciprocal condition numbers for specified */
/*  eigenvalues and/or eigenvectors of a matrix pair (A, B). */

/*  (A, B) must be in generalized Schur canonical form, that is, A and */
/*  B are both upper triangular. */

/*  Arguments */
/*  ========= */

/*  JOB     (input) CHARACTER*1 */
/*          Specifies whether condition numbers are required for */
/*          eigenvalues (S) or eigenvectors (DIF): */
/*          = 'E': for eigenvalues only (S); */
/*          = 'V': for eigenvectors only (DIF); */
/*          = 'B': for both eigenvalues and eigenvectors (S and DIF). */

/*  HOWMNY  (input) CHARACTER*1 */
/*          = 'A': compute condition numbers for all eigenpairs; */
/*          = 'S': compute condition numbers for selected eigenpairs */
/*                 specified by the array SELECT. */

/*  SELECT  (input) LOGICAL array, dimension (N) */
/*          If HOWMNY = 'S', SELECT specifies the eigenpairs for which */
/*          condition numbers are required. To select condition numbers */
/*          for the corresponding j-th eigenvalue and/or eigenvector, */
/*          SELECT(j) must be set to .TRUE.. */
/*          If HOWMNY = 'A', SELECT is not referenced. */

/*  N       (input) INTEGER */
/*          The order of the square matrix pair (A, B). N >= 0. */

/*  A       (input) COMPLEX array, dimension (LDA,N) */
/*          The upper triangular matrix A in the pair (A,B). */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A. LDA >= MAX(1,N). */

/*  B       (input) COMPLEX array, dimension (LDB,N) */
/*          The upper triangular matrix B in the pair (A, B). */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B. LDB >= MAX(1,N). */

/*  VL      (input) COMPLEX array, dimension (LDVL,M) */
/*          IF JOB = 'E' or 'B', VL must contain left eigenvectors of */
/*          (A, B), corresponding to the eigenpairs specified by HOWMNY */
/*          and SELECT.  The eigenvectors must be stored in consecutive */
/*          columns of VL, as returned by CTGEVC. */
/*          If JOB = 'V', VL is not referenced. */

/*  LDVL    (input) INTEGER */
/*          The leading dimension of the array VL. LDVL >= 1; and */
/*          If JOB = 'E' or 'B', LDVL >= N. */

/*  VR      (input) COMPLEX array, dimension (LDVR,M) */
/*          IF JOB = 'E' or 'B', VR must contain right eigenvectors of */
/*          (A, B), corresponding to the eigenpairs specified by HOWMNY */
/*          and SELECT.  The eigenvectors must be stored in consecutive */
/*          columns of VR, as returned by CTGEVC. */
/*          If JOB = 'V', VR is not referenced. */

/*  LDVR    (input) INTEGER */
/*          The leading dimension of the array VR. LDVR >= 1; */
/*          If JOB = 'E' or 'B', LDVR >= N. */

/*  S       (output) REAL array, dimension (MM) */
/*          If JOB = 'E' or 'B', the reciprocal condition numbers of the */
/*          selected eigenvalues, stored in consecutive elements of the */
/*          array. */
/*          If JOB = 'V', S is not referenced. */

/*  DIF     (output) REAL array, dimension (MM) */
/*          If JOB = 'V' or 'B', the estimated reciprocal condition */
/*          numbers of the selected eigenvectors, stored in consecutive */
/*          elements of the array. */
/*          If the eigenvalues cannot be reordered to compute DIF(j), */
/*          DIF(j) is set to 0; this can only occur when the true value */
/*          would be very small anyway. */
/*          For each eigenvalue/vector specified by SELECT, DIF stores */
/*          a Frobenius norm-based estimate of Difl. */
/*          If JOB = 'E', DIF is not referenced. */

/*  MM      (input) INTEGER */
/*          The number of elements in the arrays S and DIF. MM >= M. */

/*  M       (output) INTEGER */
/*          The number of elements of the arrays S and DIF used to store */
/*          the specified condition numbers; for each selected eigenvalue */
/*          one element is used. If HOWMNY = 'A', M is set to N. */

/*  WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) */
/*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */

/*  LWORK  (input) INTEGER */
/*          The dimension of the array WORK. LWORK >= MAX(1,N). */
/*          If JOB = 'V' or 'B', LWORK >= MAX(1,2*N*N). */

/*  IWORK   (workspace) INTEGER array, dimension (N+2) */
/*          If JOB = 'E', IWORK is not referenced. */

/*  INFO    (output) INTEGER */
/*          = 0: Successful exit */
/*          < 0: If INFO = -i, the i-th argument had an illegal value */

/*  Further Details */
/*  =============== */

/*  The reciprocal of the condition number of the i-th generalized */
/*  eigenvalue w = (a, b) is defined as */

/*          S(I) = (|v'Au|**2 + |v'Bu|**2)**(1/2) / (norm(u)*norm(v)) */

/*  where u and v are the right and left eigenvectors of (A, B) */
/*  corresponding to w; |z| denotes the absolute value of the complex */
/*  number, and norm(u) denotes the 2-norm of the vector u. The pair */
/*  (a, b) corresponds to an eigenvalue w = a/b (= v'Au/v'Bu) of the */
/*  matrix pair (A, B). If both a and b equal zero, then (A,B) is */
/*  singular and S(I) = -1 is returned. */

/*  An approximate error bound on the chordal distance between the i-th */
/*  computed generalized eigenvalue w and the corresponding exact */
/*  eigenvalue lambda is */

/*          chord(w, lambda) <=   EPS * norm(A, B) / S(I), */

/*  where EPS is the machine precision. */

/*  The reciprocal of the condition number of the right eigenvector u */
/*  and left eigenvector v corresponding to the generalized eigenvalue w */
/*  is defined as follows. Suppose */

/*                   (A, B) = ( a   *  ) ( b  *  )  1 */
/*                            ( 0  A22 ),( 0 B22 )  n-1 */
/*                              1  n-1     1 n-1 */

/*  Then the reciprocal condition number DIF(I) is */

/*          Difl[(a, b), (A22, B22)]  = sigma-MIN( Zl ) */

/*  where sigma-MIN(Zl) denotes the smallest singular value of */

/*         Zl = [ kron(a, In-1) -kron(1, A22) ] */
/*              [ kron(b, In-1) -kron(1, B22) ]. */

/*  Here In-1 is the identity matrix of size n-1 and X' is the conjugate */
/*  transpose of X. kron(X, Y) is the Kronecker product between the */
/*  matrices X and Y. */

/*  We approximate the smallest singular value of Zl with an upper */
/*  bound. This is done by CLATDF. */

/*  An approximate error bound for a computed eigenvector VL(i) or */
/*  VR(i) is given by */

/*                      EPS * norm(A, B) / DIF(i). */

/*  See ref. [2-3] for more details and further references. */

/*  Based on contributions by */
/*     Bo Kagstrom and Peter Poromaa, Department of Computing Science, */
/*     Umea University, S-901 87 Umea, Sweden. */

/*  References */
/*  ========== */

/*  [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the */
/*      Generalized Real Schur Form of a Regular Matrix Pair (A, B), in */
/*      M.S. Moonen et al (eds), Linear Algebra for Large Scale and */
/*      Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218. */

/*  [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified */
/*      Eigenvalues of a Regular Matrix Pair (A, B) and Condition */
/*      Estimation: Theory, Algorithms and Software, Report */
/*      UMINF - 94.04, Department of Computing Science, Umea University, */
/*      S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87. */
/*      To appear in Numerical Algorithms, 1996. */

/*  [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software */
/*      for Solving the Generalized Sylvester Equation and Estimating the */
/*      Separation between Regular Matrix Pairs, Report UMINF - 93.23, */
/*      Department of Computing Science, Umea University, S-901 87 Umea, */
/*      Sweden, December 1993, Revised April 1994, Also as LAPACK Working */
/*      Note 75. */
/*      To appear in ACM Trans. on Math. Software, Vol 22, No 1, 1996. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Decode and test the input parameters */

    /* Parameter adjustments */
    --select;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    vl_dim1 = *ldvl;
    vl_offset = 1 + vl_dim1;
    vl -= vl_offset;
    vr_dim1 = *ldvr;
    vr_offset = 1 + vr_dim1;
    vr -= vr_offset;
    --s;
    --dif;
    --work;
    --iwork;

    /* Function Body */
    wantbh = lsame_(job, "B");
    wants = lsame_(job, "E") || wantbh;
    wantdf = lsame_(job, "V") || wantbh;

    somcon = lsame_(howmny, "S");

    *info = 0;
    lquery = *lwork == -1;

    if (! wants && ! wantdf) {
	*info = -1;
    } else if (! lsame_(howmny, "A") && ! somcon) {
	*info = -2;
    } else if (*n < 0) {
	*info = -4;
    } else if (*lda < MAX(1,*n)) {
	*info = -6;
    } else if (*ldb < MAX(1,*n)) {
	*info = -8;
    } else if (wants && *ldvl < *n) {
	*info = -10;
    } else if (wants && *ldvr < *n) {
	*info = -12;
    } else {

/*        Set M to the number of eigenpairs for which condition numbers */
/*        are required, and test MM. */

	if (somcon) {
	    *m = 0;
	    i__1 = *n;
	    for (k = 1; k <= i__1; ++k) {
		if (select[k]) {
		    ++(*m);
		}
/* L10: */
	    }
	} else {
	    *m = *n;
	}

	if (*n == 0) {
	    lwmin = 1;
	} else if (lsame_(job, "V") || lsame_(job, 
		"B")) {
	    lwmin = (*n << 1) * *n;
	} else {
	    lwmin = *n;
	}
	work[1].r = (float) lwmin, work[1].i = 0.f;

	if (*mm < *m) {
	    *info = -15;
	} else if (*lwork < lwmin && ! lquery) {
	    *info = -18;
	}
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CTGSNA", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Get machine constants */

    eps = slamch_("P");
    smlnum = slamch_("S") / eps;
    bignum = 1.f / smlnum;
    slabad_(&smlnum, &bignum);
    ks = 0;
    i__1 = *n;
    for (k = 1; k <= i__1; ++k) {

/*        Determine whether condition numbers are required for the k-th */
/*        eigenpair. */

	if (somcon) {
	    if (! select[k]) {
		goto L20;
	    }
	}

	++ks;

	if (wants) {

/*           Compute the reciprocal condition number of the k-th */
/*           eigenvalue. */

	    rnrm = scnrm2_(n, &vr[ks * vr_dim1 + 1], &c__1);
	    lnrm = scnrm2_(n, &vl[ks * vl_dim1 + 1], &c__1);
	    cgemv_("N", n, n, &c_b19, &a[a_offset], lda, &vr[ks * vr_dim1 + 1]
, &c__1, &c_b20, &work[1], &c__1);
	    cdotc_(&q__1, n, &work[1], &c__1, &vl[ks * vl_dim1 + 1], &c__1);
	    yhax.r = q__1.r, yhax.i = q__1.i;
	    cgemv_("N", n, n, &c_b19, &b[b_offset], ldb, &vr[ks * vr_dim1 + 1]
, &c__1, &c_b20, &work[1], &c__1);
	    cdotc_(&q__1, n, &work[1], &c__1, &vl[ks * vl_dim1 + 1], &c__1);
	    yhbx.r = q__1.r, yhbx.i = q__1.i;
	    r__1 = c_abs(&yhax);
	    r__2 = c_abs(&yhbx);
	    cond = slapy2_(&r__1, &r__2);
	    if (cond == 0.f) {
		s[ks] = -1.f;
	    } else {
		s[ks] = cond / (rnrm * lnrm);
	    }
	}

	if (wantdf) {
	    if (*n == 1) {
		r__1 = c_abs(&a[a_dim1 + 1]);
		r__2 = c_abs(&b[b_dim1 + 1]);
		dif[ks] = slapy2_(&r__1, &r__2);
	    } else {

/*              Estimate the reciprocal condition number of the k-th */
/*              eigenvectors. */

/*              Copy the matrix (A, B) to the array WORK and move the */
/*              (k,k)th pair to the (1,1) position. */

		clacpy_("Full", n, n, &a[a_offset], lda, &work[1], n);
		clacpy_("Full", n, n, &b[b_offset], ldb, &work[*n * *n + 1], 
			n);
		ifst = k;
		ilst = 1;

		ctgexc_(&c_false, &c_false, n, &work[1], n, &work[*n * *n + 1]
, n, dummy, &c__1, dummy1, &c__1, &ifst, &ilst, &ierr)
			;

		if (ierr > 0) {

/*                 Ill-conditioned problem - swap rejected. */

		    dif[ks] = 0.f;
		} else {

/*                 Reordering successful, solve generalized Sylvester */
/*                 equation for R and L, */
/*                            A22 * R - L * A11 = A12 */
/*                            B22 * R - L * B11 = B12, */
/*                 and compute estimate of Difl[(A11,B11), (A22, B22)]. */

		    n1 = 1;
		    n2 = *n - n1;
		    i__ = *n * *n + 1;
		    ctgsyl_("N", &c__3, &n2, &n1, &work[*n * n1 + n1 + 1], n, 
			    &work[1], n, &work[n1 + 1], n, &work[*n * n1 + n1 
			    + i__], n, &work[i__], n, &work[n1 + i__], n, &
			    scale, &dif[ks], dummy, &c__1, &iwork[1], &ierr);
		}
	    }
	}

L20:
	;
    }
    work[1].r = (float) lwmin, work[1].i = 0.f;
    return 0;

/*     End of CTGSNA */

} /* ctgsna_ */
Пример #9
0
/* Subroutine */ int clagsy_(integer *n, integer *k, real *d__, complex *a, 
	integer *lda, integer *iseed, complex *work, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, 
	    i__9;
    real r__1;
    complex q__1, q__2, q__3, q__4;

    /* Local variables */
    integer i__, j, ii, jj;
    complex wa, wb;
    real wn;
    complex tau;
    complex alpha;

/*  -- LAPACK auxiliary test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CLAGSY generates a complex symmetric matrix A, by pre- and post- */
/*  multiplying a real diagonal matrix D with a random unitary matrix: */
/*  A = U*D*U**T. The semi-bandwidth may then be reduced to k by */
/*  additional unitary transformations. */

/*  Arguments */
/*  ========= */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  K       (input) INTEGER */
/*          The number of nonzero subdiagonals within the band of A. */
/*          0 <= K <= N-1. */

/*  D       (input) REAL array, dimension (N) */
/*          The diagonal elements of the diagonal matrix D. */

/*  A       (output) COMPLEX array, dimension (LDA,N) */
/*          The generated n by n symmetric matrix A (the full matrix is */
/*          stored). */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= N. */

/*  ISEED   (input/output) INTEGER array, dimension (4) */
/*          On entry, the seed of the random number generator; the array */
/*          elements must be between 0 and 4095, and ISEED(4) must be */
/*          odd. */
/*          On exit, the seed is updated. */

/*  WORK    (workspace) COMPLEX array, dimension (2*N) */

/*  INFO    (output) INTEGER */
/*          = 0: successful exit */
/*          < 0: if INFO = -i, the i-th argument had an illegal value */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input arguments */

    /* Parameter adjustments */
    --d__;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --iseed;
    --work;

    /* Function Body */
    *info = 0;
    if (*n < 0) {
	*info = -1;
    } else if (*k < 0 || *k > *n - 1) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    }
    if (*info < 0) {
	i__1 = -(*info);
	xerbla_("CLAGSY", &i__1);
	return 0;
    }

/*     initialize lower triangle of A to diagonal matrix */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	i__2 = *n;
	for (i__ = j + 1; i__ <= i__2; ++i__) {
	    i__3 = i__ + j * a_dim1;
	    a[i__3].r = 0.f, a[i__3].i = 0.f;
/* L10: */
	}
/* L20: */
    }
    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	i__2 = i__ + i__ * a_dim1;
	i__3 = i__;
	a[i__2].r = d__[i__3], a[i__2].i = 0.f;
/* L30: */
    }

/*     Generate lower triangle of symmetric matrix */

    for (i__ = *n - 1; i__ >= 1; --i__) {

/*        generate random reflection */

	i__1 = *n - i__ + 1;
	clarnv_(&c__3, &iseed[1], &i__1, &work[1]);
	i__1 = *n - i__ + 1;
	wn = scnrm2_(&i__1, &work[1], &c__1);
	r__1 = wn / c_abs(&work[1]);
	q__1.r = r__1 * work[1].r, q__1.i = r__1 * work[1].i;
	wa.r = q__1.r, wa.i = q__1.i;
	if (wn == 0.f) {
	    tau.r = 0.f, tau.i = 0.f;
	} else {
	    q__1.r = work[1].r + wa.r, q__1.i = work[1].i + wa.i;
	    wb.r = q__1.r, wb.i = q__1.i;
	    i__1 = *n - i__;
	    c_div(&q__1, &c_b2, &wb);
	    cscal_(&i__1, &q__1, &work[2], &c__1);
	    work[1].r = 1.f, work[1].i = 0.f;
	    c_div(&q__1, &wb, &wa);
	    r__1 = q__1.r;
	    tau.r = r__1, tau.i = 0.f;
	}

/*        apply random reflection to A(i:n,i:n) from the left */
/*        and the right */

/*        compute  y := tau * A * conjg(u) */

	i__1 = *n - i__ + 1;
	clacgv_(&i__1, &work[1], &c__1);
	i__1 = *n - i__ + 1;
	csymv_("Lower", &i__1, &tau, &a[i__ + i__ * a_dim1], lda, &work[1], &
		c__1, &c_b1, &work[*n + 1], &c__1);
	i__1 = *n - i__ + 1;
	clacgv_(&i__1, &work[1], &c__1);

/*        compute  v := y - 1/2 * tau * ( u, y ) * u */

	q__3.r = -.5f, q__3.i = -0.f;
	q__2.r = q__3.r * tau.r - q__3.i * tau.i, q__2.i = q__3.r * tau.i + 
		q__3.i * tau.r;
	i__1 = *n - i__ + 1;
	cdotc_(&q__4, &i__1, &work[1], &c__1, &work[*n + 1], &c__1);
	q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * q__4.i 
		+ q__2.i * q__4.r;
	alpha.r = q__1.r, alpha.i = q__1.i;
	i__1 = *n - i__ + 1;
	caxpy_(&i__1, &alpha, &work[1], &c__1, &work[*n + 1], &c__1);

/*        apply the transformation as a rank-2 update to A(i:n,i:n) */

/*        CALL CSYR2( 'Lower', N-I+1, -ONE, WORK, 1, WORK( N+1 ), 1, */
/*        $               A( I, I ), LDA ) */

	i__1 = *n;
	for (jj = i__; jj <= i__1; ++jj) {
	    i__2 = *n;
	    for (ii = jj; ii <= i__2; ++ii) {
		i__3 = ii + jj * a_dim1;
		i__4 = ii + jj * a_dim1;
		i__5 = ii - i__ + 1;
		i__6 = *n + jj - i__ + 1;
		q__3.r = work[i__5].r * work[i__6].r - work[i__5].i * work[
			i__6].i, q__3.i = work[i__5].r * work[i__6].i + work[
			i__5].i * work[i__6].r;
		q__2.r = a[i__4].r - q__3.r, q__2.i = a[i__4].i - q__3.i;
		i__7 = *n + ii - i__ + 1;
		i__8 = jj - i__ + 1;
		q__4.r = work[i__7].r * work[i__8].r - work[i__7].i * work[
			i__8].i, q__4.i = work[i__7].r * work[i__8].i + work[
			i__7].i * work[i__8].r;
		q__1.r = q__2.r - q__4.r, q__1.i = q__2.i - q__4.i;
		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
/* L40: */
	    }
/* L50: */
	}
/* L60: */
    }

/*     Reduce number of subdiagonals to K */

    i__1 = *n - 1 - *k;
    for (i__ = 1; i__ <= i__1; ++i__) {

/*        generate reflection to annihilate A(k+i+1:n,i) */

	i__2 = *n - *k - i__ + 1;
	wn = scnrm2_(&i__2, &a[*k + i__ + i__ * a_dim1], &c__1);
	r__1 = wn / c_abs(&a[*k + i__ + i__ * a_dim1]);
	i__2 = *k + i__ + i__ * a_dim1;
	q__1.r = r__1 * a[i__2].r, q__1.i = r__1 * a[i__2].i;
	wa.r = q__1.r, wa.i = q__1.i;
	if (wn == 0.f) {
	    tau.r = 0.f, tau.i = 0.f;
	} else {
	    i__2 = *k + i__ + i__ * a_dim1;
	    q__1.r = a[i__2].r + wa.r, q__1.i = a[i__2].i + wa.i;
	    wb.r = q__1.r, wb.i = q__1.i;
	    i__2 = *n - *k - i__;
	    c_div(&q__1, &c_b2, &wb);
	    cscal_(&i__2, &q__1, &a[*k + i__ + 1 + i__ * a_dim1], &c__1);
	    i__2 = *k + i__ + i__ * a_dim1;
	    a[i__2].r = 1.f, a[i__2].i = 0.f;
	    c_div(&q__1, &wb, &wa);
	    r__1 = q__1.r;
	    tau.r = r__1, tau.i = 0.f;
	}

/*        apply reflection to A(k+i:n,i+1:k+i-1) from the left */

	i__2 = *n - *k - i__ + 1;
	i__3 = *k - 1;
	cgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a[*k + i__ + (i__ 
		+ 1) * a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &
		c_b1, &work[1], &c__1);
	i__2 = *n - *k - i__ + 1;
	i__3 = *k - 1;
	q__1.r = -tau.r, q__1.i = -tau.i;
	cgerc_(&i__2, &i__3, &q__1, &a[*k + i__ + i__ * a_dim1], &c__1, &work[
		1], &c__1, &a[*k + i__ + (i__ + 1) * a_dim1], lda);

/*        apply reflection to A(k+i:n,k+i:n) from the left and the right */

/*        compute  y := tau * A * conjg(u) */

	i__2 = *n - *k - i__ + 1;
	clacgv_(&i__2, &a[*k + i__ + i__ * a_dim1], &c__1);
	i__2 = *n - *k - i__ + 1;
	csymv_("Lower", &i__2, &tau, &a[*k + i__ + (*k + i__) * a_dim1], lda, 
		&a[*k + i__ + i__ * a_dim1], &c__1, &c_b1, &work[1], &c__1);
	i__2 = *n - *k - i__ + 1;
	clacgv_(&i__2, &a[*k + i__ + i__ * a_dim1], &c__1);

/*        compute  v := y - 1/2 * tau * ( u, y ) * u */

	q__3.r = -.5f, q__3.i = -0.f;
	q__2.r = q__3.r * tau.r - q__3.i * tau.i, q__2.i = q__3.r * tau.i + 
		q__3.i * tau.r;
	i__2 = *n - *k - i__ + 1;
	cdotc_(&q__4, &i__2, &a[*k + i__ + i__ * a_dim1], &c__1, &work[1], &
		c__1);
	q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * q__4.i 
		+ q__2.i * q__4.r;
	alpha.r = q__1.r, alpha.i = q__1.i;
	i__2 = *n - *k - i__ + 1;
	caxpy_(&i__2, &alpha, &a[*k + i__ + i__ * a_dim1], &c__1, &work[1], &
		c__1);

/*        apply symmetric rank-2 update to A(k+i:n,k+i:n) */

/*        CALL CSYR2( 'Lower', N-K-I+1, -ONE, A( K+I, I ), 1, WORK, 1, */
/*        $               A( K+I, K+I ), LDA ) */

	i__2 = *n;
	for (jj = *k + i__; jj <= i__2; ++jj) {
	    i__3 = *n;
	    for (ii = jj; ii <= i__3; ++ii) {
		i__4 = ii + jj * a_dim1;
		i__5 = ii + jj * a_dim1;
		i__6 = ii + i__ * a_dim1;
		i__7 = jj - *k - i__ + 1;
		q__3.r = a[i__6].r * work[i__7].r - a[i__6].i * work[i__7].i, 
			q__3.i = a[i__6].r * work[i__7].i + a[i__6].i * work[
			i__7].r;
		q__2.r = a[i__5].r - q__3.r, q__2.i = a[i__5].i - q__3.i;
		i__8 = ii - *k - i__ + 1;
		i__9 = jj + i__ * a_dim1;
		q__4.r = work[i__8].r * a[i__9].r - work[i__8].i * a[i__9].i, 
			q__4.i = work[i__8].r * a[i__9].i + work[i__8].i * a[
			i__9].r;
		q__1.r = q__2.r - q__4.r, q__1.i = q__2.i - q__4.i;
		a[i__4].r = q__1.r, a[i__4].i = q__1.i;
/* L70: */
	    }
/* L80: */
	}

	i__2 = *k + i__ + i__ * a_dim1;
	q__1.r = -wa.r, q__1.i = -wa.i;
	a[i__2].r = q__1.r, a[i__2].i = q__1.i;
	i__2 = *n;
	for (j = *k + i__ + 1; j <= i__2; ++j) {
	    i__3 = j + i__ * a_dim1;
	    a[i__3].r = 0.f, a[i__3].i = 0.f;
/* L90: */
	}
/* L100: */
    }

/*     Store full symmetric matrix */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	i__2 = *n;
	for (i__ = j + 1; i__ <= i__2; ++i__) {
	    i__3 = j + i__ * a_dim1;
	    i__4 = i__ + j * a_dim1;
	    a[i__3].r = a[i__4].r, a[i__3].i = a[i__4].i;
/* L110: */
	}
/* L120: */
    }
    return 0;

/*     End of CLAGSY */

} /* clagsy_ */
Пример #10
0
/* Subroutine */ int cggglm_(integer *n, integer *m, integer *p, complex *a, 
	integer *lda, complex *b, integer *ldb, complex *d__, complex *x, 
	complex *y, complex *work, integer *lwork, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
    complex q__1;

    /* Local variables */
    integer i__, nb, np, nb1, nb2, nb3, nb4, lopt;
    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
, complex *, integer *, complex *, integer *, complex *, complex *
, integer *), ccopy_(integer *, complex *, integer *, 
	    complex *, integer *), cggqrf_(integer *, integer *, integer *, 
	    complex *, integer *, complex *, complex *, integer *, complex *, 
	    complex *, integer *, integer *), xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *);
    integer lwkmin;
    extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, 
	    integer *, complex *, integer *, complex *, complex *, integer *, 
	    complex *, integer *, integer *), cunmrq_(char *, 
	    char *, integer *, integer *, integer *, complex *, integer *, 
	    complex *, complex *, integer *, complex *, integer *, integer *);
    integer lwkopt;
    logical lquery;
    extern /* Subroutine */ int ctrtrs_(char *, char *, char *, integer *, 
	    integer *, complex *, integer *, complex *, integer *, integer *);


/*  -- LAPACK driver routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CGGGLM solves a general Gauss-Markov linear model (GLM) problem: */

/*          minimize || y ||_2   subject to   d = A*x + B*y */
/*              x */

/*  where A is an N-by-M matrix, B is an N-by-P matrix, and d is a */
/*  given N-vector. It is assumed that M <= N <= M+P, and */

/*             rank(A) = M    and    rank( A B ) = N. */

/*  Under these assumptions, the constrained equation is always */
/*  consistent, and there is a unique solution x and a minimal 2-norm */
/*  solution y, which is obtained using a generalized QR factorization */
/*  of the matrices (A, B) given by */

/*     A = Q*(R),   B = Q*T*Z. */
/*           (0) */

/*  In particular, if matrix B is square nonsingular, then the problem */
/*  GLM is equivalent to the following weighted linear least squares */
/*  problem */

/*               minimize || inv(B)*(d-A*x) ||_2 */
/*                   x */

/*  where inv(B) denotes the inverse of B. */

/*  Arguments */
/*  ========= */

/*  N       (input) INTEGER */
/*          The number of rows of the matrices A and B.  N >= 0. */

/*  M       (input) INTEGER */
/*          The number of columns of the matrix A.  0 <= M <= N. */

/*  P       (input) INTEGER */
/*          The number of columns of the matrix B.  P >= N-M. */

/*  A       (input/output) COMPLEX array, dimension (LDA,M) */
/*          On entry, the N-by-M matrix A. */
/*          On exit, the upper triangular part of the array A contains */
/*          the M-by-M upper triangular matrix R. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A. LDA >= max(1,N). */

/*  B       (input/output) COMPLEX array, dimension (LDB,P) */
/*          On entry, the N-by-P matrix B. */
/*          On exit, if N <= P, the upper triangle of the subarray */
/*          B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; */
/*          if N > P, the elements on and above the (N-P)th subdiagonal */
/*          contain the N-by-P upper trapezoidal matrix T. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B. LDB >= max(1,N). */

/*  D       (input/output) COMPLEX array, dimension (N) */
/*          On entry, D is the left hand side of the GLM equation. */
/*          On exit, D is destroyed. */

/*  X       (output) COMPLEX array, dimension (M) */
/*  Y       (output) COMPLEX array, dimension (P) */
/*          On exit, X and Y are the solutions of the GLM problem. */

/*  WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) */
/*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */

/*  LWORK   (input) INTEGER */
/*          The dimension of the array WORK. LWORK >= max(1,N+M+P). */
/*          For optimum performance, LWORK >= M+min(N,P)+max(N,P)*NB, */
/*          where NB is an upper bound for the optimal blocksizes for */
/*          CGEQRF, CGERQF, CUNMQR and CUNMRQ. */

/*          If LWORK = -1, then a workspace query is assumed; the routine */
/*          only calculates the optimal size of the WORK array, returns */
/*          this value as the first entry of the WORK array, and no error */
/*          message related to LWORK is issued by XERBLA. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit. */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
/*          = 1:  the upper triangular factor R associated with A in the */
/*                generalized QR factorization of the pair (A, B) is */
/*                singular, so that rank(A) < M; the least squares */
/*                solution could not be computed. */
/*          = 2:  the bottom (N-M) by (N-M) part of the upper trapezoidal */
/*                factor T associated with B in the generalized QR */
/*                factorization of the pair (A, B) is singular, so that */
/*                rank( A B ) < N; the least squares solution could not */
/*                be computed. */

/*  =================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    --d__;
    --x;
    --y;
    --work;

    /* Function Body */
    *info = 0;
    np = min(*n,*p);
    lquery = *lwork == -1;
    if (*n < 0) {
	*info = -1;
    } else if (*m < 0 || *m > *n) {
	*info = -2;
    } else if (*p < 0 || *p < *n - *m) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    } else if (*ldb < max(1,*n)) {
	*info = -7;
    }

/*     Calculate workspace */

    if (*info == 0) {
	if (*n == 0) {
	    lwkmin = 1;
	    lwkopt = 1;
	} else {
	    nb1 = ilaenv_(&c__1, "CGEQRF", " ", n, m, &c_n1, &c_n1);
	    nb2 = ilaenv_(&c__1, "CGERQF", " ", n, m, &c_n1, &c_n1);
	    nb3 = ilaenv_(&c__1, "CUNMQR", " ", n, m, p, &c_n1);
	    nb4 = ilaenv_(&c__1, "CUNMRQ", " ", n, m, p, &c_n1);
/* Computing MAX */
	    i__1 = max(nb1,nb2), i__1 = max(i__1,nb3);
	    nb = max(i__1,nb4);
	    lwkmin = *m + *n + *p;
	    lwkopt = *m + np + max(*n,*p) * nb;
	}
	work[1].r = (real) lwkopt, work[1].i = 0.f;

	if (*lwork < lwkmin && ! lquery) {
	    *info = -12;
	}
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CGGGLM", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Compute the GQR factorization of matrices A and B: */

/*            Q'*A = ( R11 ) M,    Q'*B*Z' = ( T11   T12 ) M */
/*                   (  0  ) N-M             (  0    T22 ) N-M */
/*                      M                     M+P-N  N-M */

/*     where R11 and T22 are upper triangular, and Q and Z are */
/*     unitary. */

    i__1 = *lwork - *m - np;
    cggqrf_(n, m, p, &a[a_offset], lda, &work[1], &b[b_offset], ldb, &work[*m 
	    + 1], &work[*m + np + 1], &i__1, info);
    i__1 = *m + np + 1;
    lopt = work[i__1].r;

/*     Update left-hand-side vector d = Q'*d = ( d1 ) M */
/*                                             ( d2 ) N-M */

    i__1 = max(1,*n);
    i__2 = *lwork - *m - np;
    cunmqr_("Left", "Conjugate transpose", n, &c__1, m, &a[a_offset], lda, &
	    work[1], &d__[1], &i__1, &work[*m + np + 1], &i__2, info);
/* Computing MAX */
    i__3 = *m + np + 1;
    i__1 = lopt, i__2 = (integer) work[i__3].r;
    lopt = max(i__1,i__2);

/*     Solve T22*y2 = d2 for y2 */

    if (*n > *m) {
	i__1 = *n - *m;
	i__2 = *n - *m;
	ctrtrs_("Upper", "No transpose", "Non unit", &i__1, &c__1, &b[*m + 1 
		+ (*m + *p - *n + 1) * b_dim1], ldb, &d__[*m + 1], &i__2, 
		info);

	if (*info > 0) {
	    *info = 1;
	    return 0;
	}

	i__1 = *n - *m;
	ccopy_(&i__1, &d__[*m + 1], &c__1, &y[*m + *p - *n + 1], &c__1);
    }

/*     Set y1 = 0 */

    i__1 = *m + *p - *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	i__2 = i__;
	y[i__2].r = 0.f, y[i__2].i = 0.f;
/* L10: */
    }

/*     Update d1 = d1 - T12*y2 */

    i__1 = *n - *m;
    q__1.r = -1.f, q__1.i = -0.f;
    cgemv_("No transpose", m, &i__1, &q__1, &b[(*m + *p - *n + 1) * b_dim1 + 
	    1], ldb, &y[*m + *p - *n + 1], &c__1, &c_b2, &d__[1], &c__1);

/*     Solve triangular system: R11*x = d1 */

    if (*m > 0) {
	ctrtrs_("Upper", "No Transpose", "Non unit", m, &c__1, &a[a_offset], 
		lda, &d__[1], m, info);

	if (*info > 0) {
	    *info = 2;
	    return 0;
	}

/*        Copy D to X */

	ccopy_(m, &d__[1], &c__1, &x[1], &c__1);
    }

/*     Backward transformation y = Z'*y */

/* Computing MAX */
    i__1 = 1, i__2 = *n - *p + 1;
    i__3 = max(1,*p);
    i__4 = *lwork - *m - np;
    cunmrq_("Left", "Conjugate transpose", p, &c__1, &np, &b[max(i__1, i__2)+ 
	    b_dim1], ldb, &work[*m + 1], &y[1], &i__3, &work[*m + np + 1], &
	    i__4, info);
/* Computing MAX */
    i__4 = *m + np + 1;
    i__2 = lopt, i__3 = (integer) work[i__4].r;
    i__1 = *m + np + max(i__2,i__3);
    work[1].r = (real) i__1, work[1].i = 0.f;

    return 0;

/*     End of CGGGLM */

} /* cggglm_ */
Пример #11
0
/* Subroutine */ int chetrs_(char *uplo, integer *n, integer *nrhs, complex *
	a, integer *lda, integer *ipiv, complex *b, integer *ldb, integer *
	info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CHETRS solves a system of linear equations A*X = B with a complex   
    Hermitian matrix A using the factorization A = U*D*U**H or   
    A = L*D*L**H computed by CHETRF.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the details of the factorization are stored   
            as an upper or lower triangular matrix.   
            = 'U':  Upper triangular, form is A = U*D*U**H;   
            = 'L':  Lower triangular, form is A = L*D*L**H.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    NRHS    (input) INTEGER   
            The number of right hand sides, i.e., the number of columns   
            of the matrix B.  NRHS >= 0.   

    A       (input) COMPLEX array, dimension (LDA,N)   
            The block diagonal matrix D and the multipliers used to   
            obtain the factor U or L as computed by CHETRF.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    IPIV    (input) INTEGER array, dimension (N)   
            Details of the interchanges and the block structure of D   
            as determined by CHETRF.   

    B       (input/output) COMPLEX array, dimension (LDB,NRHS)   
            On entry, the right hand side matrix B.   
            On exit, the solution matrix X.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   

    =====================================================================   


       Parameter adjustments */
    /* Table of constant values */
    static complex c_b1 = {1.f,0.f};
    static integer c__1 = 1;
    
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
    complex q__1, q__2, q__3;
    /* Builtin functions */
    void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *);
    /* Local variables */
    static complex akm1k;
    static integer j, k;
    static real s;
    extern logical lsame_(char *, char *);
    static complex denom;
    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
	    , complex *, integer *, complex *, integer *, complex *, complex *
	    , integer *), cgeru_(integer *, integer *, complex *, 
	    complex *, integer *, complex *, integer *, complex *, integer *),
	     cswap_(integer *, complex *, integer *, complex *, integer *);
    static logical upper;
    static complex ak, bk;
    static integer kp;
    extern /* Subroutine */ int clacgv_(integer *, complex *, integer *), 
	    csscal_(integer *, real *, complex *, integer *), xerbla_(char *, 
	    integer *);
    static complex akm1, bkm1;
#define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1
#define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)]
#define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1
#define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*nrhs < 0) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    } else if (*ldb < max(1,*n)) {
	*info = -8;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CHETRS", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
	return 0;
    }

    if (upper) {

/*        Solve A*X = B, where A = U*D*U'.   

          First solve U*D*X = B, overwriting B with X.   

          K is the main loop index, decreasing from N to 1 in steps of   
          1 or 2, depending on the size of the diagonal blocks. */

	k = *n;
L10:

/*        If K < 1, exit from loop. */

	if (k < 1) {
	    goto L30;
	}

	if (ipiv[k] > 0) {

/*           1 x 1 diagonal block   

             Interchange rows K and IPIV(K). */

	    kp = ipiv[k];
	    if (kp != k) {
		cswap_(nrhs, &b_ref(k, 1), ldb, &b_ref(kp, 1), ldb);
	    }

/*           Multiply by inv(U(K)), where U(K) is the transformation   
             stored in column K of A. */

	    i__1 = k - 1;
	    q__1.r = -1.f, q__1.i = 0.f;
	    cgeru_(&i__1, nrhs, &q__1, &a_ref(1, k), &c__1, &b_ref(k, 1), ldb,
		     &b_ref(1, 1), ldb);

/*           Multiply by the inverse of the diagonal block. */

	    i__1 = a_subscr(k, k);
	    s = 1.f / a[i__1].r;
	    csscal_(nrhs, &s, &b_ref(k, 1), ldb);
	    --k;
	} else {

/*           2 x 2 diagonal block   

             Interchange rows K-1 and -IPIV(K). */

	    kp = -ipiv[k];
	    if (kp != k - 1) {
		cswap_(nrhs, &b_ref(k - 1, 1), ldb, &b_ref(kp, 1), ldb);
	    }

/*           Multiply by inv(U(K)), where U(K) is the transformation   
             stored in columns K-1 and K of A. */

	    i__1 = k - 2;
	    q__1.r = -1.f, q__1.i = 0.f;
	    cgeru_(&i__1, nrhs, &q__1, &a_ref(1, k), &c__1, &b_ref(k, 1), ldb,
		     &b_ref(1, 1), ldb);
	    i__1 = k - 2;
	    q__1.r = -1.f, q__1.i = 0.f;
	    cgeru_(&i__1, nrhs, &q__1, &a_ref(1, k - 1), &c__1, &b_ref(k - 1, 
		    1), ldb, &b_ref(1, 1), ldb);

/*           Multiply by the inverse of the diagonal block. */

	    i__1 = a_subscr(k - 1, k);
	    akm1k.r = a[i__1].r, akm1k.i = a[i__1].i;
	    c_div(&q__1, &a_ref(k - 1, k - 1), &akm1k);
	    akm1.r = q__1.r, akm1.i = q__1.i;
	    r_cnjg(&q__2, &akm1k);
	    c_div(&q__1, &a_ref(k, k), &q__2);
	    ak.r = q__1.r, ak.i = q__1.i;
	    q__2.r = akm1.r * ak.r - akm1.i * ak.i, q__2.i = akm1.r * ak.i + 
		    akm1.i * ak.r;
	    q__1.r = q__2.r - 1.f, q__1.i = q__2.i + 0.f;
	    denom.r = q__1.r, denom.i = q__1.i;
	    i__1 = *nrhs;
	    for (j = 1; j <= i__1; ++j) {
		c_div(&q__1, &b_ref(k - 1, j), &akm1k);
		bkm1.r = q__1.r, bkm1.i = q__1.i;
		r_cnjg(&q__2, &akm1k);
		c_div(&q__1, &b_ref(k, j), &q__2);
		bk.r = q__1.r, bk.i = q__1.i;
		i__2 = b_subscr(k - 1, j);
		q__3.r = ak.r * bkm1.r - ak.i * bkm1.i, q__3.i = ak.r * 
			bkm1.i + ak.i * bkm1.r;
		q__2.r = q__3.r - bk.r, q__2.i = q__3.i - bk.i;
		c_div(&q__1, &q__2, &denom);
		b[i__2].r = q__1.r, b[i__2].i = q__1.i;
		i__2 = b_subscr(k, j);
		q__3.r = akm1.r * bk.r - akm1.i * bk.i, q__3.i = akm1.r * 
			bk.i + akm1.i * bk.r;
		q__2.r = q__3.r - bkm1.r, q__2.i = q__3.i - bkm1.i;
		c_div(&q__1, &q__2, &denom);
		b[i__2].r = q__1.r, b[i__2].i = q__1.i;
/* L20: */
	    }
	    k += -2;
	}

	goto L10;
L30:

/*        Next solve U'*X = B, overwriting B with X.   

          K is the main loop index, increasing from 1 to N in steps of   
          1 or 2, depending on the size of the diagonal blocks. */

	k = 1;
L40:

/*        If K > N, exit from loop. */

	if (k > *n) {
	    goto L50;
	}

	if (ipiv[k] > 0) {

/*           1 x 1 diagonal block   

             Multiply by inv(U'(K)), where U(K) is the transformation   
             stored in column K of A. */

	    if (k > 1) {
		clacgv_(nrhs, &b_ref(k, 1), ldb);
		i__1 = k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		cgemv_("Conjugate transpose", &i__1, nrhs, &q__1, &b[b_offset]
			, ldb, &a_ref(1, k), &c__1, &c_b1, &b_ref(k, 1), ldb);
		clacgv_(nrhs, &b_ref(k, 1), ldb);
	    }

/*           Interchange rows K and IPIV(K). */

	    kp = ipiv[k];
	    if (kp != k) {
		cswap_(nrhs, &b_ref(k, 1), ldb, &b_ref(kp, 1), ldb);
	    }
	    ++k;
	} else {

/*           2 x 2 diagonal block   

             Multiply by inv(U'(K+1)), where U(K+1) is the transformation   
             stored in columns K and K+1 of A. */

	    if (k > 1) {
		clacgv_(nrhs, &b_ref(k, 1), ldb);
		i__1 = k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		cgemv_("Conjugate transpose", &i__1, nrhs, &q__1, &b[b_offset]
			, ldb, &a_ref(1, k), &c__1, &c_b1, &b_ref(k, 1), ldb);
		clacgv_(nrhs, &b_ref(k, 1), ldb);

		clacgv_(nrhs, &b_ref(k + 1, 1), ldb);
		i__1 = k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		cgemv_("Conjugate transpose", &i__1, nrhs, &q__1, &b[b_offset]
			, ldb, &a_ref(1, k + 1), &c__1, &c_b1, &b_ref(k + 1, 
			1), ldb);
		clacgv_(nrhs, &b_ref(k + 1, 1), ldb);
	    }

/*           Interchange rows K and -IPIV(K). */

	    kp = -ipiv[k];
	    if (kp != k) {
		cswap_(nrhs, &b_ref(k, 1), ldb, &b_ref(kp, 1), ldb);
	    }
	    k += 2;
	}

	goto L40;
L50:

	;
    } else {

/*        Solve A*X = B, where A = L*D*L'.   

          First solve L*D*X = B, overwriting B with X.   

          K is the main loop index, increasing from 1 to N in steps of   
          1 or 2, depending on the size of the diagonal blocks. */

	k = 1;
L60:

/*        If K > N, exit from loop. */

	if (k > *n) {
	    goto L80;
	}

	if (ipiv[k] > 0) {

/*           1 x 1 diagonal block   

             Interchange rows K and IPIV(K). */

	    kp = ipiv[k];
	    if (kp != k) {
		cswap_(nrhs, &b_ref(k, 1), ldb, &b_ref(kp, 1), ldb);
	    }

/*           Multiply by inv(L(K)), where L(K) is the transformation   
             stored in column K of A. */

	    if (k < *n) {
		i__1 = *n - k;
		q__1.r = -1.f, q__1.i = 0.f;
		cgeru_(&i__1, nrhs, &q__1, &a_ref(k + 1, k), &c__1, &b_ref(k, 
			1), ldb, &b_ref(k + 1, 1), ldb);
	    }

/*           Multiply by the inverse of the diagonal block. */

	    i__1 = a_subscr(k, k);
	    s = 1.f / a[i__1].r;
	    csscal_(nrhs, &s, &b_ref(k, 1), ldb);
	    ++k;
	} else {

/*           2 x 2 diagonal block   

             Interchange rows K+1 and -IPIV(K). */

	    kp = -ipiv[k];
	    if (kp != k + 1) {
		cswap_(nrhs, &b_ref(k + 1, 1), ldb, &b_ref(kp, 1), ldb);
	    }

/*           Multiply by inv(L(K)), where L(K) is the transformation   
             stored in columns K and K+1 of A. */

	    if (k < *n - 1) {
		i__1 = *n - k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		cgeru_(&i__1, nrhs, &q__1, &a_ref(k + 2, k), &c__1, &b_ref(k, 
			1), ldb, &b_ref(k + 2, 1), ldb);
		i__1 = *n - k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		cgeru_(&i__1, nrhs, &q__1, &a_ref(k + 2, k + 1), &c__1, &
			b_ref(k + 1, 1), ldb, &b_ref(k + 2, 1), ldb);
	    }

/*           Multiply by the inverse of the diagonal block. */

	    i__1 = a_subscr(k + 1, k);
	    akm1k.r = a[i__1].r, akm1k.i = a[i__1].i;
	    r_cnjg(&q__2, &akm1k);
	    c_div(&q__1, &a_ref(k, k), &q__2);
	    akm1.r = q__1.r, akm1.i = q__1.i;
	    c_div(&q__1, &a_ref(k + 1, k + 1), &akm1k);
	    ak.r = q__1.r, ak.i = q__1.i;
	    q__2.r = akm1.r * ak.r - akm1.i * ak.i, q__2.i = akm1.r * ak.i + 
		    akm1.i * ak.r;
	    q__1.r = q__2.r - 1.f, q__1.i = q__2.i + 0.f;
	    denom.r = q__1.r, denom.i = q__1.i;
	    i__1 = *nrhs;
	    for (j = 1; j <= i__1; ++j) {
		r_cnjg(&q__2, &akm1k);
		c_div(&q__1, &b_ref(k, j), &q__2);
		bkm1.r = q__1.r, bkm1.i = q__1.i;
		c_div(&q__1, &b_ref(k + 1, j), &akm1k);
		bk.r = q__1.r, bk.i = q__1.i;
		i__2 = b_subscr(k, j);
		q__3.r = ak.r * bkm1.r - ak.i * bkm1.i, q__3.i = ak.r * 
			bkm1.i + ak.i * bkm1.r;
		q__2.r = q__3.r - bk.r, q__2.i = q__3.i - bk.i;
		c_div(&q__1, &q__2, &denom);
		b[i__2].r = q__1.r, b[i__2].i = q__1.i;
		i__2 = b_subscr(k + 1, j);
		q__3.r = akm1.r * bk.r - akm1.i * bk.i, q__3.i = akm1.r * 
			bk.i + akm1.i * bk.r;
		q__2.r = q__3.r - bkm1.r, q__2.i = q__3.i - bkm1.i;
		c_div(&q__1, &q__2, &denom);
		b[i__2].r = q__1.r, b[i__2].i = q__1.i;
/* L70: */
	    }
	    k += 2;
	}

	goto L60;
L80:

/*        Next solve L'*X = B, overwriting B with X.   

          K is the main loop index, decreasing from N to 1 in steps of   
          1 or 2, depending on the size of the diagonal blocks. */

	k = *n;
L90:

/*        If K < 1, exit from loop. */

	if (k < 1) {
	    goto L100;
	}

	if (ipiv[k] > 0) {

/*           1 x 1 diagonal block   

             Multiply by inv(L'(K)), where L(K) is the transformation   
             stored in column K of A. */

	    if (k < *n) {
		clacgv_(nrhs, &b_ref(k, 1), ldb);
		i__1 = *n - k;
		q__1.r = -1.f, q__1.i = 0.f;
		cgemv_("Conjugate transpose", &i__1, nrhs, &q__1, &b_ref(k + 
			1, 1), ldb, &a_ref(k + 1, k), &c__1, &c_b1, &b_ref(k, 
			1), ldb);
		clacgv_(nrhs, &b_ref(k, 1), ldb);
	    }

/*           Interchange rows K and IPIV(K). */

	    kp = ipiv[k];
	    if (kp != k) {
		cswap_(nrhs, &b_ref(k, 1), ldb, &b_ref(kp, 1), ldb);
	    }
	    --k;
	} else {

/*           2 x 2 diagonal block   

             Multiply by inv(L'(K-1)), where L(K-1) is the transformation   
             stored in columns K-1 and K of A. */

	    if (k < *n) {
		clacgv_(nrhs, &b_ref(k, 1), ldb);
		i__1 = *n - k;
		q__1.r = -1.f, q__1.i = 0.f;
		cgemv_("Conjugate transpose", &i__1, nrhs, &q__1, &b_ref(k + 
			1, 1), ldb, &a_ref(k + 1, k), &c__1, &c_b1, &b_ref(k, 
			1), ldb);
		clacgv_(nrhs, &b_ref(k, 1), ldb);

		clacgv_(nrhs, &b_ref(k - 1, 1), ldb);
		i__1 = *n - k;
		q__1.r = -1.f, q__1.i = 0.f;
		cgemv_("Conjugate transpose", &i__1, nrhs, &q__1, &b_ref(k + 
			1, 1), ldb, &a_ref(k + 1, k - 1), &c__1, &c_b1, &
			b_ref(k - 1, 1), ldb);
		clacgv_(nrhs, &b_ref(k - 1, 1), ldb);
	    }

/*           Interchange rows K and -IPIV(K). */

	    kp = -ipiv[k];
	    if (kp != k) {
		cswap_(nrhs, &b_ref(k, 1), ldb, &b_ref(kp, 1), ldb);
	    }
	    k += -2;
	}

	goto L90;
L100:
	;
    }

    return 0;

/*     End of CHETRS */

} /* chetrs_ */
Пример #12
0
/* Subroutine */
int clasyf_(char *uplo, integer *n, integer *nb, integer *kb, complex *a, integer *lda, integer *ipiv, complex *w, integer *ldw, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4, i__5;
    real r__1, r__2, r__3, r__4;
    complex q__1, q__2, q__3;
    /* Builtin functions */
    double sqrt(doublereal), r_imag(complex *);
    void c_div(complex *, complex *, complex *);
    /* Local variables */
    integer j, k;
    complex t, r1, d11, d21, d22;
    integer jb, jj, kk, jp, kp, kw, kkw, imax, jmax;
    real alpha;
    extern /* Subroutine */
    int cscal_(integer *, complex *, complex *, integer *), cgemm_(char *, char *, integer *, integer *, integer * , complex *, complex *, integer *, complex *, integer *, complex * , complex *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */
    int cgemv_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *), ccopy_(integer *, complex *, integer *, complex *, integer *), cswap_(integer *, complex *, integer *, complex *, integer *);
    integer kstep;
    real absakk;
    extern integer icamax_(integer *, complex *, integer *);
    real colmax, rowmax;
    /* -- LAPACK computational routine (version 3.5.0) -- */
    /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
    /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
    /* November 2013 */
    /* .. Scalar Arguments .. */
    /* .. */
    /* .. Array Arguments .. */
    /* .. */
    /* ===================================================================== */
    /* .. Parameters .. */
    /* .. */
    /* .. Local Scalars .. */
    /* .. */
    /* .. External Functions .. */
    /* .. */
    /* .. External Subroutines .. */
    /* .. */
    /* .. Intrinsic Functions .. */
    /* .. */
    /* .. Statement Functions .. */
    /* .. */
    /* .. Statement Function definitions .. */
    /* .. */
    /* .. Executable Statements .. */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --ipiv;
    w_dim1 = *ldw;
    w_offset = 1 + w_dim1;
    w -= w_offset;
    /* Function Body */
    *info = 0;
    /* Initialize ALPHA for use in choosing pivot block size. */
    alpha = (sqrt(17.f) + 1.f) / 8.f;
    if (lsame_(uplo, "U"))
    {
        /* Factorize the trailing columns of A using the upper triangle */
        /* of A and working backwards, and compute the matrix W = U12*D */
        /* for use in updating A11 */
        /* K is the main loop index, decreasing from N in steps of 1 or 2 */
        /* KW is the column of W which corresponds to column K of A */
        k = *n;
L10:
        kw = *nb + k - *n;
        /* Exit from loop */
        if (k <= *n - *nb + 1 && *nb < *n || k < 1)
        {
            goto L30;
        }
        /* Copy column K of A to column KW of W and update it */
        ccopy_(&k, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1);
        if (k < *n)
        {
            i__1 = *n - k;
            q__1.r = -1.f;
            q__1.i = -0.f; // , expr subst
            cgemv_("No transpose", &k, &i__1, &q__1, &a[(k + 1) * a_dim1 + 1], lda, &w[k + (kw + 1) * w_dim1], ldw, &c_b1, &w[kw * w_dim1 + 1], &c__1);
        }
        kstep = 1;
        /* Determine rows and columns to be interchanged and whether */
        /* a 1-by-1 or 2-by-2 pivot block will be used */
        i__1 = k + kw * w_dim1;
        absakk = (r__1 = w[i__1].r, f2c_abs(r__1)) + (r__2 = r_imag(&w[k + kw * w_dim1]), f2c_abs(r__2));
        /* IMAX is the row-index of the largest off-diagonal element in */
        /* column K, and COLMAX is its absolute value. */
        /* Determine both COLMAX and IMAX. */
        if (k > 1)
        {
            i__1 = k - 1;
            imax = icamax_(&i__1, &w[kw * w_dim1 + 1], &c__1);
            i__1 = imax + kw * w_dim1;
            colmax = (r__1 = w[i__1].r, f2c_abs(r__1)) + (r__2 = r_imag(&w[imax + kw * w_dim1]), f2c_abs(r__2));
        }
        else
        {
            colmax = 0.f;
        }
        if (max(absakk,colmax) == 0.f)
        {
            /* Column K is zero or underflow: set INFO and continue */
            if (*info == 0)
            {
                *info = k;
            }
            kp = k;
        }
        else
        {
            if (absakk >= alpha * colmax)
            {
                /* no interchange, use 1-by-1 pivot block */
                kp = k;
            }
            else
            {
                /* Copy column IMAX to column KW-1 of W and update it */
                ccopy_(&imax, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
                i__1 = k - imax;
                ccopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax + 1 + (kw - 1) * w_dim1], &c__1);
                if (k < *n)
                {
                    i__1 = *n - k;
                    q__1.r = -1.f;
                    q__1.i = -0.f; // , expr subst
                    cgemv_("No transpose", &k, &i__1, &q__1, &a[(k + 1) * a_dim1 + 1], lda, &w[imax + (kw + 1) * w_dim1], ldw, &c_b1, &w[(kw - 1) * w_dim1 + 1], &c__1);
                }
                /* JMAX is the column-index of the largest off-diagonal */
                /* element in row IMAX, and ROWMAX is its absolute value */
                i__1 = k - imax;
                jmax = imax + icamax_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1], &c__1);
                i__1 = jmax + (kw - 1) * w_dim1;
                rowmax = (r__1 = w[i__1].r, f2c_abs(r__1)) + (r__2 = r_imag(&w[ jmax + (kw - 1) * w_dim1]), f2c_abs(r__2));
                if (imax > 1)
                {
                    i__1 = imax - 1;
                    jmax = icamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
                    /* Computing MAX */
                    i__1 = jmax + (kw - 1) * w_dim1;
                    r__3 = rowmax;
                    r__4 = (r__1 = w[i__1].r, f2c_abs(r__1)) + ( r__2 = r_imag(&w[jmax + (kw - 1) * w_dim1]), f2c_abs( r__2)); // , expr subst
                    rowmax = max(r__3,r__4);
                }
                if (absakk >= alpha * colmax * (colmax / rowmax))
                {
                    /* no interchange, use 1-by-1 pivot block */
                    kp = k;
                }
                else /* if(complicated condition) */
                {
                    i__1 = imax + (kw - 1) * w_dim1;
                    if ((r__1 = w[i__1].r, f2c_abs(r__1)) + (r__2 = r_imag(&w[ imax + (kw - 1) * w_dim1]), f2c_abs(r__2)) >= alpha * rowmax)
                    {
                        /* interchange rows and columns K and IMAX, use 1-by-1 */
                        /* pivot block */
                        kp = imax;
                        /* copy column KW-1 of W to column KW of W */
                        ccopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1);
                    }
                    else
                    {
                        /* interchange rows and columns K-1 and IMAX, use 2-by-2 */
                        /* pivot block */
                        kp = imax;
                        kstep = 2;
                    }
                }
            }
            /* ============================================================ */
            /* KK is the column of A where pivoting step stopped */
            kk = k - kstep + 1;
            /* KKW is the column of W which corresponds to column KK of A */
            kkw = *nb + kk - *n;
            /* Interchange rows and columns KP and KK. */
            /* Updated column KP is already stored in column KKW of W. */
            if (kp != kk)
            {
                /* Copy non-updated column KK to column KP of submatrix A */
                /* at step K. No need to copy element into column K */
                /* (or K and K-1 for 2-by-2 pivot) of A, since these columns */
                /* will be later overwritten. */
                i__1 = kp + kp * a_dim1;
                i__2 = kk + kk * a_dim1;
                a[i__1].r = a[i__2].r;
                a[i__1].i = a[i__2].i; // , expr subst
                i__1 = kk - 1 - kp;
                ccopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda);
                if (kp > 1)
                {
                    i__1 = kp - 1;
                    ccopy_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1);
                }
                /* Interchange rows KK and KP in last K+1 to N columns of A */
                /* (columns K (or K and K-1 for 2-by-2 pivot) of A will be */
                /* later overwritten). Interchange rows KK and KP */
                /* in last KKW to NB columns of W. */
                if (k < *n)
                {
                    i__1 = *n - k;
                    cswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k + 1) * a_dim1], lda);
                }
                i__1 = *n - kk + 1;
                cswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw * w_dim1], ldw);
            }
            if (kstep == 1)
            {
                /* 1-by-1 pivot block D(k): column kw of W now holds */
                /* W(kw) = U(k)*D(k), */
                /* where U(k) is the k-th column of U */
                /* Store subdiag. elements of column U(k) */
                /* and 1-by-1 block D(k) in column k of A. */
                /* NOTE: Diagonal element U(k,k) is a UNIT element */
                /* and not stored. */
                /* A(k,k) := D(k,k) = W(k,kw) */
                /* A(1:k-1,k) := U(1:k-1,k) = W(1:k-1,kw)/D(k,k) */
                ccopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], & c__1);
                c_div(&q__1, &c_b1, &a[k + k * a_dim1]);
                r1.r = q__1.r;
                r1.i = q__1.i; // , expr subst
                i__1 = k - 1;
                cscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1);
            }
            else
            {
                /* 2-by-2 pivot block D(k): columns kw and kw-1 of W now hold */
                /* ( W(kw-1) W(kw) ) = ( U(k-1) U(k) )*D(k) */
                /* where U(k) and U(k-1) are the k-th and (k-1)-th columns */
                /* of U */
                /* Store U(1:k-2,k-1) and U(1:k-2,k) and 2-by-2 */
                /* block D(k-1:k,k-1:k) in columns k-1 and k of A. */
                /* NOTE: 2-by-2 diagonal block U(k-1:k,k-1:k) is a UNIT */
                /* block and not stored. */
                /* A(k-1:k,k-1:k) := D(k-1:k,k-1:k) = W(k-1:k,kw-1:kw) */
                /* A(1:k-2,k-1:k) := U(1:k-2,k:k-1:k) = */
                /* = W(1:k-2,kw-1:kw) * ( D(k-1:k,k-1:k)**(-1) ) */
                if (k > 2)
                {
                    /* Compose the columns of the inverse of 2-by-2 pivot */
                    /* block D in the following way to reduce the number */
                    /* of FLOPS when we myltiply panel ( W(kw-1) W(kw) ) by */
                    /* this inverse */
                    /* D**(-1) = ( d11 d21 )**(-1) = */
                    /* ( d21 d22 ) */
                    /* = 1/(d11*d22-d21**2) * ( ( d22 ) (-d21 ) ) = */
                    /* ( (-d21 ) ( d11 ) ) */
                    /* = 1/d21 * 1/((d11/d21)*(d22/d21)-1) * */
                    /* * ( ( d22/d21 ) ( -1 ) ) = */
                    /* ( ( -1 ) ( d11/d21 ) ) */
                    /* = 1/d21 * 1/(D22*D11-1) * ( ( D11 ) ( -1 ) ) = */
                    /* ( ( -1 ) ( D22 ) ) */
                    /* = 1/d21 * T * ( ( D11 ) ( -1 ) ) */
                    /* ( ( -1 ) ( D22 ) ) */
                    /* = D21 * ( ( D11 ) ( -1 ) ) */
                    /* ( ( -1 ) ( D22 ) ) */
                    i__1 = k - 1 + kw * w_dim1;
                    d21.r = w[i__1].r;
                    d21.i = w[i__1].i; // , expr subst
                    c_div(&q__1, &w[k + kw * w_dim1], &d21);
                    d11.r = q__1.r;
                    d11.i = q__1.i; // , expr subst
                    c_div(&q__1, &w[k - 1 + (kw - 1) * w_dim1], &d21);
                    d22.r = q__1.r;
                    d22.i = q__1.i; // , expr subst
                    q__3.r = d11.r * d22.r - d11.i * d22.i;
                    q__3.i = d11.r * d22.i + d11.i * d22.r; // , expr subst
                    q__2.r = q__3.r - 1.f;
                    q__2.i = q__3.i - 0.f; // , expr subst
                    c_div(&q__1, &c_b1, &q__2);
                    t.r = q__1.r;
                    t.i = q__1.i; // , expr subst
                    /* Update elements in columns A(k-1) and A(k) as */
                    /* dot products of rows of ( W(kw-1) W(kw) ) and columns */
                    /* of D**(-1) */
                    c_div(&q__1, &t, &d21);
                    d21.r = q__1.r;
                    d21.i = q__1.i; // , expr subst
                    i__1 = k - 2;
                    for (j = 1;
                            j <= i__1;
                            ++j)
                    {
                        i__2 = j + (k - 1) * a_dim1;
                        i__3 = j + (kw - 1) * w_dim1;
                        q__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i;
                        q__3.i = d11.r * w[i__3].i + d11.i * w[i__3] .r; // , expr subst
                        i__4 = j + kw * w_dim1;
                        q__2.r = q__3.r - w[i__4].r;
                        q__2.i = q__3.i - w[i__4] .i; // , expr subst
                        q__1.r = d21.r * q__2.r - d21.i * q__2.i;
                        q__1.i = d21.r * q__2.i + d21.i * q__2.r; // , expr subst
                        a[i__2].r = q__1.r;
                        a[i__2].i = q__1.i; // , expr subst
                        i__2 = j + k * a_dim1;
                        i__3 = j + kw * w_dim1;
                        q__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i;
                        q__3.i = d22.r * w[i__3].i + d22.i * w[i__3] .r; // , expr subst
                        i__4 = j + (kw - 1) * w_dim1;
                        q__2.r = q__3.r - w[i__4].r;
                        q__2.i = q__3.i - w[i__4] .i; // , expr subst
                        q__1.r = d21.r * q__2.r - d21.i * q__2.i;
                        q__1.i = d21.r * q__2.i + d21.i * q__2.r; // , expr subst
                        a[i__2].r = q__1.r;
                        a[i__2].i = q__1.i; // , expr subst
                        /* L20: */
                    }
                }
                /* Copy D(k) to A */
                i__1 = k - 1 + (k - 1) * a_dim1;
                i__2 = k - 1 + (kw - 1) * w_dim1;
                a[i__1].r = w[i__2].r;
                a[i__1].i = w[i__2].i; // , expr subst
                i__1 = k - 1 + k * a_dim1;
                i__2 = k - 1 + kw * w_dim1;
                a[i__1].r = w[i__2].r;
                a[i__1].i = w[i__2].i; // , expr subst
                i__1 = k + k * a_dim1;
                i__2 = k + kw * w_dim1;
                a[i__1].r = w[i__2].r;
                a[i__1].i = w[i__2].i; // , expr subst
            }
        }
        /* Store details of the interchanges in IPIV */
        if (kstep == 1)
        {
            ipiv[k] = kp;
        }
        else
        {
            ipiv[k] = -kp;
            ipiv[k - 1] = -kp;
        }
        /* Decrease K and return to the start of the main loop */
        k -= kstep;
        goto L10;
L30: /* Update the upper triangle of A11 (= A(1:k,1:k)) as */
        /* A11 := A11 - U12*D*U12**T = A11 - U12*W**T */
        /* computing blocks of NB columns at a time */
        i__1 = -(*nb);
        for (j = (k - 1) / *nb * *nb + 1;
                i__1 < 0 ? j >= 1 : j <= 1;
                j += i__1)
        {
            /* Computing MIN */
            i__2 = *nb;
            i__3 = k - j + 1; // , expr subst
            jb = min(i__2,i__3);
            /* Update the upper triangle of the diagonal block */
            i__2 = j + jb - 1;
            for (jj = j;
                    jj <= i__2;
                    ++jj)
            {
                i__3 = jj - j + 1;
                i__4 = *n - k;
                q__1.r = -1.f;
                q__1.i = -0.f; // , expr subst
                cgemv_("No transpose", &i__3, &i__4, &q__1, &a[j + (k + 1) * a_dim1], lda, &w[jj + (kw + 1) * w_dim1], ldw, &c_b1, &a[j + jj * a_dim1], &c__1);
                /* L40: */
            }
            /* Update the rectangular superdiagonal block */
            i__2 = j - 1;
            i__3 = *n - k;
            q__1.r = -1.f;
            q__1.i = -0.f; // , expr subst
            cgemm_("No transpose", "Transpose", &i__2, &jb, &i__3, &q__1, &a[( k + 1) * a_dim1 + 1], lda, &w[j + (kw + 1) * w_dim1], ldw, &c_b1, &a[j * a_dim1 + 1], lda);
            /* L50: */
        }
        /* Put U12 in standard form by partially undoing the interchanges */
        /* in columns k+1:n looping backwards from k+1 to n */
        j = k + 1;
L60: /* Undo the interchanges (if any) of rows JJ and JP at each */
        /* step J */
        /* (Here, J is a diagonal index) */
        jj = j;
        jp = ipiv[j];
        if (jp < 0)
        {
            jp = -jp;
            /* (Here, J is a diagonal index) */
            ++j;
        }
        /* (NOTE: Here, J is used to determine row length. Length N-J+1 */
        /* of the rows to swap back doesn't include diagonal element) */
        ++j;
        if (jp != jj && j <= *n)
        {
            i__1 = *n - j + 1;
            cswap_(&i__1, &a[jp + j * a_dim1], lda, &a[jj + j * a_dim1], lda);
        }
        if (j < *n)
        {
            goto L60;
        }
        /* Set KB to the number of columns factorized */
        *kb = *n - k;
    }
    else
    {
        /* Factorize the leading columns of A using the lower triangle */
        /* of A and working forwards, and compute the matrix W = L21*D */
        /* for use in updating A22 */
        /* K is the main loop index, increasing from 1 in steps of 1 or 2 */
        k = 1;
L70: /* Exit from loop */
        if (k >= *nb && *nb < *n || k > *n)
        {
            goto L90;
        }
        /* Copy column K of A to column K of W and update it */
        i__1 = *n - k + 1;
        ccopy_(&i__1, &a[k + k * a_dim1], &c__1, &w[k + k * w_dim1], &c__1);
        i__1 = *n - k + 1;
        i__2 = k - 1;
        q__1.r = -1.f;
        q__1.i = -0.f; // , expr subst
        cgemv_("No transpose", &i__1, &i__2, &q__1, &a[k + a_dim1], lda, &w[k + w_dim1], ldw, &c_b1, &w[k + k * w_dim1], &c__1);
        kstep = 1;
        /* Determine rows and columns to be interchanged and whether */
        /* a 1-by-1 or 2-by-2 pivot block will be used */
        i__1 = k + k * w_dim1;
        absakk = (r__1 = w[i__1].r, f2c_abs(r__1)) + (r__2 = r_imag(&w[k + k * w_dim1]), f2c_abs(r__2));
        /* IMAX is the row-index of the largest off-diagonal element in */
        /* column K, and COLMAX is its absolute value. */
        /* Determine both COLMAX and IMAX. */
        if (k < *n)
        {
            i__1 = *n - k;
            imax = k + icamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
            i__1 = imax + k * w_dim1;
            colmax = (r__1 = w[i__1].r, f2c_abs(r__1)) + (r__2 = r_imag(&w[imax + k * w_dim1]), f2c_abs(r__2));
        }
        else
        {
            colmax = 0.f;
        }
        if (max(absakk,colmax) == 0.f)
        {
            /* Column K is zero or underflow: set INFO and continue */
            if (*info == 0)
            {
                *info = k;
            }
            kp = k;
        }
        else
        {
            if (absakk >= alpha * colmax)
            {
                /* no interchange, use 1-by-1 pivot block */
                kp = k;
            }
            else
            {
                /* Copy column IMAX to column K+1 of W and update it */
                i__1 = imax - k;
                ccopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) * w_dim1], &c__1);
                i__1 = *n - imax + 1;
                ccopy_(&i__1, &a[imax + imax * a_dim1], &c__1, &w[imax + (k + 1) * w_dim1], &c__1);
                i__1 = *n - k + 1;
                i__2 = k - 1;
                q__1.r = -1.f;
                q__1.i = -0.f; // , expr subst
                cgemv_("No transpose", &i__1, &i__2, &q__1, &a[k + a_dim1], lda, &w[imax + w_dim1], ldw, &c_b1, &w[k + (k + 1) * w_dim1], &c__1);
                /* JMAX is the column-index of the largest off-diagonal */
                /* element in row IMAX, and ROWMAX is its absolute value */
                i__1 = imax - k;
                jmax = k - 1 + icamax_(&i__1, &w[k + (k + 1) * w_dim1], &c__1) ;
                i__1 = jmax + (k + 1) * w_dim1;
                rowmax = (r__1 = w[i__1].r, f2c_abs(r__1)) + (r__2 = r_imag(&w[ jmax + (k + 1) * w_dim1]), f2c_abs(r__2));
                if (imax < *n)
                {
                    i__1 = *n - imax;
                    jmax = imax + icamax_(&i__1, &w[imax + 1 + (k + 1) * w_dim1], &c__1);
                    /* Computing MAX */
                    i__1 = jmax + (k + 1) * w_dim1;
                    r__3 = rowmax;
                    r__4 = (r__1 = w[i__1].r, f2c_abs(r__1)) + ( r__2 = r_imag(&w[jmax + (k + 1) * w_dim1]), f2c_abs( r__2)); // , expr subst
                    rowmax = max(r__3,r__4);
                }
                if (absakk >= alpha * colmax * (colmax / rowmax))
                {
                    /* no interchange, use 1-by-1 pivot block */
                    kp = k;
                }
                else /* if(complicated condition) */
                {
                    i__1 = imax + (k + 1) * w_dim1;
                    if ((r__1 = w[i__1].r, f2c_abs(r__1)) + (r__2 = r_imag(&w[ imax + (k + 1) * w_dim1]), f2c_abs(r__2)) >= alpha * rowmax)
                    {
                        /* interchange rows and columns K and IMAX, use 1-by-1 */
                        /* pivot block */
                        kp = imax;
                        /* copy column K+1 of W to column K of W */
                        i__1 = *n - k + 1;
                        ccopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k * w_dim1], &c__1);
                    }
                    else
                    {
                        /* interchange rows and columns K+1 and IMAX, use 2-by-2 */
                        /* pivot block */
                        kp = imax;
                        kstep = 2;
                    }
                }
            }
            /* ============================================================ */
            /* KK is the column of A where pivoting step stopped */
            kk = k + kstep - 1;
            /* Interchange rows and columns KP and KK. */
            /* Updated column KP is already stored in column KK of W. */
            if (kp != kk)
            {
                /* Copy non-updated column KK to column KP of submatrix A */
                /* at step K. No need to copy element into column K */
                /* (or K and K+1 for 2-by-2 pivot) of A, since these columns */
                /* will be later overwritten. */
                i__1 = kp + kp * a_dim1;
                i__2 = kk + kk * a_dim1;
                a[i__1].r = a[i__2].r;
                a[i__1].i = a[i__2].i; // , expr subst
                i__1 = kp - kk - 1;
                ccopy_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk + 1) * a_dim1], lda);
                if (kp < *n)
                {
                    i__1 = *n - kp;
                    ccopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1);
                }
                /* Interchange rows KK and KP in first K-1 columns of A */
                /* (columns K (or K and K+1 for 2-by-2 pivot) of A will be */
                /* later overwritten). Interchange rows KK and KP */
                /* in first KK columns of W. */
                if (k > 1)
                {
                    i__1 = k - 1;
                    cswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda);
                }
                cswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw);
            }
            if (kstep == 1)
            {
                /* 1-by-1 pivot block D(k): column k of W now holds */
                /* W(k) = L(k)*D(k), */
                /* where L(k) is the k-th column of L */
                /* Store subdiag. elements of column L(k) */
                /* and 1-by-1 block D(k) in column k of A. */
                /* (NOTE: Diagonal element L(k,k) is a UNIT element */
                /* and not stored) */
                /* A(k,k) := D(k,k) = W(k,k) */
                /* A(k+1:N,k) := L(k+1:N,k) = W(k+1:N,k)/D(k,k) */
                i__1 = *n - k + 1;
                ccopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], & c__1);
                if (k < *n)
                {
                    c_div(&q__1, &c_b1, &a[k + k * a_dim1]);
                    r1.r = q__1.r;
                    r1.i = q__1.i; // , expr subst
                    i__1 = *n - k;
                    cscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1);
                }
            }
            else
            {
                /* 2-by-2 pivot block D(k): columns k and k+1 of W now hold */
                /* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) */
                /* where L(k) and L(k+1) are the k-th and (k+1)-th columns */
                /* of L */
                /* Store L(k+2:N,k) and L(k+2:N,k+1) and 2-by-2 */
                /* block D(k:k+1,k:k+1) in columns k and k+1 of A. */
                /* (NOTE: 2-by-2 diagonal block L(k:k+1,k:k+1) is a UNIT */
                /* block and not stored) */
                /* A(k:k+1,k:k+1) := D(k:k+1,k:k+1) = W(k:k+1,k:k+1) */
                /* A(k+2:N,k:k+1) := L(k+2:N,k:k+1) = */
                /* = W(k+2:N,k:k+1) * ( D(k:k+1,k:k+1)**(-1) ) */
                if (k < *n - 1)
                {
                    /* Compose the columns of the inverse of 2-by-2 pivot */
                    /* block D in the following way to reduce the number */
                    /* of FLOPS when we myltiply panel ( W(k) W(k+1) ) by */
                    /* this inverse */
                    /* D**(-1) = ( d11 d21 )**(-1) = */
                    /* ( d21 d22 ) */
                    /* = 1/(d11*d22-d21**2) * ( ( d22 ) (-d21 ) ) = */
                    /* ( (-d21 ) ( d11 ) ) */
                    /* = 1/d21 * 1/((d11/d21)*(d22/d21)-1) * */
                    /* * ( ( d22/d21 ) ( -1 ) ) = */
                    /* ( ( -1 ) ( d11/d21 ) ) */
                    /* = 1/d21 * 1/(D22*D11-1) * ( ( D11 ) ( -1 ) ) = */
                    /* ( ( -1 ) ( D22 ) ) */
                    /* = 1/d21 * T * ( ( D11 ) ( -1 ) ) */
                    /* ( ( -1 ) ( D22 ) ) */
                    /* = D21 * ( ( D11 ) ( -1 ) ) */
                    /* ( ( -1 ) ( D22 ) ) */
                    i__1 = k + 1 + k * w_dim1;
                    d21.r = w[i__1].r;
                    d21.i = w[i__1].i; // , expr subst
                    c_div(&q__1, &w[k + 1 + (k + 1) * w_dim1], &d21);
                    d11.r = q__1.r;
                    d11.i = q__1.i; // , expr subst
                    c_div(&q__1, &w[k + k * w_dim1], &d21);
                    d22.r = q__1.r;
                    d22.i = q__1.i; // , expr subst
                    q__3.r = d11.r * d22.r - d11.i * d22.i;
                    q__3.i = d11.r * d22.i + d11.i * d22.r; // , expr subst
                    q__2.r = q__3.r - 1.f;
                    q__2.i = q__3.i - 0.f; // , expr subst
                    c_div(&q__1, &c_b1, &q__2);
                    t.r = q__1.r;
                    t.i = q__1.i; // , expr subst
                    c_div(&q__1, &t, &d21);
                    d21.r = q__1.r;
                    d21.i = q__1.i; // , expr subst
                    /* Update elements in columns A(k) and A(k+1) as */
                    /* dot products of rows of ( W(k) W(k+1) ) and columns */
                    /* of D**(-1) */
                    i__1 = *n;
                    for (j = k + 2;
                            j <= i__1;
                            ++j)
                    {
                        i__2 = j + k * a_dim1;
                        i__3 = j + k * w_dim1;
                        q__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i;
                        q__3.i = d11.r * w[i__3].i + d11.i * w[i__3] .r; // , expr subst
                        i__4 = j + (k + 1) * w_dim1;
                        q__2.r = q__3.r - w[i__4].r;
                        q__2.i = q__3.i - w[i__4] .i; // , expr subst
                        q__1.r = d21.r * q__2.r - d21.i * q__2.i;
                        q__1.i = d21.r * q__2.i + d21.i * q__2.r; // , expr subst
                        a[i__2].r = q__1.r;
                        a[i__2].i = q__1.i; // , expr subst
                        i__2 = j + (k + 1) * a_dim1;
                        i__3 = j + (k + 1) * w_dim1;
                        q__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i;
                        q__3.i = d22.r * w[i__3].i + d22.i * w[i__3] .r; // , expr subst
                        i__4 = j + k * w_dim1;
                        q__2.r = q__3.r - w[i__4].r;
                        q__2.i = q__3.i - w[i__4] .i; // , expr subst
                        q__1.r = d21.r * q__2.r - d21.i * q__2.i;
                        q__1.i = d21.r * q__2.i + d21.i * q__2.r; // , expr subst
                        a[i__2].r = q__1.r;
                        a[i__2].i = q__1.i; // , expr subst
                        /* L80: */
                    }
                }
                /* Copy D(k) to A */
                i__1 = k + k * a_dim1;
                i__2 = k + k * w_dim1;
                a[i__1].r = w[i__2].r;
                a[i__1].i = w[i__2].i; // , expr subst
                i__1 = k + 1 + k * a_dim1;
                i__2 = k + 1 + k * w_dim1;
                a[i__1].r = w[i__2].r;
                a[i__1].i = w[i__2].i; // , expr subst
                i__1 = k + 1 + (k + 1) * a_dim1;
                i__2 = k + 1 + (k + 1) * w_dim1;
                a[i__1].r = w[i__2].r;
                a[i__1].i = w[i__2].i; // , expr subst
            }
        }
        /* Store details of the interchanges in IPIV */
        if (kstep == 1)
        {
            ipiv[k] = kp;
        }
        else
        {
            ipiv[k] = -kp;
            ipiv[k + 1] = -kp;
        }
        /* Increase K and return to the start of the main loop */
        k += kstep;
        goto L70;
L90: /* Update the lower triangle of A22 (= A(k:n,k:n)) as */
        /* A22 := A22 - L21*D*L21**T = A22 - L21*W**T */
        /* computing blocks of NB columns at a time */
        i__1 = *n;
        i__2 = *nb;
        for (j = k;
                i__2 < 0 ? j >= i__1 : j <= i__1;
                j += i__2)
        {
            /* Computing MIN */
            i__3 = *nb;
            i__4 = *n - j + 1; // , expr subst
            jb = min(i__3,i__4);
            /* Update the lower triangle of the diagonal block */
            i__3 = j + jb - 1;
            for (jj = j;
                    jj <= i__3;
                    ++jj)
            {
                i__4 = j + jb - jj;
                i__5 = k - 1;
                q__1.r = -1.f;
                q__1.i = -0.f; // , expr subst
                cgemv_("No transpose", &i__4, &i__5, &q__1, &a[jj + a_dim1], lda, &w[jj + w_dim1], ldw, &c_b1, &a[jj + jj * a_dim1] , &c__1);
                /* L100: */
            }
            /* Update the rectangular subdiagonal block */
            if (j + jb <= *n)
            {
                i__3 = *n - j - jb + 1;
                i__4 = k - 1;
                q__1.r = -1.f;
                q__1.i = -0.f; // , expr subst
                cgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &q__1, &a[j + jb + a_dim1], lda, &w[j + w_dim1], ldw, &c_b1, &a[j + jb + j * a_dim1], lda);
            }
            /* L110: */
        }
        /* Put L21 in standard form by partially undoing the interchanges */
        /* of rows in columns 1:k-1 looping backwards from k-1 to 1 */
        j = k - 1;
L120: /* Undo the interchanges (if any) of rows JJ and JP at each */
        /* step J */
        /* (Here, J is a diagonal index) */
        jj = j;
        jp = ipiv[j];
        if (jp < 0)
        {
            jp = -jp;
            /* (Here, J is a diagonal index) */
            --j;
        }
        /* (NOTE: Here, J is used to determine row length. Length J */
        /* of the rows to swap back doesn't include diagonal element) */
        --j;
        if (jp != jj && j >= 1)
        {
            cswap_(&j, &a[jp + a_dim1], lda, &a[jj + a_dim1], lda);
        }
        if (j > 1)
        {
            goto L120;
        }
        /* Set KB to the number of columns factorized */
        *kb = k - 1;
    }
    return 0;
    /* End of CLASYF */
}
Пример #13
0
/*! \brief Solves one of the systems of equations A*x = b,   or   A'*x = b
 * 
 * <pre>
 *   Purpose
 *   =======
 *
 *   sp_ctrsv() solves one of the systems of equations   
 *       A*x = b,   or   A'*x = b,
 *   where b and x are n element vectors and A is a sparse unit , or   
 *   non-unit, upper or lower triangular matrix.   
 *   No test for singularity or near-singularity is included in this   
 *   routine. Such tests must be performed before calling this routine.   
 *
 *   Parameters   
 *   ==========   
 *
 *   uplo   - (input) char*
 *            On entry, uplo specifies whether the matrix is an upper or   
 *             lower triangular matrix as follows:   
 *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
 *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
 *
 *   trans  - (input) char*
 *             On entry, trans specifies the equations to be solved as   
 *             follows:   
 *                trans = 'N' or 'n'   A*x = b.   
 *                trans = 'T' or 't'   A'*x = b.
 *                trans = 'C' or 'c'   A^H*x = b.   
 *
 *   diag   - (input) char*
 *             On entry, diag specifies whether or not A is unit   
 *             triangular as follows:   
 *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
 *                diag = 'N' or 'n'   A is not assumed to be unit   
 *                                    triangular.   
 *	     
 *   L       - (input) SuperMatrix*
 *	       The factor L from the factorization Pr*A*Pc=L*U. Use
 *             compressed row subscripts storage for supernodes,
 *             i.e., L has types: Stype = SC, Dtype = SLU_C, Mtype = TRLU.
 *
 *   U       - (input) SuperMatrix*
 *	        The factor U from the factorization Pr*A*Pc=L*U.
 *	        U has types: Stype = NC, Dtype = SLU_C, Mtype = TRU.
 *    
 *   x       - (input/output) complex*
 *             Before entry, the incremented array X must contain the n   
 *             element right-hand side vector b. On exit, X is overwritten 
 *             with the solution vector x.
 *
 *   info    - (output) int*
 *             If *info = -i, the i-th argument had an illegal value.
 * </pre>
 */
int
sp_ctrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 
         SuperMatrix *U, complex *x, SuperLUStat_t *stat, int *info)
{
#ifdef _CRAY
    _fcd ftcs1 = _cptofcd("L", strlen("L")),
	 ftcs2 = _cptofcd("N", strlen("N")),
	 ftcs3 = _cptofcd("U", strlen("U"));
#endif
    SCformat *Lstore;
    NCformat *Ustore;
    complex   *Lval, *Uval;
    int incx = 1, incy = 1;
    complex temp;
    complex alpha = {1.0, 0.0}, beta = {1.0, 0.0};
    complex comp_zero = {0.0, 0.0};
    int nrow;
    int fsupc, nsupr, nsupc, luptr, istart, irow;
    int i, k, iptr, jcol;
    complex *work;
    flops_t solve_ops;

    /* Test the input parameters */
    *info = 0;
    if ( strncmp(uplo,"L", 1)!=0 && strncmp(uplo, "U", 1)!=0 ) *info = -1;
    else if ( strncmp(trans, "N", 1)!=0 && strncmp(trans, "T", 1)!=0 && 
              strncmp(trans, "C", 1)!=0) *info = -2;
    else if ( strncmp(diag, "U", 1)!=0 && strncmp(diag, "N", 1)!=0 )
         *info = -3;
    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
    if ( *info ) {
	i = -(*info);
	input_error("sp_ctrsv", &i);
	return 0;
    }

    Lstore = L->Store;
    Lval = Lstore->nzval;
    Ustore = U->Store;
    Uval = Ustore->nzval;
    solve_ops = 0;

    if ( !(work = complexCalloc(L->nrow)) )
	ABORT("Malloc fails for work in sp_ctrsv().");
    
    if ( strncmp(trans, "N", 1)==0 ) {	/* Form x := inv(A)*x. */
	
	if ( strncmp(uplo, "L", 1)==0 ) {
	    /* Form x := inv(L)*x */
    	    if ( L->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = 0; k <= Lstore->nsuper; k++) {
		fsupc = L_FST_SUPC(k);
		istart = L_SUB_START(fsupc);
		nsupr = L_SUB_START(fsupc+1) - istart;
		nsupc = L_FST_SUPC(k+1) - fsupc;
		luptr = L_NZ_START(fsupc);
		nrow = nsupr - nsupc;

                /* 1 c_div costs 10 flops */
	        solve_ops += 4 * nsupc * (nsupc - 1) + 10 * nsupc;
	        solve_ops += 8 * nrow * nsupc;

		if ( nsupc == 1 ) {
		    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
			irow = L_SUB(iptr);
			++luptr;
			cc_mult(&comp_zero, &x[fsupc], &Lval[luptr]);
			c_sub(&x[irow], &x[irow], &comp_zero);
		    }
		} else {
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
		    CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
		       	&x[fsupc], &incx);
		
		    CGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
#else
		    ctrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
		       	&x[fsupc], &incx);
		
		    cgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
#endif
#else
		    clsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
		
		    cmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
                             &x[fsupc], &work[0] );
#endif		
		
		    iptr = istart + nsupc;
		    for (i = 0; i < nrow; ++i, ++iptr) {
			irow = L_SUB(iptr);
			c_sub(&x[irow], &x[irow], &work[i]); /* Scatter */
			work[i] = comp_zero;

		    }
	 	}
	    } /* for k ... */
	    
	} else {
	    /* Form x := inv(U)*x */
	    
	    if ( U->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = Lstore->nsuper; k >= 0; k--) {
	    	fsupc = L_FST_SUPC(k);
	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
	    	luptr = L_NZ_START(fsupc);
		
                /* 1 c_div costs 10 flops */
    	        solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;

		if ( nsupc == 1 ) {
		    c_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
		    for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
			irow = U_SUB(i);
			cc_mult(&comp_zero, &x[fsupc], &Uval[i]);
			c_sub(&x[irow], &x[irow], &comp_zero);
		    }
		} else {
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
		    CTRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
		       &x[fsupc], &incx);
#else
		    ctrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
                           &x[fsupc], &incx);
#endif
#else		
		    cusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
#endif		

		    for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
		        solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
		    	for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); 
				i++) {
			    irow = U_SUB(i);
			cc_mult(&comp_zero, &x[jcol], &Uval[i]);
			c_sub(&x[irow], &x[irow], &comp_zero);
		    	}
                    }
		}
	    } /* for k ... */
	    
	}
    } else if ( strncmp(trans, "T", 1)==0 ) { /* Form x := inv(A')*x */
	
	if ( strncmp(uplo, "L", 1)==0 ) {
	    /* Form x := inv(L')*x */
    	    if ( L->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = Lstore->nsuper; k >= 0; --k) {
	    	fsupc = L_FST_SUPC(k);
	    	istart = L_SUB_START(fsupc);
	    	nsupr = L_SUB_START(fsupc+1) - istart;
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
	    	luptr = L_NZ_START(fsupc);

		solve_ops += 8 * (nsupr - nsupc) * nsupc;

		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
		    iptr = istart + nsupc;
		    for (i = L_NZ_START(jcol) + nsupc; 
				i < L_NZ_START(jcol+1); i++) {
			irow = L_SUB(iptr);
			cc_mult(&comp_zero, &x[irow], &Lval[i]);
		    	c_sub(&x[jcol], &x[jcol], &comp_zero);
			iptr++;
		    }
		}
		
		if ( nsupc > 1 ) {
		    solve_ops += 4 * nsupc * (nsupc - 1);
#ifdef _CRAY
                    ftcs1 = _cptofcd("L", strlen("L"));
                    ftcs2 = _cptofcd("T", strlen("T"));
                    ftcs3 = _cptofcd("U", strlen("U"));
		    CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
			&x[fsupc], &incx);
#else
		    ctrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
			&x[fsupc], &incx);
#endif
		}
	    }
	} else {
	    /* Form x := inv(U')*x */
	    if ( U->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = 0; k <= Lstore->nsuper; k++) {
	    	fsupc = L_FST_SUPC(k);
	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
	    	luptr = L_NZ_START(fsupc);

		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
		    solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
			irow = U_SUB(i);
			cc_mult(&comp_zero, &x[irow], &Uval[i]);
		    	c_sub(&x[jcol], &x[jcol], &comp_zero);
		    }
		}

                /* 1 c_div costs 10 flops */
		solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;

		if ( nsupc == 1 ) {
		    c_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
		} else {
#ifdef _CRAY
                    ftcs1 = _cptofcd("U", strlen("U"));
                    ftcs2 = _cptofcd("T", strlen("T"));
                    ftcs3 = _cptofcd("N", strlen("N"));
		    CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
			    &x[fsupc], &incx);
#else
		    ctrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
			    &x[fsupc], &incx);
#endif
		}
	    } /* for k ... */
	}
    } else { /* Form x := conj(inv(A'))*x */
	
	if ( strncmp(uplo, "L", 1)==0 ) {
	    /* Form x := conj(inv(L'))*x */
    	    if ( L->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = Lstore->nsuper; k >= 0; --k) {
	    	fsupc = L_FST_SUPC(k);
	    	istart = L_SUB_START(fsupc);
	    	nsupr = L_SUB_START(fsupc+1) - istart;
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
	    	luptr = L_NZ_START(fsupc);

		solve_ops += 8 * (nsupr - nsupc) * nsupc;

		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
		    iptr = istart + nsupc;
		    for (i = L_NZ_START(jcol) + nsupc; 
				i < L_NZ_START(jcol+1); i++) {
			irow = L_SUB(iptr);
                        cc_conj(&temp, &Lval[i]);
			cc_mult(&comp_zero, &x[irow], &temp);
		    	c_sub(&x[jcol], &x[jcol], &comp_zero);
			iptr++;
		    }
 		}
 		
 		if ( nsupc > 1 ) {
		    solve_ops += 4 * nsupc * (nsupc - 1);
#ifdef _CRAY
                    ftcs1 = _cptofcd("L", strlen("L"));
                    ftcs2 = _cptofcd(trans, strlen("T"));
                    ftcs3 = _cptofcd("U", strlen("U"));
		    CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
			&x[fsupc], &incx);
#else
                    ctrsv_("L", trans, "U", &nsupc, &Lval[luptr], &nsupr,
                           &x[fsupc], &incx);
#endif
		}
	    }
	} else {
	    /* Form x := conj(inv(U'))*x */
	    if ( U->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = 0; k <= Lstore->nsuper; k++) {
	    	fsupc = L_FST_SUPC(k);
	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
	    	luptr = L_NZ_START(fsupc);

		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
		    solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
			irow = U_SUB(i);
                        cc_conj(&temp, &Uval[i]);
			cc_mult(&comp_zero, &x[irow], &temp);
		    	c_sub(&x[jcol], &x[jcol], &comp_zero);
		    }
		}

                /* 1 c_div costs 10 flops */
		solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
 
		if ( nsupc == 1 ) {
                    cc_conj(&temp, &Lval[luptr]);
		    c_div(&x[fsupc], &x[fsupc], &temp);
		} else {
#ifdef _CRAY
                    ftcs1 = _cptofcd("U", strlen("U"));
                    ftcs2 = _cptofcd(trans, strlen("T"));
                    ftcs3 = _cptofcd("N", strlen("N"));
		    CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
			    &x[fsupc], &incx);
#else
                    ctrsv_("U", trans, "N", &nsupc, &Lval[luptr], &nsupr,
                               &x[fsupc], &incx);
#endif
  		}
  	    } /* for k ... */
  	}
    }

    stat->ops[SOLVE] += solve_ops;
    SUPERLU_FREE(work);
    return 0;
}
Пример #14
0
/* Subroutine */ int cget52_(logical *left, integer *n, complex *a, integer *
	lda, complex *b, integer *ldb, complex *e, integer *lde, complex *
	alpha, complex *beta, complex *work, real *rwork, real *result)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, e_dim1, e_offset, i__1, i__2, 
	    i__3;
    real r__1, r__2, r__3, r__4, r__5, r__6;
    complex q__1;

    /* Builtin functions */
    double r_imag(complex *);
    void r_cnjg(complex *, complex *);

    /* Local variables */
    static integer jvec;
    static real temp1;
    static integer j;
    static complex betai;
    static real scale, abmax;
    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
	    , complex *, integer *, complex *, integer *, complex *, complex *
	    , integer *);
    static real anorm, bnorm, enorm;
    static char trans[1];
    static complex acoeff, bcoeff;
    extern doublereal clange_(char *, integer *, integer *, complex *, 
	    integer *, real *);
    static complex alphai;
    extern doublereal slamch_(char *);
    static real alfmax, safmin;
    static char normab[1];
    static real safmax, betmax, enrmer, errnrm, ulp;


#define e_subscr(a_1,a_2) (a_2)*e_dim1 + a_1
#define e_ref(a_1,a_2) e[e_subscr(a_1,a_2)]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CGET52  does an eigenvector check for the generalized eigenvalue   
    problem.   

    The basic test for right eigenvectors is:   

                              | b(i) A E(i) -  a(i) B E(i) |   
            RESULT(1) = max   -------------------------------   
                         i    n ulp max( |b(i) A|, |a(i) B| )   

    using the 1-norm.  Here, a(i)/b(i) = w is the i-th generalized   
    eigenvalue of A - w B, or, equivalently, b(i)/a(i) = m is the i-th   
    generalized eigenvalue of m A - B.   

                            H   H  _      _   
    For left eigenvectors, A , B , a, and b  are used.   

    CGET52 also tests the normalization of E.  Each eigenvector is   
    supposed to be normalized so that the maximum "absolute value"   
    of its elements is 1, where in this case, "absolute value"   
    of a complex value x is  |Re(x)| + |Im(x)| ; let us call this   
    maximum "absolute value" norm of a vector v  M(v).   
    if a(i)=b(i)=0, then the eigenvector is set to be the jth coordinate   
    vector. The normalization test is:   

            RESULT(2) =      max       | M(v(i)) - 1 | / ( n ulp )   
                       eigenvectors v(i)   

    Arguments   
    =========   

    LEFT    (input) LOGICAL   
            =.TRUE.:  The eigenvectors in the columns of E are assumed   
                      to be *left* eigenvectors.   
            =.FALSE.: The eigenvectors in the columns of E are assumed   
                      to be *right* eigenvectors.   

    N       (input) INTEGER   
            The size of the matrices.  If it is zero, CGET52 does   
            nothing.  It must be at least zero.   

    A       (input) COMPLEX array, dimension (LDA, N)   
            The matrix A.   

    LDA     (input) INTEGER   
            The leading dimension of A.  It must be at least 1   
            and at least N.   

    B       (input) COMPLEX array, dimension (LDB, N)   
            The matrix B.   

    LDB     (input) INTEGER   
            The leading dimension of B.  It must be at least 1   
            and at least N.   

    E       (input) COMPLEX array, dimension (LDE, N)   
            The matrix of eigenvectors.  It must be O( 1 ).   

    LDE     (input) INTEGER   
            The leading dimension of E.  It must be at least 1 and at   
            least N.   

    ALPHA   (input) COMPLEX array, dimension (N)   
            The values a(i) as described above, which, along with b(i),   
            define the generalized eigenvalues.   

    BETA    (input) COMPLEX array, dimension (N)   
            The values b(i) as described above, which, along with a(i),   
            define the generalized eigenvalues.   

    WORK    (workspace) COMPLEX array, dimension (N**2)   

    RWORK   (workspace) REAL array, dimension (N)   

    RESULT  (output) REAL array, dimension (2)   
            The values computed by the test described above.  If A E or   
            B E is likely to overflow, then RESULT(1:2) is set to   
            10 / ulp.   

    =====================================================================   


       Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    e_dim1 = *lde;
    e_offset = 1 + e_dim1 * 1;
    e -= e_offset;
    --alpha;
    --beta;
    --work;
    --rwork;
    --result;

    /* Function Body */
    result[1] = 0.f;
    result[2] = 0.f;
    if (*n <= 0) {
	return 0;
    }

    safmin = slamch_("Safe minimum");
    safmax = 1.f / safmin;
    ulp = slamch_("Epsilon") * slamch_("Base");

    if (*left) {
	*(unsigned char *)trans = 'C';
	*(unsigned char *)normab = 'I';
    } else {
	*(unsigned char *)trans = 'N';
	*(unsigned char *)normab = 'O';
    }

/*     Norm of A, B, and E:   

   Computing MAX */
    r__1 = clange_(normab, n, n, &a[a_offset], lda, &rwork[1]);
    anorm = dmax(r__1,safmin);
/* Computing MAX */
    r__1 = clange_(normab, n, n, &b[b_offset], ldb, &rwork[1]);
    bnorm = dmax(r__1,safmin);
/* Computing MAX */
    r__1 = clange_("O", n, n, &e[e_offset], lde, &rwork[1]);
    enorm = dmax(r__1,ulp);
    alfmax = safmax / dmax(1.f,bnorm);
    betmax = safmax / dmax(1.f,anorm);

/*     Compute error matrix.   
       Column i = ( b(i) A - a(i) B ) E(i) / max( |a(i) B| |b(i) A| ) */

    i__1 = *n;
    for (jvec = 1; jvec <= i__1; ++jvec) {
	i__2 = jvec;
	alphai.r = alpha[i__2].r, alphai.i = alpha[i__2].i;
	i__2 = jvec;
	betai.r = beta[i__2].r, betai.i = beta[i__2].i;
/* Computing MAX */
	r__5 = (r__1 = alphai.r, dabs(r__1)) + (r__2 = r_imag(&alphai), dabs(
		r__2)), r__6 = (r__3 = betai.r, dabs(r__3)) + (r__4 = r_imag(&
		betai), dabs(r__4));
	abmax = dmax(r__5,r__6);
	if ((r__1 = alphai.r, dabs(r__1)) + (r__2 = r_imag(&alphai), dabs(
		r__2)) > alfmax || (r__3 = betai.r, dabs(r__3)) + (r__4 = 
		r_imag(&betai), dabs(r__4)) > betmax || abmax < 1.f) {
	    scale = 1.f / dmax(abmax,safmin);
	    q__1.r = scale * alphai.r, q__1.i = scale * alphai.i;
	    alphai.r = q__1.r, alphai.i = q__1.i;
	    q__1.r = scale * betai.r, q__1.i = scale * betai.i;
	    betai.r = q__1.r, betai.i = q__1.i;
	}
/* Computing MAX */
	r__5 = ((r__1 = alphai.r, dabs(r__1)) + (r__2 = r_imag(&alphai), dabs(
		r__2))) * bnorm, r__6 = ((r__3 = betai.r, dabs(r__3)) + (r__4 
		= r_imag(&betai), dabs(r__4))) * anorm, r__5 = max(r__5,r__6);
	scale = 1.f / dmax(r__5,safmin);
	q__1.r = scale * betai.r, q__1.i = scale * betai.i;
	acoeff.r = q__1.r, acoeff.i = q__1.i;
	q__1.r = scale * alphai.r, q__1.i = scale * alphai.i;
	bcoeff.r = q__1.r, bcoeff.i = q__1.i;
	if (*left) {
	    r_cnjg(&q__1, &acoeff);
	    acoeff.r = q__1.r, acoeff.i = q__1.i;
	    r_cnjg(&q__1, &bcoeff);
	    bcoeff.r = q__1.r, bcoeff.i = q__1.i;
	}
	cgemv_(trans, n, n, &acoeff, &a[a_offset], lda, &e_ref(1, jvec), &
		c__1, &c_b1, &work[*n * (jvec - 1) + 1], &c__1);
	q__1.r = -bcoeff.r, q__1.i = -bcoeff.i;
	cgemv_(trans, n, n, &q__1, &b[b_offset], lda, &e_ref(1, jvec), &c__1, 
		&c_b2, &work[*n * (jvec - 1) + 1], &c__1);
/* L10: */
    }

    errnrm = clange_("One", n, n, &work[1], n, &rwork[1]) / enorm;

/*     Compute RESULT(1) */

    result[1] = errnrm / ulp;

/*     Normalization of E: */

    enrmer = 0.f;
    i__1 = *n;
    for (jvec = 1; jvec <= i__1; ++jvec) {
	temp1 = 0.f;
	i__2 = *n;
	for (j = 1; j <= i__2; ++j) {
/* Computing MAX */
	    i__3 = e_subscr(j, jvec);
	    r__3 = temp1, r__4 = (r__1 = e[i__3].r, dabs(r__1)) + (r__2 = 
		    r_imag(&e_ref(j, jvec)), dabs(r__2));
	    temp1 = dmax(r__3,r__4);
/* L20: */
	}
/* Computing MAX */
	r__1 = enrmer, r__2 = temp1 - 1.f;
	enrmer = dmax(r__1,r__2);
/* L30: */
    }

/*     Compute RESULT(2) : the normalization error in E. */

    result[2] = enrmer / ((real) (*n) * ulp);

    return 0;

/*     End of CGET52 */

} /* cget52_ */
Пример #15
0
/* Subroutine */ int claror_(char *side, char *init, integer *m, integer *n, 
	complex *a, integer *lda, integer *iseed, complex *x, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    complex q__1, q__2;

    /* Builtin functions */
    double c_abs(complex *);
    void r_cnjg(complex *, complex *);

    /* Local variables */
    static integer kbeg, jcol;
    static real xabs;
    static integer irow, j;
    extern /* Subroutine */ int cgerc_(integer *, integer *, complex *, 
	    complex *, integer *, complex *, integer *, complex *, integer *),
	     cscal_(integer *, complex *, complex *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
	    , complex *, integer *, complex *, integer *, complex *, complex *
	    , integer *);
    static complex csign;
    static integer ixfrm, itype, nxfrm;
    static real xnorm;
    extern real scnrm2_(integer *, complex *, integer *);
    extern /* Subroutine */ int clacgv_(integer *, complex *, integer *);
    extern /* Complex */ VOID clarnd_(complex *, integer *, integer *);
    extern /* Subroutine */ int claset_(char *, integer *, integer *, complex 
	    *, complex *, complex *, integer *), xerbla_(char *, 
	    integer *);
    static real factor;
    static complex xnorms;


/*  -- LAPACK auxiliary test routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

       CLAROR pre- or post-multiplies an M by N matrix A by a random   
       unitary matrix U, overwriting A. A may optionally be   
       initialized to the identity matrix before multiplying by U.   
       U is generated using the method of G.W. Stewart   
       ( SIAM J. Numer. Anal. 17, 1980, pp. 403-409 ).   
       (BLAS-2 version)   

    Arguments   
    =========   

    SIDE   - CHARACTER*1   
             SIDE specifies whether A is multiplied on the left or right 
  
             by U.   
         SIDE = 'L'   Multiply A on the left (premultiply) by U   
         SIDE = 'R'   Multiply A on the right (postmultiply) by U*   
         SIDE = 'C'   Multiply A on the left by U and the right by U*   
         SIDE = 'T'   Multiply A on the left by U and the right by U'   
             Not modified.   

    INIT   - CHARACTER*1   
             INIT specifies whether or not A should be initialized to   
             the identity matrix.   
                INIT = 'I'   Initialize A to (a section of) the   
                             identity matrix before applying U.   
                INIT = 'N'   No initialization.  Apply U to the   
                             input matrix A.   

             INIT = 'I' may be used to generate square (i.e., unitary)   
             or rectangular orthogonal matrices (orthogonality being   
             in the sense of CDOTC):   

             For square matrices, M=N, and SIDE many be either 'L' or   
             'R'; the rows will be orthogonal to each other, as will the 
  
             columns.   
             For rectangular matrices where M < N, SIDE = 'R' will   
             produce a dense matrix whose rows will be orthogonal and   
             whose columns will not, while SIDE = 'L' will produce a   
             matrix whose rows will be orthogonal, and whose first M   
             columns will be orthogonal, the remaining columns being   
             zero.   
             For matrices where M > N, just use the previous   
             explaination, interchanging 'L' and 'R' and "rows" and   
             "columns".   

             Not modified.   

    M      - INTEGER   
             Number of rows of A. Not modified.   

    N      - INTEGER   
             Number of columns of A. Not modified.   

    A      - COMPLEX array, dimension ( LDA, N )   
             Input and output array. Overwritten by U A ( if SIDE = 'L' ) 
  
             or by A U ( if SIDE = 'R' )   
             or by U A U* ( if SIDE = 'C')   
             or by U A U' ( if SIDE = 'T') on exit.   

    LDA    - INTEGER   
             Leading dimension of A. Must be at least MAX ( 1, M ).   
             Not modified.   

    ISEED  - INTEGER array, dimension ( 4 )   
             On entry ISEED specifies the seed of the random number   
             generator. The array elements should be between 0 and 4095; 
  
             if not they will be reduced mod 4096.  Also, ISEED(4) must   
             be odd.  The random number generator uses a linear   
             congruential sequence limited to small integers, and so   
             should produce machine independent random numbers. The   
             values of ISEED are changed on exit, and can be used in the 
  
             next call to CLAROR to continue the same random number   
             sequence.   
             Modified.   

    X      - COMPLEX array, dimension ( 3*MAX( M, N ) )   
             Workspace. Of length:   
                 2*M + N if SIDE = 'L',   
                 2*N + M if SIDE = 'R',   
                 3*N     if SIDE = 'C' or 'T'.   
             Modified.   

    INFO   - INTEGER   
             An error flag.  It is set to:   
              0  if no error.   
              1  if CLARND returned a bad random number (installation   
                 problem)   
             -1  if SIDE is not L, R, C, or T.   
             -3  if M is negative.   
             -4  if N is negative or if SIDE is C or T and N is not equal 
  
                 to M.   
             -6  if LDA is less than M.   

    ===================================================================== 
  


       Parameter adjustments */
    a_dim1 = *lda;
    a_offset = a_dim1 + 1;
    a -= a_offset;
    --iseed;
    --x;

    /* Function Body */
    if (*n == 0 || *m == 0) {
	return 0;
    }

    itype = 0;
    if (lsame_(side, "L")) {
	itype = 1;
    } else if (lsame_(side, "R")) {
	itype = 2;
    } else if (lsame_(side, "C")) {
	itype = 3;
    } else if (lsame_(side, "T")) {
	itype = 4;
    }

/*     Check for argument errors. */

    *info = 0;
    if (itype == 0) {
	*info = -1;
    } else if (*m < 0) {
	*info = -3;
    } else if (*n < 0 || itype == 3 && *n != *m) {
	*info = -4;
    } else if (*lda < *m) {
	*info = -6;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CLAROR", &i__1);
	return 0;
    }

    if (itype == 1) {
	nxfrm = *m;
    } else {
	nxfrm = *n;
    }

/*     Initialize A to the identity matrix if desired */

    if (lsame_(init, "I")) {
	claset_("Full", m, n, &c_b1, &c_b2, &a[a_offset], lda);
    }

/*     If no rotation possible, still multiply by   
       a random complex number from the circle |x| = 1   

        2)      Compute Rotation by computing Householder   
                Transformations H(2), H(3), ..., H(n).  Note that the   
                order in which they are computed is irrelevant. */

    i__1 = nxfrm;
    for (j = 1; j <= i__1; ++j) {
	i__2 = j;
	x[i__2].r = 0.f, x[i__2].i = 0.f;
/* L40: */
    }

    i__1 = nxfrm;
    for (ixfrm = 2; ixfrm <= i__1; ++ixfrm) {
	kbeg = nxfrm - ixfrm + 1;

/*        Generate independent normal( 0, 1 ) random numbers */

	i__2 = nxfrm;
	for (j = kbeg; j <= i__2; ++j) {
	    i__3 = j;
	    clarnd_(&q__1, &c__3, &iseed[1]);
	    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
/* L50: */
	}

/*        Generate a Householder transformation from the random vector
 X */

	xnorm = scnrm2_(&ixfrm, &x[kbeg], &c__1);
	xabs = c_abs(&x[kbeg]);
	if (xabs != 0.f) {
	    i__2 = kbeg;
	    q__1.r = x[i__2].r / xabs, q__1.i = x[i__2].i / xabs;
	    csign.r = q__1.r, csign.i = q__1.i;
	} else {
	    csign.r = 1.f, csign.i = 0.f;
	}
	q__1.r = xnorm * csign.r, q__1.i = xnorm * csign.i;
	xnorms.r = q__1.r, xnorms.i = q__1.i;
	i__2 = nxfrm + kbeg;
	q__1.r = -(doublereal)csign.r, q__1.i = -(doublereal)csign.i;
	x[i__2].r = q__1.r, x[i__2].i = q__1.i;
	factor = xnorm * (xnorm + xabs);
	if (dabs(factor) < 1e-20f) {
	    *info = 1;
	    i__2 = -(*info);
	    xerbla_("CLAROR", &i__2);
	    return 0;
	} else {
	    factor = 1.f / factor;
	}
	i__2 = kbeg;
	i__3 = kbeg;
	q__1.r = x[i__3].r + xnorms.r, q__1.i = x[i__3].i + xnorms.i;
	x[i__2].r = q__1.r, x[i__2].i = q__1.i;

/*        Apply Householder transformation to A */

	if (itype == 1 || itype == 3 || itype == 4) {

/*           Apply H(k) on the left of A */

	    cgemv_("C", &ixfrm, n, &c_b2, &a[kbeg + a_dim1], lda, &x[kbeg], &
		    c__1, &c_b1, &x[(nxfrm << 1) + 1], &c__1);
	    q__2.r = factor, q__2.i = 0.f;
	    q__1.r = -(doublereal)q__2.r, q__1.i = -(doublereal)q__2.i;
	    cgerc_(&ixfrm, n, &q__1, &x[kbeg], &c__1, &x[(nxfrm << 1) + 1], &
		    c__1, &a[kbeg + a_dim1], lda);

	}

	if (itype >= 2 && itype <= 4) {

/*           Apply H(k)* (or H(k)') on the right of A */

	    if (itype == 4) {
		clacgv_(&ixfrm, &x[kbeg], &c__1);
	    }

	    cgemv_("N", m, &ixfrm, &c_b2, &a[kbeg * a_dim1 + 1], lda, &x[kbeg]
		    , &c__1, &c_b1, &x[(nxfrm << 1) + 1], &c__1);
	    q__2.r = factor, q__2.i = 0.f;
	    q__1.r = -(doublereal)q__2.r, q__1.i = -(doublereal)q__2.i;
	    cgerc_(m, &ixfrm, &q__1, &x[(nxfrm << 1) + 1], &c__1, &x[kbeg], &
		    c__1, &a[kbeg * a_dim1 + 1], lda);

	}
/* L60: */
    }

    clarnd_(&q__1, &c__3, &iseed[1]);
    x[1].r = q__1.r, x[1].i = q__1.i;
    xabs = c_abs(&x[1]);
    if (xabs != 0.f) {
	q__1.r = x[1].r / xabs, q__1.i = x[1].i / xabs;
	csign.r = q__1.r, csign.i = q__1.i;
    } else {
	csign.r = 1.f, csign.i = 0.f;
    }
    i__1 = nxfrm << 1;
    x[i__1].r = csign.r, x[i__1].i = csign.i;

/*     Scale the matrix A by D. */

    if (itype == 1 || itype == 3 || itype == 4) {
	i__1 = *m;
	for (irow = 1; irow <= i__1; ++irow) {
	    r_cnjg(&q__1, &x[nxfrm + irow]);
	    cscal_(n, &q__1, &a[irow + a_dim1], lda);
/* L70: */
	}
    }

    if (itype == 2 || itype == 3) {
	i__1 = *n;
	for (jcol = 1; jcol <= i__1; ++jcol) {
	    cscal_(m, &x[nxfrm + jcol], &a[jcol * a_dim1 + 1], &c__1);
/* L80: */
	}
    }

    if (itype == 4) {
	i__1 = *n;
	for (jcol = 1; jcol <= i__1; ++jcol) {
	    r_cnjg(&q__1, &x[nxfrm + jcol]);
	    cscal_(m, &q__1, &a[jcol * a_dim1 + 1], &c__1);
/* L90: */
	}
    }
    return 0;

/*     End of CLAROR */

} /* claror_ */
Пример #16
0
/* Subroutine */ int clahrd_(integer *n, integer *k, integer *nb, complex *a, 
	integer *lda, complex *tau, complex *t, integer *ldt, complex *y, 
	integer *ldy)
{
    /* System generated locals */
    integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, 
	    i__3;
    complex q__1;

    /* Local variables */
    integer i__;
    complex ei;
    extern /* Subroutine */ int cscal_(integer *, complex *, complex *, 
	    integer *), cgemv_(char *, integer *, integer *, complex *, 
	    complex *, integer *, complex *, integer *, complex *, complex *, 
	    integer *), ccopy_(integer *, complex *, integer *, 
	    complex *, integer *), caxpy_(integer *, complex *, complex *, 
	    integer *, complex *, integer *), ctrmv_(char *, char *, char *, 
	    integer *, complex *, integer *, complex *, integer *), clarfg_(integer *, complex *, complex *, integer 
	    *, complex *), clacgv_(integer *, complex *, integer *);


/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CLAHRD reduces the first NB columns of a complex general n-by-(n-k+1) */
/*  matrix A so that elements below the k-th subdiagonal are zero. The */
/*  reduction is performed by a unitary similarity transformation */
/*  Q' * A * Q. The routine returns the matrices V and T which determine */
/*  Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. */

/*  This is an OBSOLETE auxiliary routine. */
/*  This routine will be 'deprecated' in a  future release. */
/*  Please use the new routine CLAHR2 instead. */

/*  Arguments */
/*  ========= */

/*  N       (input) INTEGER */
/*          The order of the matrix A. */

/*  K       (input) INTEGER */
/*          The offset for the reduction. Elements below the k-th */
/*          subdiagonal in the first NB columns are reduced to zero. */

/*  NB      (input) INTEGER */
/*          The number of columns to be reduced. */

/*  A       (input/output) COMPLEX array, dimension (LDA,N-K+1) */
/*          On entry, the n-by-(n-k+1) general matrix A. */
/*          On exit, the elements on and above the k-th subdiagonal in */
/*          the first NB columns are overwritten with the corresponding */
/*          elements of the reduced matrix; the elements below the k-th */
/*          subdiagonal, with the array TAU, represent the matrix Q as a */
/*          product of elementary reflectors. The other columns of A are */
/*          unchanged. See Further Details. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  TAU     (output) COMPLEX array, dimension (NB) */
/*          The scalar factors of the elementary reflectors. See Further */
/*          Details. */

/*  T       (output) COMPLEX array, dimension (LDT,NB) */
/*          The upper triangular matrix T. */

/*  LDT     (input) INTEGER */
/*          The leading dimension of the array T.  LDT >= NB. */

/*  Y       (output) COMPLEX array, dimension (LDY,NB) */
/*          The n-by-nb matrix Y. */

/*  LDY     (input) INTEGER */
/*          The leading dimension of the array Y. LDY >= max(1,N). */

/*  Further Details */
/*  =============== */

/*  The matrix Q is represented as a product of nb elementary reflectors */

/*     Q = H(1) H(2) . . . H(nb). */

/*  Each H(i) has the form */

/*     H(i) = I - tau * v * v' */

/*  where tau is a complex scalar, and v is a complex vector with */
/*  v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in */
/*  A(i+k+1:n,i), and tau in TAU(i). */

/*  The elements of the vectors v together form the (n-k+1)-by-nb matrix */
/*  V which is needed, with T and Y, to apply the transformation to the */
/*  unreduced part of the matrix, using an update of the form: */
/*  A := (I - V*T*V') * (A - Y*V'). */

/*  The contents of A on exit are illustrated by the following example */
/*  with n = 7, k = 3 and nb = 2: */

/*     ( a   h   a   a   a ) */
/*     ( a   h   a   a   a ) */
/*     ( a   h   a   a   a ) */
/*     ( h   h   a   a   a ) */
/*     ( v1  h   a   a   a ) */
/*     ( v1  v2  a   a   a ) */
/*     ( v1  v2  a   a   a ) */

/*  where a denotes an element of the original matrix A, h denotes a */
/*  modified element of the upper Hessenberg matrix H, and vi denotes an */
/*  element of the vector defining H(i). */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Quick return if possible */

    /* Parameter adjustments */
    --tau;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1;
    t -= t_offset;
    y_dim1 = *ldy;
    y_offset = 1 + y_dim1;
    y -= y_offset;

    /* Function Body */
    if (*n <= 1) {
	return 0;
    }

    i__1 = *nb;
    for (i__ = 1; i__ <= i__1; ++i__) {
	if (i__ > 1) {

/*           Update A(1:n,i) */

/*           Compute i-th column of A - Y * V' */

	    i__2 = i__ - 1;
	    clacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda);
	    i__2 = i__ - 1;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgemv_("No transpose", n, &i__2, &q__1, &y[y_offset], ldy, &a[*k 
		    + i__ - 1 + a_dim1], lda, &c_b2, &a[i__ * a_dim1 + 1], &
		    c__1);
	    i__2 = i__ - 1;
	    clacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda);

/*           Apply I - V * T' * V' to this column (call it b) from the */
/*           left, using the last column of T as workspace */

/*           Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows) */
/*                    ( V2 )             ( b2 ) */

/*           where V1 is unit lower triangular */

/*           w := V1' * b1 */

	    i__2 = i__ - 1;
	    ccopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 + 
		    1], &c__1);
	    i__2 = i__ - 1;
	    ctrmv_("Lower", "Conjugate transpose", "Unit", &i__2, &a[*k + 1 + 
		    a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1);

/*           w := w + V2'*b2 */

	    i__2 = *n - *k - i__ + 1;
	    i__3 = i__ - 1;
	    cgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a[*k + i__ + 
		    a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b2, &
		    t[*nb * t_dim1 + 1], &c__1);

/*           w := T'*w */

	    i__2 = i__ - 1;
	    ctrmv_("Upper", "Conjugate transpose", "Non-unit", &i__2, &t[
		    t_offset], ldt, &t[*nb * t_dim1 + 1], &c__1);

/*           b2 := b2 - V2*w */

	    i__2 = *n - *k - i__ + 1;
	    i__3 = i__ - 1;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgemv_("No transpose", &i__2, &i__3, &q__1, &a[*k + i__ + a_dim1], 
		     lda, &t[*nb * t_dim1 + 1], &c__1, &c_b2, &a[*k + i__ + 
		    i__ * a_dim1], &c__1);

/*           b1 := b1 - V1*w */

	    i__2 = i__ - 1;
	    ctrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1]
, lda, &t[*nb * t_dim1 + 1], &c__1);
	    i__2 = i__ - 1;
	    q__1.r = -1.f, q__1.i = -0.f;
	    caxpy_(&i__2, &q__1, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__ 
		    * a_dim1], &c__1);

	    i__2 = *k + i__ - 1 + (i__ - 1) * a_dim1;
	    a[i__2].r = ei.r, a[i__2].i = ei.i;
	}

/*        Generate the elementary reflector H(i) to annihilate */
/*        A(k+i+1:n,i) */

	i__2 = *k + i__ + i__ * a_dim1;
	ei.r = a[i__2].r, ei.i = a[i__2].i;
	i__2 = *n - *k - i__ + 1;
/* Computing MIN */
	i__3 = *k + i__ + 1;
	clarfg_(&i__2, &ei, &a[min(i__3, *n)+ i__ * a_dim1], &c__1, &tau[i__])
		;
	i__2 = *k + i__ + i__ * a_dim1;
	a[i__2].r = 1.f, a[i__2].i = 0.f;

/*        Compute  Y(1:n,i) */

	i__2 = *n - *k - i__ + 1;
	cgemv_("No transpose", n, &i__2, &c_b2, &a[(i__ + 1) * a_dim1 + 1], 
		lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b1, &y[i__ * 
		y_dim1 + 1], &c__1);
	i__2 = *n - *k - i__ + 1;
	i__3 = i__ - 1;
	cgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a[*k + i__ + 
		a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b1, &t[
		i__ * t_dim1 + 1], &c__1);
	i__2 = i__ - 1;
	q__1.r = -1.f, q__1.i = -0.f;
	cgemv_("No transpose", n, &i__2, &q__1, &y[y_offset], ldy, &t[i__ * 
		t_dim1 + 1], &c__1, &c_b2, &y[i__ * y_dim1 + 1], &c__1);
	cscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1);

/*        Compute T(1:i,i) */

	i__2 = i__ - 1;
	i__3 = i__;
	q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
	cscal_(&i__2, &q__1, &t[i__ * t_dim1 + 1], &c__1);
	i__2 = i__ - 1;
	ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt, 
		&t[i__ * t_dim1 + 1], &c__1)
		;
	i__2 = i__ + i__ * t_dim1;
	i__3 = i__;
	t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i;

/* L10: */
    }
    i__1 = *k + *nb + *nb * a_dim1;
    a[i__1].r = ei.r, a[i__1].i = ei.i;

    return 0;

/*     End of CLAHRD */

} /* clahrd_ */
Пример #17
0
/* Subroutine */ int clauu2_(char *uplo, integer *n, complex *a, integer *lda,
	 integer *info)
{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CLAUU2 computes the product U * U' or L' * L, where the triangular   
    factor U or L is stored in the upper or lower triangular part of   
    the array A.   

    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,   
    overwriting the factor U in A.   
    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,   
    overwriting the factor L in A.   

    This is the unblocked form of the algorithm, calling Level 2 BLAS.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the triangular factor stored in the array A   
            is upper or lower triangular:   
            = 'U':  Upper triangular   
            = 'L':  Lower triangular   

    N       (input) INTEGER   
            The order of the triangular factor U or L.  N >= 0.   

    A       (input/output) COMPLEX array, dimension (LDA,N)   
            On entry, the triangular factor U or L.   
            On exit, if UPLO = 'U', the upper triangle of A is   
            overwritten with the upper triangle of the product U * U';   
            if UPLO = 'L', the lower triangle of A is overwritten with   
            the lower triangle of the product L' * L.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -k, the k-th argument had an illegal value   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static complex c_b1 = {1.f,0.f};
    static integer c__1 = 1;
    
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    real r__1;
    complex q__1;
    /* Local variables */
    static integer i__;
    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer 
	    *, complex *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
	    , complex *, integer *, complex *, integer *, complex *, complex *
	    , integer *);
    static logical upper;
    extern /* Subroutine */ int clacgv_(integer *, complex *, integer *), 
	    csscal_(integer *, real *, complex *, integer *), xerbla_(char *, 
	    integer *);
    static real aii;
#define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1
#define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CLAUU2", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

    if (upper) {

/*        Compute the product U * U'. */

	i__1 = *n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    i__2 = a_subscr(i__, i__);
	    aii = a[i__2].r;
	    if (i__ < *n) {
		i__2 = a_subscr(i__, i__);
		i__3 = *n - i__;
		cdotc_(&q__1, &i__3, &a_ref(i__, i__ + 1), lda, &a_ref(i__, 
			i__ + 1), lda);
		r__1 = aii * aii + q__1.r;
		a[i__2].r = r__1, a[i__2].i = 0.f;
		i__2 = *n - i__;
		clacgv_(&i__2, &a_ref(i__, i__ + 1), lda);
		i__2 = i__ - 1;
		i__3 = *n - i__;
		q__1.r = aii, q__1.i = 0.f;
		cgemv_("No transpose", &i__2, &i__3, &c_b1, &a_ref(1, i__ + 1)
			, lda, &a_ref(i__, i__ + 1), lda, &q__1, &a_ref(1, 
			i__), &c__1);
		i__2 = *n - i__;
		clacgv_(&i__2, &a_ref(i__, i__ + 1), lda);
	    } else {
		csscal_(&i__, &aii, &a_ref(1, i__), &c__1);
	    }
/* L10: */
	}

    } else {

/*        Compute the product L' * L. */

	i__1 = *n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    i__2 = a_subscr(i__, i__);
	    aii = a[i__2].r;
	    if (i__ < *n) {
		i__2 = a_subscr(i__, i__);
		i__3 = *n - i__;
		cdotc_(&q__1, &i__3, &a_ref(i__ + 1, i__), &c__1, &a_ref(i__ 
			+ 1, i__), &c__1);
		r__1 = aii * aii + q__1.r;
		a[i__2].r = r__1, a[i__2].i = 0.f;
		i__2 = i__ - 1;
		clacgv_(&i__2, &a_ref(i__, 1), lda);
		i__2 = *n - i__;
		i__3 = i__ - 1;
		q__1.r = aii, q__1.i = 0.f;
		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b1, &a_ref(i__ 
			+ 1, 1), lda, &a_ref(i__ + 1, i__), &c__1, &q__1, &
			a_ref(i__, 1), lda);
		i__2 = i__ - 1;
		clacgv_(&i__2, &a_ref(i__, 1), lda);
	    } else {
		csscal_(&i__, &aii, &a_ref(i__, 1), lda);
	    }
/* L20: */
	}
    }

    return 0;

/*     End of CLAUU2 */

} /* clauu2_ */
Пример #18
0
/* Subroutine */ int clarge_(integer *n, complex *a, integer *lda, integer *
                             iseed, complex *work, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1;
    real r__1;
    complex q__1;

    /* Builtin functions */
    double c_abs(complex *);
    void c_div(complex *, complex *, complex *);

    /* Local variables */
    static integer i__;
    extern /* Subroutine */ int cgerc_(integer *, integer *, complex *,
                                       complex *, integer *, complex *, integer *, complex *, integer *),
                                               cscal_(integer *, complex *, complex *, integer *), cgemv_(char *
                                                       , integer *, integer *, complex *, complex *, integer *, complex *
                                                       , integer *, complex *, complex *, integer *);
    extern doublereal scnrm2_(integer *, complex *, integer *);
    static complex wa, wb;
    static real wn;
    extern /* Subroutine */ int xerbla_(char *, integer *), clarnv_(
        integer *, integer *, integer *, complex *);
    static complex tau;


#define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1
#define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)]


    /*  -- LAPACK auxiliary test routine (version 3.0) --
           Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
           Courant Institute, Argonne National Lab, and Rice University
           September 30, 1994


        Purpose
        =======

        CLARGE pre- and post-multiplies a complex general n by n matrix A
        with a random unitary matrix: A = U*D*U'.

        Arguments
        =========

        N       (input) INTEGER
                The order of the matrix A.  N >= 0.

        A       (input/output) COMPLEX array, dimension (LDA,N)
                On entry, the original n by n matrix A.
                On exit, A is overwritten by U*A*U' for some random
                unitary matrix U.

        LDA     (input) INTEGER
                The leading dimension of the array A.  LDA >= N.

        ISEED   (input/output) INTEGER array, dimension (4)
                On entry, the seed of the random number generator; the array
                elements must be between 0 and 4095, and ISEED(4) must be
                odd.
                On exit, the seed is updated.

        WORK    (workspace) COMPLEX array, dimension (2*N)

        INFO    (output) INTEGER
                = 0: successful exit
                < 0: if INFO = -i, the i-th argument had an illegal value

        =====================================================================


           Test the input arguments

           Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --iseed;
    --work;

    /* Function Body */
    *info = 0;
    if (*n < 0) {
        *info = -1;
    } else if (*lda < max(1,*n)) {
        *info = -3;
    }
    if (*info < 0) {
        i__1 = -(*info);
        xerbla_("CLARGE", &i__1);
        return 0;
    }

    /*     pre- and post-multiply A by random unitary matrix */

    for (i__ = *n; i__ >= 1; --i__) {

        /*        generate random reflection */

        i__1 = *n - i__ + 1;
        clarnv_(&c__3, &iseed[1], &i__1, &work[1]);
        i__1 = *n - i__ + 1;
        wn = scnrm2_(&i__1, &work[1], &c__1);
        r__1 = wn / c_abs(&work[1]);
        q__1.r = r__1 * work[1].r, q__1.i = r__1 * work[1].i;
        wa.r = q__1.r, wa.i = q__1.i;
        if (wn == 0.f) {
            tau.r = 0.f, tau.i = 0.f;
        } else {
            q__1.r = work[1].r + wa.r, q__1.i = work[1].i + wa.i;
            wb.r = q__1.r, wb.i = q__1.i;
            i__1 = *n - i__;
            c_div(&q__1, &c_b2, &wb);
            cscal_(&i__1, &q__1, &work[2], &c__1);
            work[1].r = 1.f, work[1].i = 0.f;
            c_div(&q__1, &wb, &wa);
            r__1 = q__1.r;
            tau.r = r__1, tau.i = 0.f;
        }

        /*        multiply A(i:n,1:n) by random reflection from the left */

        i__1 = *n - i__ + 1;
        cgemv_("Conjugate transpose", &i__1, n, &c_b2, &a_ref(i__, 1), lda, &
               work[1], &c__1, &c_b1, &work[*n + 1], &c__1);
        i__1 = *n - i__ + 1;
        q__1.r = -tau.r, q__1.i = -tau.i;
        cgerc_(&i__1, n, &q__1, &work[1], &c__1, &work[*n + 1], &c__1, &a_ref(
                   i__, 1), lda);

        /*        multiply A(1:n,i:n) by random reflection from the right */

        i__1 = *n - i__ + 1;
        cgemv_("No transpose", n, &i__1, &c_b2, &a_ref(1, i__), lda, &work[1],
               &c__1, &c_b1, &work[*n + 1], &c__1);
        i__1 = *n - i__ + 1;
        q__1.r = -tau.r, q__1.i = -tau.i;
        cgerc_(n, &i__1, &q__1, &work[*n + 1], &c__1, &work[1], &c__1, &a_ref(
                   1, i__), lda);
        /* L10: */
    }
    return 0;

    /*     End of CLARGE */

} /* clarge_ */
Пример #19
0
/* Subroutine */ int chseqr_(char *job, char *compz, integer *n, integer *ilo,
	 integer *ihi, complex *h__, integer *ldh, complex *w, complex *z__, 
	integer *ldz, complex *work, integer *lwork, integer *info)
{
    /* System generated locals */
    address a__1[2];
    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4[2], 
	    i__5, i__6;
    real r__1, r__2, r__3, r__4;
    complex q__1;
    char ch__1[2];

    /* Builtin functions */
    double r_imag(complex *);
    void r_cnjg(complex *, complex *);
    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);

    /* Local variables */
    static integer maxb, ierr;
    static real unfl;
    static complex temp;
    static real ovfl, opst;
    static integer i__, j, k, l;
    static complex s[225]	/* was [15][15] */;
    extern /* Subroutine */ int cscal_(integer *, complex *, complex *, 
	    integer *);
    static complex v[16];
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
	    , complex *, integer *, complex *, integer *, complex *, complex *
	    , integer *), ccopy_(integer *, complex *, integer *, 
	    complex *, integer *);
    static integer itemp;
    static real rtemp;
    static integer i1, i2;
    static logical initz, wantt, wantz;
    static real rwork[1];
    extern doublereal slapy2_(real *, real *);
    static integer ii, nh;
    extern /* Subroutine */ int slabad_(real *, real *), clarfg_(integer *, 
	    complex *, complex *, integer *, complex *);
    static integer nr, ns;
    extern integer icamax_(integer *, complex *, integer *);
    static integer nv;
    extern doublereal slamch_(char *), clanhs_(char *, integer *, 
	    complex *, integer *, real *);
    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer 
	    *), clahqr_(logical *, logical *, integer *, integer *, integer *,
	     complex *, integer *, complex *, integer *, integer *, complex *,
	     integer *, integer *), clacpy_(char *, integer *, integer *, 
	    complex *, integer *, complex *, integer *);
    static complex vv[16];
    extern /* Subroutine */ int claset_(char *, integer *, integer *, complex 
	    *, complex *, complex *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int clarfx_(char *, integer *, integer *, complex 
	    *, complex *, complex *, integer *, complex *), xerbla_(
	    char *, integer *);
    static real smlnum;
    static logical lquery;
    static integer itn;
    static complex tau;
    static integer its;
    static real ulp, tst1;


#define h___subscr(a_1,a_2) (a_2)*h_dim1 + a_1
#define h___ref(a_1,a_2) h__[h___subscr(a_1,a_2)]
#define s_subscr(a_1,a_2) (a_2)*15 + a_1 - 16
#define s_ref(a_1,a_2) s[s_subscr(a_1,a_2)]
#define z___subscr(a_1,a_2) (a_2)*z_dim1 + a_1
#define z___ref(a_1,a_2) z__[z___subscr(a_1,a_2)]


/*  -- LAPACK routine (instrumented to count operations, version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   

       Common block to return operation count.   

    Purpose   
    =======   

    CHSEQR computes the eigenvalues of a complex upper Hessenberg   
    matrix H, and, optionally, the matrices T and Z from the Schur   
    decomposition H = Z T Z**H, where T is an upper triangular matrix   
    (the Schur form), and Z is the unitary matrix of Schur vectors.   

    Optionally Z may be postmultiplied into an input unitary matrix Q,   
    so that this routine can give the Schur factorization of a matrix A   
    which has been reduced to the Hessenberg form H by the unitary   
    matrix Q:  A = Q*H*Q**H = (QZ)*T*(QZ)**H.   

    Arguments   
    =========   

    JOB     (input) CHARACTER*1   
            = 'E': compute eigenvalues only;   
            = 'S': compute eigenvalues and the Schur form T.   

    COMPZ   (input) CHARACTER*1   
            = 'N': no Schur vectors are computed;   
            = 'I': Z is initialized to the unit matrix and the matrix Z   
                   of Schur vectors of H is returned;   
            = 'V': Z must contain an unitary matrix Q on entry, and   
                   the product Q*Z is returned.   

    N       (input) INTEGER   
            The order of the matrix H.  N >= 0.   

    ILO     (input) INTEGER   
    IHI     (input) INTEGER   
            It is assumed that H is already upper triangular in rows   
            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally   
            set by a previous call to CGEBAL, and then passed to CGEHRD   
            when the matrix output by CGEBAL is reduced to Hessenberg   
            form. Otherwise ILO and IHI should be set to 1 and N   
            respectively.   
            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.   

    H       (input/output) COMPLEX array, dimension (LDH,N)   
            On entry, the upper Hessenberg matrix H.   
            On exit, if JOB = 'S', H contains the upper triangular matrix   
            T from the Schur decomposition (the Schur form). If   
            JOB = 'E', the contents of H are unspecified on exit.   

    LDH     (input) INTEGER   
            The leading dimension of the array H. LDH >= max(1,N).   

    W       (output) COMPLEX array, dimension (N)   
            The computed eigenvalues. If JOB = 'S', the eigenvalues are   
            stored in the same order as on the diagonal of the Schur form   
            returned in H, with W(i) = H(i,i).   

    Z       (input/output) COMPLEX array, dimension (LDZ,N)   
            If COMPZ = 'N': Z is not referenced.   
            If COMPZ = 'I': on entry, Z need not be set, and on exit, Z   
            contains the unitary matrix Z of the Schur vectors of H.   
            If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q,   
            which is assumed to be equal to the unit matrix except for   
            the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z.   
            Normally Q is the unitary matrix generated by CUNGHR after   
            the call to CGEHRD which formed the Hessenberg matrix H.   

    LDZ     (input) INTEGER   
            The leading dimension of the array Z.   
            LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise.   

    WORK    (workspace/output) COMPLEX array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The dimension of the array WORK.  LWORK >= max(1,N).   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   
            > 0:  if INFO = i, CHSEQR failed to compute all the   
                  eigenvalues in a total of 30*(IHI-ILO+1) iterations;   
                  elements 1:ilo-1 and i+1:n of W contain those   
                  eigenvalues which have been successfully computed.   

    =====================================================================   


       Decode and test the input parameters   

       Parameter adjustments */
    h_dim1 = *ldh;
    h_offset = 1 + h_dim1 * 1;
    h__ -= h_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;

    /* Function Body */
    wantt = lsame_(job, "S");
    initz = lsame_(compz, "I");
    wantz = initz || lsame_(compz, "V");

    *info = 0;
    i__1 = max(1,*n);
    work[1].r = (real) i__1, work[1].i = 0.f;
    lquery = *lwork == -1;
    if (! lsame_(job, "E") && ! wantt) {
	*info = -1;
    } else if (! lsame_(compz, "N") && ! wantz) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*ilo < 1 || *ilo > max(1,*n)) {
	*info = -4;
    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
	*info = -5;
    } else if (*ldh < max(1,*n)) {
	*info = -7;
    } else if (*ldz < 1 || wantz && *ldz < max(1,*n)) {
	*info = -10;
    } else if (*lwork < max(1,*n) && ! lquery) {
	*info = -12;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CHSEQR", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }
/* **   
       Initialize */
    opst = 0.f;
/* **   

       Initialize Z, if necessary */

    if (initz) {
	claset_("Full", n, n, &c_b1, &c_b2, &z__[z_offset], ldz);
    }

/*     Store the eigenvalues isolated by CGEBAL. */

    i__1 = *ilo - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
	i__2 = i__;
	i__3 = h___subscr(i__, i__);
	w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i;
/* L10: */
    }
    i__1 = *n;
    for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
	i__2 = i__;
	i__3 = h___subscr(i__, i__);
	w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i;
/* L20: */
    }

/*     Quick return if possible. */

    if (*n == 0) {
	return 0;
    }
    if (*ilo == *ihi) {
	i__1 = *ilo;
	i__2 = h___subscr(*ilo, *ilo);
	w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i;
	return 0;
    }

/*     Set rows and columns ILO to IHI to zero below the first   
       subdiagonal. */

    i__1 = *ihi - 2;
    for (j = *ilo; j <= i__1; ++j) {
	i__2 = *n;
	for (i__ = j + 2; i__ <= i__2; ++i__) {
	    i__3 = h___subscr(i__, j);
	    h__[i__3].r = 0.f, h__[i__3].i = 0.f;
/* L30: */
	}
/* L40: */
    }
    nh = *ihi - *ilo + 1;

/*     I1 and I2 are the indices of the first row and last column of H   
       to which transformations must be applied. If eigenvalues only are   
       being computed, I1 and I2 are re-set inside the main loop. */

    if (wantt) {
	i1 = 1;
	i2 = *n;
    } else {
	i1 = *ilo;
	i2 = *ihi;
    }

/*     Ensure that the subdiagonal elements are real. */

    i__1 = *ihi;
    for (i__ = *ilo + 1; i__ <= i__1; ++i__) {
	i__2 = h___subscr(i__, i__ - 1);
	temp.r = h__[i__2].r, temp.i = h__[i__2].i;
	if (r_imag(&temp) != 0.f) {
	    r__1 = temp.r;
	    r__2 = r_imag(&temp);
	    rtemp = slapy2_(&r__1, &r__2);
	    i__2 = h___subscr(i__, i__ - 1);
	    h__[i__2].r = rtemp, h__[i__2].i = 0.f;
	    q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp;
	    temp.r = q__1.r, temp.i = q__1.i;
	    if (i2 > i__) {
		i__2 = i2 - i__;
		r_cnjg(&q__1, &temp);
		cscal_(&i__2, &q__1, &h___ref(i__, i__ + 1), ldh);
	    }
	    i__2 = i__ - i1;
	    cscal_(&i__2, &temp, &h___ref(i1, i__), &c__1);
	    if (i__ < *ihi) {
		i__2 = h___subscr(i__ + 1, i__);
		i__3 = h___subscr(i__ + 1, i__);
		q__1.r = temp.r * h__[i__3].r - temp.i * h__[i__3].i, q__1.i =
			 temp.r * h__[i__3].i + temp.i * h__[i__3].r;
		h__[i__2].r = q__1.r, h__[i__2].i = q__1.i;
	    }
/* **   
             Increment op count */
	    opst += (i2 - i1 + 2) * 6;
/* ** */
	    if (wantz) {
		cscal_(&nh, &temp, &z___ref(*ilo, i__), &c__1);
/* **   
                Increment op count */
		opst += nh * 6;
/* ** */
	    }
	}
/* L50: */
    }

/*     Determine the order of the multi-shift QR algorithm to be used.   

   Writing concatenation */
    i__4[0] = 1, a__1[0] = job;
    i__4[1] = 1, a__1[1] = compz;
    s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2);
    ns = ilaenv_(&c__4, "CHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
	    ftnlen)2);
/* Writing concatenation */
    i__4[0] = 1, a__1[0] = job;
    i__4[1] = 1, a__1[1] = compz;
    s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2);
    maxb = ilaenv_(&c__8, "CHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
	    ftnlen)2);
    if (ns <= 1 || ns > nh || maxb >= nh) {

/*        Use the standard double-shift algorithm */

	clahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], ilo, 
		ihi, &z__[z_offset], ldz, info);
	return 0;
    }
    maxb = max(2,maxb);
/* Computing MIN */
    i__1 = min(ns,maxb);
    ns = min(i__1,15);

/*     Now 1 < NS <= MAXB < NH.   

       Set machine-dependent constants for the stopping criterion.   
       If norm(H) <= sqrt(OVFL), overflow should not occur. */

    unfl = slamch_("Safe minimum");
    ovfl = 1.f / unfl;
    slabad_(&unfl, &ovfl);
    ulp = slamch_("Precision");
    smlnum = unfl * (nh / ulp);

/*     ITN is the total number of multiple-shift QR iterations allowed. */

    itn = nh * 30;

/*     The main loop begins here. I is the loop index and decreases from   
       IHI to ILO in steps of at most MAXB. Each iteration of the loop   
       works with the active submatrix in rows and columns L to I.   
       Eigenvalues I+1 to IHI have already converged. Either L = ILO, or   
       H(L,L-1) is negligible so that the matrix splits. */

    i__ = *ihi;
L60:
    if (i__ < *ilo) {
	goto L180;
    }

/*     Perform multiple-shift QR iterations on rows and columns ILO to I   
       until a submatrix of order at most MAXB splits off at the bottom   
       because a subdiagonal element has become negligible. */

    l = *ilo;
    i__1 = itn;
    for (its = 0; its <= i__1; ++its) {

/*        Look for a single small subdiagonal element. */

	i__2 = l + 1;
	for (k = i__; k >= i__2; --k) {
	    i__3 = h___subscr(k - 1, k - 1);
	    i__5 = h___subscr(k, k);
	    tst1 = (r__1 = h__[i__3].r, dabs(r__1)) + (r__2 = r_imag(&h___ref(
		    k - 1, k - 1)), dabs(r__2)) + ((r__3 = h__[i__5].r, dabs(
		    r__3)) + (r__4 = r_imag(&h___ref(k, k)), dabs(r__4)));
	    if (tst1 == 0.f) {
		i__3 = i__ - l + 1;
		tst1 = clanhs_("1", &i__3, &h___ref(l, l), ldh, rwork);
/* **   
                Increment op count */
		latime_1.ops += (i__ - l + 1) * 5 * (i__ - l) / 2;
/* ** */
	    }
	    i__3 = h___subscr(k, k - 1);
/* Computing MAX */
	    r__2 = ulp * tst1;
	    if ((r__1 = h__[i__3].r, dabs(r__1)) <= dmax(r__2,smlnum)) {
		goto L80;
	    }
/* L70: */
	}
L80:
	l = k;
/* **   
          Increment op count */
	opst += (i__ - l + 1) * 5;
/* ** */
	if (l > *ilo) {

/*           H(L,L-1) is negligible. */

	    i__2 = h___subscr(l, l - 1);
	    h__[i__2].r = 0.f, h__[i__2].i = 0.f;
	}

/*        Exit from loop if a submatrix of order <= MAXB has split off. */

	if (l >= i__ - maxb + 1) {
	    goto L170;
	}

/*        Now the active submatrix is in rows and columns L to I. If   
          eigenvalues only are being computed, only the active submatrix   
          need be transformed. */

	if (! wantt) {
	    i1 = l;
	    i2 = i__;
	}

	if (its == 20 || its == 30) {

/*           Exceptional shifts. */

	    i__2 = i__;
	    for (ii = i__ - ns + 1; ii <= i__2; ++ii) {
		i__3 = ii;
		i__5 = h___subscr(ii, ii - 1);
		i__6 = h___subscr(ii, ii);
		r__3 = ((r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = h__[i__6]
			.r, dabs(r__2))) * 1.5f;
		w[i__3].r = r__3, w[i__3].i = 0.f;
/* L90: */
	    }
/* **   
             Increment op count */
	    opst += ns << 1;
/* ** */
	} else {

/*           Use eigenvalues of trailing submatrix of order NS as shifts. */

	    clacpy_("Full", &ns, &ns, &h___ref(i__ - ns + 1, i__ - ns + 1), 
		    ldh, s, &c__15);
	    clahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &w[i__ - 
		    ns + 1], &c__1, &ns, &z__[z_offset], ldz, &ierr);
	    if (ierr > 0) {

/*              If CLAHQR failed to compute all NS eigenvalues, use the   
                unconverged diagonal elements as the remaining shifts. */

		i__2 = ierr;
		for (ii = 1; ii <= i__2; ++ii) {
		    i__3 = i__ - ns + ii;
		    i__5 = s_subscr(ii, ii);
		    w[i__3].r = s[i__5].r, w[i__3].i = s[i__5].i;
/* L100: */
		}
	    }
	}

/*        Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))   
          where G is the Hessenberg submatrix H(L:I,L:I) and w is   
          the vector of shifts (stored in W). The result is   
          stored in the local array V. */

	v[0].r = 1.f, v[0].i = 0.f;
	i__2 = ns + 1;
	for (ii = 2; ii <= i__2; ++ii) {
	    i__3 = ii - 1;
	    v[i__3].r = 0.f, v[i__3].i = 0.f;
/* L110: */
	}
	nv = 1;
	i__2 = i__;
	for (j = i__ - ns + 1; j <= i__2; ++j) {
	    i__3 = nv + 1;
	    ccopy_(&i__3, v, &c__1, vv, &c__1);
	    i__3 = nv + 1;
	    i__5 = j;
	    q__1.r = -w[i__5].r, q__1.i = -w[i__5].i;
	    cgemv_("No transpose", &i__3, &nv, &c_b2, &h___ref(l, l), ldh, vv,
		     &c__1, &q__1, v, &c__1);
	    ++nv;
/* **   
             Increment op count */
	    opst = opst + (nv << 3) * (*n + 1) + (nv + 1) * 6;
/* **   

             Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,   
             reset it to the unit vector. */

	    itemp = icamax_(&nv, v, &c__1);
/* **   
             Increment op count */
	    opst += nv << 1;
/* ** */
	    i__3 = itemp - 1;
	    rtemp = (r__1 = v[i__3].r, dabs(r__1)) + (r__2 = r_imag(&v[itemp 
		    - 1]), dabs(r__2));
	    if (rtemp == 0.f) {
		v[0].r = 1.f, v[0].i = 0.f;
		i__3 = nv;
		for (ii = 2; ii <= i__3; ++ii) {
		    i__5 = ii - 1;
		    v[i__5].r = 0.f, v[i__5].i = 0.f;
/* L120: */
		}
	    } else {
		rtemp = dmax(rtemp,smlnum);
		r__1 = 1.f / rtemp;
		csscal_(&nv, &r__1, v, &c__1);
/* **   
                Increment op count */
		opst += nv << 1;
/* ** */
	    }
/* L130: */
	}

/*        Multiple-shift QR step */

	i__2 = i__ - 1;
	for (k = l; k <= i__2; ++k) {

/*           The first iteration of this loop determines a reflection G   
             from the vector V and applies it from left and right to H,   
             thus creating a nonzero bulge below the subdiagonal.   

             Each subsequent iteration determines a reflection G to   
             restore the Hessenberg form in the (K-1)th column, and thus   
             chases the bulge one step toward the bottom of the active   
             submatrix. NR is the order of G.   

   Computing MIN */
	    i__3 = ns + 1, i__5 = i__ - k + 1;
	    nr = min(i__3,i__5);
	    if (k > l) {
		ccopy_(&nr, &h___ref(k, k - 1), &c__1, v, &c__1);
	    }
	    clarfg_(&nr, v, &v[1], &c__1, &tau);
/* **   
             Increment op count */
	    opst = opst + nr * 10 + 12;
/* ** */
	    if (k > l) {
		i__3 = h___subscr(k, k - 1);
		h__[i__3].r = v[0].r, h__[i__3].i = v[0].i;
		i__3 = i__;
		for (ii = k + 1; ii <= i__3; ++ii) {
		    i__5 = h___subscr(ii, k - 1);
		    h__[i__5].r = 0.f, h__[i__5].i = 0.f;
/* L140: */
		}
	    }
	    v[0].r = 1.f, v[0].i = 0.f;

/*           Apply G' from the left to transform the rows of the matrix   
             in columns K to I2. */

	    i__3 = i2 - k + 1;
	    r_cnjg(&q__1, &tau);
	    clarfx_("Left", &nr, &i__3, v, &q__1, &h___ref(k, k), ldh, &work[
		    1]);

/*           Apply G from the right to transform the columns of the   
             matrix in rows I1 to min(K+NR,I).   

   Computing MIN */
	    i__5 = k + nr;
	    i__3 = min(i__5,i__) - i1 + 1;
	    clarfx_("Right", &i__3, &nr, v, &tau, &h___ref(i1, k), ldh, &work[
		    1]);
/* **   
             Increment op count   
   Computing MIN */
	    i__3 = nr, i__5 = i__ - k;
	    latime_1.ops += ((nr << 2) - 2 << 2) * (i2 - i1 + 2 + min(i__3,
		    i__5));
/* ** */

	    if (wantz) {

/*              Accumulate transformations in the matrix Z */

		clarfx_("Right", &nh, &nr, v, &tau, &z___ref(*ilo, k), ldz, &
			work[1]);
/* **   
                Increment op count */
		latime_1.ops += ((nr << 2) - 2 << 2) * nh;
/* ** */
	    }
/* L150: */
	}

/*        Ensure that H(I,I-1) is real. */

	i__2 = h___subscr(i__, i__ - 1);
	temp.r = h__[i__2].r, temp.i = h__[i__2].i;
	if (r_imag(&temp) != 0.f) {
	    r__1 = temp.r;
	    r__2 = r_imag(&temp);
	    rtemp = slapy2_(&r__1, &r__2);
	    i__2 = h___subscr(i__, i__ - 1);
	    h__[i__2].r = rtemp, h__[i__2].i = 0.f;
	    q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp;
	    temp.r = q__1.r, temp.i = q__1.i;
	    if (i2 > i__) {
		i__2 = i2 - i__;
		r_cnjg(&q__1, &temp);
		cscal_(&i__2, &q__1, &h___ref(i__, i__ + 1), ldh);
	    }
	    i__2 = i__ - i1;
	    cscal_(&i__2, &temp, &h___ref(i1, i__), &c__1);
/* **   
             Increment op count */
	    opst += (i2 - i1 + 1) * 6;
/* ** */
	    if (wantz) {
		cscal_(&nh, &temp, &z___ref(*ilo, i__), &c__1);
/* **   
                Increment op count */
		opst += nh * 6;
/* ** */
	    }
	}

/* L160: */
    }

/*     Failure to converge in remaining number of iterations */

    *info = i__;
    return 0;

L170:

/*     A submatrix of order <= MAXB in rows and columns L to I has split   
       off. Use the double-shift QR algorithm to handle it. */

    clahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &w[1], ilo, ihi,
	     &z__[z_offset], ldz, info);
    if (*info > 0) {
	return 0;
    }

/*     Decrement number of remaining iterations, and return to start of   
       the main loop with a new value of I. */

    itn -= its;
    i__ = l - 1;
    goto L60;

L180:
/* **   
       Compute final op count */
    latime_1.ops += opst;
/* ** */
    i__1 = max(1,*n);
    work[1].r = (real) i__1, work[1].i = 0.f;
    return 0;

/*     End of CHSEQR */

} /* chseqr_ */
Пример #20
0
 int cgerfs_(char *trans, int *n, int *nrhs, complex *
	a, int *lda, complex *af, int *ldaf, int *ipiv, complex *
	b, int *ldb, complex *x, int *ldx, float *ferr, float *berr, 
	complex *work, float *rwork, int *info)
{
    /* System generated locals */
    int a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, 
	    x_offset, i__1, i__2, i__3, i__4, i__5;
    float r__1, r__2, r__3, r__4;
    complex q__1;

    /* Builtin functions */
    double r_imag(complex *);

    /* Local variables */
    int i__, j, k;
    float s, xk;
    int nz;
    float eps;
    int kase;
    float safe1, safe2;
    extern int lsame_(char *, char *);
    extern  int cgemv_(char *, int *, int *, complex *
, complex *, int *, complex *, int *, complex *, complex *
, int *);
    int isave[3];
    extern  int ccopy_(int *, complex *, int *, 
	    complex *, int *), caxpy_(int *, complex *, complex *, 
	    int *, complex *, int *);
    int count;
    extern  int clacn2_(int *, complex *, complex *, float 
	    *, int *, int *);
    extern double slamch_(char *);
    float safmin;
    extern  int xerbla_(char *, int *), cgetrs_(
	    char *, int *, int *, complex *, int *, int *, 
	    complex *, int *, int *);
    int notran;
    char transn[1], transt[1];
    float lstres;


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     Modified to call CLACN2 in place of CLACON, 10 Feb 03, SJH. */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CGERFS improves the computed solution to a system of linear */
/*  equations and provides error bounds and backward error estimates for */
/*  the solution. */

/*  Arguments */
/*  ========= */

/*  TRANS   (input) CHARACTER*1 */
/*          Specifies the form of the system of equations: */
/*          = 'N':  A * X = B     (No transpose) */
/*          = 'T':  A**T * X = B  (Transpose) */
/*          = 'C':  A**H * X = B  (Conjugate transpose) */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  NRHS    (input) INTEGER */
/*          The number of right hand sides, i.e., the number of columns */
/*          of the matrices B and X.  NRHS >= 0. */

/*  A       (input) COMPLEX array, dimension (LDA,N) */
/*          The original N-by-N matrix A. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= MAX(1,N). */

/*  AF      (input) COMPLEX array, dimension (LDAF,N) */
/*          The factors L and U from the factorization A = P*L*U */
/*          as computed by CGETRF. */

/*  LDAF    (input) INTEGER */
/*          The leading dimension of the array AF.  LDAF >= MAX(1,N). */

/*  IPIV    (input) INTEGER array, dimension (N) */
/*          The pivot indices from CGETRF; for 1<=i<=N, row i of the */
/*          matrix was interchanged with row IPIV(i). */

/*  B       (input) COMPLEX array, dimension (LDB,NRHS) */
/*          The right hand side matrix B. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B.  LDB >= MAX(1,N). */

/*  X       (input/output) COMPLEX array, dimension (LDX,NRHS) */
/*          On entry, the solution matrix X, as computed by CGETRS. */
/*          On exit, the improved solution matrix X. */

/*  LDX     (input) INTEGER */
/*          The leading dimension of the array X.  LDX >= MAX(1,N). */

/*  FERR    (output) REAL array, dimension (NRHS) */
/*          The estimated forward error bound for each solution vector */
/*          X(j) (the j-th column of the solution matrix X). */
/*          If XTRUE is the true solution corresponding to X(j), FERR(j) */
/*          is an estimated upper bound for the magnitude of the largest */
/*          element in (X(j) - XTRUE) divided by the magnitude of the */
/*          largest element in X(j).  The estimate is as reliable as */
/*          the estimate for RCOND, and is almost always a slight */
/*          overestimate of the true error. */

/*  BERR    (output) REAL array, dimension (NRHS) */
/*          The componentwise relative backward error of each solution */
/*          vector X(j) (i.e., the smallest relative change in */
/*          any element of A or B that makes X(j) an exact solution). */

/*  WORK    (workspace) COMPLEX array, dimension (2*N) */

/*  RWORK   (workspace) REAL array, dimension (N) */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */

/*  Internal Parameters */
/*  =================== */

/*  ITMAX is the maximum number of steps of iterative refinement. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Statement Functions .. */
/*     .. */
/*     .. Statement Function definitions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    af_dim1 = *ldaf;
    af_offset = 1 + af_dim1;
    af -= af_offset;
    --ipiv;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1;
    x -= x_offset;
    --ferr;
    --berr;
    --work;
    --rwork;

    /* Function Body */
    *info = 0;
    notran = lsame_(trans, "N");
    if (! notran && ! lsame_(trans, "T") && ! lsame_(
	    trans, "C")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*nrhs < 0) {
	*info = -3;
    } else if (*lda < MAX(1,*n)) {
	*info = -5;
    } else if (*ldaf < MAX(1,*n)) {
	*info = -7;
    } else if (*ldb < MAX(1,*n)) {
	*info = -10;
    } else if (*ldx < MAX(1,*n)) {
	*info = -12;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CGERFS", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
	i__1 = *nrhs;
	for (j = 1; j <= i__1; ++j) {
	    ferr[j] = 0.f;
	    berr[j] = 0.f;
/* L10: */
	}
	return 0;
    }

    if (notran) {
	*(unsigned char *)transn = 'N';
	*(unsigned char *)transt = 'C';
    } else {
	*(unsigned char *)transn = 'C';
	*(unsigned char *)transt = 'N';
    }

/*     NZ = maximum number of nonzero elements in each row of A, plus 1 */

    nz = *n + 1;
    eps = slamch_("Epsilon");
    safmin = slamch_("Safe minimum");
    safe1 = nz * safmin;
    safe2 = safe1 / eps;

/*     Do for each right hand side */

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {

	count = 1;
	lstres = 3.f;
L20:

/*        Loop until stopping criterion is satisfied. */

/*        Compute residual R = B - op(A) * X, */
/*        where op(A) = A, A**T, or A**H, depending on TRANS. */

	ccopy_(n, &b[j * b_dim1 + 1], &c__1, &work[1], &c__1);
	q__1.r = -1.f, q__1.i = -0.f;
	cgemv_(trans, n, n, &q__1, &a[a_offset], lda, &x[j * x_dim1 + 1], &
		c__1, &c_b1, &work[1], &c__1);

/*        Compute componentwise relative backward error from formula */

/*        MAX(i) ( ABS(R(i)) / ( ABS(op(A))*ABS(X) + ABS(B) )(i) ) */

/*        where ABS(Z) is the componentwise absolute value of the matrix */
/*        or vector Z.  If the i-th component of the denominator is less */
/*        than SAFE2, then SAFE1 is added to the i-th components of the */
/*        numerator and denominator before dividing. */

	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    i__3 = i__ + j * b_dim1;
	    rwork[i__] = (r__1 = b[i__3].r, ABS(r__1)) + (r__2 = r_imag(&b[
		    i__ + j * b_dim1]), ABS(r__2));
/* L30: */
	}

/*        Compute ABS(op(A))*ABS(X) + ABS(B). */

	if (notran) {
	    i__2 = *n;
	    for (k = 1; k <= i__2; ++k) {
		i__3 = k + j * x_dim1;
		xk = (r__1 = x[i__3].r, ABS(r__1)) + (r__2 = r_imag(&x[k + j 
			* x_dim1]), ABS(r__2));
		i__3 = *n;
		for (i__ = 1; i__ <= i__3; ++i__) {
		    i__4 = i__ + k * a_dim1;
		    rwork[i__] += ((r__1 = a[i__4].r, ABS(r__1)) + (r__2 = 
			    r_imag(&a[i__ + k * a_dim1]), ABS(r__2))) * xk;
/* L40: */
		}
/* L50: */
	    }
	} else {
	    i__2 = *n;
	    for (k = 1; k <= i__2; ++k) {
		s = 0.f;
		i__3 = *n;
		for (i__ = 1; i__ <= i__3; ++i__) {
		    i__4 = i__ + k * a_dim1;
		    i__5 = i__ + j * x_dim1;
		    s += ((r__1 = a[i__4].r, ABS(r__1)) + (r__2 = r_imag(&a[
			    i__ + k * a_dim1]), ABS(r__2))) * ((r__3 = x[
			    i__5].r, ABS(r__3)) + (r__4 = r_imag(&x[i__ + j *
			     x_dim1]), ABS(r__4)));
/* L60: */
		}
		rwork[k] += s;
/* L70: */
	    }
	}
	s = 0.f;
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    if (rwork[i__] > safe2) {
/* Computing MAX */
		i__3 = i__;
		r__3 = s, r__4 = ((r__1 = work[i__3].r, ABS(r__1)) + (r__2 = 
			r_imag(&work[i__]), ABS(r__2))) / rwork[i__];
		s = MAX(r__3,r__4);
	    } else {
/* Computing MAX */
		i__3 = i__;
		r__3 = s, r__4 = ((r__1 = work[i__3].r, ABS(r__1)) + (r__2 = 
			r_imag(&work[i__]), ABS(r__2)) + safe1) / (rwork[i__]
			 + safe1);
		s = MAX(r__3,r__4);
	    }
/* L80: */
	}
	berr[j] = s;

/*        Test stopping criterion. Continue iterating if */
/*           1) The residual BERR(J) is larger than machine epsilon, and */
/*           2) BERR(J) decreased by at least a factor of 2 during the */
/*              last iteration, and */
/*           3) At most ITMAX iterations tried. */

	if (berr[j] > eps && berr[j] * 2.f <= lstres && count <= 5) {

/*           Update solution and try again. */

	    cgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &work[1], 
		     n, info);
	    caxpy_(n, &c_b1, &work[1], &c__1, &x[j * x_dim1 + 1], &c__1);
	    lstres = berr[j];
	    ++count;
	    goto L20;
	}

/*        Bound error from formula */

/*        norm(X - XTRUE) / norm(X) .le. FERR = */
/*        norm( ABS(inv(op(A)))* */
/*           ( ABS(R) + NZ*EPS*( ABS(op(A))*ABS(X)+ABS(B) ))) / norm(X) */

/*        where */
/*          norm(Z) is the magnitude of the largest component of Z */
/*          inv(op(A)) is the inverse of op(A) */
/*          ABS(Z) is the componentwise absolute value of the matrix or */
/*             vector Z */
/*          NZ is the maximum number of nonzeros in any row of A, plus 1 */
/*          EPS is machine epsilon */

/*        The i-th component of ABS(R)+NZ*EPS*(ABS(op(A))*ABS(X)+ABS(B)) */
/*        is incremented by SAFE1 if the i-th component of */
/*        ABS(op(A))*ABS(X) + ABS(B) is less than SAFE2. */

/*        Use CLACN2 to estimate the infinity-norm of the matrix */
/*           inv(op(A)) * diag(W), */
/*        where W = ABS(R) + NZ*EPS*( ABS(op(A))*ABS(X)+ABS(B) ))) */

	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    if (rwork[i__] > safe2) {
		i__3 = i__;
		rwork[i__] = (r__1 = work[i__3].r, ABS(r__1)) + (r__2 = 
			r_imag(&work[i__]), ABS(r__2)) + nz * eps * rwork[
			i__];
	    } else {
		i__3 = i__;
		rwork[i__] = (r__1 = work[i__3].r, ABS(r__1)) + (r__2 = 
			r_imag(&work[i__]), ABS(r__2)) + nz * eps * rwork[
			i__] + safe1;
	    }
/* L90: */
	}

	kase = 0;
L100:
	clacn2_(n, &work[*n + 1], &work[1], &ferr[j], &kase, isave);
	if (kase != 0) {
	    if (kase == 1) {

/*              Multiply by diag(W)*inv(op(A)**H). */

		cgetrs_(transt, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &
			work[1], n, info);
		i__2 = *n;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    i__3 = i__;
		    i__4 = i__;
		    i__5 = i__;
		    q__1.r = rwork[i__4] * work[i__5].r, q__1.i = rwork[i__4] 
			    * work[i__5].i;
		    work[i__3].r = q__1.r, work[i__3].i = q__1.i;
/* L110: */
		}
	    } else {

/*              Multiply by inv(op(A))*diag(W). */

		i__2 = *n;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    i__3 = i__;
		    i__4 = i__;
		    i__5 = i__;
		    q__1.r = rwork[i__4] * work[i__5].r, q__1.i = rwork[i__4] 
			    * work[i__5].i;
		    work[i__3].r = q__1.r, work[i__3].i = q__1.i;
/* L120: */
		}
		cgetrs_(transn, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &
			work[1], n, info);
	    }
	    goto L100;
	}

/*        Normalize error. */

	lstres = 0.f;
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    i__3 = i__ + j * x_dim1;
	    r__3 = lstres, r__4 = (r__1 = x[i__3].r, ABS(r__1)) + (r__2 = 
		    r_imag(&x[i__ + j * x_dim1]), ABS(r__2));
	    lstres = MAX(r__3,r__4);
/* L130: */
	}
	if (lstres != 0.f) {
	    ferr[j] /= lstres;
	}

/* L140: */
    }

    return 0;

/*     End of CGERFS */

} /* cgerfs_ */
Пример #21
0
/** CHETRF_ROOK_REC2 computes a partial factorization of a complex Hermitian indefinite matrix using the boun ded Bunch-Kaufman ("rook") diagonal pivoting method
 *
 * This routine is a minor modification of LAPACK's clahef_rook.
 * It serves as an unblocked kernel in the recursive algorithms.
 * The blocked BLAS Level 3 updates were removed and moved to the
 * recursive algorithm.
 * */
/* Subroutine */ void RELAPACK_chetrf_rook_rec2(char *uplo, int *n,
	int *nb, int *kb, complex *a, int *lda, int *ipiv,
	complex *w, int *ldw, int *info, ftnlen uplo_len)
{
    /* System generated locals */
    int a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4;
    float r__1, r__2;
    complex q__1, q__2, q__3, q__4, q__5;

    /* Builtin functions */
    double sqrt(double), r_imag(complex *);
    void r_cnjg(complex *, complex *), c_div(complex *, complex *, complex *);

    /* Local variables */
    static int j, k, p;
    static float t, r1;
    static complex d11, d21, d22;
    static int ii, jj, kk, kp, kw, jp1, jp2, kkw;
    static logical done;
    static int imax, jmax;
    static float alpha;
    extern logical lsame_(char *, char *, ftnlen, ftnlen);
    extern /* Subroutine */ int cgemv_(char *, int *, int *, complex *
	    , complex *, int *, complex *, int *, complex *, complex *
	    , int *, ftnlen);
    static float sfmin;
    extern /* Subroutine */ int ccopy_(int *, complex *, int *,
	    complex *, int *);
    static int itemp;
    extern /* Subroutine */ int cswap_(int *, complex *, int *,
	    complex *, int *);
    static int kstep;
    static float stemp, absakk;
    extern /* Subroutine */ int clacgv_(int *, complex *, int *);
    extern int icamax_(int *, complex *, int *);
    extern double slamch_(char *, ftnlen);
    extern /* Subroutine */ int csscal_(int *, float *, complex *, int
	    *);
    static float colmax, rowmax;

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --ipiv;
    w_dim1 = *ldw;
    w_offset = 1 + w_dim1;
    w -= w_offset;

    /* Function Body */
    *info = 0;
    alpha = (sqrt(17.f) + 1.f) / 8.f;
    sfmin = slamch_("S", (ftnlen)1);
    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
	k = *n;
L10:
	kw = *nb + k - *n;
	if ((k <= *n - *nb + 1 && *nb < *n) || k < 1) {
	    goto L30;
	}
	kstep = 1;
	p = k;
	if (k > 1) {
	    i__1 = k - 1;
	    ccopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &
		    c__1);
	}
	i__1 = k + kw * w_dim1;
	i__2 = k + k * a_dim1;
	r__1 = a[i__2].r;
	w[i__1].r = r__1, w[i__1].i = 0.f;
	if (k < *n) {
	    i__1 = *n - k;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgemv_("No transpose", &k, &i__1, &q__1, &a[(k + 1) * a_dim1 + 1],
		     lda, &w[k + (kw + 1) * w_dim1], ldw, &c_b1, &w[kw *
		    w_dim1 + 1], &c__1, (ftnlen)12);
	    i__1 = k + kw * w_dim1;
	    i__2 = k + kw * w_dim1;
	    r__1 = w[i__2].r;
	    w[i__1].r = r__1, w[i__1].i = 0.f;
	}
	i__1 = k + kw * w_dim1;
	absakk = (r__1 = w[i__1].r, dabs(r__1));
	if (k > 1) {
	    i__1 = k - 1;
	    imax = icamax_(&i__1, &w[kw * w_dim1 + 1], &c__1);
	    i__1 = imax + kw * w_dim1;
	    colmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[imax
		    + kw * w_dim1]), dabs(r__2));
	} else {
	    colmax = 0.f;
	}
	if (dmax(absakk,colmax) == 0.f) {
	    if (*info == 0) {
		*info = k;
	    }
	    kp = k;
	    i__1 = k + k * a_dim1;
	    i__2 = k + kw * w_dim1;
	    r__1 = w[i__2].r;
	    a[i__1].r = r__1, a[i__1].i = 0.f;
	    if (k > 1) {
		i__1 = k - 1;
		ccopy_(&i__1, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1],
			&c__1);
	    }
	} else {
	    if (! (absakk < alpha * colmax)) {
		kp = k;
	    } else {
		done = FALSE_;
L12:
		if (imax > 1) {
		    i__1 = imax - 1;
		    ccopy_(&i__1, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) *
			    w_dim1 + 1], &c__1);
		}
		i__1 = imax + (kw - 1) * w_dim1;
		i__2 = imax + imax * a_dim1;
		r__1 = a[i__2].r;
		w[i__1].r = r__1, w[i__1].i = 0.f;
		i__1 = k - imax;
		ccopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax +
			1 + (kw - 1) * w_dim1], &c__1);
		i__1 = k - imax;
		clacgv_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1], &c__1);
		if (k < *n) {
		    i__1 = *n - k;
		    q__1.r = -1.f, q__1.i = -0.f;
		    cgemv_("No transpose", &k, &i__1, &q__1, &a[(k + 1) *
			    a_dim1 + 1], lda, &w[imax + (kw + 1) * w_dim1],
			    ldw, &c_b1, &w[(kw - 1) * w_dim1 + 1], &c__1, (
			    ftnlen)12);
		    i__1 = imax + (kw - 1) * w_dim1;
		    i__2 = imax + (kw - 1) * w_dim1;
		    r__1 = w[i__2].r;
		    w[i__1].r = r__1, w[i__1].i = 0.f;
		}
		if (imax != k) {
		    i__1 = k - imax;
		    jmax = imax + icamax_(&i__1, &w[imax + 1 + (kw - 1) *
			    w_dim1], &c__1);
		    i__1 = jmax + (kw - 1) * w_dim1;
		    rowmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&
			    w[jmax + (kw - 1) * w_dim1]), dabs(r__2));
		} else {
		    rowmax = 0.f;
		}
		if (imax > 1) {
		    i__1 = imax - 1;
		    itemp = icamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
		    i__1 = itemp + (kw - 1) * w_dim1;
		    stemp = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&
			    w[itemp + (kw - 1) * w_dim1]), dabs(r__2));
		    if (stemp > rowmax) {
			rowmax = stemp;
			jmax = itemp;
		    }
		}
		i__1 = imax + (kw - 1) * w_dim1;
		if (! ((r__1 = w[i__1].r, dabs(r__1)) < alpha * rowmax)) {
		    kp = imax;
		    ccopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
			    w_dim1 + 1], &c__1);
		    done = TRUE_;
		} else if (p == jmax || rowmax <= colmax) {
		    kp = imax;
		    kstep = 2;
		    done = TRUE_;
		} else {
		    p = imax;
		    colmax = rowmax;
		    imax = jmax;
		    ccopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw *
			    w_dim1 + 1], &c__1);
		}
		if (! done) {
		    goto L12;
		}
	    }
	    kk = k - kstep + 1;
	    kkw = *nb + kk - *n;
	    if (kstep == 2 && p != k) {
		i__1 = p + p * a_dim1;
		i__2 = k + k * a_dim1;
		r__1 = a[i__2].r;
		a[i__1].r = r__1, a[i__1].i = 0.f;
		i__1 = k - 1 - p;
		ccopy_(&i__1, &a[p + 1 + k * a_dim1], &c__1, &a[p + (p + 1) *
			a_dim1], lda);
		i__1 = k - 1 - p;
		clacgv_(&i__1, &a[p + (p + 1) * a_dim1], lda);
		if (p > 1) {
		    i__1 = p - 1;
		    ccopy_(&i__1, &a[k * a_dim1 + 1], &c__1, &a[p * a_dim1 +
			    1], &c__1);
		}
		if (k < *n) {
		    i__1 = *n - k;
		    cswap_(&i__1, &a[k + (k + 1) * a_dim1], lda, &a[p + (k +
			    1) * a_dim1], lda);
		}
		i__1 = *n - kk + 1;
		cswap_(&i__1, &w[k + kkw * w_dim1], ldw, &w[p + kkw * w_dim1],
			 ldw);
	    }
	    if (kp != kk) {
		i__1 = kp + kp * a_dim1;
		i__2 = kk + kk * a_dim1;
		r__1 = a[i__2].r;
		a[i__1].r = r__1, a[i__1].i = 0.f;
		i__1 = kk - 1 - kp;
		ccopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp +
			1) * a_dim1], lda);
		i__1 = kk - 1 - kp;
		clacgv_(&i__1, &a[kp + (kp + 1) * a_dim1], lda);
		if (kp > 1) {
		    i__1 = kp - 1;
		    ccopy_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1
			    + 1], &c__1);
		}
		if (k < *n) {
		    i__1 = *n - k;
		    cswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k
			    + 1) * a_dim1], lda);
		}
		i__1 = *n - kk + 1;
		cswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw *
			w_dim1], ldw);
	    }
	    if (kstep == 1) {
		ccopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &
			c__1);
		if (k > 1) {
		    i__1 = k + k * a_dim1;
		    t = a[i__1].r;
		    if (dabs(t) >= sfmin) {
			r1 = 1.f / t;
			i__1 = k - 1;
			csscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1);
		    } else {
			i__1 = k - 1;
			for (ii = 1; ii <= i__1; ++ii) {
			    i__2 = ii + k * a_dim1;
			    i__3 = ii + k * a_dim1;
			    q__1.r = a[i__3].r / t, q__1.i = a[i__3].i / t;
			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
/* L14: */
			}
		    }
		    i__1 = k - 1;
		    clacgv_(&i__1, &w[kw * w_dim1 + 1], &c__1);
		}
	    } else {
		if (k > 2) {
		    i__1 = k - 1 + kw * w_dim1;
		    d21.r = w[i__1].r, d21.i = w[i__1].i;
		    r_cnjg(&q__2, &d21);
		    c_div(&q__1, &w[k + kw * w_dim1], &q__2);
		    d11.r = q__1.r, d11.i = q__1.i;
		    c_div(&q__1, &w[k - 1 + (kw - 1) * w_dim1], &d21);
		    d22.r = q__1.r, d22.i = q__1.i;
		    q__1.r = d11.r * d22.r - d11.i * d22.i, q__1.i = d11.r *
			    d22.i + d11.i * d22.r;
		    t = 1.f / (q__1.r - 1.f);
		    i__1 = k - 2;
		    for (j = 1; j <= i__1; ++j) {
			i__2 = j + (k - 1) * a_dim1;
			i__3 = j + (kw - 1) * w_dim1;
			q__4.r = d11.r * w[i__3].r - d11.i * w[i__3].i,
				q__4.i = d11.r * w[i__3].i + d11.i * w[i__3]
				.r;
			i__4 = j + kw * w_dim1;
			q__3.r = q__4.r - w[i__4].r, q__3.i = q__4.i - w[i__4]
				.i;
			c_div(&q__2, &q__3, &d21);
			q__1.r = t * q__2.r, q__1.i = t * q__2.i;
			a[i__2].r = q__1.r, a[i__2].i = q__1.i;
			i__2 = j + k * a_dim1;
			i__3 = j + kw * w_dim1;
			q__4.r = d22.r * w[i__3].r - d22.i * w[i__3].i,
				q__4.i = d22.r * w[i__3].i + d22.i * w[i__3]
				.r;
			i__4 = j + (kw - 1) * w_dim1;
			q__3.r = q__4.r - w[i__4].r, q__3.i = q__4.i - w[i__4]
				.i;
			r_cnjg(&q__5, &d21);
			c_div(&q__2, &q__3, &q__5);
			q__1.r = t * q__2.r, q__1.i = t * q__2.i;
			a[i__2].r = q__1.r, a[i__2].i = q__1.i;
/* L20: */
		    }
		}
		i__1 = k - 1 + (k - 1) * a_dim1;
		i__2 = k - 1 + (kw - 1) * w_dim1;
		a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
		i__1 = k - 1 + k * a_dim1;
		i__2 = k - 1 + kw * w_dim1;
		a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
		i__1 = k + k * a_dim1;
		i__2 = k + kw * w_dim1;
		a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
		i__1 = k - 1;
		clacgv_(&i__1, &w[kw * w_dim1 + 1], &c__1);
		i__1 = k - 2;
		clacgv_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
	    }
	}
	if (kstep == 1) {
	    ipiv[k] = kp;
	} else {
	    ipiv[k] = -p;
	    ipiv[k - 1] = -kp;
	}
	k -= kstep;
	goto L10;
L30:
	j = k + 1;
L60:
	kstep = 1;
	jp1 = 1;
	jj = j;
	jp2 = ipiv[j];
	if (jp2 < 0) {
	    jp2 = -jp2;
	    ++j;
	    jp1 = -ipiv[j];
	    kstep = 2;
	}
	++j;
	if (jp2 != jj && j <= *n) {
	    i__1 = *n - j + 1;
	    cswap_(&i__1, &a[jp2 + j * a_dim1], lda, &a[jj + j * a_dim1], lda)
		    ;
	}
	++jj;
	if (kstep == 2 && jp1 != jj && j <= *n) {
	    i__1 = *n - j + 1;
	    cswap_(&i__1, &a[jp1 + j * a_dim1], lda, &a[jj + j * a_dim1], lda)
		    ;
	}
	if (j < *n) {
	    goto L60;
	}
	*kb = *n - k;
    } else {
	k = 1;
L70:
	if ((k >= *nb && *nb < *n) || k > *n) {
	    goto L90;
	}
	kstep = 1;
	p = k;
	i__1 = k + k * w_dim1;
	i__2 = k + k * a_dim1;
	r__1 = a[i__2].r;
	w[i__1].r = r__1, w[i__1].i = 0.f;
	if (k < *n) {
	    i__1 = *n - k;
	    ccopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &w[k + 1 + k *
		    w_dim1], &c__1);
	}
	if (k > 1) {
	    i__1 = *n - k + 1;
	    i__2 = k - 1;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgemv_("No transpose", &i__1, &i__2, &q__1, &a[k + a_dim1], lda, &
		    w[k + w_dim1], ldw, &c_b1, &w[k + k * w_dim1], &c__1, (
		    ftnlen)12);
	    i__1 = k + k * w_dim1;
	    i__2 = k + k * w_dim1;
	    r__1 = w[i__2].r;
	    w[i__1].r = r__1, w[i__1].i = 0.f;
	}
	i__1 = k + k * w_dim1;
	absakk = (r__1 = w[i__1].r, dabs(r__1));
	if (k < *n) {
	    i__1 = *n - k;
	    imax = k + icamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
	    i__1 = imax + k * w_dim1;
	    colmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&w[imax
		    + k * w_dim1]), dabs(r__2));
	} else {
	    colmax = 0.f;
	}
	if (dmax(absakk,colmax) == 0.f) {
	    if (*info == 0) {
		*info = k;
	    }
	    kp = k;
	    i__1 = k + k * a_dim1;
	    i__2 = k + k * w_dim1;
	    r__1 = w[i__2].r;
	    a[i__1].r = r__1, a[i__1].i = 0.f;
	    if (k < *n) {
		i__1 = *n - k;
		ccopy_(&i__1, &w[k + 1 + k * w_dim1], &c__1, &a[k + 1 + k *
			a_dim1], &c__1);
	    }
	} else {
	    if (! (absakk < alpha * colmax)) {
		kp = k;
	    } else {
		done = FALSE_;
L72:
		i__1 = imax - k;
		ccopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) *
			w_dim1], &c__1);
		i__1 = imax - k;
		clacgv_(&i__1, &w[k + (k + 1) * w_dim1], &c__1);
		i__1 = imax + (k + 1) * w_dim1;
		i__2 = imax + imax * a_dim1;
		r__1 = a[i__2].r;
		w[i__1].r = r__1, w[i__1].i = 0.f;
		if (imax < *n) {
		    i__1 = *n - imax;
		    ccopy_(&i__1, &a[imax + 1 + imax * a_dim1], &c__1, &w[
			    imax + 1 + (k + 1) * w_dim1], &c__1);
		}
		if (k > 1) {
		    i__1 = *n - k + 1;
		    i__2 = k - 1;
		    q__1.r = -1.f, q__1.i = -0.f;
		    cgemv_("No transpose", &i__1, &i__2, &q__1, &a[k + a_dim1]
			    , lda, &w[imax + w_dim1], ldw, &c_b1, &w[k + (k +
			    1) * w_dim1], &c__1, (ftnlen)12);
		    i__1 = imax + (k + 1) * w_dim1;
		    i__2 = imax + (k + 1) * w_dim1;
		    r__1 = w[i__2].r;
		    w[i__1].r = r__1, w[i__1].i = 0.f;
		}
		if (imax != k) {
		    i__1 = imax - k;
		    jmax = k - 1 + icamax_(&i__1, &w[k + (k + 1) * w_dim1], &
			    c__1);
		    i__1 = jmax + (k + 1) * w_dim1;
		    rowmax = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&
			    w[jmax + (k + 1) * w_dim1]), dabs(r__2));
		} else {
		    rowmax = 0.f;
		}
		if (imax < *n) {
		    i__1 = *n - imax;
		    itemp = imax + icamax_(&i__1, &w[imax + 1 + (k + 1) *
			    w_dim1], &c__1);
		    i__1 = itemp + (k + 1) * w_dim1;
		    stemp = (r__1 = w[i__1].r, dabs(r__1)) + (r__2 = r_imag(&
			    w[itemp + (k + 1) * w_dim1]), dabs(r__2));
		    if (stemp > rowmax) {
			rowmax = stemp;
			jmax = itemp;
		    }
		}
		i__1 = imax + (k + 1) * w_dim1;
		if (! ((r__1 = w[i__1].r, dabs(r__1)) < alpha * rowmax)) {
		    kp = imax;
		    i__1 = *n - k + 1;
		    ccopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k *
			    w_dim1], &c__1);
		    done = TRUE_;
		} else if (p == jmax || rowmax <= colmax) {
		    kp = imax;
		    kstep = 2;
		    done = TRUE_;
		} else {
		    p = imax;
		    colmax = rowmax;
		    imax = jmax;
		    i__1 = *n - k + 1;
		    ccopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k *
			    w_dim1], &c__1);
		}
		if (! done) {
		    goto L72;
		}
	    }
	    kk = k + kstep - 1;
	    if (kstep == 2 && p != k) {
		i__1 = p + p * a_dim1;
		i__2 = k + k * a_dim1;
		r__1 = a[i__2].r;
		a[i__1].r = r__1, a[i__1].i = 0.f;
		i__1 = p - k - 1;
		ccopy_(&i__1, &a[k + 1 + k * a_dim1], &c__1, &a[p + (k + 1) *
			a_dim1], lda);
		i__1 = p - k - 1;
		clacgv_(&i__1, &a[p + (k + 1) * a_dim1], lda);
		if (p < *n) {
		    i__1 = *n - p;
		    ccopy_(&i__1, &a[p + 1 + k * a_dim1], &c__1, &a[p + 1 + p
			    * a_dim1], &c__1);
		}
		if (k > 1) {
		    i__1 = k - 1;
		    cswap_(&i__1, &a[k + a_dim1], lda, &a[p + a_dim1], lda);
		}
		cswap_(&kk, &w[k + w_dim1], ldw, &w[p + w_dim1], ldw);
	    }
	    if (kp != kk) {
		i__1 = kp + kp * a_dim1;
		i__2 = kk + kk * a_dim1;
		r__1 = a[i__2].r;
		a[i__1].r = r__1, a[i__1].i = 0.f;
		i__1 = kp - kk - 1;
		ccopy_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk +
			1) * a_dim1], lda);
		i__1 = kp - kk - 1;
		clacgv_(&i__1, &a[kp + (kk + 1) * a_dim1], lda);
		if (kp < *n) {
		    i__1 = *n - kp;
		    ccopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1
			    + kp * a_dim1], &c__1);
		}
		if (k > 1) {
		    i__1 = k - 1;
		    cswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda);
		}
		cswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw);
	    }
	    if (kstep == 1) {
		i__1 = *n - k + 1;
		ccopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], &
			c__1);
		if (k < *n) {
		    i__1 = k + k * a_dim1;
		    t = a[i__1].r;
		    if (dabs(t) >= sfmin) {
			r1 = 1.f / t;
			i__1 = *n - k;
			csscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1);
		    } else {
			i__1 = *n;
			for (ii = k + 1; ii <= i__1; ++ii) {
			    i__2 = ii + k * a_dim1;
			    i__3 = ii + k * a_dim1;
			    q__1.r = a[i__3].r / t, q__1.i = a[i__3].i / t;
			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
/* L74: */
			}
		    }
		    i__1 = *n - k;
		    clacgv_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
		}
	    } else {
		if (k < *n - 1) {
		    i__1 = k + 1 + k * w_dim1;
		    d21.r = w[i__1].r, d21.i = w[i__1].i;
		    c_div(&q__1, &w[k + 1 + (k + 1) * w_dim1], &d21);
		    d11.r = q__1.r, d11.i = q__1.i;
		    r_cnjg(&q__2, &d21);
		    c_div(&q__1, &w[k + k * w_dim1], &q__2);
		    d22.r = q__1.r, d22.i = q__1.i;
		    q__1.r = d11.r * d22.r - d11.i * d22.i, q__1.i = d11.r *
			    d22.i + d11.i * d22.r;
		    t = 1.f / (q__1.r - 1.f);
		    i__1 = *n;
		    for (j = k + 2; j <= i__1; ++j) {
			i__2 = j + k * a_dim1;
			i__3 = j + k * w_dim1;
			q__4.r = d11.r * w[i__3].r - d11.i * w[i__3].i,
				q__4.i = d11.r * w[i__3].i + d11.i * w[i__3]
				.r;
			i__4 = j + (k + 1) * w_dim1;
			q__3.r = q__4.r - w[i__4].r, q__3.i = q__4.i - w[i__4]
				.i;
			r_cnjg(&q__5, &d21);
			c_div(&q__2, &q__3, &q__5);
			q__1.r = t * q__2.r, q__1.i = t * q__2.i;
			a[i__2].r = q__1.r, a[i__2].i = q__1.i;
			i__2 = j + (k + 1) * a_dim1;
			i__3 = j + (k + 1) * w_dim1;
			q__4.r = d22.r * w[i__3].r - d22.i * w[i__3].i,
				q__4.i = d22.r * w[i__3].i + d22.i * w[i__3]
				.r;
			i__4 = j + k * w_dim1;
			q__3.r = q__4.r - w[i__4].r, q__3.i = q__4.i - w[i__4]
				.i;
			c_div(&q__2, &q__3, &d21);
			q__1.r = t * q__2.r, q__1.i = t * q__2.i;
			a[i__2].r = q__1.r, a[i__2].i = q__1.i;
/* L80: */
		    }
		}
		i__1 = k + k * a_dim1;
		i__2 = k + k * w_dim1;
		a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
		i__1 = k + 1 + k * a_dim1;
		i__2 = k + 1 + k * w_dim1;
		a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
		i__1 = k + 1 + (k + 1) * a_dim1;
		i__2 = k + 1 + (k + 1) * w_dim1;
		a[i__1].r = w[i__2].r, a[i__1].i = w[i__2].i;
		i__1 = *n - k;
		clacgv_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
		i__1 = *n - k - 1;
		clacgv_(&i__1, &w[k + 2 + (k + 1) * w_dim1], &c__1);
	    }
	}
	if (kstep == 1) {
	    ipiv[k] = kp;
	} else {
	    ipiv[k] = -p;
	    ipiv[k + 1] = -kp;
	}
	k += kstep;
	goto L70;
L90:
	j = k - 1;
L120:
	kstep = 1;
	jp1 = 1;
	jj = j;
	jp2 = ipiv[j];
	if (jp2 < 0) {
	    jp2 = -jp2;
	    --j;
	    jp1 = -ipiv[j];
	    kstep = 2;
	}
	--j;
	if (jp2 != jj && j >= 1) {
	    cswap_(&j, &a[jp2 + a_dim1], lda, &a[jj + a_dim1], lda);
	}
	--jj;
	if (kstep == 2 && jp1 != jj && j >= 1) {
	    cswap_(&j, &a[jp1 + a_dim1], lda, &a[jj + a_dim1], lda);
	}
	if (j > 1) {
	    goto L120;
	}
	*kb = k - 1;
    }
    return;
}
Пример #22
0
/* Subroutine */ int cla_gerfsx_extended__(integer *prec_type__, integer *
	trans_type__, integer *n, integer *nrhs, complex *a, integer *lda, 
	complex *af, integer *ldaf, integer *ipiv, logical *colequ, real *c__,
	 complex *b, integer *ldb, complex *y, integer *ldy, real *berr_out__,
	 integer *n_norms__, real *errs_n__, real *errs_c__, complex *res, 
	real *ayb, complex *dy, complex *y_tail__, real *rcond, integer *
	ithresh, real *rthresh, real *dz_ub__, logical *ignore_cwise__, 
	integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, y_dim1, 
	    y_offset, errs_n_dim1, errs_n_offset, errs_c_dim1, errs_c_offset, 
	    i__1, i__2, i__3, i__4;
    real r__1, r__2;
    char ch__1[1];

    /* Builtin functions */
    double r_imag(complex *);

    /* Local variables */
    real dxratmax, dzratmax;
    integer i__, j;
    extern /* Subroutine */ int cla_geamv__(integer *, integer *, integer *, 
	    real *, complex *, integer *, complex *, integer *, real *, real *
	    , integer *);
    logical incr_prec__;
    real prev_dz_z__, yk, final_dx_x__;
    extern /* Subroutine */ int cla_wwaddw__(integer *, complex *, complex *, 
	    complex *);
    real final_dz_z__, prevnormdx;
    integer cnt;
    real dyk, eps, incr_thresh__, dx_x__, dz_z__;
    extern /* Subroutine */ int cla_lin_berr__(integer *, integer *, integer *
	    , complex *, real *, real *);
    real ymin;
    extern /* Subroutine */ int blas_cgemv_x__(integer *, integer *, integer *
	    , complex *, complex *, integer *, complex *, integer *, complex *
	    , complex *, integer *, integer *);
    integer y_prec_state__;
    extern /* Subroutine */ int blas_cgemv2_x__(integer *, integer *, integer 
	    *, complex *, complex *, integer *, complex *, complex *, integer 
	    *, complex *, complex *, integer *, integer *), cgemv_(char *, 
	    integer *, integer *, complex *, complex *, integer *, complex *, 
	    integer *, complex *, complex *, integer *), ccopy_(
	    integer *, complex *, integer *, complex *, integer *);
    real dxrat, dzrat;
    extern /* Subroutine */ int caxpy_(integer *, complex *, complex *, 
	    integer *, complex *, integer *);
    char trans[1];
    real normx, normy;
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int cgetrs_(char *, integer *, integer *, complex 
	    *, integer *, integer *, complex *, integer *, integer *);
    real normdx;
    extern /* Character */ VOID chla_transtype__(char *, ftnlen, integer *);
    real hugeval;
    integer x_state__, z_state__;


/*     -- LAPACK routine (version 3.2.1)                                 -- */
/*     -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */
/*     -- Jason Riedy of Univ. of California Berkeley.                 -- */
/*     -- April 2009                                                   -- */

/*     -- LAPACK is a software package provided by Univ. of Tennessee, -- */
/*     -- Univ. of California Berkeley and NAG Ltd.                    -- */

/*     .. */
/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CLA_GERFSX_EXTENDED improves the computed solution to a system of */
/*  linear equations by performing extra-precise iterative refinement */
/*  and provides error bounds and backward error estimates for the solution. */
/*  This subroutine is called by CGERFSX to perform iterative refinement. */
/*  In addition to normwise error bound, the code provides maximum */
/*  componentwise error bound if possible. See comments for ERR_BNDS_NORM */
/*  and ERR_BNDS_COMP for details of the error bounds. Note that this */
/*  subroutine is only resonsible for setting the second fields of */
/*  ERR_BNDS_NORM and ERR_BNDS_COMP. */

/*  Arguments */
/*  ========= */

/*     PREC_TYPE      (input) INTEGER */
/*     Specifies the intermediate precision to be used in refinement. */
/*     The value is defined by ILAPREC(P) where P is a CHARACTER and */
/*     P    = 'S':  Single */
/*          = 'D':  Double */
/*          = 'I':  Indigenous */
/*          = 'X', 'E':  Extra */

/*     TRANS_TYPE     (input) INTEGER */
/*     Specifies the transposition operation on A. */
/*     The value is defined by ILATRANS(T) where T is a CHARACTER and */
/*     T    = 'N':  No transpose */
/*          = 'T':  Transpose */
/*          = 'C':  Conjugate transpose */

/*     N              (input) INTEGER */
/*     The number of linear equations, i.e., the order of the */
/*     matrix A.  N >= 0. */

/*     NRHS           (input) INTEGER */
/*     The number of right-hand-sides, i.e., the number of columns of the */
/*     matrix B. */

/*     A              (input) COMPLEX array, dimension (LDA,N) */
/*     On entry, the N-by-N matrix A. */

/*     LDA            (input) INTEGER */
/*     The leading dimension of the array A.  LDA >= max(1,N). */

/*     AF             (input) COMPLEX array, dimension (LDAF,N) */
/*     The factors L and U from the factorization */
/*     A = P*L*U as computed by CGETRF. */

/*     LDAF           (input) INTEGER */
/*     The leading dimension of the array AF.  LDAF >= max(1,N). */

/*     IPIV           (input) INTEGER array, dimension (N) */
/*     The pivot indices from the factorization A = P*L*U */
/*     as computed by CGETRF; row i of the matrix was interchanged */
/*     with row IPIV(i). */

/*     COLEQU         (input) LOGICAL */
/*     If .TRUE. then column equilibration was done to A before calling */
/*     this routine. This is needed to compute the solution and error */
/*     bounds correctly. */

/*     C              (input) REAL array, dimension (N) */
/*     The column scale factors for A. If COLEQU = .FALSE., C */
/*     is not accessed. If C is input, each element of C should be a power */
/*     of the radix to ensure a reliable solution and error estimates. */
/*     Scaling by powers of the radix does not cause rounding errors unless */
/*     the result underflows or overflows. Rounding errors during scaling */
/*     lead to refining with a matrix that is not equivalent to the */
/*     input matrix, producing error estimates that may not be */
/*     reliable. */

/*     B              (input) COMPLEX array, dimension (LDB,NRHS) */
/*     The right-hand-side matrix B. */

/*     LDB            (input) INTEGER */
/*     The leading dimension of the array B.  LDB >= max(1,N). */

/*     Y              (input/output) COMPLEX array, dimension (LDY,NRHS) */
/*     On entry, the solution matrix X, as computed by CGETRS. */
/*     On exit, the improved solution matrix Y. */

/*     LDY            (input) INTEGER */
/*     The leading dimension of the array Y.  LDY >= max(1,N). */

/*     BERR_OUT       (output) REAL array, dimension (NRHS) */
/*     On exit, BERR_OUT(j) contains the componentwise relative backward */
/*     error for right-hand-side j from the formula */
/*         max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */
/*     where abs(Z) is the componentwise absolute value of the matrix */
/*     or vector Z. This is computed by CLA_LIN_BERR. */

/*     N_NORMS        (input) INTEGER */
/*     Determines which error bounds to return (see ERR_BNDS_NORM */
/*     and ERR_BNDS_COMP). */
/*     If N_NORMS >= 1 return normwise error bounds. */
/*     If N_NORMS >= 2 return componentwise error bounds. */

/*     ERR_BNDS_NORM  (input/output) REAL array, dimension (NRHS, N_ERR_BNDS) */
/*     For each right-hand side, this array contains information about */
/*     various error bounds and condition numbers corresponding to the */
/*     normwise relative error, which is defined as follows: */

/*     Normwise relative error in the ith solution vector: */
/*             max_j (abs(XTRUE(j,i) - X(j,i))) */
/*            ------------------------------ */
/*                  max_j abs(X(j,i)) */

/*     The array is indexed by the type of error information as described */
/*     below. There currently are up to three pieces of information */
/*     returned. */

/*     The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */
/*     right-hand side. */

/*     The second index in ERR_BNDS_NORM(:,err) contains the following */
/*     three fields: */
/*     err = 1 "Trust/don't trust" boolean. Trust the answer if the */
/*              reciprocal condition number is less than the threshold */
/*              sqrt(n) * slamch('Epsilon'). */

/*     err = 2 "Guaranteed" error bound: The estimated forward error, */
/*              almost certainly within a factor of 10 of the true error */
/*              so long as the next entry is greater than the threshold */
/*              sqrt(n) * slamch('Epsilon'). This error bound should only */
/*              be trusted if the previous boolean is true. */

/*     err = 3  Reciprocal condition number: Estimated normwise */
/*              reciprocal condition number.  Compared with the threshold */
/*              sqrt(n) * slamch('Epsilon') to determine if the error */
/*              estimate is "guaranteed". These reciprocal condition */
/*              numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
/*              appropriately scaled matrix Z. */
/*              Let Z = S*A, where S scales each row by a power of the */
/*              radix so all absolute row sums of Z are approximately 1. */

/*     This subroutine is only responsible for setting the second field */
/*     above. */
/*     See Lapack Working Note 165 for further details and extra */
/*     cautions. */

/*     ERR_BNDS_COMP  (input/output) REAL array, dimension (NRHS, N_ERR_BNDS) */
/*     For each right-hand side, this array contains information about */
/*     various error bounds and condition numbers corresponding to the */
/*     componentwise relative error, which is defined as follows: */

/*     Componentwise relative error in the ith solution vector: */
/*                    abs(XTRUE(j,i) - X(j,i)) */
/*             max_j ---------------------- */
/*                         abs(X(j,i)) */

/*     The array is indexed by the right-hand side i (on which the */
/*     componentwise relative error depends), and the type of error */
/*     information as described below. There currently are up to three */
/*     pieces of information returned for each right-hand side. If */
/*     componentwise accuracy is not requested (PARAMS(3) = 0.0), then */
/*     ERR_BNDS_COMP is not accessed.  If N_ERR_BNDS .LT. 3, then at most */
/*     the first (:,N_ERR_BNDS) entries are returned. */

/*     The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */
/*     right-hand side. */

/*     The second index in ERR_BNDS_COMP(:,err) contains the following */
/*     three fields: */
/*     err = 1 "Trust/don't trust" boolean. Trust the answer if the */
/*              reciprocal condition number is less than the threshold */
/*              sqrt(n) * slamch('Epsilon'). */

/*     err = 2 "Guaranteed" error bound: The estimated forward error, */
/*              almost certainly within a factor of 10 of the true error */
/*              so long as the next entry is greater than the threshold */
/*              sqrt(n) * slamch('Epsilon'). This error bound should only */
/*              be trusted if the previous boolean is true. */

/*     err = 3  Reciprocal condition number: Estimated componentwise */
/*              reciprocal condition number.  Compared with the threshold */
/*              sqrt(n) * slamch('Epsilon') to determine if the error */
/*              estimate is "guaranteed". These reciprocal condition */
/*              numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */
/*              appropriately scaled matrix Z. */
/*              Let Z = S*(A*diag(x)), where x is the solution for the */
/*              current right-hand side and S scales each row of */
/*              A*diag(x) by a power of the radix so all absolute row */
/*              sums of Z are approximately 1. */

/*     This subroutine is only responsible for setting the second field */
/*     above. */
/*     See Lapack Working Note 165 for further details and extra */
/*     cautions. */

/*     RES            (input) COMPLEX array, dimension (N) */
/*     Workspace to hold the intermediate residual. */

/*     AYB            (input) REAL array, dimension (N) */
/*     Workspace. */

/*     DY             (input) COMPLEX array, dimension (N) */
/*     Workspace to hold the intermediate solution. */

/*     Y_TAIL         (input) COMPLEX array, dimension (N) */
/*     Workspace to hold the trailing bits of the intermediate solution. */

/*     RCOND          (input) REAL */
/*     Reciprocal scaled condition number.  This is an estimate of the */
/*     reciprocal Skeel condition number of the matrix A after */
/*     equilibration (if done).  If this is less than the machine */
/*     precision (in particular, if it is zero), the matrix is singular */
/*     to working precision.  Note that the error may still be small even */
/*     if this number is very small and the matrix appears ill- */
/*     conditioned. */

/*     ITHRESH        (input) INTEGER */
/*     The maximum number of residual computations allowed for */
/*     refinement. The default is 10. For 'aggressive' set to 100 to */
/*     permit convergence using approximate factorizations or */
/*     factorizations other than LU. If the factorization uses a */
/*     technique other than Gaussian elimination, the guarantees in */
/*     ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. */

/*     RTHRESH        (input) REAL */
/*     Determines when to stop refinement if the error estimate stops */
/*     decreasing. Refinement will stop when the next solution no longer */
/*     satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is */
/*     the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The */
/*     default value is 0.5. For 'aggressive' set to 0.9 to permit */
/*     convergence on extremely ill-conditioned matrices. See LAWN 165 */
/*     for more details. */

/*     DZ_UB          (input) REAL */
/*     Determines when to start considering componentwise convergence. */
/*     Componentwise convergence is only considered after each component */
/*     of the solution Y is stable, which we definte as the relative */
/*     change in each component being less than DZ_UB. The default value */
/*     is 0.25, requiring the first bit to be stable. See LAWN 165 for */
/*     more details. */

/*     IGNORE_CWISE   (input) LOGICAL */
/*     If .TRUE. then ignore componentwise convergence. Default value */
/*     is .FALSE.. */

/*     INFO           (output) INTEGER */
/*       = 0:  Successful exit. */
/*       < 0:  if INFO = -i, the ith argument to CGETRS had an illegal */
/*             value */

/*  ===================================================================== */

/*     .. Local Scalars .. */
/*     .. */
/*     .. Parameters .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Statement Functions .. */
/*     .. */
/*     .. Statement Function Definitions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    errs_c_dim1 = *nrhs;
    errs_c_offset = 1 + errs_c_dim1;
    errs_c__ -= errs_c_offset;
    errs_n_dim1 = *nrhs;
    errs_n_offset = 1 + errs_n_dim1;
    errs_n__ -= errs_n_offset;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    af_dim1 = *ldaf;
    af_offset = 1 + af_dim1;
    af -= af_offset;
    --ipiv;
    --c__;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    y_dim1 = *ldy;
    y_offset = 1 + y_dim1;
    y -= y_offset;
    --berr_out__;
    --res;
    --ayb;
    --dy;
    --y_tail__;

    /* Function Body */
    if (*info != 0) {
	return 0;
    }
    chla_transtype__(ch__1, (ftnlen)1, trans_type__);
    *(unsigned char *)trans = *(unsigned char *)&ch__1[0];
    eps = slamch_("Epsilon");
    hugeval = slamch_("Overflow");
/*     Force HUGEVAL to Inf */
    hugeval *= hugeval;
/*     Using HUGEVAL may lead to spurious underflows. */
    incr_thresh__ = (real) (*n) * eps;

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
	y_prec_state__ = 1;
	if (y_prec_state__ == 2) {
	    i__2 = *n;
	    for (i__ = 1; i__ <= i__2; ++i__) {
		i__3 = i__;
		y_tail__[i__3].r = 0.f, y_tail__[i__3].i = 0.f;
	    }
	}
	dxrat = 0.f;
	dxratmax = 0.f;
	dzrat = 0.f;
	dzratmax = 0.f;
	final_dx_x__ = hugeval;
	final_dz_z__ = hugeval;
	prevnormdx = hugeval;
	prev_dz_z__ = hugeval;
	dz_z__ = hugeval;
	dx_x__ = hugeval;
	x_state__ = 1;
	z_state__ = 0;
	incr_prec__ = FALSE_;
	i__2 = *ithresh;
	for (cnt = 1; cnt <= i__2; ++cnt) {

/*         Compute residual RES = B_s - op(A_s) * Y, */
/*             op(A) = A, A**T, or A**H depending on TRANS (and type). */

	    ccopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1);
	    if (y_prec_state__ == 0) {
		cgemv_(trans, n, n, &c_b6, &a[a_offset], lda, &y[j * y_dim1 + 
			1], &c__1, &c_b8, &res[1], &c__1);
	    } else if (y_prec_state__ == 1) {
		blas_cgemv_x__(trans_type__, n, n, &c_b6, &a[a_offset], lda, &
			y[j * y_dim1 + 1], &c__1, &c_b8, &res[1], &c__1, 
			prec_type__);
	    } else {
		blas_cgemv2_x__(trans_type__, n, n, &c_b6, &a[a_offset], lda, 
			&y[j * y_dim1 + 1], &y_tail__[1], &c__1, &c_b8, &res[
			1], &c__1, prec_type__);
	    }
/*         XXX: RES is no longer needed. */
	    ccopy_(n, &res[1], &c__1, &dy[1], &c__1);
	    cgetrs_(trans, n, &c__1, &af[af_offset], ldaf, &ipiv[1], &dy[1], 
		    n, info);

/*         Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. */

	    normx = 0.f;
	    normy = 0.f;
	    normdx = 0.f;
	    dz_z__ = 0.f;
	    ymin = hugeval;

	    i__3 = *n;
	    for (i__ = 1; i__ <= i__3; ++i__) {
		i__4 = i__ + j * y_dim1;
		yk = (r__1 = y[i__4].r, dabs(r__1)) + (r__2 = r_imag(&y[i__ + 
			j * y_dim1]), dabs(r__2));
		i__4 = i__;
		dyk = (r__1 = dy[i__4].r, dabs(r__1)) + (r__2 = r_imag(&dy[
			i__]), dabs(r__2));
		if (yk != 0.f) {
/* Computing MAX */
		    r__1 = dz_z__, r__2 = dyk / yk;
		    dz_z__ = dmax(r__1,r__2);
		} else if (dyk != 0.f) {
		    dz_z__ = hugeval;
		}
		ymin = dmin(ymin,yk);
		normy = dmax(normy,yk);
		if (*colequ) {
/* Computing MAX */
		    r__1 = normx, r__2 = yk * c__[i__];
		    normx = dmax(r__1,r__2);
/* Computing MAX */
		    r__1 = normdx, r__2 = dyk * c__[i__];
		    normdx = dmax(r__1,r__2);
		} else {
		    normx = normy;
		    normdx = dmax(normdx,dyk);
		}
	    }
	    if (normx != 0.f) {
		dx_x__ = normdx / normx;
	    } else if (normdx == 0.f) {
		dx_x__ = 0.f;
	    } else {
		dx_x__ = hugeval;
	    }
	    dxrat = normdx / prevnormdx;
	    dzrat = dz_z__ / prev_dz_z__;

/*         Check termination criteria */

	    if (! (*ignore_cwise__) && ymin * *rcond < incr_thresh__ * normy 
		    && y_prec_state__ < 2) {
		incr_prec__ = TRUE_;
	    }
	    if (x_state__ == 3 && dxrat <= *rthresh) {
		x_state__ = 1;
	    }
	    if (x_state__ == 1) {
		if (dx_x__ <= eps) {
		    x_state__ = 2;
		} else if (dxrat > *rthresh) {
		    if (y_prec_state__ != 2) {
			incr_prec__ = TRUE_;
		    } else {
			x_state__ = 3;
		    }
		} else {
		    if (dxrat > dxratmax) {
			dxratmax = dxrat;
		    }
		}
		if (x_state__ > 1) {
		    final_dx_x__ = dx_x__;
		}
	    }
	    if (z_state__ == 0 && dz_z__ <= *dz_ub__) {
		z_state__ = 1;
	    }
	    if (z_state__ == 3 && dzrat <= *rthresh) {
		z_state__ = 1;
	    }
	    if (z_state__ == 1) {
		if (dz_z__ <= eps) {
		    z_state__ = 2;
		} else if (dz_z__ > *dz_ub__) {
		    z_state__ = 0;
		    dzratmax = 0.f;
		    final_dz_z__ = hugeval;
		} else if (dzrat > *rthresh) {
		    if (y_prec_state__ != 2) {
			incr_prec__ = TRUE_;
		    } else {
			z_state__ = 3;
		    }
		} else {
		    if (dzrat > dzratmax) {
			dzratmax = dzrat;
		    }
		}
		if (z_state__ > 1) {
		    final_dz_z__ = dz_z__;
		}
	    }

/*           Exit if both normwise and componentwise stopped working, */
/*           but if componentwise is unstable, let it go at least two */
/*           iterations. */

	    if (x_state__ != 1) {
		if (*ignore_cwise__) {
		    goto L666;
		}
		if (z_state__ == 3 || z_state__ == 2) {
		    goto L666;
		}
		if (z_state__ == 0 && cnt > 1) {
		    goto L666;
		}
	    }
	    if (incr_prec__) {
		incr_prec__ = FALSE_;
		++y_prec_state__;
		i__3 = *n;
		for (i__ = 1; i__ <= i__3; ++i__) {
		    i__4 = i__;
		    y_tail__[i__4].r = 0.f, y_tail__[i__4].i = 0.f;
		}
	    }
	    prevnormdx = normdx;
	    prev_dz_z__ = dz_z__;

/*           Update soluton. */

	    if (y_prec_state__ < 2) {
		caxpy_(n, &c_b8, &dy[1], &c__1, &y[j * y_dim1 + 1], &c__1);
	    } else {
		cla_wwaddw__(n, &y[j * y_dim1 + 1], &y_tail__[1], &dy[1]);
	    }
	}
/*        Target of "IF (Z_STOP .AND. X_STOP)".  Sun's f77 won't EXIT. */
L666:

/*     Set final_* when cnt hits ithresh */

	if (x_state__ == 1) {
	    final_dx_x__ = dx_x__;
	}
	if (z_state__ == 1) {
	    final_dz_z__ = dz_z__;
	}

/*     Compute error bounds */

	if (*n_norms__ >= 1) {
	    errs_n__[j + (errs_n_dim1 << 1)] = final_dx_x__ / (1 - dxratmax);
	}
	if (*n_norms__ >= 2) {
	    errs_c__[j + (errs_c_dim1 << 1)] = final_dz_z__ / (1 - dzratmax);
	}

/*     Compute componentwise relative backward error from formula */
/*         max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */
/*     where abs(Z) is the componentwise absolute value of the matrix */
/*     or vector Z. */

/*        Compute residual RES = B_s - op(A_s) * Y, */
/*            op(A) = A, A**T, or A**H depending on TRANS (and type). */

	ccopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1);
	cgemv_(trans, n, n, &c_b6, &a[a_offset], lda, &y[j * y_dim1 + 1], &
		c__1, &c_b8, &res[1], &c__1);
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    i__3 = i__ + j * b_dim1;
	    ayb[i__] = (r__1 = b[i__3].r, dabs(r__1)) + (r__2 = r_imag(&b[i__ 
		    + j * b_dim1]), dabs(r__2));
	}

/*     Compute abs(op(A_s))*abs(Y) + abs(B_s). */

	cla_geamv__(trans_type__, n, n, &c_b31, &a[a_offset], lda, &y[j * 
		y_dim1 + 1], &c__1, &c_b31, &ayb[1], &c__1);
	cla_lin_berr__(n, n, &c__1, &res[1], &ayb[1], &berr_out__[j]);

/*     End of loop for each RHS. */

    }

    return 0;
} /* cla_gerfsx_extended__ */
Пример #23
0
/* Subroutine */ int cbdt02_(integer *m, integer *n, complex *b, integer *ldb, 
	 complex *c__, integer *ldc, complex *u, integer *ldu, complex *work, 
	real *rwork, real *resid)
{
    /* System generated locals */
    integer b_dim1, b_offset, c_dim1, c_offset, u_dim1, u_offset, i__1;
    real r__1, r__2;

    /* Local variables */
    integer j;
    real eps;
    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
, complex *, integer *, complex *, integer *, complex *, complex *
, integer *);
    real bnorm;
    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, 
	    complex *, integer *);
    extern doublereal clange_(char *, integer *, integer *, complex *, 
	    integer *, real *), slamch_(char *);
    real realmn;
    extern doublereal scasum_(integer *, complex *, integer *);


/*  -- LAPACK test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CBDT02 tests the change of basis C = U' * B by computing the residual */

/*     RESID = norm( B - U * C ) / ( max(m,n) * norm(B) * EPS ), */

/*  where B and C are M by N matrices, U is an M by M orthogonal matrix, */
/*  and EPS is the machine precision. */

/*  Arguments */
/*  ========= */

/*  M       (input) INTEGER */
/*          The number of rows of the matrices B and C and the order of */
/*          the matrix Q. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrices B and C. */

/*  B       (input) COMPLEX array, dimension (LDB,N) */
/*          The m by n matrix B. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B.  LDB >= max(1,M). */

/*  C       (input) COMPLEX array, dimension (LDC,N) */
/*          The m by n matrix C, assumed to contain U' * B. */

/*  LDC     (input) INTEGER */
/*          The leading dimension of the array C.  LDC >= max(1,M). */

/*  U       (input) COMPLEX array, dimension (LDU,M) */
/*          The m by m orthogonal matrix U. */

/*  LDU     (input) INTEGER */
/*          The leading dimension of the array U.  LDU >= max(1,M). */

/*  WORK    (workspace) COMPLEX array, dimension (M) */

/*  RWORK   (workspace) REAL array, dimension (M) */

/*  RESID   (output) REAL */
/*          RESID = norm( B - U * C ) / ( max(m,n) * norm(B) * EPS ), */

/* ====================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Quick return if possible */

    /* Parameter adjustments */
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1;
    c__ -= c_offset;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1;
    u -= u_offset;
    --work;
    --rwork;

    /* Function Body */
    *resid = 0.f;
    if (*m <= 0 || *n <= 0) {
	return 0;
    }
    realmn = (real) max(*m,*n);
    eps = slamch_("Precision");

/*     Compute norm( B - U * C ) */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	ccopy_(m, &b[j * b_dim1 + 1], &c__1, &work[1], &c__1);
	cgemv_("No transpose", m, m, &c_b7, &u[u_offset], ldu, &c__[j * 
		c_dim1 + 1], &c__1, &c_b10, &work[1], &c__1);
/* Computing MAX */
	r__1 = *resid, r__2 = scasum_(m, &work[1], &c__1);
	*resid = dmax(r__1,r__2);
/* L10: */
    }

/*     Compute norm of B. */

    bnorm = clange_("1", m, n, &b[b_offset], ldb, &rwork[1]);

    if (bnorm <= 0.f) {
	if (*resid != 0.f) {
	    *resid = 1.f / eps;
	}
    } else {
	if (bnorm >= *resid) {
	    *resid = *resid / bnorm / (realmn * eps);
	} else {
	    if (bnorm < 1.f) {
/* Computing MIN */
		r__1 = *resid, r__2 = realmn * bnorm;
		*resid = dmin(r__1,r__2) / bnorm / (realmn * eps);
	    } else {
/* Computing MIN */
		r__1 = *resid / bnorm;
		*resid = dmin(r__1,realmn) / (realmn * eps);
	    }
	}
    }
    return 0;

/*     End of CBDT02 */

} /* cbdt02_ */
Пример #24
0
int
pcgstrf_snode_bmod(
		   const int  pnum,   /* process number */
		   const int  jcol,   /* in - current column in the s-node */
		   const int  jsupno, /* in */
		   const int  fsupc,  /* in - first column in the s-node */
		   complex     *dense, /* in */
		   complex     *tempv, /* working array */
		   GlobalLU_t *Glu,   /* modified */
		   Gstat_t *Gstat     /* modified */
		   )
{
/*
 * -- SuperLU MT routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley,
 * and Xerox Palo Alto Research Center.
 * September 10, 2007
 *
 * Performs numeric block updates within the relaxed supernode. 
 */

    complex      zero = {0.0, 0.0};
    complex      one = {1.0, 0.0};
    complex      none = {-1.0, 0.0};

#if ( MACH==CRAY_PVP )
    _fcd ftcs1, ftcs2, ftcs3;
#endif
#ifdef USE_VENDOR_BLAS    
    int            incx = 1, incy = 1;
    complex         alpha = none, beta = one;
#endif
    
    int            luptr, nsupc, nsupr, nrow;
    int            isub, irow, i, iptr; 
    register int   ufirst, nextlu;
    complex         *lusup;
    int            *lsub, *xlsub, *xlsub_end, *xlusup, *xlusup_end;
    register float flopcnt;

    lsub       = Glu->lsub;
    xlsub      = Glu->xlsub;
    xlsub_end  = Glu->xlsub_end;
    lusup      = Glu->lusup;
    xlusup     = Glu->xlusup;
    xlusup_end = Glu->xlusup_end;

    nextlu = xlusup[jcol];
    
    /*
     *	Process the supernodal portion of L\U[*,j]
     */
    for (isub = xlsub[fsupc]; isub < xlsub_end[fsupc]; isub++) {
  	irow = lsub[isub];
	lusup[nextlu] = dense[irow];
	dense[irow] = zero;
	++nextlu;
    }

    xlusup_end[jcol] = nextlu;
    
    if ( fsupc < jcol ) {

	luptr = xlusup[fsupc];
	nsupr = xlsub_end[fsupc] - xlsub[fsupc];
	nsupc = jcol - fsupc;	/* Excluding jcol */
	ufirst = xlusup[jcol];	/* Points to the beginning of column
				   jcol in supernode L\U(jsupno). */
	nrow = nsupr - nsupc;
	
	flopcnt = nsupc * (nsupc - 1) + 2 * nrow * nsupc; //sj
	Gstat->procstat[pnum].fcops += flopcnt;

/*	ops[TRSV] += nsupc * (nsupc - 1);
	ops[GEMV] += 2 * nrow * nsupc;    */

#ifdef USE_VENDOR_BLAS
#if ( MACH==CRAY_PVP )
	ftcs1 = _cptofcd("L", strlen("L"));
	ftcs2 = _cptofcd("N", strlen("N"));
	ftcs3 = _cptofcd("U", strlen("U"));
	CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, 
	      &lusup[ufirst], &incx );
	CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
	      &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#else
	ctrsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, 
	      &lusup[ufirst], &incx );
	cgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
		&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#endif
#else
	clsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
	cmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], 
		 &lusup[ufirst], &tempv[0] );

        /* Scatter tempv[*] into lusup[*] */
	iptr = ufirst + nsupc;
	for (i = 0; i < nrow; i++) {
            c_sub(&lusup[iptr], &lusup[iptr], &tempv[i]);
            ++iptr;
            tempv[i] = zero;
	}
#endif

    }

    return 0;
}
Пример #25
0
/* Subroutine */ int clarz_(char *side, integer *m, integer *n, integer *l, 
	complex *v, integer *incv, complex *tau, complex *c__, integer *ldc, 
	complex *work)
{
    /* System generated locals */
    integer c_dim1, c_offset;
    complex q__1;

    /* Local variables */
    extern /* Subroutine */ int cgerc_(integer *, integer *, complex *, 
	    complex *, integer *, complex *, integer *, complex *, integer *),
	     cgemv_(char *, integer *, integer *, complex *, complex *, 
	    integer *, complex *, integer *, complex *, complex *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int cgeru_(integer *, integer *, complex *, 
	    complex *, integer *, complex *, integer *, complex *, integer *),
	     ccopy_(integer *, complex *, integer *, complex *, integer *), 
	    caxpy_(integer *, complex *, complex *, integer *, complex *, 
	    integer *), clacgv_(integer *, complex *, integer *);


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CLARZ applies a complex elementary reflector H to a complex */
/*  M-by-N matrix C, from either the left or the right. H is represented */
/*  in the form */

/*        H = I - tau * v * v' */

/*  where tau is a complex scalar and v is a complex vector. */

/*  If tau = 0, then H is taken to be the unit matrix. */

/*  To apply H' (the conjugate transpose of H), supply conjg(tau) instead */
/*  tau. */

/*  H is a product of k elementary reflectors as returned by CTZRZF. */

/*  Arguments */
/*  ========= */

/*  SIDE    (input) CHARACTER*1 */
/*          = 'L': form  H * C */
/*          = 'R': form  C * H */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix C. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix C. */

/*  L       (input) INTEGER */
/*          The number of entries of the vector V containing */
/*          the meaningful part of the Householder vectors. */
/*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. */

/*  V       (input) COMPLEX array, dimension (1+(L-1)*abs(INCV)) */
/*          The vector v in the representation of H as returned by */
/*          CTZRZF. V is not used if TAU = 0. */

/*  INCV    (input) INTEGER */
/*          The increment between elements of v. INCV <> 0. */

/*  TAU     (input) COMPLEX */
/*          The value tau in the representation of H. */

/*  C       (input/output) COMPLEX array, dimension (LDC,N) */
/*          On entry, the M-by-N matrix C. */
/*          On exit, C is overwritten by the matrix H * C if SIDE = 'L', */
/*          or C * H if SIDE = 'R'. */

/*  LDC     (input) INTEGER */
/*          The leading dimension of the array C. LDC >= max(1,M). */

/*  WORK    (workspace) COMPLEX array, dimension */
/*                         (N) if SIDE = 'L' */
/*                      or (M) if SIDE = 'R' */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    --v;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1;
    c__ -= c_offset;
    --work;

    /* Function Body */
    if (lsame_(side, "L")) {

/*        Form  H * C */

	if (tau->r != 0.f || tau->i != 0.f) {

/*           w( 1:n ) = conjg( C( 1, 1:n ) ) */

	    ccopy_(n, &c__[c_offset], ldc, &work[1], &c__1);
	    clacgv_(n, &work[1], &c__1);

/*           w( 1:n ) = conjg( w( 1:n ) + C( m-l+1:m, 1:n )' * v( 1:l ) ) */

	    cgemv_("Conjugate transpose", l, n, &c_b1, &c__[*m - *l + 1 + 
		    c_dim1], ldc, &v[1], incv, &c_b1, &work[1], &c__1);
	    clacgv_(n, &work[1], &c__1);

/*           C( 1, 1:n ) = C( 1, 1:n ) - tau * w( 1:n ) */

	    q__1.r = -tau->r, q__1.i = -tau->i;
	    caxpy_(n, &q__1, &work[1], &c__1, &c__[c_offset], ldc);

/*           C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... */
/*                               tau * v( 1:l ) * conjg( w( 1:n )' ) */

	    q__1.r = -tau->r, q__1.i = -tau->i;
	    cgeru_(l, n, &q__1, &v[1], incv, &work[1], &c__1, &c__[*m - *l + 
		    1 + c_dim1], ldc);
	}

    } else {

/*        Form  C * H */

	if (tau->r != 0.f || tau->i != 0.f) {

/*           w( 1:m ) = C( 1:m, 1 ) */

	    ccopy_(m, &c__[c_offset], &c__1, &work[1], &c__1);

/*           w( 1:m ) = w( 1:m ) + C( 1:m, n-l+1:n, 1:n ) * v( 1:l ) */

	    cgemv_("No transpose", m, l, &c_b1, &c__[(*n - *l + 1) * c_dim1 + 
		    1], ldc, &v[1], incv, &c_b1, &work[1], &c__1);

/*           C( 1:m, 1 ) = C( 1:m, 1 ) - tau * w( 1:m ) */

	    q__1.r = -tau->r, q__1.i = -tau->i;
	    caxpy_(m, &q__1, &work[1], &c__1, &c__[c_offset], &c__1);

/*           C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... */
/*                               tau * w( 1:m ) * v( 1:l )' */

	    q__1.r = -tau->r, q__1.i = -tau->i;
	    cgerc_(m, l, &q__1, &work[1], &c__1, &v[1], incv, &c__[(*n - *l + 
		    1) * c_dim1 + 1], ldc);

	}

    }

    return 0;

/*     End of CLARZ */

} /* clarz_ */
Пример #26
0
/* Subroutine */ int ctrevc_(char *side, char *howmny, logical *select, 
	integer *n, complex *t, integer *ldt, complex *vl, integer *ldvl, 
	complex *vr, integer *ldvr, integer *mm, integer *m, complex *work, 
	real *rwork, integer *info)
{
    /* System generated locals */
    integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, 
	    i__2, i__3, i__4, i__5;
    real r__1, r__2, r__3;
    complex q__1, q__2;

    /* Builtin functions */
    double r_imag(complex *);
    void r_cnjg(complex *, complex *);

    /* Local variables */
    integer i__, j, k, ii, ki, is;
    real ulp;
    logical allv;
    real unfl, ovfl, smin;
    logical over;
    real scale;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
, complex *, integer *, complex *, integer *, complex *, complex *
, integer *);
    real remax;
    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, 
	    complex *, integer *);
    logical leftv, bothv, somev;
    extern /* Subroutine */ int slabad_(real *, real *);
    extern integer icamax_(integer *, complex *, integer *);
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer 
	    *), xerbla_(char *, integer *), clatrs_(char *, char *, 
	    char *, char *, integer *, complex *, integer *, complex *, real *
, real *, integer *);
    extern doublereal scasum_(integer *, complex *, integer *);
    logical rightv;
    real smlnum;


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CTREVC computes some or all of the right and/or left eigenvectors of */
/*  a complex upper triangular matrix T. */
/*  Matrices of this type are produced by the Schur factorization of */
/*  a complex general matrix:  A = Q*T*Q**H, as computed by CHSEQR. */

/*  The right eigenvector x and the left eigenvector y of T corresponding */
/*  to an eigenvalue w are defined by: */

/*               T*x = w*x,     (y**H)*T = w*(y**H) */

/*  where y**H denotes the conjugate transpose of the vector y. */
/*  The eigenvalues are not input to this routine, but are read directly */
/*  from the diagonal of T. */

/*  This routine returns the matrices X and/or Y of right and left */
/*  eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an */
/*  input matrix.  If Q is the unitary factor that reduces a matrix A to */
/*  Schur form T, then Q*X and Q*Y are the matrices of right and left */
/*  eigenvectors of A. */

/*  Arguments */
/*  ========= */

/*  SIDE    (input) CHARACTER*1 */
/*          = 'R':  compute right eigenvectors only; */
/*          = 'L':  compute left eigenvectors only; */
/*          = 'B':  compute both right and left eigenvectors. */

/*  HOWMNY  (input) CHARACTER*1 */
/*          = 'A':  compute all right and/or left eigenvectors; */
/*          = 'B':  compute all right and/or left eigenvectors, */
/*                  backtransformed using the matrices supplied in */
/*                  VR and/or VL; */
/*          = 'S':  compute selected right and/or left eigenvectors, */
/*                  as indicated by the logical array SELECT. */

/*  SELECT  (input) LOGICAL array, dimension (N) */
/*          If HOWMNY = 'S', SELECT specifies the eigenvectors to be */
/*          computed. */
/*          The eigenvector corresponding to the j-th eigenvalue is */
/*          computed if SELECT(j) = .TRUE.. */
/*          Not referenced if HOWMNY = 'A' or 'B'. */

/*  N       (input) INTEGER */
/*          The order of the matrix T. N >= 0. */

/*  T       (input/output) COMPLEX array, dimension (LDT,N) */
/*          The upper triangular matrix T.  T is modified, but restored */
/*          on exit. */

/*  LDT     (input) INTEGER */
/*          The leading dimension of the array T. LDT >= max(1,N). */

/*  VL      (input/output) COMPLEX array, dimension (LDVL,MM) */
/*          On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must */
/*          contain an N-by-N matrix Q (usually the unitary matrix Q of */
/*          Schur vectors returned by CHSEQR). */
/*          On exit, if SIDE = 'L' or 'B', VL contains: */
/*          if HOWMNY = 'A', the matrix Y of left eigenvectors of T; */
/*          if HOWMNY = 'B', the matrix Q*Y; */
/*          if HOWMNY = 'S', the left eigenvectors of T specified by */
/*                           SELECT, stored consecutively in the columns */
/*                           of VL, in the same order as their */
/*                           eigenvalues. */
/*          Not referenced if SIDE = 'R'. */

/*  LDVL    (input) INTEGER */
/*          The leading dimension of the array VL.  LDVL >= 1, and if */
/*          SIDE = 'L' or 'B', LDVL >= N. */

/*  VR      (input/output) COMPLEX array, dimension (LDVR,MM) */
/*          On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must */
/*          contain an N-by-N matrix Q (usually the unitary matrix Q of */
/*          Schur vectors returned by CHSEQR). */
/*          On exit, if SIDE = 'R' or 'B', VR contains: */
/*          if HOWMNY = 'A', the matrix X of right eigenvectors of T; */
/*          if HOWMNY = 'B', the matrix Q*X; */
/*          if HOWMNY = 'S', the right eigenvectors of T specified by */
/*                           SELECT, stored consecutively in the columns */
/*                           of VR, in the same order as their */
/*                           eigenvalues. */
/*          Not referenced if SIDE = 'L'. */

/*  LDVR    (input) INTEGER */
/*          The leading dimension of the array VR.  LDVR >= 1, and if */
/*          SIDE = 'R' or 'B'; LDVR >= N. */

/*  MM      (input) INTEGER */
/*          The number of columns in the arrays VL and/or VR. MM >= M. */

/*  M       (output) INTEGER */
/*          The number of columns in the arrays VL and/or VR actually */
/*          used to store the eigenvectors.  If HOWMNY = 'A' or 'B', M */
/*          is set to N.  Each selected eigenvector occupies one */
/*          column. */

/*  WORK    (workspace) COMPLEX array, dimension (2*N) */

/*  RWORK   (workspace) REAL array, dimension (N) */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */

/*  Further Details */
/*  =============== */

/*  The algorithm used in this program is basically backward (forward) */
/*  substitution, with scaling to make the the code robust against */
/*  possible overflow. */

/*  Each eigenvector is normalized so that the element of largest */
/*  magnitude has magnitude 1; here the magnitude of a complex number */
/*  (x,y) is taken to be |x| + |y|. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Statement Functions .. */
/*     .. */
/*     .. Statement Function definitions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Decode and test the input parameters */

    /* Parameter adjustments */
    --select;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1;
    t -= t_offset;
    vl_dim1 = *ldvl;
    vl_offset = 1 + vl_dim1;
    vl -= vl_offset;
    vr_dim1 = *ldvr;
    vr_offset = 1 + vr_dim1;
    vr -= vr_offset;
    --work;
    --rwork;

    /* Function Body */
    bothv = lsame_(side, "B");
    rightv = lsame_(side, "R") || bothv;
    leftv = lsame_(side, "L") || bothv;

    allv = lsame_(howmny, "A");
    over = lsame_(howmny, "B");
    somev = lsame_(howmny, "S");

/*     Set M to the number of columns required to store the selected */
/*     eigenvectors. */

    if (somev) {
	*m = 0;
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    if (select[j]) {
		++(*m);
	    }
/* L10: */
	}
    } else {
	*m = *n;
    }

    *info = 0;
    if (! rightv && ! leftv) {
	*info = -1;
    } else if (! allv && ! over && ! somev) {
	*info = -2;
    } else if (*n < 0) {
	*info = -4;
    } else if (*ldt < max(1,*n)) {
	*info = -6;
    } else if (*ldvl < 1 || leftv && *ldvl < *n) {
	*info = -8;
    } else if (*ldvr < 1 || rightv && *ldvr < *n) {
	*info = -10;
    } else if (*mm < *m) {
	*info = -11;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CTREVC", &i__1);
	return 0;
    }

/*     Quick return if possible. */

    if (*n == 0) {
	return 0;
    }

/*     Set the constants to control overflow. */

    unfl = slamch_("Safe minimum");
    ovfl = 1.f / unfl;
    slabad_(&unfl, &ovfl);
    ulp = slamch_("Precision");
    smlnum = unfl * (*n / ulp);

/*     Store the diagonal elements of T in working array WORK. */

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	i__2 = i__ + *n;
	i__3 = i__ + i__ * t_dim1;
	work[i__2].r = t[i__3].r, work[i__2].i = t[i__3].i;
/* L20: */
    }

/*     Compute 1-norm of each column of strictly upper triangular */
/*     part of T to control overflow in triangular solver. */

    rwork[1] = 0.f;
    i__1 = *n;
    for (j = 2; j <= i__1; ++j) {
	i__2 = j - 1;
	rwork[j] = scasum_(&i__2, &t[j * t_dim1 + 1], &c__1);
/* L30: */
    }

    if (rightv) {

/*        Compute right eigenvectors. */

	is = *m;
	for (ki = *n; ki >= 1; --ki) {

	    if (somev) {
		if (! select[ki]) {
		    goto L80;
		}
	    }
/* Computing MAX */
	    i__1 = ki + ki * t_dim1;
	    r__3 = ulp * ((r__1 = t[i__1].r, dabs(r__1)) + (r__2 = r_imag(&t[
		    ki + ki * t_dim1]), dabs(r__2)));
	    smin = dmax(r__3,smlnum);

	    work[1].r = 1.f, work[1].i = 0.f;

/*           Form right-hand side. */

	    i__1 = ki - 1;
	    for (k = 1; k <= i__1; ++k) {
		i__2 = k;
		i__3 = k + ki * t_dim1;
		q__1.r = -t[i__3].r, q__1.i = -t[i__3].i;
		work[i__2].r = q__1.r, work[i__2].i = q__1.i;
/* L40: */
	    }

/*           Solve the triangular system: */
/*              (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK. */

	    i__1 = ki - 1;
	    for (k = 1; k <= i__1; ++k) {
		i__2 = k + k * t_dim1;
		i__3 = k + k * t_dim1;
		i__4 = ki + ki * t_dim1;
		q__1.r = t[i__3].r - t[i__4].r, q__1.i = t[i__3].i - t[i__4]
			.i;
		t[i__2].r = q__1.r, t[i__2].i = q__1.i;
		i__2 = k + k * t_dim1;
		if ((r__1 = t[i__2].r, dabs(r__1)) + (r__2 = r_imag(&t[k + k *
			 t_dim1]), dabs(r__2)) < smin) {
		    i__3 = k + k * t_dim1;
		    t[i__3].r = smin, t[i__3].i = 0.f;
		}
/* L50: */
	    }

	    if (ki > 1) {
		i__1 = ki - 1;
		clatrs_("Upper", "No transpose", "Non-unit", "Y", &i__1, &t[
			t_offset], ldt, &work[1], &scale, &rwork[1], info);
		i__1 = ki;
		work[i__1].r = scale, work[i__1].i = 0.f;
	    }

/*           Copy the vector x or Q*x to VR and normalize. */

	    if (! over) {
		ccopy_(&ki, &work[1], &c__1, &vr[is * vr_dim1 + 1], &c__1);

		ii = icamax_(&ki, &vr[is * vr_dim1 + 1], &c__1);
		i__1 = ii + is * vr_dim1;
		remax = 1.f / ((r__1 = vr[i__1].r, dabs(r__1)) + (r__2 = 
			r_imag(&vr[ii + is * vr_dim1]), dabs(r__2)));
		csscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);

		i__1 = *n;
		for (k = ki + 1; k <= i__1; ++k) {
		    i__2 = k + is * vr_dim1;
		    vr[i__2].r = 0.f, vr[i__2].i = 0.f;
/* L60: */
		}
	    } else {
		if (ki > 1) {
		    i__1 = ki - 1;
		    q__1.r = scale, q__1.i = 0.f;
		    cgemv_("N", n, &i__1, &c_b2, &vr[vr_offset], ldvr, &work[
			    1], &c__1, &q__1, &vr[ki * vr_dim1 + 1], &c__1);
		}

		ii = icamax_(n, &vr[ki * vr_dim1 + 1], &c__1);
		i__1 = ii + ki * vr_dim1;
		remax = 1.f / ((r__1 = vr[i__1].r, dabs(r__1)) + (r__2 = 
			r_imag(&vr[ii + ki * vr_dim1]), dabs(r__2)));
		csscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
	    }

/*           Set back the original diagonal elements of T. */

	    i__1 = ki - 1;
	    for (k = 1; k <= i__1; ++k) {
		i__2 = k + k * t_dim1;
		i__3 = k + *n;
		t[i__2].r = work[i__3].r, t[i__2].i = work[i__3].i;
/* L70: */
	    }

	    --is;
L80:
	    ;
	}
    }

    if (leftv) {

/*        Compute left eigenvectors. */

	is = 1;
	i__1 = *n;
	for (ki = 1; ki <= i__1; ++ki) {

	    if (somev) {
		if (! select[ki]) {
		    goto L130;
		}
	    }
/* Computing MAX */
	    i__2 = ki + ki * t_dim1;
	    r__3 = ulp * ((r__1 = t[i__2].r, dabs(r__1)) + (r__2 = r_imag(&t[
		    ki + ki * t_dim1]), dabs(r__2)));
	    smin = dmax(r__3,smlnum);

	    i__2 = *n;
	    work[i__2].r = 1.f, work[i__2].i = 0.f;

/*           Form right-hand side. */

	    i__2 = *n;
	    for (k = ki + 1; k <= i__2; ++k) {
		i__3 = k;
		r_cnjg(&q__2, &t[ki + k * t_dim1]);
		q__1.r = -q__2.r, q__1.i = -q__2.i;
		work[i__3].r = q__1.r, work[i__3].i = q__1.i;
/* L90: */
	    }

/*           Solve the triangular system: */
/*              (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK. */

	    i__2 = *n;
	    for (k = ki + 1; k <= i__2; ++k) {
		i__3 = k + k * t_dim1;
		i__4 = k + k * t_dim1;
		i__5 = ki + ki * t_dim1;
		q__1.r = t[i__4].r - t[i__5].r, q__1.i = t[i__4].i - t[i__5]
			.i;
		t[i__3].r = q__1.r, t[i__3].i = q__1.i;
		i__3 = k + k * t_dim1;
		if ((r__1 = t[i__3].r, dabs(r__1)) + (r__2 = r_imag(&t[k + k *
			 t_dim1]), dabs(r__2)) < smin) {
		    i__4 = k + k * t_dim1;
		    t[i__4].r = smin, t[i__4].i = 0.f;
		}
/* L100: */
	    }

	    if (ki < *n) {
		i__2 = *n - ki;
		clatrs_("Upper", "Conjugate transpose", "Non-unit", "Y", &
			i__2, &t[ki + 1 + (ki + 1) * t_dim1], ldt, &work[ki + 
			1], &scale, &rwork[1], info);
		i__2 = ki;
		work[i__2].r = scale, work[i__2].i = 0.f;
	    }

/*           Copy the vector x or Q*x to VL and normalize. */

	    if (! over) {
		i__2 = *n - ki + 1;
		ccopy_(&i__2, &work[ki], &c__1, &vl[ki + is * vl_dim1], &c__1)
			;

		i__2 = *n - ki + 1;
		ii = icamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - 1;
		i__2 = ii + is * vl_dim1;
		remax = 1.f / ((r__1 = vl[i__2].r, dabs(r__1)) + (r__2 = 
			r_imag(&vl[ii + is * vl_dim1]), dabs(r__2)));
		i__2 = *n - ki + 1;
		csscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);

		i__2 = ki - 1;
		for (k = 1; k <= i__2; ++k) {
		    i__3 = k + is * vl_dim1;
		    vl[i__3].r = 0.f, vl[i__3].i = 0.f;
/* L110: */
		}
	    } else {
		if (ki < *n) {
		    i__2 = *n - ki;
		    q__1.r = scale, q__1.i = 0.f;
		    cgemv_("N", n, &i__2, &c_b2, &vl[(ki + 1) * vl_dim1 + 1], 
			    ldvl, &work[ki + 1], &c__1, &q__1, &vl[ki * 
			    vl_dim1 + 1], &c__1);
		}

		ii = icamax_(n, &vl[ki * vl_dim1 + 1], &c__1);
		i__2 = ii + ki * vl_dim1;
		remax = 1.f / ((r__1 = vl[i__2].r, dabs(r__1)) + (r__2 = 
			r_imag(&vl[ii + ki * vl_dim1]), dabs(r__2)));
		csscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
	    }

/*           Set back the original diagonal elements of T. */

	    i__2 = *n;
	    for (k = ki + 1; k <= i__2; ++k) {
		i__3 = k + k * t_dim1;
		i__4 = k + *n;
		t[i__3].r = work[i__4].r, t[i__3].i = work[i__4].i;
/* L120: */
	    }

	    ++is;
L130:
	    ;
	}
    }

    return 0;

/*     End of CTREVC */

} /* ctrevc_ */
Пример #27
0
int
pcgstrf_column_bmod(
		    const int  pnum,   /* process number */
		    const int  jcol,   /* current column in the panel */
		    const int  fpanelc,/* first column in the panel */
		    const int  nseg,   /* number of s-nodes to update jcol */
		    int        *segrep,/* in */
		    int        *repfnz,/* in */
		    complex     *dense, /* modified */
		    complex     *tempv, /* working array */
		    pxgstrf_shared_t *pxgstrf_shared, /* modified */
		    Gstat_t *Gstat     /* modified */
		    )
{
/*
 * -- SuperLU MT routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley,
 * and Xerox Palo Alto Research Center.
 * September 10, 2007
 *
 * Purpose:
 * ========
 *    Performs numeric block updates (sup-col) in topological order.
 *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
 *    Special processing on the supernodal portion of L\U[*,j].
 *
 * Return value:
 * =============
 *      0 - successful return
 *    > 0 - number of bytes allocated when run out of space
 *
 */
#if ( MACH==CRAY_PVP )
    _fcd ftcs1 = _cptofcd("L", strlen("L")),
         ftcs2 = _cptofcd("N", strlen("N")),
         ftcs3 = _cptofcd("U", strlen("U"));
#endif
    
#ifdef USE_VENDOR_BLAS    
    int         incx = 1, incy = 1;
    complex      alpha, beta;
#endif
    GlobalLU_t *Glu = pxgstrf_shared->Glu;   /* modified */
    
    /* krep = representative of current k-th supernode
     * fsupc = first supernodal column
     * nsupc = no of columns in supernode
     * nsupr = no of rows in supernode (used as leading dimension)
     * luptr = location of supernodal LU-block in storage
     * kfnz = first nonz in the k-th supernodal segment
     * no_zeros = no of leading zeros in a supernodal U-segment
     */
    complex	  ukj, ukj1, ukj2;
    register int lptr, kfnz, isub, irow, i, no_zeros;
    register int luptr, luptr1, luptr2;
    int          fsupc, nsupc, nsupr, segsze;
    int          nrow;	  /* No of rows in the matrix of matrix-vector */
    int          jsupno, k, ksub, krep, krep_ind, ksupno;
    int          ufirst, nextlu;
    int          fst_col; /* First column within small LU update */
    int          d_fsupc; /* Distance between the first column of the current
			     panel and the first column of the current snode.*/
    int          *xsup, *supno;
    int          *lsub, *xlsub, *xlsub_end;
    complex       *lusup;
    int          *xlusup, *xlusup_end;
    complex       *tempv1;
    int          mem_error;
    register float flopcnt;

    complex      zero = {0.0, 0.0};
    complex      one = {1.0, 0.0};
    complex      none = {-1.0, 0.0};
    complex      comp_temp, comp_temp1;

    xsup       = Glu->xsup;
    supno      = Glu->supno;
    lsub       = Glu->lsub;
    xlsub      = Glu->xlsub;
    xlsub_end  = Glu->xlsub_end;
    lusup      = Glu->lusup;
    xlusup     = Glu->xlusup;
    xlusup_end = Glu->xlusup_end;
    jsupno     = supno[jcol];

    /* 
     * For each nonz supernode segment of U[*,j] in topological order 
     */
    k = nseg - 1;
    for (ksub = 0; ksub < nseg; ksub++) {

	krep = segrep[k];
	k--;
	ksupno = supno[krep];
#if ( DEBUGlvel>=2 )
if (jcol==BADCOL)
printf("(%d) pcgstrf_column_bmod[1]: %d, nseg %d, krep %d, jsupno %d, ksupno %d\n",
       pnum, jcol, nseg, krep, jsupno, ksupno);
#endif    
	if ( jsupno != ksupno ) { /* Outside the rectangular supernode */

	    fsupc = xsup[ksupno];
	    fst_col = SUPERLU_MAX ( fsupc, fpanelc );

  	    /* Distance from the current supernode to the current panel; 
	       d_fsupc=0 if fsupc >= fpanelc. */
  	    d_fsupc = fst_col - fsupc; 

	    luptr = xlusup[fst_col] + d_fsupc;
	    lptr = xlsub[fsupc] + d_fsupc;
	    kfnz = repfnz[krep];
	    kfnz = SUPERLU_MAX ( kfnz, fpanelc );
	    segsze = krep - kfnz + 1;
	    nsupc = krep - fst_col + 1;
	    nsupr = xlsub_end[fsupc] - xlsub[fsupc]; /* Leading dimension */
	    nrow = nsupr - d_fsupc - nsupc;
	    krep_ind = lptr + nsupc - 1;

	flopcnt = segsze * (segsze - 1) + 2 * nrow * segsze;//sj
		Gstat->procstat[pnum].fcops += flopcnt;

#if ( DEBUGlevel>=2 )
if (jcol==BADCOL)	    
printf("(%d) pcgstrf_column_bmod[2]: %d, krep %d, kfnz %d, segsze %d, d_fsupc %d,\
fsupc %d, nsupr %d, nsupc %d\n",
       pnum, jcol, krep, kfnz, segsze, d_fsupc, fsupc, nsupr, nsupc);

#endif



            /*
             * Case 1: Update U-segment of size 1 -- col-col update
             */
            if ( segsze == 1 ) {
                ukj = dense[lsub[krep_ind]];
                luptr += nsupr*(nsupc-1) + nsupc;

                for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
                    irow = lsub[i];
                    cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                    c_sub(&dense[irow], &dense[irow], &comp_temp);
                    luptr++;
                }

            } else if ( segsze <= 3 ) {
                ukj = dense[lsub[krep_ind]];
                luptr += nsupr*(nsupc-1) + nsupc-1;
                ukj1 = dense[lsub[krep_ind - 1]];
                luptr1 = luptr - nsupr;

                if ( segsze == 2 ) { /* Case 2: 2cols-col update */
                    cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                    c_sub(&ukj, &ukj, &comp_temp);
                    dense[lsub[krep_ind]] = ukj;
                    for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
                        irow = lsub[i];
                        luptr++;
                        luptr1++;
                        cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                        cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                        c_add(&comp_temp, &comp_temp, &comp_temp1);
                        c_sub(&dense[irow], &dense[irow], &comp_temp);
                    }
                } else { /* Case 3: 3cols-col update */
                    ukj2 = dense[lsub[krep_ind - 2]];
                    luptr2 = luptr1 - nsupr;
                    cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
                    c_sub(&ukj1, &ukj1, &comp_temp);

                    cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                    cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                    c_add(&comp_temp, &comp_temp, &comp_temp1);
                    c_sub(&ukj, &ukj, &comp_temp);

                    dense[lsub[krep_ind]] = ukj;
                    dense[lsub[krep_ind-1]] = ukj1;
                    for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
                        irow = lsub[i];
                        luptr++;
                        luptr1++;
                        luptr2++;
                        cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                        cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                        c_add(&comp_temp, &comp_temp, &comp_temp1);
                        cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                        c_add(&comp_temp, &comp_temp, &comp_temp1);
                        c_sub(&dense[irow], &dense[irow], &comp_temp);
                    }
                }


	    } else {
	  	/*
		 * Case: sup-col update
		 * Perform a triangular solve and block update,
		 * then scatter the result of sup-col update to dense
		 */
		no_zeros = kfnz - fst_col;

	        /* Copy U[*,j] segment from dense[*] to tempv[*] */
	        isub = lptr + no_zeros;
	        for (i = 0; i < segsze; i++) {
	  	    irow = lsub[isub];
		    tempv[i] = dense[irow];
		    ++isub; 
	        }

	        /* Dense triangular solve -- start effective triangle */
		luptr += nsupr * no_zeros + no_zeros; 
#ifdef USE_VENDOR_BLAS
#if ( MACH==CRAY_PVP )
		CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
		       &nsupr, tempv, &incx );
#else
		ctrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
		       &nsupr, tempv, &incx );
#endif
		
 		luptr += segsze;  /* Dense matrix-vector */
		tempv1 = &tempv[segsze];
		alpha = one;
		beta = zero;
#if ( MACH==CRAY_PVP )
		CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
		       &nsupr, tempv, &incx, &beta, tempv1, &incy );
#else
		cgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
		       &nsupr, tempv, &incx, &beta, tempv1, &incy );
#endif
#else
		clsolve ( nsupr, segsze, &lusup[luptr], tempv );

 		luptr += segsze;  /* Dense matrix-vector */
		tempv1 = &tempv[segsze];
		cmatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
#endif
                /* Scatter tempv[] into SPA dense[*] */
                isub = lptr + no_zeros;
                for (i = 0; i < segsze; i++) {
                    irow = lsub[isub];
                    dense[irow] = tempv[i]; /* Scatter */
                    tempv[i] = zero;
                    isub++;
                }

		/* Scatter tempv1[] into SPA dense[*] */
		for (i = 0; i < nrow; i++) {
		    irow = lsub[isub];
                    c_sub(&dense[irow], &dense[irow], &tempv1[i]);
		    tempv1[i] = zero;
		    ++isub;
		}
	    } /* else segsze >= 4 */
	    
	} /* if jsupno ... */

    } /* for each segment... */

    
    /* ------------------------------------------
       Process the supernodal portion of L\U[*,j]
       ------------------------------------------ */
    
    fsupc = SUPER_FSUPC (jsupno);
    nsupr = xlsub_end[fsupc] - xlsub[fsupc];
    if ( (mem_error = Glu_alloc(pnum, jcol, nsupr, LUSUP, &nextlu, 
			       pxgstrf_shared)) )
	return mem_error;
    xlusup[jcol] = nextlu;
    lusup = Glu->lusup;
    
    /* Gather the nonzeros from SPA dense[*,j] into L\U[*,j] */
    for (isub = xlsub[fsupc]; isub < xlsub_end[fsupc]; ++isub) {
  	irow = lsub[isub];
	lusup[nextlu] = dense[irow];
	dense[irow] = zero;
#ifdef DEBUG
if (jcol == -1)
    printf("(%d) pcgstrf_column_bmod[lusup] jcol %d, irow %d, lusup %.10e\n",
	   pnum, jcol, irow, lusup[nextlu]);
#endif	
	++nextlu;
    }
    xlusup_end[jcol] = nextlu; /* close L\U[*,jcol] */

#if ( DEBUGlevel>=2 )
if (jcol == -1) {
    nrow = xlusup_end[jcol] - xlusup[jcol];
    print_double_vec("before sup-col update", nrow, &lsub[xlsub[fsupc]],
		     &lusup[xlusup[jcol]]);
}
#endif    
    
    /*
     * For more updates within the panel (also within the current supernode), 
     * should start from the first column of the panel, or the first column 
     * of the supernode, whichever is bigger. There are 2 cases:
     *    (1) fsupc < fpanelc,  then fst_col := fpanelc
     *    (2) fsupc >= fpanelc, then fst_col := fsupc
     */
    fst_col = SUPERLU_MAX ( fsupc, fpanelc );

    if ( fst_col < jcol ) {

  	/* distance between the current supernode and the current panel;
	   d_fsupc=0 if fsupc >= fpanelc. */
  	d_fsupc = fst_col - fsupc;

	lptr = xlsub[fsupc] + d_fsupc;
	luptr = xlusup[fst_col] + d_fsupc;
	nsupr = xlsub_end[fsupc] - xlsub[fsupc]; /* Leading dimension */
	nsupc = jcol - fst_col;	/* Excluding jcol */
	nrow = nsupr - d_fsupc - nsupc;

	/* points to the beginning of jcol in supernode L\U[*,jsupno] */
	ufirst = xlusup[jcol] + d_fsupc;	

#if ( DEBUGlevel>=2 )
if (jcol==BADCOL)
printf("(%d) pcgstrf_column_bmod[3] jcol %d, fsupc %d, nsupr %d, nsupc %d, nrow %d\n",
       pnum, jcol, fsupc, nsupr, nsupc, nrow);
#endif    

	flopcnt = nsupc * (nsupc - 1) + 2 * nrow * nsupc; //sj
	Gstat->procstat[pnum].fcops += flopcnt;

/*	ops[TRSV] += nsupc * (nsupc - 1);
	ops[GEMV] += 2 * nrow * nsupc;    */
	
#ifdef USE_VENDOR_BLAS
	alpha = none; beta = one; /* y := beta*y + alpha*A*x */
#if ( MACH==CRAY_PVP )
	CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], 
	       &nsupr, &lusup[ufirst], &incx );
	CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
	       &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#else
	ctrsv_( "L", "N", "U", &nsupc, &lusup[luptr], 
	       &nsupr, &lusup[ufirst], &incx );
	cgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
	       &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#endif
#else
	clsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );

	cmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
		 &lusup[ufirst], tempv );
	
        /* Copy updates from tempv[*] into lusup[*] */
	isub = ufirst + nsupc;
	for (i = 0; i < nrow; i++) {
            c_sub(&lusup[isub], &lusup[isub], &tempv[i]);
            tempv[i] = zero;
	    ++isub;
	}
#endif
    } /* if fst_col < jcol ... */ 

    return 0;
}
Пример #28
0
/* Subroutine */ int clagsy_(integer *n, integer *k, real *d, complex *a, 
	integer *lda, integer *iseed, complex *work, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8, 
	    i__9;
    doublereal d__1;
    complex q__1, q__2, q__3, q__4;

    /* Builtin functions */
    double c_abs(complex *);
    void c_div(complex *, complex *, complex *);

    /* Local variables */
    static integer i, j;
    extern /* Subroutine */ int cgerc_(integer *, integer *, complex *, 
	    complex *, integer *, complex *, integer *, complex *, integer *);
    static complex alpha;
    extern /* Subroutine */ int cscal_(integer *, complex *, complex *, 
	    integer *);
    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer 
	    *, complex *, integer *);
    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
	    , complex *, integer *, complex *, integer *, complex *, complex *
	    , integer *), caxpy_(integer *, complex *, complex *, 
	    integer *, complex *, integer *), csymv_(char *, integer *, 
	    complex *, complex *, integer *, complex *, integer *, complex *, 
	    complex *, integer *);
    extern real scnrm2_(integer *, complex *, integer *);
    static integer ii, jj;
    static complex wa, wb;
    extern /* Subroutine */ int clacgv_(integer *, complex *, integer *);
    static real wn;
    extern /* Subroutine */ int xerbla_(char *, integer *), clarnv_(
	    integer *, integer *, integer *, complex *);
    static complex tau;


/*  -- LAPACK auxiliary test routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CLAGSY generates a complex symmetric matrix A, by pre- and post-   
    multiplying a real diagonal matrix D with a random unitary matrix:   
    A = U*D*U**T. The semi-bandwidth may then be reduced to k by   
    additional unitary transformations.   

    Arguments   
    =========   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    K       (input) INTEGER   
            The number of nonzero subdiagonals within the band of A.   
            0 <= K <= N-1.   

    D       (input) REAL array, dimension (N)   
            The diagonal elements of the diagonal matrix D.   

    A       (output) COMPLEX array, dimension (LDA,N)   
            The generated n by n symmetric matrix A (the full matrix is   
            stored).   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= N.   

    ISEED   (input/output) INTEGER array, dimension (4)   
            On entry, the seed of the random number generator; the array 
  
            elements must be between 0 and 4095, and ISEED(4) must be   
            odd.   
            On exit, the seed is updated.   

    WORK    (workspace) COMPLEX array, dimension (2*N)   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -i, the i-th argument had an illegal value   

    ===================================================================== 
  


       Test the input arguments   

       Parameter adjustments */
    --d;
    a_dim1 = *lda;
    a_offset = a_dim1 + 1;
    a -= a_offset;
    --iseed;
    --work;

    /* Function Body */
    *info = 0;
    if (*n < 0) {
	*info = -1;
    } else if (*k < 0 || *k > *n - 1) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    }
    if (*info < 0) {
	i__1 = -(*info);
	xerbla_("CLAGSY", &i__1);
	return 0;
    }

/*     initialize lower triangle of A to diagonal matrix */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	i__2 = *n;
	for (i = j + 1; i <= i__2; ++i) {
	    i__3 = i + j * a_dim1;
	    a[i__3].r = 0.f, a[i__3].i = 0.f;
/* L10: */
	}
/* L20: */
    }
    i__1 = *n;
    for (i = 1; i <= i__1; ++i) {
	i__2 = i + i * a_dim1;
	i__3 = i;
	a[i__2].r = d[i__3], a[i__2].i = 0.f;
/* L30: */
    }

/*     Generate lower triangle of symmetric matrix */

    for (i = *n - 1; i >= 1; --i) {

/*        generate random reflection */

	i__1 = *n - i + 1;
	clarnv_(&c__3, &iseed[1], &i__1, &work[1]);
	i__1 = *n - i + 1;
	wn = scnrm2_(&i__1, &work[1], &c__1);
	d__1 = wn / c_abs(&work[1]);
	q__1.r = d__1 * work[1].r, q__1.i = d__1 * work[1].i;
	wa.r = q__1.r, wa.i = q__1.i;
	if (wn == 0.f) {
	    tau.r = 0.f, tau.i = 0.f;
	} else {
	    q__1.r = work[1].r + wa.r, q__1.i = work[1].i + wa.i;
	    wb.r = q__1.r, wb.i = q__1.i;
	    i__1 = *n - i;
	    c_div(&q__1, &c_b2, &wb);
	    cscal_(&i__1, &q__1, &work[2], &c__1);
	    work[1].r = 1.f, work[1].i = 0.f;
	    c_div(&q__1, &wb, &wa);
	    d__1 = q__1.r;
	    tau.r = d__1, tau.i = 0.f;
	}

/*        apply random reflection to A(i:n,i:n) from the left   
          and the right   

          compute  y := tau * A * conjg(u) */

	i__1 = *n - i + 1;
	clacgv_(&i__1, &work[1], &c__1);
	i__1 = *n - i + 1;
	csymv_("Lower", &i__1, &tau, &a[i + i * a_dim1], lda, &work[1], &c__1,
		 &c_b1, &work[*n + 1], &c__1);
	i__1 = *n - i + 1;
	clacgv_(&i__1, &work[1], &c__1);

/*        compute  v := y - 1/2 * tau * ( u, y ) * u */

	q__3.r = -.5f, q__3.i = 0.f;
	q__2.r = q__3.r * tau.r - q__3.i * tau.i, q__2.i = q__3.r * tau.i + 
		q__3.i * tau.r;
	i__1 = *n - i + 1;
	cdotc_(&q__4, &i__1, &work[1], &c__1, &work[*n + 1], &c__1);
	q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * q__4.i 
		+ q__2.i * q__4.r;
	alpha.r = q__1.r, alpha.i = q__1.i;
	i__1 = *n - i + 1;
	caxpy_(&i__1, &alpha, &work[1], &c__1, &work[*n + 1], &c__1);

/*        apply the transformation as a rank-2 update to A(i:n,i:n)   

          CALL CSYR2( 'Lower', N-I+1, -ONE, WORK, 1, WORK( N+1 ), 1, 
  
          $               A( I, I ), LDA ) */

	i__1 = *n;
	for (jj = i; jj <= i__1; ++jj) {
	    i__2 = *n;
	    for (ii = jj; ii <= i__2; ++ii) {
		i__3 = ii + jj * a_dim1;
		i__4 = ii + jj * a_dim1;
		i__5 = ii - i + 1;
		i__6 = *n + jj - i + 1;
		q__3.r = work[i__5].r * work[i__6].r - work[i__5].i * work[
			i__6].i, q__3.i = work[i__5].r * work[i__6].i + work[
			i__5].i * work[i__6].r;
		q__2.r = a[i__4].r - q__3.r, q__2.i = a[i__4].i - q__3.i;
		i__7 = *n + ii - i + 1;
		i__8 = jj - i + 1;
		q__4.r = work[i__7].r * work[i__8].r - work[i__7].i * work[
			i__8].i, q__4.i = work[i__7].r * work[i__8].i + work[
			i__7].i * work[i__8].r;
		q__1.r = q__2.r - q__4.r, q__1.i = q__2.i - q__4.i;
		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
/* L40: */
	    }
/* L50: */
	}
/* L60: */
    }

/*     Reduce number of subdiagonals to K */

    i__1 = *n - 1 - *k;
    for (i = 1; i <= i__1; ++i) {

/*        generate reflection to annihilate A(k+i+1:n,i) */

	i__2 = *n - *k - i + 1;
	wn = scnrm2_(&i__2, &a[*k + i + i * a_dim1], &c__1);
	d__1 = wn / c_abs(&a[*k + i + i * a_dim1]);
	i__2 = *k + i + i * a_dim1;
	q__1.r = d__1 * a[i__2].r, q__1.i = d__1 * a[i__2].i;
	wa.r = q__1.r, wa.i = q__1.i;
	if (wn == 0.f) {
	    tau.r = 0.f, tau.i = 0.f;
	} else {
	    i__2 = *k + i + i * a_dim1;
	    q__1.r = a[i__2].r + wa.r, q__1.i = a[i__2].i + wa.i;
	    wb.r = q__1.r, wb.i = q__1.i;
	    i__2 = *n - *k - i;
	    c_div(&q__1, &c_b2, &wb);
	    cscal_(&i__2, &q__1, &a[*k + i + 1 + i * a_dim1], &c__1);
	    i__2 = *k + i + i * a_dim1;
	    a[i__2].r = 1.f, a[i__2].i = 0.f;
	    c_div(&q__1, &wb, &wa);
	    d__1 = q__1.r;
	    tau.r = d__1, tau.i = 0.f;
	}

/*        apply reflection to A(k+i:n,i+1:k+i-1) from the left */

	i__2 = *n - *k - i + 1;
	i__3 = *k - 1;
	cgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &a[*k + i + (i + 1)
		 * a_dim1], lda, &a[*k + i + i * a_dim1], &c__1, &c_b1, &work[
		1], &c__1);
	i__2 = *n - *k - i + 1;
	i__3 = *k - 1;
	q__1.r = -(doublereal)tau.r, q__1.i = -(doublereal)tau.i;
	cgerc_(&i__2, &i__3, &q__1, &a[*k + i + i * a_dim1], &c__1, &work[1], 
		&c__1, &a[*k + i + (i + 1) * a_dim1], lda);

/*        apply reflection to A(k+i:n,k+i:n) from the left and the rig
ht   

          compute  y := tau * A * conjg(u) */

	i__2 = *n - *k - i + 1;
	clacgv_(&i__2, &a[*k + i + i * a_dim1], &c__1);
	i__2 = *n - *k - i + 1;
	csymv_("Lower", &i__2, &tau, &a[*k + i + (*k + i) * a_dim1], lda, &a[*
		k + i + i * a_dim1], &c__1, &c_b1, &work[1], &c__1);
	i__2 = *n - *k - i + 1;
	clacgv_(&i__2, &a[*k + i + i * a_dim1], &c__1);

/*        compute  v := y - 1/2 * tau * ( u, y ) * u */

	q__3.r = -.5f, q__3.i = 0.f;
	q__2.r = q__3.r * tau.r - q__3.i * tau.i, q__2.i = q__3.r * tau.i + 
		q__3.i * tau.r;
	i__2 = *n - *k - i + 1;
	cdotc_(&q__4, &i__2, &a[*k + i + i * a_dim1], &c__1, &work[1], &c__1);
	q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r * q__4.i 
		+ q__2.i * q__4.r;
	alpha.r = q__1.r, alpha.i = q__1.i;
	i__2 = *n - *k - i + 1;
	caxpy_(&i__2, &alpha, &a[*k + i + i * a_dim1], &c__1, &work[1], &c__1)
		;

/*        apply symmetric rank-2 update to A(k+i:n,k+i:n)   

          CALL CSYR2( 'Lower', N-K-I+1, -ONE, A( K+I, I ), 1, WORK, 1,
   
          $               A( K+I, K+I ), LDA ) */

	i__2 = *n;
	for (jj = *k + i; jj <= i__2; ++jj) {
	    i__3 = *n;
	    for (ii = jj; ii <= i__3; ++ii) {
		i__4 = ii + jj * a_dim1;
		i__5 = ii + jj * a_dim1;
		i__6 = ii + i * a_dim1;
		i__7 = jj - *k - i + 1;
		q__3.r = a[i__6].r * work[i__7].r - a[i__6].i * work[i__7].i, 
			q__3.i = a[i__6].r * work[i__7].i + a[i__6].i * work[
			i__7].r;
		q__2.r = a[i__5].r - q__3.r, q__2.i = a[i__5].i - q__3.i;
		i__8 = ii - *k - i + 1;
		i__9 = jj + i * a_dim1;
		q__4.r = work[i__8].r * a[i__9].r - work[i__8].i * a[i__9].i, 
			q__4.i = work[i__8].r * a[i__9].i + work[i__8].i * a[
			i__9].r;
		q__1.r = q__2.r - q__4.r, q__1.i = q__2.i - q__4.i;
		a[i__4].r = q__1.r, a[i__4].i = q__1.i;
/* L70: */
	    }
/* L80: */
	}

	i__2 = *k + i + i * a_dim1;
	q__1.r = -(doublereal)wa.r, q__1.i = -(doublereal)wa.i;
	a[i__2].r = q__1.r, a[i__2].i = q__1.i;
	i__2 = *n;
	for (j = *k + i + 1; j <= i__2; ++j) {
	    i__3 = j + i * a_dim1;
	    a[i__3].r = 0.f, a[i__3].i = 0.f;
/* L90: */
	}
/* L100: */
    }

/*     Store full symmetric matrix */

    i__1 = *n;
    for (j = 1; j <= i__1; ++j) {
	i__2 = *n;
	for (i = j + 1; i <= i__2; ++i) {
	    i__3 = j + i * a_dim1;
	    i__4 = i + j * a_dim1;
	    a[i__3].r = a[i__4].r, a[i__3].i = a[i__4].i;
/* L110: */
	}
/* L120: */
    }
    return 0;

/*     End of CLAGSY */

} /* clagsy_ */
Пример #29
0
/* Subroutine */ int cggglm_(integer *n, integer *m, integer *p, complex *a, 
	integer *lda, complex *b, integer *ldb, complex *d__, complex *x, 
	complex *y, complex *work, integer *lwork, integer *info)
{
/*  -- LAPACK driver routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    CGGGLM solves a general Gauss-Markov linear model (GLM) problem:   

            minimize || y ||_2   subject to   d = A*x + B*y   
                x   

    where A is an N-by-M matrix, B is an N-by-P matrix, and d is a   
    given N-vector. It is assumed that M <= N <= M+P, and   

               rank(A) = M    and    rank( A B ) = N.   

    Under these assumptions, the constrained equation is always   
    consistent, and there is a unique solution x and a minimal 2-norm   
    solution y, which is obtained using a generalized QR factorization   
    of A and B.   

    In particular, if matrix B is square nonsingular, then the problem   
    GLM is equivalent to the following weighted linear least squares   
    problem   

                 minimize || inv(B)*(d-A*x) ||_2   
                     x   

    where inv(B) denotes the inverse of B.   

    Arguments   
    =========   

    N       (input) INTEGER   
            The number of rows of the matrices A and B.  N >= 0.   

    M       (input) INTEGER   
            The number of columns of the matrix A.  0 <= M <= N.   

    P       (input) INTEGER   
            The number of columns of the matrix B.  P >= N-M.   

    A       (input/output) COMPLEX array, dimension (LDA,M)   
            On entry, the N-by-M matrix A.   
            On exit, A is destroyed.   

    LDA     (input) INTEGER   
            The leading dimension of the array A. LDA >= max(1,N).   

    B       (input/output) COMPLEX array, dimension (LDB,P)   
            On entry, the N-by-P matrix B.   
            On exit, B is destroyed.   

    LDB     (input) INTEGER   
            The leading dimension of the array B. LDB >= max(1,N).   

    D       (input/output) COMPLEX array, dimension (N)   
            On entry, D is the left hand side of the GLM equation.   
            On exit, D is destroyed.   

    X       (output) COMPLEX array, dimension (M)   
    Y       (output) COMPLEX array, dimension (P)   
            On exit, X and Y are the solutions of the GLM problem.   

    WORK    (workspace/output) COMPLEX array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The dimension of the array WORK. LWORK >= max(1,N+M+P).   
            For optimum performance, LWORK >= M+min(N,P)+max(N,P)*NB,   
            where NB is an upper bound for the optimal blocksizes for   
            CGEQRF, CGERQF, CUNMQR and CUNMRQ.   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    INFO    (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   

    ===================================================================   


       Test the input parameters   

       Parameter adjustments */
    /* Table of constant values */
    static complex c_b2 = {1.f,0.f};
    static integer c__1 = 1;
    static integer c_n1 = -1;
    
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
    complex q__1;
    /* Local variables */
    static integer lopt, i__;
    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
	    , complex *, integer *, complex *, integer *, complex *, complex *
	    , integer *), ccopy_(integer *, complex *, integer *, 
	    complex *, integer *), ctrsv_(char *, char *, char *, integer *, 
	    complex *, integer *, complex *, integer *);
    static integer nb, np;
    extern /* Subroutine */ int cggqrf_(integer *, integer *, integer *, 
	    complex *, integer *, complex *, complex *, integer *, complex *, 
	    complex *, integer *, integer *), xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    static integer nb1, nb2, nb3, nb4;
    extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, 
	    integer *, complex *, integer *, complex *, complex *, integer *, 
	    complex *, integer *, integer *), cunmrq_(char *, 
	    char *, integer *, integer *, integer *, complex *, integer *, 
	    complex *, complex *, integer *, complex *, integer *, integer *);
    static integer lwkopt;
    static logical lquery;
#define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1
#define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --d__;
    --x;
    --y;
    --work;

    /* Function Body */
    *info = 0;
    np = min(*n,*p);
    nb1 = ilaenv_(&c__1, "CGEQRF", " ", n, m, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)1);
    nb2 = ilaenv_(&c__1, "CGERQF", " ", n, m, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)1);
    nb3 = ilaenv_(&c__1, "CUNMQR", " ", n, m, p, &c_n1, (ftnlen)6, (ftnlen)1);
    nb4 = ilaenv_(&c__1, "CUNMRQ", " ", n, m, p, &c_n1, (ftnlen)6, (ftnlen)1);
/* Computing MAX */
    i__1 = max(nb1,nb2), i__1 = max(i__1,nb3);
    nb = max(i__1,nb4);
    lwkopt = *m + np + max(*n,*p) * nb;
    work[1].r = (real) lwkopt, work[1].i = 0.f;
    lquery = *lwork == -1;
    if (*n < 0) {
	*info = -1;
    } else if (*m < 0 || *m > *n) {
	*info = -2;
    } else if (*p < 0 || *p < *n - *m) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    } else if (*ldb < max(1,*n)) {
	*info = -7;
    } else /* if(complicated condition) */ {
/* Computing MAX */
	i__1 = 1, i__2 = *n + *m + *p;
	if (*lwork < max(i__1,i__2) && ! lquery) {
	    *info = -12;
	}
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CGGGLM", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Compute the GQR factorization of matrices A and B:   

              Q'*A = ( R11 ) M,    Q'*B*Z' = ( T11   T12 ) M   
                     (  0  ) N-M             (  0    T22 ) N-M   
                        M                     M+P-N  N-M   

       where R11 and T22 are upper triangular, and Q and Z are   
       unitary. */

    i__1 = *lwork - *m - np;
    cggqrf_(n, m, p, &a[a_offset], lda, &work[1], &b[b_offset], ldb, &work[*m 
	    + 1], &work[*m + np + 1], &i__1, info);
    i__1 = *m + np + 1;
    lopt = work[i__1].r;

/*     Update left-hand-side vector d = Q'*d = ( d1 ) M   
                                               ( d2 ) N-M */

    i__1 = max(1,*n);
    i__2 = *lwork - *m - np;
    cunmqr_("Left", "Conjugate transpose", n, &c__1, m, &a[a_offset], lda, &
	    work[1], &d__[1], &i__1, &work[*m + np + 1], &i__2, info);
/* Computing MAX */
    i__3 = *m + np + 1;
    i__1 = lopt, i__2 = (integer) work[i__3].r;
    lopt = max(i__1,i__2);

/*     Solve T22*y2 = d2 for y2 */

    i__1 = *n - *m;
    ctrsv_("Upper", "No transpose", "Non unit", &i__1, &b_ref(*m + 1, *m + *p 
	    - *n + 1), ldb, &d__[*m + 1], &c__1);
    i__1 = *n - *m;
    ccopy_(&i__1, &d__[*m + 1], &c__1, &y[*m + *p - *n + 1], &c__1);

/*     Set y1 = 0 */

    i__1 = *m + *p - *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	i__2 = i__;
	y[i__2].r = 0.f, y[i__2].i = 0.f;
/* L10: */
    }

/*     Update d1 = d1 - T12*y2 */

    i__1 = *n - *m;
    q__1.r = -1.f, q__1.i = 0.f;
    cgemv_("No transpose", m, &i__1, &q__1, &b_ref(1, *m + *p - *n + 1), ldb, 
	    &y[*m + *p - *n + 1], &c__1, &c_b2, &d__[1], &c__1);

/*     Solve triangular system: R11*x = d1 */

    ctrsv_("Upper", "No Transpose", "Non unit", m, &a[a_offset], lda, &d__[1],
	     &c__1);

/*     Copy D to X */

    ccopy_(m, &d__[1], &c__1, &x[1], &c__1);

/*     Backward transformation y = Z'*y   

   Computing MAX */
    i__1 = 1, i__2 = *n - *p + 1;
    i__3 = max(1,*p);
    i__4 = *lwork - *m - np;
    cunmrq_("Left", "Conjugate transpose", p, &c__1, &np, &b_ref(max(i__1,
	    i__2), 1), ldb, &work[*m + 1], &y[1], &i__3, &work[*m + np + 1], &
	    i__4, info);
/* Computing MAX */
    i__4 = *m + np + 1;
    i__2 = lopt, i__3 = (integer) work[i__4].r;
    i__1 = *m + np + max(i__2,i__3);
    work[1].r = (real) i__1, work[1].i = 0.f;

    return 0;

/*     End of CGGGLM */

} /* cggglm_ */
Пример #30
0
void
cgemv(char transa, int m, int n, complex *alpha, complex *a, int lda, complex *x, int incx, complex *beta, complex *y, int incy)
{
    cgemv_(&transa, &m, &n, alpha, a, &lda, x, &incx, beta, y, &incy);
}