示例#1
0
/* Subroutine */ int cgeqls_(integer *m, integer *n, integer *nrhs, complex *
	a, integer *lda, complex *tau, complex *b, integer *ldb, complex *
	work, integer *lwork, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;

    /* Local variables */
    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, 
	    integer *, integer *, complex *, complex *, integer *, complex *, 
	    integer *), xerbla_(char *, 
	    integer *), cunmql_(char *, char *, integer *, integer *, 
	    integer *, complex *, integer *, complex *, complex *, integer *, 
	    complex *, integer *, integer *);


/*  -- LAPACK routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  Solve the least squares problem */
/*      min || A*X - B || */
/*  using the QL factorization */
/*      A = Q*L */
/*  computed by CGEQLF. */

/*  Arguments */
/*  ========= */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix A.  M >= 0. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix A.  M >= N >= 0. */

/*  NRHS    (input) INTEGER */
/*          The number of columns of B.  NRHS >= 0. */

/*  A       (input) COMPLEX array, dimension (LDA,N) */
/*          Details of the QL factorization of the original matrix A as */
/*          returned by CGEQLF. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= M. */

/*  TAU     (input) COMPLEX array, dimension (N) */
/*          Details of the orthogonal matrix Q. */

/*  B       (input/output) COMPLEX array, dimension (LDB,NRHS) */
/*          On entry, the m-by-nrhs right hand side matrix B. */
/*          On exit, the n-by-nrhs solution matrix X, stored in rows */
/*          m-n+1:m. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B. LDB >= M. */

/*  WORK    (workspace) COMPLEX array, dimension (LWORK) */

/*  LWORK   (input) INTEGER */
/*          The length of the array WORK.  LWORK must be at least NRHS, */
/*          and should be at least NRHS*NB, where NB is the block size */
/*          for this environment. */

/*  INFO    (output) INTEGER */
/*          = 0: successful exit */
/*          < 0: if INFO = -i, the i-th argument had an illegal value */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input arguments. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --tau;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    --work;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
	*info = -1;
    } else if (*n < 0 || *n > *m) {
	*info = -2;
    } else if (*nrhs < 0) {
	*info = -3;
    } else if (*lda < max(1,*m)) {
	*info = -5;
    } else if (*ldb < max(1,*m)) {
	*info = -8;
    } else if (*lwork < 1 || *lwork < *nrhs && *m > 0 && *n > 0) {
	*info = -10;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CGEQLS", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0 || *nrhs == 0 || *m == 0) {
	return 0;
    }

/*     B := Q' * B */

    cunmql_("Left", "Conjugate transpose", m, nrhs, n, &a[a_offset], lda, &
	    tau[1], &b[b_offset], ldb, &work[1], lwork, info);

/*     Solve L*X = B(m-n+1:m,:) */

    ctrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b1, &a[*m 
	    - *n + 1 + a_dim1], lda, &b[*m - *n + 1 + b_dim1], ldb);

    return 0;

/*     End of CGEQLS */

} /* cgeqls_ */
示例#2
0
void
cgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
        int *perm_c, int *perm_r, SuperMatrix *B,
        SuperLUStat_t *stat, int *info)
{

#ifdef _CRAY
    _fcd ftcs1, ftcs2, ftcs3, ftcs4;
#endif
    int      incx = 1, incy = 1;
#ifdef USE_VENDOR_BLAS
    complex   alpha = {1.0, 0.0}, beta = {1.0, 0.0};
    complex   *work_col;
#endif
    complex   temp_comp;
    DNformat *Bstore;
    complex   *Bmat;
    SCformat *Lstore;
    NCformat *Ustore;
    complex   *Lval, *Uval;
    int      fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
    int      i, j, k, iptr, jcol, n, ldb, nrhs;
    complex   *work, *rhs_work, *soln;
    flops_t  solve_ops;
    void cprint_soln();

    /* Test input parameters ... */
    *info = 0;
    Bstore = B->Store;
    ldb = Bstore->lda;
    nrhs = B->ncol;
    if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
    else if ( L->nrow != L->ncol || L->nrow < 0 ||
              L->Stype != SLU_SC || L->Dtype != SLU_C || L->Mtype != SLU_TRLU )
        *info = -2;
    else if ( U->nrow != U->ncol || U->nrow < 0 ||
              U->Stype != SLU_NC || U->Dtype != SLU_C || U->Mtype != SLU_TRU )
        *info = -3;
    else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
              B->Stype != SLU_DN || B->Dtype != SLU_C || B->Mtype != SLU_GE )
        *info = -6;
    if ( *info ) {
        i = -(*info);
        xerbla_("cgstrs", &i);
        return;
    }

    n = L->nrow;
    work = complexCalloc(n * nrhs);
    if ( !work ) ABORT("Malloc fails for local work[].");
    soln = complexMalloc(n);
    if ( !soln ) ABORT("Malloc fails for local soln[].");

    Bmat = Bstore->nzval;
    Lstore = L->Store;
    Lval = Lstore->nzval;
    Ustore = U->Store;
    Uval = Ustore->nzval;
    solve_ops = 0;

    if ( trans == NOTRANS ) {
        /* Permute right hand sides to form Pr*B */
        for (i = 0; i < nrhs; i++) {
            rhs_work = &Bmat[i*ldb];
            for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
            for (k = 0; k < n; k++) rhs_work[k] = soln[k];
        }

        /* Forward solve PLy=Pb. */
        for (k = 0; k <= Lstore->nsuper; k++) {
            fsupc = L_FST_SUPC(k);
            istart = L_SUB_START(fsupc);
            nsupr = L_SUB_START(fsupc+1) - istart;
            nsupc = L_FST_SUPC(k+1) - fsupc;
            nrow = nsupr - nsupc;

            solve_ops += 4 * nsupc * (nsupc - 1) * nrhs;
            solve_ops += 8 * nrow * nsupc * nrhs;

            if ( nsupc == 1 ) {
                for (j = 0; j < nrhs; j++) {
                    rhs_work = &Bmat[j*ldb];
                    luptr = L_NZ_START(fsupc);
                    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
                        irow = L_SUB(iptr);
                        ++luptr;
                        cc_mult(&temp_comp, &rhs_work[fsupc], &Lval[luptr]);
                        c_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
                    }
                }
            } else {
                luptr = L_NZ_START(fsupc);
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
                ftcs1 = _cptofcd("L", strlen("L"));
                ftcs2 = _cptofcd("N", strlen("N"));
                ftcs3 = _cptofcd("U", strlen("U"));
                CTRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
                       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);

                CGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha,
                        &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
                        &beta, &work[0], &n );
#else
                ctrsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
                       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);

                cgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha,
                        &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
                        &beta, &work[0], &n );
#endif
                for (j = 0; j < nrhs; j++) {
                    rhs_work = &Bmat[j*ldb];
                    work_col = &work[j*n];
                    iptr = istart + nsupc;
                    for (i = 0; i < nrow; i++) {
                        irow = L_SUB(iptr);
                        c_sub(&rhs_work[irow], &rhs_work[irow], &work_col[i]);
                        work_col[i].r = 0.0;
                        work_col[i].i = 0.0;
                        iptr++;
                    }
                }
#else
                for (j = 0; j < nrhs; j++) {
                    rhs_work = &Bmat[j*ldb];
                    clsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
                    cmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
                            &rhs_work[fsupc], &work[0] );

                    iptr = istart + nsupc;
                    for (i = 0; i < nrow; i++) {
                        irow = L_SUB(iptr);
                        c_sub(&rhs_work[irow], &rhs_work[irow], &work[i]);
                        work[i].r = 0.;
                        work[i].i = 0.;
                        iptr++;
                    }
                }
#endif
            } /* else ... */
        } /* for L-solve */

#ifdef DEBUG
        printf("After L-solve: y=\n");
        cprint_soln(n, nrhs, Bmat);
#endif

        /*
         * Back solve Ux=y.
         */
        for (k = Lstore->nsuper; k >= 0; k--) {
            fsupc = L_FST_SUPC(k);
            istart = L_SUB_START(fsupc);
            nsupr = L_SUB_START(fsupc+1) - istart;
            nsupc = L_FST_SUPC(k+1) - fsupc;
            luptr = L_NZ_START(fsupc);

            solve_ops += 4 * nsupc * (nsupc + 1) * nrhs;

            if ( nsupc == 1 ) {
                rhs_work = &Bmat[0];
                for (j = 0; j < nrhs; j++) {
                    c_div(&rhs_work[fsupc], &rhs_work[fsupc], &Lval[luptr]);
                    rhs_work += ldb;
                }
            } else {
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
                ftcs1 = _cptofcd("L", strlen("L"));
                ftcs2 = _cptofcd("U", strlen("U"));
                ftcs3 = _cptofcd("N", strlen("N"));
                CTRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
                       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
#else
                ctrsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
                       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
#endif
#else
                for (j = 0; j < nrhs; j++)
                    cusolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
#endif
            }

            for (j = 0; j < nrhs; ++j) {
                rhs_work = &Bmat[j*ldb];
                for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
                    solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
                    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
                        irow = U_SUB(i);
                        cc_mult(&temp_comp, &rhs_work[jcol], &Uval[i]);
                        c_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
                    }
                }
            }

        } /* for U-solve */

#ifdef DEBUG
        printf("After U-solve: x=\n");
        cprint_soln(n, nrhs, Bmat);
#endif

        /* Compute the final solution X := Pc*X. */
        for (i = 0; i < nrhs; i++) {
            rhs_work = &Bmat[i*ldb];
            for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
            for (k = 0; k < n; k++) rhs_work[k] = soln[k];
        }

        stat->ops[SOLVE] = solve_ops;

    } else { /* Solve A'*X=B or CONJ(A)*X=B */
        /* Permute right hand sides to form Pc'*B. */
        for (i = 0; i < nrhs; i++) {
            rhs_work = &Bmat[i*ldb];
            for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
            for (k = 0; k < n; k++) rhs_work[k] = soln[k];
        }

        stat->ops[SOLVE] = 0;
        if (trans == TRANS) {
            for (k = 0; k < nrhs; ++k) {
                /* Multiply by inv(U'). */
                sp_ctrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);

                /* Multiply by inv(L'). */
                sp_ctrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
            }
         } else { /* trans == CONJ */
            for (k = 0; k < nrhs; ++k) {
                /* Multiply by conj(inv(U')). */
                sp_ctrsv("U", "C", "N", L, U, &Bmat[k*ldb], stat, info);

                /* Multiply by conj(inv(L')). */
                sp_ctrsv("L", "C", "U", L, U, &Bmat[k*ldb], stat, info);
            }
         }
        /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
        for (i = 0; i < nrhs; i++) {
            rhs_work = &Bmat[i*ldb];
            for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
            for (k = 0; k < n; k++) rhs_work[k] = soln[k];
        }

    }

    SUPERLU_FREE(work);
    SUPERLU_FREE(soln);
}
示例#3
0
文件: cpbtrf.c 项目: csapng/libflame
/* Subroutine */
int cpbtrf_(char *uplo, integer *n, integer *kd, complex *ab, integer *ldab, integer *info)
{
    /* System generated locals */
    integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6;
    complex q__1;
    /* Local variables */
    integer i__, j, i2, i3, ib, nb, ii, jj;
    complex work[1056] /* was [33][32] */
    ;
    extern /* Subroutine */
    int cgemm_(char *, char *, integer *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *), cherk_(char *, char *, integer *, integer *, real *, complex *, integer *, real * , complex *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */
    int ctrsm_(char *, char *, char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *), cpbtf2_(char *, integer *, integer *, complex *, integer *, integer *), cpotf2_(char *, integer *, complex *, integer *, integer *), xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *);
    /* -- LAPACK computational routine (version 3.4.0) -- */
    /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
    /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
    /* November 2011 */
    /* .. Scalar Arguments .. */
    /* .. */
    /* .. Array Arguments .. */
    /* .. */
    /* ===================================================================== */
    /* .. Parameters .. */
    /* .. */
    /* .. Local Scalars .. */
    /* .. */
    /* .. Local Arrays .. */
    /* .. */
    /* .. External Functions .. */
    /* .. */
    /* .. External Subroutines .. */
    /* .. */
    /* .. Intrinsic Functions .. */
    /* .. */
    /* .. Executable Statements .. */
    /* Test the input parameters. */
    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1;
    ab -= ab_offset;
    /* Function Body */
    *info = 0;
    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L"))
    {
        *info = -1;
    }
    else if (*n < 0)
    {
        *info = -2;
    }
    else if (*kd < 0)
    {
        *info = -3;
    }
    else if (*ldab < *kd + 1)
    {
        *info = -5;
    }
    if (*info != 0)
    {
        i__1 = -(*info);
        xerbla_("CPBTRF", &i__1);
        return 0;
    }
    /* Quick return if possible */
    if (*n == 0)
    {
        return 0;
    }
    /* Determine the block size for this environment */
    nb = ilaenv_(&c__1, "CPBTRF", uplo, n, kd, &c_n1, &c_n1);
    /* The block size must not exceed the semi-bandwidth KD, and must not */
    /* exceed the limit set by the size of the local array WORK. */
    nb = min(nb,32);
    if (nb <= 1 || nb > *kd)
    {
        /* Use unblocked code */
        cpbtf2_(uplo, n, kd, &ab[ab_offset], ldab, info);
    }
    else
    {
        /* Use blocked code */
        if (lsame_(uplo, "U"))
        {
            /* Compute the Cholesky factorization of a Hermitian band */
            /* matrix, given the upper triangle of the matrix in band */
            /* storage. */
            /* Zero the upper triangle of the work array. */
            i__1 = nb;
            for (j = 1;
                    j <= i__1;
                    ++j)
            {
                i__2 = j - 1;
                for (i__ = 1;
                        i__ <= i__2;
                        ++i__)
                {
                    i__3 = i__ + j * 33 - 34;
                    work[i__3].r = 0.f;
                    work[i__3].i = 0.f; // , expr subst
                    /* L10: */
                }
                /* L20: */
            }
            /* Process the band matrix one diagonal block at a time. */
            i__1 = *n;
            i__2 = nb;
            for (i__ = 1;
                    i__2 < 0 ? i__ >= i__1 : i__ <= i__1;
                    i__ += i__2)
            {
                /* Computing MIN */
                i__3 = nb;
                i__4 = *n - i__ + 1; // , expr subst
                ib = min(i__3,i__4);
                /* Factorize the diagonal block */
                i__3 = *ldab - 1;
                cpotf2_(uplo, &ib, &ab[*kd + 1 + i__ * ab_dim1], &i__3, &ii);
                if (ii != 0)
                {
                    *info = i__ + ii - 1;
                    goto L150;
                }
                if (i__ + ib <= *n)
                {
                    /* Update the relevant part of the trailing submatrix. */
                    /* If A11 denotes the diagonal block which has just been */
                    /* factorized, then we need to update the remaining */
                    /* blocks in the diagram: */
                    /* A11 A12 A13 */
                    /* A22 A23 */
                    /* A33 */
                    /* The numbers of rows and columns in the partitioning */
                    /* are IB, I2, I3 respectively. The blocks A12, A22 and */
                    /* A23 are empty if IB = KD. The upper triangle of A13 */
                    /* lies outside the band. */
                    /* Computing MIN */
                    i__3 = *kd - ib;
                    i__4 = *n - i__ - ib + 1; // , expr subst
                    i2 = min(i__3,i__4);
                    /* Computing MIN */
                    i__3 = ib;
                    i__4 = *n - i__ - *kd + 1; // , expr subst
                    i3 = min(i__3,i__4);
                    if (i2 > 0)
                    {
                        /* Update A12 */
                        i__3 = *ldab - 1;
                        i__4 = *ldab - 1;
                        ctrsm_("Left", "Upper", "Conjugate transpose", "Non-" "unit", &ib, &i2, &c_b1, &ab[*kd + 1 + i__ * ab_dim1], &i__3, &ab[*kd + 1 - ib + (i__ + ib) * ab_dim1], &i__4);
                        /* Update A22 */
                        i__3 = *ldab - 1;
                        i__4 = *ldab - 1;
                        cherk_("Upper", "Conjugate transpose", &i2, &ib, & c_b21, &ab[*kd + 1 - ib + (i__ + ib) * ab_dim1], &i__3, &c_b22, &ab[*kd + 1 + (i__ + ib) * ab_dim1], &i__4);
                    }
                    if (i3 > 0)
                    {
                        /* Copy the lower triangle of A13 into the work array. */
                        i__3 = i3;
                        for (jj = 1;
                                jj <= i__3;
                                ++jj)
                        {
                            i__4 = ib;
                            for (ii = jj;
                                    ii <= i__4;
                                    ++ii)
                            {
                                i__5 = ii + jj * 33 - 34;
                                i__6 = ii - jj + 1 + (jj + i__ + *kd - 1) * ab_dim1;
                                work[i__5].r = ab[i__6].r;
                                work[i__5].i = ab[ i__6].i; // , expr subst
                                /* L30: */
                            }
                            /* L40: */
                        }
                        /* Update A13 (in the work array). */
                        i__3 = *ldab - 1;
                        ctrsm_("Left", "Upper", "Conjugate transpose", "Non-" "unit", &ib, &i3, &c_b1, &ab[*kd + 1 + i__ * ab_dim1], &i__3, work, &c__33);
                        /* Update A23 */
                        if (i2 > 0)
                        {
                            q__1.r = -1.f;
                            q__1.i = -0.f; // , expr subst
                            i__3 = *ldab - 1;
                            i__4 = *ldab - 1;
                            cgemm_("Conjugate transpose", "No transpose", &i2, &i3, &ib, &q__1, &ab[*kd + 1 - ib + (i__ + ib) * ab_dim1], &i__3, work, &c__33, & c_b1, &ab[ib + 1 + (i__ + *kd) * ab_dim1], &i__4);
                        }
                        /* Update A33 */
                        i__3 = *ldab - 1;
                        cherk_("Upper", "Conjugate transpose", &i3, &ib, & c_b21, work, &c__33, &c_b22, &ab[*kd + 1 + ( i__ + *kd) * ab_dim1], &i__3);
                        /* Copy the lower triangle of A13 back into place. */
                        i__3 = i3;
                        for (jj = 1;
                                jj <= i__3;
                                ++jj)
                        {
                            i__4 = ib;
                            for (ii = jj;
                                    ii <= i__4;
                                    ++ii)
                            {
                                i__5 = ii - jj + 1 + (jj + i__ + *kd - 1) * ab_dim1;
                                i__6 = ii + jj * 33 - 34;
                                ab[i__5].r = work[i__6].r;
                                ab[i__5].i = work[ i__6].i; // , expr subst
                                /* L50: */
                            }
                            /* L60: */
                        }
                    }
                }
                /* L70: */
            }
        }
        else
        {
            /* Compute the Cholesky factorization of a Hermitian band */
            /* matrix, given the lower triangle of the matrix in band */
            /* storage. */
            /* Zero the lower triangle of the work array. */
            i__2 = nb;
            for (j = 1;
                    j <= i__2;
                    ++j)
            {
                i__1 = nb;
                for (i__ = j + 1;
                        i__ <= i__1;
                        ++i__)
                {
                    i__3 = i__ + j * 33 - 34;
                    work[i__3].r = 0.f;
                    work[i__3].i = 0.f; // , expr subst
                    /* L80: */
                }
                /* L90: */
            }
            /* Process the band matrix one diagonal block at a time. */
            i__2 = *n;
            i__1 = nb;
            for (i__ = 1;
                    i__1 < 0 ? i__ >= i__2 : i__ <= i__2;
                    i__ += i__1)
            {
                /* Computing MIN */
                i__3 = nb;
                i__4 = *n - i__ + 1; // , expr subst
                ib = min(i__3,i__4);
                /* Factorize the diagonal block */
                i__3 = *ldab - 1;
                cpotf2_(uplo, &ib, &ab[i__ * ab_dim1 + 1], &i__3, &ii);
                if (ii != 0)
                {
                    *info = i__ + ii - 1;
                    goto L150;
                }
                if (i__ + ib <= *n)
                {
                    /* Update the relevant part of the trailing submatrix. */
                    /* If A11 denotes the diagonal block which has just been */
                    /* factorized, then we need to update the remaining */
                    /* blocks in the diagram: */
                    /* A11 */
                    /* A21 A22 */
                    /* A31 A32 A33 */
                    /* The numbers of rows and columns in the partitioning */
                    /* are IB, I2, I3 respectively. The blocks A21, A22 and */
                    /* A32 are empty if IB = KD. The lower triangle of A31 */
                    /* lies outside the band. */
                    /* Computing MIN */
                    i__3 = *kd - ib;
                    i__4 = *n - i__ - ib + 1; // , expr subst
                    i2 = min(i__3,i__4);
                    /* Computing MIN */
                    i__3 = ib;
                    i__4 = *n - i__ - *kd + 1; // , expr subst
                    i3 = min(i__3,i__4);
                    if (i2 > 0)
                    {
                        /* Update A21 */
                        i__3 = *ldab - 1;
                        i__4 = *ldab - 1;
                        ctrsm_("Right", "Lower", "Conjugate transpose", "Non" "-unit", &i2, &ib, &c_b1, &ab[i__ * ab_dim1 + 1], &i__3, &ab[ib + 1 + i__ * ab_dim1], &i__4);
                        /* Update A22 */
                        i__3 = *ldab - 1;
                        i__4 = *ldab - 1;
                        cherk_("Lower", "No transpose", &i2, &ib, &c_b21, &ab[ ib + 1 + i__ * ab_dim1], &i__3, &c_b22, &ab[( i__ + ib) * ab_dim1 + 1], &i__4);
                    }
                    if (i3 > 0)
                    {
                        /* Copy the upper triangle of A31 into the work array. */
                        i__3 = ib;
                        for (jj = 1;
                                jj <= i__3;
                                ++jj)
                        {
                            i__4 = min(jj,i3);
                            for (ii = 1;
                                    ii <= i__4;
                                    ++ii)
                            {
                                i__5 = ii + jj * 33 - 34;
                                i__6 = *kd + 1 - jj + ii + (jj + i__ - 1) * ab_dim1;
                                work[i__5].r = ab[i__6].r;
                                work[i__5].i = ab[ i__6].i; // , expr subst
                                /* L100: */
                            }
                            /* L110: */
                        }
                        /* Update A31 (in the work array). */
                        i__3 = *ldab - 1;
                        ctrsm_("Right", "Lower", "Conjugate transpose", "Non" "-unit", &i3, &ib, &c_b1, &ab[i__ * ab_dim1 + 1], &i__3, work, &c__33);
                        /* Update A32 */
                        if (i2 > 0)
                        {
                            q__1.r = -1.f;
                            q__1.i = -0.f; // , expr subst
                            i__3 = *ldab - 1;
                            i__4 = *ldab - 1;
                            cgemm_("No transpose", "Conjugate transpose", &i3, &i2, &ib, &q__1, work, &c__33, &ab[ib + 1 + i__ * ab_dim1], &i__3, &c_b1, &ab[*kd + 1 - ib + (i__ + ib) * ab_dim1], &i__4);
                        }
                        /* Update A33 */
                        i__3 = *ldab - 1;
                        cherk_("Lower", "No transpose", &i3, &ib, &c_b21, work, &c__33, &c_b22, &ab[(i__ + *kd) * ab_dim1 + 1], &i__3);
                        /* Copy the upper triangle of A31 back into place. */
                        i__3 = ib;
                        for (jj = 1;
                                jj <= i__3;
                                ++jj)
                        {
                            i__4 = min(jj,i3);
                            for (ii = 1;
                                    ii <= i__4;
                                    ++ii)
                            {
                                i__5 = *kd + 1 - jj + ii + (jj + i__ - 1) * ab_dim1;
                                i__6 = ii + jj * 33 - 34;
                                ab[i__5].r = work[i__6].r;
                                ab[i__5].i = work[ i__6].i; // , expr subst
                                /* L120: */
                            }
                            /* L130: */
                        }
                    }
                }
                /* L140: */
            }
        }
    }
    return 0;
L150:
    return 0;
    /* End of CPBTRF */
}
示例#4
0
/* Subroutine */ int cgelsy_(integer *m, integer *n, integer *nrhs, complex *
	a, integer *lda, complex *b, integer *ldb, integer *jpvt, real *rcond, 
	 integer *rank, complex *work, integer *lwork, real *rwork, integer *
	info)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
    real r__1, r__2;
    complex q__1;

    /* Builtin functions */
    double c_abs(complex *);

    /* Local variables */
    integer i__, j;
    complex c1, c2, s1, s2;
    integer nb, mn, nb1, nb2, nb3, nb4;
    real anrm, bnrm, smin, smax;
    integer iascl, ibscl;
    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, 
	    complex *, integer *);
    integer ismin, ismax;
    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, 
	    integer *, integer *, complex *, complex *, integer *, complex *, 
	    integer *), claic1_(integer *, 
	    integer *, complex *, real *, complex *, complex *, real *, 
	    complex *, complex *);
    real wsize;
    extern /* Subroutine */ int cgeqp3_(integer *, integer *, complex *, 
	    integer *, integer *, complex *, complex *, integer *, real *, 
	    integer *), slabad_(real *, real *);
    extern doublereal clange_(char *, integer *, integer *, complex *, 
	    integer *, real *);
    extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, 
	    real *, integer *, integer *, complex *, integer *, integer *);
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int claset_(char *, integer *, integer *, complex 
	    *, complex *, complex *, integer *), xerbla_(char *, 
	    integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *);
    real bignum;
    extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *, 
	    integer *, complex *, integer *, complex *, complex *, integer *, 
	    complex *, integer *, integer *);
    real sminpr, smaxpr, smlnum;
    extern /* Subroutine */ int cunmrz_(char *, char *, integer *, integer *, 
	    integer *, integer *, complex *, integer *, complex *, complex *, 
	    integer *, complex *, integer *, integer *);
    integer lwkopt;
    logical lquery;
    extern /* Subroutine */ int ctzrzf_(integer *, integer *, complex *, 
	    integer *, complex *, complex *, integer *, integer *);


/*  -- LAPACK driver routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CGELSY computes the minimum-norm solution to a complex linear least */
/*  squares problem: */
/*      minimize || A * X - B || */
/*  using a complete orthogonal factorization of A.  A is an M-by-N */
/*  matrix which may be rank-deficient. */

/*  Several right hand side vectors b and solution vectors x can be */
/*  handled in a single call; they are stored as the columns of the */
/*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution */
/*  matrix X. */

/*  The routine first computes a QR factorization with column pivoting: */
/*      A * P = Q * [ R11 R12 ] */
/*                  [  0  R22 ] */
/*  with R11 defined as the largest leading submatrix whose estimated */
/*  condition number is less than 1/RCOND.  The order of R11, RANK, */
/*  is the effective rank of A. */

/*  Then, R22 is considered to be negligible, and R12 is annihilated */
/*  by unitary transformations from the right, arriving at the */
/*  complete orthogonal factorization: */
/*     A * P = Q * [ T11 0 ] * Z */
/*                 [  0  0 ] */
/*  The minimum-norm solution is then */
/*     X = P * Z' [ inv(T11)*Q1'*B ] */
/*                [        0       ] */
/*  where Q1 consists of the first RANK columns of Q. */

/*  This routine is basically identical to the original xGELSX except */
/*  three differences: */
/*    o The permutation of matrix B (the right hand side) is faster and */
/*      more simple. */
/*    o The call to the subroutine xGEQPF has been substituted by the */
/*      the call to the subroutine xGEQP3. This subroutine is a Blas-3 */
/*      version of the QR factorization with column pivoting. */
/*    o Matrix B (the right hand side) is updated with Blas-3. */

/*  Arguments */
/*  ========= */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix A.  M >= 0. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix A.  N >= 0. */

/*  NRHS    (input) INTEGER */
/*          The number of right hand sides, i.e., the number of */
/*          columns of matrices B and X. NRHS >= 0. */

/*  A       (input/output) COMPLEX array, dimension (LDA,N) */
/*          On entry, the M-by-N matrix A. */
/*          On exit, A has been overwritten by details of its */
/*          complete orthogonal factorization. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,M). */

/*  B       (input/output) COMPLEX array, dimension (LDB,NRHS) */
/*          On entry, the M-by-NRHS right hand side matrix B. */
/*          On exit, the N-by-NRHS solution matrix X. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B. LDB >= max(1,M,N). */

/*  JPVT    (input/output) INTEGER array, dimension (N) */
/*          On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted */
/*          to the front of AP, otherwise column i is a free column. */
/*          On exit, if JPVT(i) = k, then the i-th column of A*P */
/*          was the k-th column of A. */

/*  RCOND   (input) REAL */
/*          RCOND is used to determine the effective rank of A, which */
/*          is defined as the order of the largest leading triangular */
/*          submatrix R11 in the QR factorization with pivoting of A, */
/*          whose estimated condition number < 1/RCOND. */

/*  RANK    (output) INTEGER */
/*          The effective rank of A, i.e., the order of the submatrix */
/*          R11.  This is the same as the order of the submatrix T11 */
/*          in the complete orthogonal factorization of A. */

/*  WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) */
/*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */

/*  LWORK   (input) INTEGER */
/*          The dimension of the array WORK. */
/*          The unblocked strategy requires that: */
/*            LWORK >= MN + MAX( 2*MN, N+1, MN+NRHS ) */
/*          where MN = min(M,N). */
/*          The block algorithm requires that: */
/*            LWORK >= MN + MAX( 2*MN, NB*(N+1), MN+MN*NB, MN+NB*NRHS ) */
/*          where NB is an upper bound on the blocksize returned */
/*          by ILAENV for the routines CGEQP3, CTZRZF, CTZRQF, CUNMQR, */
/*          and CUNMRZ. */

/*          If LWORK = -1, then a workspace query is assumed; the routine */
/*          only calculates the optimal size of the WORK array, returns */
/*          this value as the first entry of the WORK array, and no error */
/*          message related to LWORK is issued by XERBLA. */

/*  RWORK   (workspace) REAL array, dimension (2*N) */

/*  INFO    (output) INTEGER */
/*          = 0: successful exit */
/*          < 0: if INFO = -i, the i-th argument had an illegal value */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */
/*    E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */
/*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    --jpvt;
    --work;
    --rwork;

    /* Function Body */
    mn = min(*m,*n);
    ismin = mn + 1;
    ismax = (mn << 1) + 1;

/*     Test the input arguments. */

    *info = 0;
    nb1 = ilaenv_(&c__1, "CGEQRF", " ", m, n, &c_n1, &c_n1);
    nb2 = ilaenv_(&c__1, "CGERQF", " ", m, n, &c_n1, &c_n1);
    nb3 = ilaenv_(&c__1, "CUNMQR", " ", m, n, nrhs, &c_n1);
    nb4 = ilaenv_(&c__1, "CUNMRQ", " ", m, n, nrhs, &c_n1);
/* Computing MAX */
    i__1 = max(nb1,nb2), i__1 = max(i__1,nb3);
    nb = max(i__1,nb4);
/* Computing MAX */
    i__1 = 1, i__2 = mn + (*n << 1) + nb * (*n + 1), i__1 = max(i__1,i__2), 
	    i__2 = (mn << 1) + nb * *nrhs;
    lwkopt = max(i__1,i__2);
    q__1.r = (real) lwkopt, q__1.i = 0.f;
    work[1].r = q__1.r, work[1].i = q__1.i;
    lquery = *lwork == -1;
    if (*m < 0) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*nrhs < 0) {
	*info = -3;
    } else if (*lda < max(1,*m)) {
	*info = -5;
    } else /* if(complicated condition) */ {
/* Computing MAX */
	i__1 = max(1,*m);
	if (*ldb < max(i__1,*n)) {
	    *info = -7;
	} else /* if(complicated condition) */ {
/* Computing MAX */
	    i__1 = mn << 1, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = mn + 
		    *nrhs;
	    if (*lwork < mn + max(i__1,i__2) && ! lquery) {
		*info = -12;
	    }
	}
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CGELSY", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

/* Computing MIN */
    i__1 = min(*m,*n);
    if (min(i__1,*nrhs) == 0) {
	*rank = 0;
	return 0;
    }

/*     Get machine parameters */

    smlnum = slamch_("S") / slamch_("P");
    bignum = 1.f / smlnum;
    slabad_(&smlnum, &bignum);

/*     Scale A, B if max entries outside range [SMLNUM,BIGNUM] */

    anrm = clange_("M", m, n, &a[a_offset], lda, &rwork[1]);
    iascl = 0;
    if (anrm > 0.f && anrm < smlnum) {

/*        Scale matrix norm up to SMLNUM */

	clascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
		info);
	iascl = 1;
    } else if (anrm > bignum) {

/*        Scale matrix norm down to BIGNUM */

	clascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
		info);
	iascl = 2;
    } else if (anrm == 0.f) {

/*        Matrix all zero. Return zero solution. */

	i__1 = max(*m,*n);
	claset_("F", &i__1, nrhs, &c_b1, &c_b1, &b[b_offset], ldb);
	*rank = 0;
	goto L70;
    }

    bnrm = clange_("M", m, nrhs, &b[b_offset], ldb, &rwork[1]);
    ibscl = 0;
    if (bnrm > 0.f && bnrm < smlnum) {

/*        Scale matrix norm up to SMLNUM */

	clascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb, 
		 info);
	ibscl = 1;
    } else if (bnrm > bignum) {

/*        Scale matrix norm down to BIGNUM */

	clascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb, 
		 info);
	ibscl = 2;
    }

/*     Compute QR factorization with column pivoting of A: */
/*        A * P = Q * R */

    i__1 = *lwork - mn;
    cgeqp3_(m, n, &a[a_offset], lda, &jpvt[1], &work[1], &work[mn + 1], &i__1, 
	     &rwork[1], info);
    i__1 = mn + 1;
    wsize = mn + work[i__1].r;

/*     complex workspace: MN+NB*(N+1). real workspace 2*N. */
/*     Details of Householder rotations stored in WORK(1:MN). */

/*     Determine RANK using incremental condition estimation */

    i__1 = ismin;
    work[i__1].r = 1.f, work[i__1].i = 0.f;
    i__1 = ismax;
    work[i__1].r = 1.f, work[i__1].i = 0.f;
    smax = c_abs(&a[a_dim1 + 1]);
    smin = smax;
    if (c_abs(&a[a_dim1 + 1]) == 0.f) {
	*rank = 0;
	i__1 = max(*m,*n);
	claset_("F", &i__1, nrhs, &c_b1, &c_b1, &b[b_offset], ldb);
	goto L70;
    } else {
	*rank = 1;
    }

L10:
    if (*rank < mn) {
	i__ = *rank + 1;
	claic1_(&c__2, rank, &work[ismin], &smin, &a[i__ * a_dim1 + 1], &a[
		i__ + i__ * a_dim1], &sminpr, &s1, &c1);
	claic1_(&c__1, rank, &work[ismax], &smax, &a[i__ * a_dim1 + 1], &a[
		i__ + i__ * a_dim1], &smaxpr, &s2, &c2);

	if (smaxpr * *rcond <= sminpr) {
	    i__1 = *rank;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		i__2 = ismin + i__ - 1;
		i__3 = ismin + i__ - 1;
		q__1.r = s1.r * work[i__3].r - s1.i * work[i__3].i, q__1.i = 
			s1.r * work[i__3].i + s1.i * work[i__3].r;
		work[i__2].r = q__1.r, work[i__2].i = q__1.i;
		i__2 = ismax + i__ - 1;
		i__3 = ismax + i__ - 1;
		q__1.r = s2.r * work[i__3].r - s2.i * work[i__3].i, q__1.i = 
			s2.r * work[i__3].i + s2.i * work[i__3].r;
		work[i__2].r = q__1.r, work[i__2].i = q__1.i;
/* L20: */
	    }
	    i__1 = ismin + *rank;
	    work[i__1].r = c1.r, work[i__1].i = c1.i;
	    i__1 = ismax + *rank;
	    work[i__1].r = c2.r, work[i__1].i = c2.i;
	    smin = sminpr;
	    smax = smaxpr;
	    ++(*rank);
	    goto L10;
	}
    }

/*     complex workspace: 3*MN. */

/*     Logically partition R = [ R11 R12 ] */
/*                             [  0  R22 ] */
/*     where R11 = R(1:RANK,1:RANK) */

/*     [R11,R12] = [ T11, 0 ] * Y */

    if (*rank < *n) {
	i__1 = *lwork - (mn << 1);
	ctzrzf_(rank, n, &a[a_offset], lda, &work[mn + 1], &work[(mn << 1) + 
		1], &i__1, info);
    }

/*     complex workspace: 2*MN. */
/*     Details of Householder rotations stored in WORK(MN+1:2*MN) */

/*     B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) */

    i__1 = *lwork - (mn << 1);
    cunmqr_("Left", "Conjugate transpose", m, nrhs, &mn, &a[a_offset], lda, &
	    work[1], &b[b_offset], ldb, &work[(mn << 1) + 1], &i__1, info);
/* Computing MAX */
    i__1 = (mn << 1) + 1;
    r__1 = wsize, r__2 = (mn << 1) + work[i__1].r;
    wsize = dmax(r__1,r__2);

/*     complex workspace: 2*MN+NB*NRHS. */

/*     B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) */

    ctrsm_("Left", "Upper", "No transpose", "Non-unit", rank, nrhs, &c_b2, &a[
	    a_offset], lda, &b[b_offset], ldb);

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
	i__2 = *n;
	for (i__ = *rank + 1; i__ <= i__2; ++i__) {
	    i__3 = i__ + j * b_dim1;
	    b[i__3].r = 0.f, b[i__3].i = 0.f;
/* L30: */
	}
/* L40: */
    }

/*     B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS) */

    if (*rank < *n) {
	i__1 = *n - *rank;
	i__2 = *lwork - (mn << 1);
	cunmrz_("Left", "Conjugate transpose", n, nrhs, rank, &i__1, &a[
		a_offset], lda, &work[mn + 1], &b[b_offset], ldb, &work[(mn <<
		 1) + 1], &i__2, info);
    }

/*     complex workspace: 2*MN+NRHS. */

/*     B(1:N,1:NRHS) := P * B(1:N,1:NRHS) */

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    i__3 = jpvt[i__];
	    i__4 = i__ + j * b_dim1;
	    work[i__3].r = b[i__4].r, work[i__3].i = b[i__4].i;
/* L50: */
	}
	ccopy_(n, &work[1], &c__1, &b[j * b_dim1 + 1], &c__1);
/* L60: */
    }

/*     complex workspace: N. */

/*     Undo scaling */

    if (iascl == 1) {
	clascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb, 
		 info);
	clascl_("U", &c__0, &c__0, &smlnum, &anrm, rank, rank, &a[a_offset], 
		lda, info);
    } else if (iascl == 2) {
	clascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb, 
		 info);
	clascl_("U", &c__0, &c__0, &bignum, &anrm, rank, rank, &a[a_offset], 
		lda, info);
    }
    if (ibscl == 1) {
	clascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb, 
		 info);
    } else if (ibscl == 2) {
	clascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb, 
		 info);
    }

L70:
    q__1.r = (real) lwkopt, q__1.i = 0.f;
    work[1].r = q__1.r, work[1].i = q__1.i;

    return 0;

/*     End of CGELSY */

} /* cgelsy_ */
示例#5
0
/* Subroutine */ int chegst_(integer *itype, char *uplo, integer *n, complex *
	a, integer *lda, complex *b, integer *ldb, integer *info, ftnlen 
	uplo_len)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
    complex q__1;

    /* Local variables */
    static integer k, kb, nb;
    extern /* Subroutine */ int chemm_(char *, char *, integer *, integer *, 
	    complex *, complex *, integer *, complex *, integer *, complex *, 
	    complex *, integer *, ftnlen, ftnlen);
    extern logical lsame_(char *, char *, ftnlen, ftnlen);
    extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, 
	    integer *, integer *, complex *, complex *, integer *, complex *, 
	    integer *, ftnlen, ftnlen, ftnlen, ftnlen), ctrsm_(char *, char *,
	     char *, char *, integer *, integer *, complex *, complex *, 
	    integer *, complex *, integer *, ftnlen, ftnlen, ftnlen, ftnlen);
    static logical upper;
    extern /* Subroutine */ int chegs2_(integer *, char *, integer *, complex 
	    *, integer *, complex *, integer *, integer *, ftnlen), cher2k_(
	    char *, char *, integer *, integer *, complex *, complex *, 
	    integer *, complex *, integer *, real *, complex *, integer *, 
	    ftnlen, ftnlen), xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);


/*  -- LAPACK routine (version 3.0) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */
/*     Courant Institute, Argonne National Lab, and Rice University */
/*     September 30, 1994 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CHEGST reduces a complex Hermitian-definite generalized */
/*  eigenproblem to standard form. */

/*  If ITYPE = 1, the problem is A*x = lambda*B*x, */
/*  and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H) */

/*  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */
/*  B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L. */

/*  B must have been previously factorized as U**H*U or L*L**H by CPOTRF. */

/*  Arguments */
/*  ========= */

/*  ITYPE   (input) INTEGER */
/*          = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H); */
/*          = 2 or 3: compute U*A*U**H or L**H*A*L. */

/*  UPLO    (input) CHARACTER */
/*          = 'U':  Upper triangle of A is stored and B is factored as */
/*                  U**H*U; */
/*          = 'L':  Lower triangle of A is stored and B is factored as */
/*                  L*L**H. */

/*  N       (input) INTEGER */
/*          The order of the matrices A and B.  N >= 0. */

/*  A       (input/output) COMPLEX array, dimension (LDA,N) */
/*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading */
/*          N-by-N upper triangular part of A contains the upper */
/*          triangular part of the matrix A, and the strictly lower */
/*          triangular part of A is not referenced.  If UPLO = 'L', the */
/*          leading N-by-N lower triangular part of A contains the lower */
/*          triangular part of the matrix A, and the strictly upper */
/*          triangular part of A is not referenced. */

/*          On exit, if INFO = 0, the transformed matrix, stored in the */
/*          same format as A. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  B       (input) COMPLEX array, dimension (LDB,N) */
/*          The triangular factor from the Cholesky factorization of B, */
/*          as returned by CPOTRF. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B.  LDB >= max(1,N). */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
    if (*itype < 1 || *itype > 3) {
	*info = -1;
    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    } else if (*ldb < max(1,*n)) {
	*info = -7;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CHEGST", &i__1, (ftnlen)6);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine the block size for this environment. */

    nb = ilaenv_(&c__1, "CHEGST", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)1);

    if (nb <= 1 || nb >= *n) {

/*        Use unblocked code */

	chegs2_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info, (
		ftnlen)1);
    } else {

/*        Use blocked code */

	if (*itype == 1) {
	    if (upper) {

/*              Compute inv(U')*A*inv(U) */

		i__1 = *n;
		i__2 = nb;
		for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {
/* Computing MIN */
		    i__3 = *n - k + 1;
		    kb = min(i__3,nb);

/*                 Update the upper triangle of A(k:n,k:n) */

		    chegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + 
			    k * b_dim1], ldb, info, (ftnlen)1);
		    if (k + kb <= *n) {
			i__3 = *n - k - kb + 1;
			ctrsm_("Left", uplo, "Conjugate transpose", "Non-unit"
				, &kb, &i__3, &c_b1, &b[k + k * b_dim1], ldb, 
				&a[k + (k + kb) * a_dim1], lda, (ftnlen)4, (
				ftnlen)1, (ftnlen)19, (ftnlen)8);
			i__3 = *n - k - kb + 1;
			q__1.r = -.5f, q__1.i = -0.f;
			chemm_("Left", uplo, &kb, &i__3, &q__1, &a[k + k * 
				a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, 
				&c_b1, &a[k + (k + kb) * a_dim1], lda, (
				ftnlen)4, (ftnlen)1);
			i__3 = *n - k - kb + 1;
			q__1.r = -1.f, q__1.i = -0.f;
			cher2k_(uplo, "Conjugate transpose", &i__3, &kb, &
				q__1, &a[k + (k + kb) * a_dim1], lda, &b[k + (
				k + kb) * b_dim1], ldb, &c_b18, &a[k + kb + (
				k + kb) * a_dim1], lda, (ftnlen)1, (ftnlen)19)
				;
			i__3 = *n - k - kb + 1;
			q__1.r = -.5f, q__1.i = -0.f;
			chemm_("Left", uplo, &kb, &i__3, &q__1, &a[k + k * 
				a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, 
				&c_b1, &a[k + (k + kb) * a_dim1], lda, (
				ftnlen)4, (ftnlen)1);
			i__3 = *n - k - kb + 1;
			ctrsm_("Right", uplo, "No transpose", "Non-unit", &kb,
				 &i__3, &c_b1, &b[k + kb + (k + kb) * b_dim1],
				 ldb, &a[k + (k + kb) * a_dim1], lda, (ftnlen)
				5, (ftnlen)1, (ftnlen)12, (ftnlen)8);
		    }
/* L10: */
		}
	    } else {

/*              Compute inv(L)*A*inv(L') */

		i__2 = *n;
		i__1 = nb;
		for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) {
/* Computing MIN */
		    i__3 = *n - k + 1;
		    kb = min(i__3,nb);

/*                 Update the lower triangle of A(k:n,k:n) */

		    chegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + 
			    k * b_dim1], ldb, info, (ftnlen)1);
		    if (k + kb <= *n) {
			i__3 = *n - k - kb + 1;
			ctrsm_("Right", uplo, "Conjugate transpose", "Non-un"
				"it", &i__3, &kb, &c_b1, &b[k + k * b_dim1], 
				ldb, &a[k + kb + k * a_dim1], lda, (ftnlen)5, 
				(ftnlen)1, (ftnlen)19, (ftnlen)8);
			i__3 = *n - k - kb + 1;
			q__1.r = -.5f, q__1.i = -0.f;
			chemm_("Right", uplo, &i__3, &kb, &q__1, &a[k + k * 
				a_dim1], lda, &b[k + kb + k * b_dim1], ldb, &
				c_b1, &a[k + kb + k * a_dim1], lda, (ftnlen)5,
				 (ftnlen)1);
			i__3 = *n - k - kb + 1;
			q__1.r = -1.f, q__1.i = -0.f;
			cher2k_(uplo, "No transpose", &i__3, &kb, &q__1, &a[k 
				+ kb + k * a_dim1], lda, &b[k + kb + k * 
				b_dim1], ldb, &c_b18, &a[k + kb + (k + kb) * 
				a_dim1], lda, (ftnlen)1, (ftnlen)12);
			i__3 = *n - k - kb + 1;
			q__1.r = -.5f, q__1.i = -0.f;
			chemm_("Right", uplo, &i__3, &kb, &q__1, &a[k + k * 
				a_dim1], lda, &b[k + kb + k * b_dim1], ldb, &
				c_b1, &a[k + kb + k * a_dim1], lda, (ftnlen)5,
				 (ftnlen)1);
			i__3 = *n - k - kb + 1;
			ctrsm_("Left", uplo, "No transpose", "Non-unit", &
				i__3, &kb, &c_b1, &b[k + kb + (k + kb) * 
				b_dim1], ldb, &a[k + kb + k * a_dim1], lda, (
				ftnlen)4, (ftnlen)1, (ftnlen)12, (ftnlen)8);
		    }
/* L20: */
		}
	    }
	} else {
	    if (upper) {

/*              Compute U*A*U' */

		i__1 = *n;
		i__2 = nb;
		for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {
/* Computing MIN */
		    i__3 = *n - k + 1;
		    kb = min(i__3,nb);

/*                 Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */

		    i__3 = k - 1;
		    ctrmm_("Left", uplo, "No transpose", "Non-unit", &i__3, &
			    kb, &c_b1, &b[b_offset], ldb, &a[k * a_dim1 + 1], 
			    lda, (ftnlen)4, (ftnlen)1, (ftnlen)12, (ftnlen)8);
		    i__3 = k - 1;
		    chemm_("Right", uplo, &i__3, &kb, &c_b2, &a[k + k * 
			    a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b1, &a[
			    k * a_dim1 + 1], lda, (ftnlen)5, (ftnlen)1);
		    i__3 = k - 1;
		    cher2k_(uplo, "No transpose", &i__3, &kb, &c_b1, &a[k * 
			    a_dim1 + 1], lda, &b[k * b_dim1 + 1], ldb, &c_b18,
			     &a[a_offset], lda, (ftnlen)1, (ftnlen)12);
		    i__3 = k - 1;
		    chemm_("Right", uplo, &i__3, &kb, &c_b2, &a[k + k * 
			    a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b1, &a[
			    k * a_dim1 + 1], lda, (ftnlen)5, (ftnlen)1);
		    i__3 = k - 1;
		    ctrmm_("Right", uplo, "Conjugate transpose", "Non-unit", &
			    i__3, &kb, &c_b1, &b[k + k * b_dim1], ldb, &a[k * 
			    a_dim1 + 1], lda, (ftnlen)5, (ftnlen)1, (ftnlen)
			    19, (ftnlen)8);
		    chegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + 
			    k * b_dim1], ldb, info, (ftnlen)1);
/* L30: */
		}
	    } else {

/*              Compute L'*A*L */

		i__2 = *n;
		i__1 = nb;
		for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) {
/* Computing MIN */
		    i__3 = *n - k + 1;
		    kb = min(i__3,nb);

/*                 Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */

		    i__3 = k - 1;
		    ctrmm_("Right", uplo, "No transpose", "Non-unit", &kb, &
			    i__3, &c_b1, &b[b_offset], ldb, &a[k + a_dim1], 
			    lda, (ftnlen)5, (ftnlen)1, (ftnlen)12, (ftnlen)8);
		    i__3 = k - 1;
		    chemm_("Left", uplo, &kb, &i__3, &c_b2, &a[k + k * a_dim1]
			    , lda, &b[k + b_dim1], ldb, &c_b1, &a[k + a_dim1],
			     lda, (ftnlen)4, (ftnlen)1);
		    i__3 = k - 1;
		    cher2k_(uplo, "Conjugate transpose", &i__3, &kb, &c_b1, &
			    a[k + a_dim1], lda, &b[k + b_dim1], ldb, &c_b18, &
			    a[a_offset], lda, (ftnlen)1, (ftnlen)19);
		    i__3 = k - 1;
		    chemm_("Left", uplo, &kb, &i__3, &c_b2, &a[k + k * a_dim1]
			    , lda, &b[k + b_dim1], ldb, &c_b1, &a[k + a_dim1],
			     lda, (ftnlen)4, (ftnlen)1);
		    i__3 = k - 1;
		    ctrmm_("Left", uplo, "Conjugate transpose", "Non-unit", &
			    kb, &i__3, &c_b1, &b[k + k * b_dim1], ldb, &a[k + 
			    a_dim1], lda, (ftnlen)4, (ftnlen)1, (ftnlen)19, (
			    ftnlen)8);
		    chegs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + 
			    k * b_dim1], ldb, info, (ftnlen)1);
/* L40: */
		}
	    }
	}
    }
    return 0;

/*     End of CHEGST */

} /* chegst_ */
示例#6
0
/* Subroutine */ int chegvx_(integer *itype, char *jobz, char *range, char *
	uplo, integer *n, complex *a, integer *lda, complex *b, integer *ldb, 
	real *vl, real *vu, integer *il, integer *iu, real *abstol, integer *
	m, real *w, complex *z__, integer *ldz, complex *work, integer *lwork,
	 real *rwork, integer *iwork, integer *ifail, integer *info)
{
/*  -- LAPACK driver routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    CHEGVX computes selected eigenvalues, and optionally, eigenvectors   
    of a complex generalized Hermitian-definite eigenproblem, of the form   
    A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A and   
    B are assumed to be Hermitian and B is also positive definite.   
    Eigenvalues and eigenvectors can be selected by specifying either a   
    range of values or a range of indices for the desired eigenvalues.   

    Arguments   
    =========   

    ITYPE   (input) INTEGER   
            Specifies the problem type to be solved:   
            = 1:  A*x = (lambda)*B*x   
            = 2:  A*B*x = (lambda)*x   
            = 3:  B*A*x = (lambda)*x   

    JOBZ    (input) CHARACTER*1   
            = 'N':  Compute eigenvalues only;   
            = 'V':  Compute eigenvalues and eigenvectors.   

    RANGE   (input) CHARACTER*1   
            = 'A': all eigenvalues will be found.   
            = 'V': all eigenvalues in the half-open interval (VL,VU]   
                   will be found.   
            = 'I': the IL-th through IU-th eigenvalues will be found.   
   *   
    UPLO    (input) CHARACTER*1   
            = 'U':  Upper triangles of A and B are stored;   
            = 'L':  Lower triangles of A and B are stored.   

    N       (input) INTEGER   
            The order of the matrices A and B.  N >= 0.   

    A       (input/output) COMPLEX array, dimension (LDA, N)   
            On entry, the Hermitian matrix A.  If UPLO = 'U', the   
            leading N-by-N upper triangular part of A contains the   
            upper triangular part of the matrix A.  If UPLO = 'L',   
            the leading N-by-N lower triangular part of A contains   
            the lower triangular part of the matrix A.   

            On exit,  the lower triangle (if UPLO='L') or the upper   
            triangle (if UPLO='U') of A, including the diagonal, is   
            destroyed.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    B       (input/output) COMPLEX array, dimension (LDB, N)   
            On entry, the Hermitian matrix B.  If UPLO = 'U', the   
            leading N-by-N upper triangular part of B contains the   
            upper triangular part of the matrix B.  If UPLO = 'L',   
            the leading N-by-N lower triangular part of B contains   
            the lower triangular part of the matrix B.   

            On exit, if INFO <= N, the part of B containing the matrix is   
            overwritten by the triangular factor U or L from the Cholesky   
            factorization B = U**H*U or B = L*L**H.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    VL      (input) REAL   
    VU      (input) REAL   
            If RANGE='V', the lower and upper bounds of the interval to   
            be searched for eigenvalues. VL < VU.   
            Not referenced if RANGE = 'A' or 'I'.   

    IL      (input) INTEGER   
    IU      (input) INTEGER   
            If RANGE='I', the indices (in ascending order) of the   
            smallest and largest eigenvalues to be returned.   
            1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.   
            Not referenced if RANGE = 'A' or 'V'.   

    ABSTOL  (input) REAL   
            The absolute error tolerance for the eigenvalues.   
            An approximate eigenvalue is accepted as converged   
            when it is determined to lie in an interval [a,b]   
            of width less than or equal to   

                    ABSTOL + EPS *   max( |a|,|b| ) ,   

            where EPS is the machine precision.  If ABSTOL is less than   
            or equal to zero, then  EPS*|T|  will be used in its place,   
            where |T| is the 1-norm of the tridiagonal matrix obtained   
            by reducing A to tridiagonal form.   

            Eigenvalues will be computed most accurately when ABSTOL is   
            set to twice the underflow threshold 2*SLAMCH('S'), not zero.   
            If this routine returns with INFO>0, indicating that some   
            eigenvectors did not converge, try setting ABSTOL to   
            2*SLAMCH('S').   

    M       (output) INTEGER   
            The total number of eigenvalues found.  0 <= M <= N.   
            If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.   

    W       (output) REAL array, dimension (N)   
            The first M elements contain the selected   
            eigenvalues in ascending order.   

    Z       (output) COMPLEX array, dimension (LDZ, max(1,M))   
            If JOBZ = 'N', then Z is not referenced.   
            If JOBZ = 'V', then if INFO = 0, the first M columns of Z   
            contain the orthonormal eigenvectors of the matrix A   
            corresponding to the selected eigenvalues, with the i-th   
            column of Z holding the eigenvector associated with W(i).   
            The eigenvectors are normalized as follows:   
            if ITYPE = 1 or 2, Z**T*B*Z = I;   
            if ITYPE = 3, Z**T*inv(B)*Z = I.   

            If an eigenvector fails to converge, then that column of Z   
            contains the latest approximation to the eigenvector, and the   
            index of the eigenvector is returned in IFAIL.   
            Note: the user must ensure that at least max(1,M) columns are   
            supplied in the array Z; if RANGE = 'V', the exact value of M   
            is not known in advance and an upper bound must be used.   

    LDZ     (input) INTEGER   
            The leading dimension of the array Z.  LDZ >= 1, and if   
            JOBZ = 'V', LDZ >= max(1,N).   

    WORK    (workspace/output) COMPLEX array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The length of the array WORK.  LWORK >= max(1,2*N-1).   
            For optimal efficiency, LWORK >= (NB+1)*N,   
            where NB is the blocksize for CHETRD returned by ILAENV.   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    RWORK   (workspace) REAL array, dimension (7*N)   

    IWORK   (workspace) INTEGER array, dimension (5*N)   

    IFAIL   (output) INTEGER array, dimension (N)   
            If JOBZ = 'V', then if INFO = 0, the first M elements of   
            IFAIL are zero.  If INFO > 0, then IFAIL contains the   
            indices of the eigenvectors that failed to converge.   
            If JOBZ = 'N', then IFAIL is not referenced.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   
            > 0:  CPOTRF or CHEEVX returned an error code:   
               <= N:  if INFO = i, CHEEVX failed to converge;   
                      i eigenvectors failed to converge.  Their indices   
                      are stored in array IFAIL.   
               > N:   if INFO = N + i, for 1 <= i <= N, then the leading   
                      minor of order i of B is not positive definite.   
                      The factorization of B could not be completed and   
                      no eigenvalues or eigenvectors were computed.   

    Further Details   
    ===============   

    Based on contributions by   
       Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static complex c_b1 = {1.f,0.f};
    static integer c__1 = 1;
    static integer c_n1 = -1;
    
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, z_dim1, z_offset, i__1, i__2;
    /* Local variables */
    static integer lopt;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, 
	    integer *, integer *, complex *, complex *, integer *, complex *, 
	    integer *);
    static char trans[1];
    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, 
	    integer *, integer *, complex *, complex *, integer *, complex *, 
	    integer *);
    static logical upper, wantz;
    static integer nb;
    static logical alleig, indeig, valeig;
    extern /* Subroutine */ int chegst_(integer *, char *, integer *, complex 
	    *, integer *, complex *, integer *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int xerbla_(char *, integer *), cheevx_(
	    char *, char *, char *, integer *, complex *, integer *, real *, 
	    real *, integer *, integer *, real *, integer *, real *, complex *
	    , integer *, complex *, integer *, real *, integer *, integer *, 
	    integer *), cpotrf_(char *, integer *, 
	    complex *, integer *, integer *);
    static integer lwkopt;
    static logical lquery;


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --work;
    --rwork;
    --iwork;
    --ifail;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    upper = lsame_(uplo, "U");
    alleig = lsame_(range, "A");
    valeig = lsame_(range, "V");
    indeig = lsame_(range, "I");
    lquery = *lwork == -1;

    *info = 0;
    if (*itype < 0 || *itype > 3) {
	*info = -1;
    } else if (! (wantz || lsame_(jobz, "N"))) {
	*info = -2;
    } else if (! (alleig || valeig || indeig)) {
	*info = -3;
    } else if (! (upper || lsame_(uplo, "L"))) {
	*info = -4;
    } else if (*n < 0) {
	*info = -5;
    } else if (*lda < max(1,*n)) {
	*info = -7;
    } else if (*ldb < max(1,*n)) {
	*info = -9;
    } else if (valeig && *n > 0) {
	if (*vu <= *vl) {
	    *info = -11;
	}
    } else if (indeig && *il < 1) {
	*info = -12;
    } else if (indeig && (*iu < min(*n,*il) || *iu > *n)) {
	*info = -13;
    } else if (*ldz < 1 || wantz && *ldz < *n) {
	*info = -18;
    } else /* if(complicated condition) */ {
/* Computing MAX */
	i__1 = 1, i__2 = (*n << 1) - 1;
	if (*lwork < max(i__1,i__2) && ! lquery) {
	    *info = -20;
	}
    }

    if (*info == 0) {
	nb = ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
		 (ftnlen)1);
	lwkopt = (nb + 1) * *n;
	work[1].r = (real) lwkopt, work[1].i = 0.f;
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CHEGVX", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    *m = 0;
    if (*n == 0) {
	work[1].r = 1.f, work[1].i = 0.f;
	return 0;
    }

/*     Form a Cholesky factorization of B. */

    cpotrf_(uplo, n, &b[b_offset], ldb, info);
    if (*info != 0) {
	*info = *n + *info;
	return 0;
    }

/*     Transform problem to standard eigenvalue problem and solve. */

    chegst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info);
    cheevx_(jobz, range, uplo, n, &a[a_offset], lda, vl, vu, il, iu, abstol, 
	    m, &w[1], &z__[z_offset], ldz, &work[1], lwork, &rwork[1], &iwork[
	    1], &ifail[1], info);
    lopt = work[1].r;

    if (wantz) {

/*        Backtransform eigenvectors to the original problem. */

	if (*info > 0) {
	    *m = *info - 1;
	}
	if (*itype == 1 || *itype == 2) {

/*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;   
             backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */

	    if (upper) {
		*(unsigned char *)trans = 'N';
	    } else {
		*(unsigned char *)trans = 'C';
	    }

	    ctrsm_("Left", uplo, trans, "Non-unit", n, m, &c_b1, &b[b_offset],
		     ldb, &z__[z_offset], ldz);

	} else if (*itype == 3) {

/*           For B*A*x=(lambda)*x;   
             backtransform eigenvectors: x = L*y or U'*y */

	    if (upper) {
		*(unsigned char *)trans = 'C';
	    } else {
		*(unsigned char *)trans = 'N';
	    }

	    ctrmm_("Left", uplo, trans, "Non-unit", n, m, &c_b1, &b[b_offset],
		     ldb, &z__[z_offset], ldz);
	}
    }

/*     Set WORK(1) to optimal complex workspace size. */

    work[1].r = (real) lwkopt, work[1].i = 0.f;

    return 0;

/*     End of CHEGVX */

} /* chegvx_ */
示例#7
0
/* Subroutine */ int cgelsx_(integer *m, integer *n, integer *nrhs, complex *
	a, integer *lda, complex *b, integer *ldb, integer *jpvt, real *rcond,
	 integer *rank, complex *work, real *rwork, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
    complex q__1;

    /* Builtin functions */
    double c_abs(complex *);
    void r_cnjg(complex *, complex *);

    /* Local variables */
    static integer i__, j, k;
    static complex c1, c2, s1, s2, t1, t2;
    static integer mn;
    static real anrm, bnrm, smin, smax;
    static integer iascl, ibscl, ismin, ismax;
    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, 
	    integer *, integer *, complex *, complex *, integer *, complex *, 
	    integer *, ftnlen, ftnlen, ftnlen, ftnlen), claic1_(integer *, 
	    integer *, complex *, real *, complex *, complex *, real *, 
	    complex *, complex *), cunm2r_(char *, char *, integer *, integer 
	    *, integer *, complex *, integer *, complex *, complex *, integer 
	    *, complex *, integer *, ftnlen, ftnlen), slabad_(real *, real *);
    extern doublereal clange_(char *, integer *, integer *, complex *, 
	    integer *, real *, ftnlen);
    extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, 
	    real *, integer *, integer *, complex *, integer *, integer *, 
	    ftnlen), cgeqpf_(integer *, integer *, complex *, integer *, 
	    integer *, complex *, complex *, real *, integer *);
    extern doublereal slamch_(char *, ftnlen);
    extern /* Subroutine */ int claset_(char *, integer *, integer *, complex 
	    *, complex *, complex *, integer *, ftnlen), xerbla_(char *, 
	    integer *, ftnlen);
    static real bignum;
    extern /* Subroutine */ int clatzm_(char *, integer *, integer *, complex 
	    *, integer *, complex *, complex *, complex *, integer *, complex 
	    *, ftnlen);
    static real sminpr;
    extern /* Subroutine */ int ctzrqf_(integer *, integer *, complex *, 
	    integer *, complex *, integer *);
    static real smaxpr, smlnum;


/*  -- LAPACK driver routine (version 3.0) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */
/*     Courant Institute, Argonne National Lab, and Rice University */
/*     September 30, 1994 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  This routine is deprecated and has been replaced by routine CGELSY. */

/*  CGELSX computes the minimum-norm solution to a complex linear least */
/*  squares problem: */
/*      minimize || A * X - B || */
/*  using a complete orthogonal factorization of A.  A is an M-by-N */
/*  matrix which may be rank-deficient. */

/*  Several right hand side vectors b and solution vectors x can be */
/*  handled in a single call; they are stored as the columns of the */
/*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution */
/*  matrix X. */

/*  The routine first computes a QR factorization with column pivoting: */
/*      A * P = Q * [ R11 R12 ] */
/*                  [  0  R22 ] */
/*  with R11 defined as the largest leading submatrix whose estimated */
/*  condition number is less than 1/RCOND.  The order of R11, RANK, */
/*  is the effective rank of A. */

/*  Then, R22 is considered to be negligible, and R12 is annihilated */
/*  by unitary transformations from the right, arriving at the */
/*  complete orthogonal factorization: */
/*     A * P = Q * [ T11 0 ] * Z */
/*                 [  0  0 ] */
/*  The minimum-norm solution is then */
/*     X = P * Z' [ inv(T11)*Q1'*B ] */
/*                [        0       ] */
/*  where Q1 consists of the first RANK columns of Q. */

/*  Arguments */
/*  ========= */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix A.  M >= 0. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix A.  N >= 0. */

/*  NRHS    (input) INTEGER */
/*          The number of right hand sides, i.e., the number of */
/*          columns of matrices B and X. NRHS >= 0. */

/*  A       (input/output) COMPLEX array, dimension (LDA,N) */
/*          On entry, the M-by-N matrix A. */
/*          On exit, A has been overwritten by details of its */
/*          complete orthogonal factorization. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,M). */

/*  B       (input/output) COMPLEX array, dimension (LDB,NRHS) */
/*          On entry, the M-by-NRHS right hand side matrix B. */
/*          On exit, the N-by-NRHS solution matrix X. */
/*          If m >= n and RANK = n, the residual sum-of-squares for */
/*          the solution in the i-th column is given by the sum of */
/*          squares of elements N+1:M in that column. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B. LDB >= max(1,M,N). */

/*  JPVT    (input/output) INTEGER array, dimension (N) */
/*          On entry, if JPVT(i) .ne. 0, the i-th column of A is an */
/*          initial column, otherwise it is a free column.  Before */
/*          the QR factorization of A, all initial columns are */
/*          permuted to the leading positions; only the remaining */
/*          free columns are moved as a result of column pivoting */
/*          during the factorization. */
/*          On exit, if JPVT(i) = k, then the i-th column of A*P */
/*          was the k-th column of A. */

/*  RCOND   (input) REAL */
/*          RCOND is used to determine the effective rank of A, which */
/*          is defined as the order of the largest leading triangular */
/*          submatrix R11 in the QR factorization with pivoting of A, */
/*          whose estimated condition number < 1/RCOND. */

/*  RANK    (output) INTEGER */
/*          The effective rank of A, i.e., the order of the submatrix */
/*          R11.  This is the same as the order of the submatrix T11 */
/*          in the complete orthogonal factorization of A. */

/*  WORK    (workspace) COMPLEX array, dimension */
/*                      (min(M,N) + max( N, 2*min(M,N)+NRHS )), */

/*  RWORK   (workspace) REAL array, dimension (2*N) */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    --jpvt;
    --work;
    --rwork;

    /* Function Body */
    mn = min(*m,*n);
    ismin = mn + 1;
    ismax = (mn << 1) + 1;

/*     Test the input arguments. */

    *info = 0;
    if (*m < 0) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*nrhs < 0) {
	*info = -3;
    } else if (*lda < max(1,*m)) {
	*info = -5;
    } else /* if(complicated condition) */ {
/* Computing MAX */
	i__1 = max(1,*m);
	if (*ldb < max(i__1,*n)) {
	    *info = -7;
	}
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CGELSX", &i__1, (ftnlen)6);
	return 0;
    }

/*     Quick return if possible */

/* Computing MIN */
    i__1 = min(*m,*n);
    if (min(i__1,*nrhs) == 0) {
	*rank = 0;
	return 0;
    }

/*     Get machine parameters */

    smlnum = slamch_("S", (ftnlen)1) / slamch_("P", (ftnlen)1);
    bignum = 1.f / smlnum;
    slabad_(&smlnum, &bignum);

/*     Scale A, B if max elements outside range [SMLNUM,BIGNUM] */

    anrm = clange_("M", m, n, &a[a_offset], lda, &rwork[1], (ftnlen)1);
    iascl = 0;
    if (anrm > 0.f && anrm < smlnum) {

/*        Scale matrix norm up to SMLNUM */

	clascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, 
		info, (ftnlen)1);
	iascl = 1;
    } else if (anrm > bignum) {

/*        Scale matrix norm down to BIGNUM */

	clascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, 
		info, (ftnlen)1);
	iascl = 2;
    } else if (anrm == 0.f) {

/*        Matrix all zero. Return zero solution. */

	i__1 = max(*m,*n);
	claset_("F", &i__1, nrhs, &c_b1, &c_b1, &b[b_offset], ldb, (ftnlen)1);
	*rank = 0;
	goto L100;
    }

    bnrm = clange_("M", m, nrhs, &b[b_offset], ldb, &rwork[1], (ftnlen)1);
    ibscl = 0;
    if (bnrm > 0.f && bnrm < smlnum) {

/*        Scale matrix norm up to SMLNUM */

	clascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
		 info, (ftnlen)1);
	ibscl = 1;
    } else if (bnrm > bignum) {

/*        Scale matrix norm down to BIGNUM */

	clascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
		 info, (ftnlen)1);
	ibscl = 2;
    }

/*     Compute QR factorization with column pivoting of A: */
/*        A * P = Q * R */

    cgeqpf_(m, n, &a[a_offset], lda, &jpvt[1], &work[1], &work[mn + 1], &
	    rwork[1], info);

/*     complex workspace MN+N. Real workspace 2*N. Details of Householder */
/*     rotations stored in WORK(1:MN). */

/*     Determine RANK using incremental condition estimation */

    i__1 = ismin;
    work[i__1].r = 1.f, work[i__1].i = 0.f;
    i__1 = ismax;
    work[i__1].r = 1.f, work[i__1].i = 0.f;
    smax = c_abs(&a[a_dim1 + 1]);
    smin = smax;
    if (c_abs(&a[a_dim1 + 1]) == 0.f) {
	*rank = 0;
	i__1 = max(*m,*n);
	claset_("F", &i__1, nrhs, &c_b1, &c_b1, &b[b_offset], ldb, (ftnlen)1);
	goto L100;
    } else {
	*rank = 1;
    }

L10:
    if (*rank < mn) {
	i__ = *rank + 1;
	claic1_(&c__2, rank, &work[ismin], &smin, &a[i__ * a_dim1 + 1], &a[
		i__ + i__ * a_dim1], &sminpr, &s1, &c1);
	claic1_(&c__1, rank, &work[ismax], &smax, &a[i__ * a_dim1 + 1], &a[
		i__ + i__ * a_dim1], &smaxpr, &s2, &c2);

	if (smaxpr * *rcond <= sminpr) {
	    i__1 = *rank;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		i__2 = ismin + i__ - 1;
		i__3 = ismin + i__ - 1;
		q__1.r = s1.r * work[i__3].r - s1.i * work[i__3].i, q__1.i = 
			s1.r * work[i__3].i + s1.i * work[i__3].r;
		work[i__2].r = q__1.r, work[i__2].i = q__1.i;
		i__2 = ismax + i__ - 1;
		i__3 = ismax + i__ - 1;
		q__1.r = s2.r * work[i__3].r - s2.i * work[i__3].i, q__1.i = 
			s2.r * work[i__3].i + s2.i * work[i__3].r;
		work[i__2].r = q__1.r, work[i__2].i = q__1.i;
/* L20: */
	    }
	    i__1 = ismin + *rank;
	    work[i__1].r = c1.r, work[i__1].i = c1.i;
	    i__1 = ismax + *rank;
	    work[i__1].r = c2.r, work[i__1].i = c2.i;
	    smin = sminpr;
	    smax = smaxpr;
	    ++(*rank);
	    goto L10;
	}
    }

/*     Logically partition R = [ R11 R12 ] */
/*                             [  0  R22 ] */
/*     where R11 = R(1:RANK,1:RANK) */

/*     [R11,R12] = [ T11, 0 ] * Y */

    if (*rank < *n) {
	ctzrqf_(rank, n, &a[a_offset], lda, &work[mn + 1], info);
    }

/*     Details of Householder rotations stored in WORK(MN+1:2*MN) */

/*     B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) */

    cunm2r_("Left", "Conjugate transpose", m, nrhs, &mn, &a[a_offset], lda, &
	    work[1], &b[b_offset], ldb, &work[(mn << 1) + 1], info, (ftnlen)4,
	     (ftnlen)19);

/*     workspace NRHS */

/*      B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS) */

    ctrsm_("Left", "Upper", "No transpose", "Non-unit", rank, nrhs, &c_b2, &a[
	    a_offset], lda, &b[b_offset], ldb, (ftnlen)4, (ftnlen)5, (ftnlen)
	    12, (ftnlen)8);

    i__1 = *n;
    for (i__ = *rank + 1; i__ <= i__1; ++i__) {
	i__2 = *nrhs;
	for (j = 1; j <= i__2; ++j) {
	    i__3 = i__ + j * b_dim1;
	    b[i__3].r = 0.f, b[i__3].i = 0.f;
/* L30: */
	}
/* L40: */
    }

/*     B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS) */

    if (*rank < *n) {
	i__1 = *rank;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    i__2 = *n - *rank + 1;
	    r_cnjg(&q__1, &work[mn + i__]);
	    clatzm_("Left", &i__2, nrhs, &a[i__ + (*rank + 1) * a_dim1], lda, 
		    &q__1, &b[i__ + b_dim1], &b[*rank + 1 + b_dim1], ldb, &
		    work[(mn << 1) + 1], (ftnlen)4);
/* L50: */
	}
    }

/*     workspace NRHS */

/*     B(1:N,1:NRHS) := P * B(1:N,1:NRHS) */

    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    i__3 = (mn << 1) + i__;
	    work[i__3].r = 1.f, work[i__3].i = 0.f;
/* L60: */
	}
	i__2 = *n;
	for (i__ = 1; i__ <= i__2; ++i__) {
	    i__3 = (mn << 1) + i__;
	    if (work[i__3].r == 1.f && work[i__3].i == 0.f) {
		if (jpvt[i__] != i__) {
		    k = i__;
		    i__3 = k + j * b_dim1;
		    t1.r = b[i__3].r, t1.i = b[i__3].i;
		    i__3 = jpvt[k] + j * b_dim1;
		    t2.r = b[i__3].r, t2.i = b[i__3].i;
L70:
		    i__3 = jpvt[k] + j * b_dim1;
		    b[i__3].r = t1.r, b[i__3].i = t1.i;
		    i__3 = (mn << 1) + k;
		    work[i__3].r = 0.f, work[i__3].i = 0.f;
		    t1.r = t2.r, t1.i = t2.i;
		    k = jpvt[k];
		    i__3 = jpvt[k] + j * b_dim1;
		    t2.r = b[i__3].r, t2.i = b[i__3].i;
		    if (jpvt[k] != i__) {
			goto L70;
		    }
		    i__3 = i__ + j * b_dim1;
		    b[i__3].r = t1.r, b[i__3].i = t1.i;
		    i__3 = (mn << 1) + k;
		    work[i__3].r = 0.f, work[i__3].i = 0.f;
		}
	    }
/* L80: */
	}
/* L90: */
    }

/*     Undo scaling */

    if (iascl == 1) {
	clascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
		 info, (ftnlen)1);
	clascl_("U", &c__0, &c__0, &smlnum, &anrm, rank, rank, &a[a_offset], 
		lda, info, (ftnlen)1);
    } else if (iascl == 2) {
	clascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
		 info, (ftnlen)1);
	clascl_("U", &c__0, &c__0, &bignum, &anrm, rank, rank, &a[a_offset], 
		lda, info, (ftnlen)1);
    }
    if (ibscl == 1) {
	clascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
		 info, (ftnlen)1);
    } else if (ibscl == 2) {
	clascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
		 info, (ftnlen)1);
    }

L100:

    return 0;

/*     End of CGELSX */

} /* cgelsx_ */
示例#8
0
 int chegvd_(int *itype, char *jobz, char *uplo, int *
	n, complex *a, int *lda, complex *b, int *ldb, float *w, 
	complex *work, int *lwork, float *rwork, int *lrwork, int *
	iwork, int *liwork, int *info)
{
    /* System generated locals */
    int a_dim1, a_offset, b_dim1, b_offset, i__1;
    float r__1, r__2;

    /* Local variables */
    int lopt;
    extern int lsame_(char *, char *);
    extern  int ctrmm_(char *, char *, char *, char *, 
	    int *, int *, complex *, complex *, int *, complex *, 
	    int *);
    int lwmin;
    char trans[1];
    int liopt;
    extern  int ctrsm_(char *, char *, char *, char *, 
	    int *, int *, complex *, complex *, int *, complex *, 
	    int *);
    int upper;
    int lropt;
    int wantz;
    extern  int cheevd_(char *, char *, int *, complex *, 
	    int *, float *, complex *, int *, float *, int *, 
	    int *, int *, int *), chegst_(int 
	    *, char *, int *, complex *, int *, complex *, int *, 
	    int *), xerbla_(char *, int *), cpotrf_(
	    char *, int *, complex *, int *, int *);
    int liwmin, lrwmin;
    int lquery;


/*  -- LAPACK driver routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CHEGVD computes all the eigenvalues, and optionally, the eigenvectors */
/*  of a complex generalized Hermitian-definite eigenproblem, of the form */
/*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A and */
/*  B are assumed to be Hermitian and B is also positive definite. */
/*  If eigenvectors are desired, it uses a divide and conquer algorithm. */

/*  The divide and conquer algorithm makes very mild assumptions about */
/*  floating point arithmetic. It will work on machines with a guard */
/*  digit in add/subtract, or on those binary machines without guard */
/*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */
/*  Cray-2. It could conceivably fail on hexadecimal or decimal machines */
/*  without guard digits, but we know of none. */

/*  Arguments */
/*  ========= */

/*  ITYPE   (input) INTEGER */
/*          Specifies the problem type to be solved: */
/*          = 1:  A*x = (lambda)*B*x */
/*          = 2:  A*B*x = (lambda)*x */
/*          = 3:  B*A*x = (lambda)*x */

/*  JOBZ    (input) CHARACTER*1 */
/*          = 'N':  Compute eigenvalues only; */
/*          = 'V':  Compute eigenvalues and eigenvectors. */

/*  UPLO    (input) CHARACTER*1 */
/*          = 'U':  Upper triangles of A and B are stored; */
/*          = 'L':  Lower triangles of A and B are stored. */

/*  N       (input) INTEGER */
/*          The order of the matrices A and B.  N >= 0. */

/*  A       (input/output) COMPLEX array, dimension (LDA, N) */
/*          On entry, the Hermitian matrix A.  If UPLO = 'U', the */
/*          leading N-by-N upper triangular part of A contains the */
/*          upper triangular part of the matrix A.  If UPLO = 'L', */
/*          the leading N-by-N lower triangular part of A contains */
/*          the lower triangular part of the matrix A. */

/*          On exit, if JOBZ = 'V', then if INFO = 0, A contains the */
/*          matrix Z of eigenvectors.  The eigenvectors are normalized */
/*          as follows: */
/*          if ITYPE = 1 or 2, Z**H*B*Z = I; */
/*          if ITYPE = 3, Z**H*inv(B)*Z = I. */
/*          If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */
/*          or the lower triangle (if UPLO='L') of A, including the */
/*          diagonal, is destroyed. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= MAX(1,N). */

/*  B       (input/output) COMPLEX array, dimension (LDB, N) */
/*          On entry, the Hermitian matrix B.  If UPLO = 'U', the */
/*          leading N-by-N upper triangular part of B contains the */
/*          upper triangular part of the matrix B.  If UPLO = 'L', */
/*          the leading N-by-N lower triangular part of B contains */
/*          the lower triangular part of the matrix B. */

/*          On exit, if INFO <= N, the part of B containing the matrix is */
/*          overwritten by the triangular factor U or L from the Cholesky */
/*          factorization B = U**H*U or B = L*L**H. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B.  LDB >= MAX(1,N). */

/*  W       (output) REAL array, dimension (N) */
/*          If INFO = 0, the eigenvalues in ascending order. */

/*  WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) */
/*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */

/*  LWORK   (input) INTEGER */
/*          The length of the array WORK. */
/*          If N <= 1,                LWORK >= 1. */
/*          If JOBZ  = 'N' and N > 1, LWORK >= N + 1. */
/*          If JOBZ  = 'V' and N > 1, LWORK >= 2*N + N**2. */

/*          If LWORK = -1, then a workspace query is assumed; the routine */
/*          only calculates the optimal sizes of the WORK, RWORK and */
/*          IWORK arrays, returns these values as the first entries of */
/*          the WORK, RWORK and IWORK arrays, and no error message */
/*          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */

/*  RWORK   (workspace/output) REAL array, dimension (MAX(1,LRWORK)) */
/*          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK. */

/*  LRWORK  (input) INTEGER */
/*          The dimension of the array RWORK. */
/*          If N <= 1,                LRWORK >= 1. */
/*          If JOBZ  = 'N' and N > 1, LRWORK >= N. */
/*          If JOBZ  = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. */

/*          If LRWORK = -1, then a workspace query is assumed; the */
/*          routine only calculates the optimal sizes of the WORK, RWORK */
/*          and IWORK arrays, returns these values as the first entries */
/*          of the WORK, RWORK and IWORK arrays, and no error message */
/*          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */

/*  IWORK   (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */
/*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */

/*  LIWORK  (input) INTEGER */
/*          The dimension of the array IWORK. */
/*          If N <= 1,                LIWORK >= 1. */
/*          If JOBZ  = 'N' and N > 1, LIWORK >= 1. */
/*          If JOBZ  = 'V' and N > 1, LIWORK >= 3 + 5*N. */

/*          If LIWORK = -1, then a workspace query is assumed; the */
/*          routine only calculates the optimal sizes of the WORK, RWORK */
/*          and IWORK arrays, returns these values as the first entries */
/*          of the WORK, RWORK and IWORK arrays, and no error message */
/*          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
/*          > 0:  CPOTRF or CHEEVD returned an error code: */
/*             <= N:  if INFO = i and JOBZ = 'N', then the algorithm */
/*                    failed to converge; i off-diagonal elements of an */
/*                    intermediate tridiagonal form did not converge to */
/*                    zero; */
/*                    if INFO = i and JOBZ = 'V', then the algorithm */
/*                    failed to compute an eigenvalue while working on */
/*                    the submatrix lying in rows and columns INFO/(N+1) */
/*                    through mod(INFO,N+1); */
/*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading */
/*                    minor of order i of B is not positive definite. */
/*                    The factorization of B could not be completed and */
/*                    no eigenvalues or eigenvectors were computed. */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */

/*  Modified so that no backsubstitution is performed if CHEEVD fails to */
/*  converge (NEIG in old code could be greater than N causing out of */
/*  bounds reference to A - reported by Ralf Meyer).  Also corrected the */
/*  description of INFO and the test on ITYPE. Sven, 16 Feb 05. */
/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    --w;
    --work;
    --rwork;
    --iwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    upper = lsame_(uplo, "U");
    lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1;

    *info = 0;
    if (*n <= 1) {
	lwmin = 1;
	lrwmin = 1;
	liwmin = 1;
    } else if (wantz) {
	lwmin = (*n << 1) + *n * *n;
	lrwmin = *n * 5 + 1 + (*n << 1) * *n;
	liwmin = *n * 5 + 3;
    } else {
	lwmin = *n + 1;
	lrwmin = *n;
	liwmin = 1;
    }
    lopt = lwmin;
    lropt = lrwmin;
    liopt = liwmin;
    if (*itype < 1 || *itype > 3) {
	*info = -1;
    } else if (! (wantz || lsame_(jobz, "N"))) {
	*info = -2;
    } else if (! (upper || lsame_(uplo, "L"))) {
	*info = -3;
    } else if (*n < 0) {
	*info = -4;
    } else if (*lda < MAX(1,*n)) {
	*info = -6;
    } else if (*ldb < MAX(1,*n)) {
	*info = -8;
    }

    if (*info == 0) {
	work[1].r = (float) lopt, work[1].i = 0.f;
	rwork[1] = (float) lropt;
	iwork[1] = liopt;

	if (*lwork < lwmin && ! lquery) {
	    *info = -11;
	} else if (*lrwork < lrwmin && ! lquery) {
	    *info = -13;
	} else if (*liwork < liwmin && ! lquery) {
	    *info = -15;
	}
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CHEGVD", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Form a Cholesky factorization of B. */

    cpotrf_(uplo, n, &b[b_offset], ldb, info);
    if (*info != 0) {
	*info = *n + *info;
	return 0;
    }

/*     Transform problem to standard eigenvalue problem and solve. */

    chegst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info);
    cheevd_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, &rwork[
	    1], lrwork, &iwork[1], liwork, info);
/* Computing MAX */
    r__1 = (float) lopt, r__2 = work[1].r;
    lopt = MAX(r__1,r__2);
/* Computing MAX */
    r__1 = (float) lropt;
    lropt = MAX(r__1,rwork[1]);
/* Computing MAX */
    r__1 = (float) liopt, r__2 = (float) iwork[1];
    liopt = MAX(r__1,r__2);

    if (wantz && *info == 0) {

/*        Backtransform eigenvectors to the original problem. */

	if (*itype == 1 || *itype == 2) {

/*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */
/*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */

	    if (upper) {
		*(unsigned char *)trans = 'N';
	    } else {
		*(unsigned char *)trans = 'C';
	    }

	    ctrsm_("Left", uplo, trans, "Non-unit", n, n, &c_b1, &b[b_offset], 
		     ldb, &a[a_offset], lda);

	} else if (*itype == 3) {

/*           For B*A*x=(lambda)*x; */
/*           backtransform eigenvectors: x = L*y or U'*y */

	    if (upper) {
		*(unsigned char *)trans = 'C';
	    } else {
		*(unsigned char *)trans = 'N';
	    }

	    ctrmm_("Left", uplo, trans, "Non-unit", n, n, &c_b1, &b[b_offset], 
		     ldb, &a[a_offset], lda);
	}
    }

    work[1].r = (float) lopt, work[1].i = 0.f;
    rwork[1] = (float) lropt;
    iwork[1] = liopt;

    return 0;

/*     End of CHEGVD */

} /* chegvd_ */
示例#9
0
/* Subroutine */ int ctrtri_(char *uplo, char *diag, integer *n, complex *a, 
	integer *lda, integer *info)
{
    /* System generated locals */
    address a__1[2];
    integer a_dim1, a_offset, i__1, i__2, i__3[2], i__4, i__5;
    complex q__1;
    char ch__1[2];

    /* Builtin functions */
    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);

    /* Local variables */
    integer j, jb, nb, nn;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, 
	    integer *, integer *, complex *, complex *, integer *, complex *, 
	    integer *), ctrsm_(char *, char *, 
	     char *, char *, integer *, integer *, complex *, complex *, 
	    integer *, complex *, integer *);
    logical upper;
    extern /* Subroutine */ int ctrti2_(char *, char *, integer *, complex *, 
	    integer *, integer *), xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *);
    logical nounit;


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CTRTRI computes the inverse of a complex upper or lower triangular */
/*  matrix A. */

/*  This is the Level 3 BLAS version of the algorithm. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          = 'U':  A is upper triangular; */
/*          = 'L':  A is lower triangular. */

/*  DIAG    (input) CHARACTER*1 */
/*          = 'N':  A is non-unit triangular; */
/*          = 'U':  A is unit triangular. */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  A       (input/output) COMPLEX array, dimension (LDA,N) */
/*          On entry, the triangular matrix A.  If UPLO = 'U', the */
/*          leading N-by-N upper triangular part of the array A contains */
/*          the upper triangular matrix, and the strictly lower */
/*          triangular part of A is not referenced.  If UPLO = 'L', the */
/*          leading N-by-N lower triangular part of the array A contains */
/*          the lower triangular matrix, and the strictly upper */
/*          triangular part of A is not referenced.  If DIAG = 'U', the */
/*          diagonal elements of A are also not referenced and are */
/*          assumed to be 1. */
/*          On exit, the (triangular) inverse of the original matrix, in */
/*          the same storage format. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  INFO    (output) INTEGER */
/*          = 0: successful exit */
/*          < 0: if INFO = -i, the i-th argument had an illegal value */
/*          > 0: if INFO = i, A(i,i) is exactly zero.  The triangular */
/*               matrix is singular and its inverse can not be computed. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    nounit = lsame_(diag, "N");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (! nounit && ! lsame_(diag, "U")) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CTRTRI", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Check for singularity if non-unit. */

    if (nounit) {
	i__1 = *n;
	for (*info = 1; *info <= i__1; ++(*info)) {
	    i__2 = *info + *info * a_dim1;
	    if (a[i__2].r == 0.f && a[i__2].i == 0.f) {
		return 0;
	    }
/* L10: */
	}
	*info = 0;
    }

/*     Determine the block size for this environment. */

/* Writing concatenation */
    i__3[0] = 1, a__1[0] = uplo;
    i__3[1] = 1, a__1[1] = diag;
    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
    nb = ilaenv_(&c__1, "CTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1);
    if (nb <= 1 || nb >= *n) {

/*        Use unblocked code */

	ctrti2_(uplo, diag, n, &a[a_offset], lda, info);
    } else {

/*        Use blocked code */

	if (upper) {

/*           Compute inverse of upper triangular matrix */

	    i__1 = *n;
	    i__2 = nb;
	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
/* Computing MIN */
		i__4 = nb, i__5 = *n - j + 1;
		jb = min(i__4,i__5);

/*              Compute rows 1:j-1 of current block column */

		i__4 = j - 1;
		ctrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, &
			c_b1, &a[a_offset], lda, &a[j * a_dim1 + 1], lda);
		i__4 = j - 1;
		q__1.r = -1.f, q__1.i = -0.f;
		ctrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, &
			q__1, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1], 
			lda);

/*              Compute inverse of current diagonal block */

		ctrti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info);
/* L20: */
	    }
	} else {

/*           Compute inverse of lower triangular matrix */

	    nn = (*n - 1) / nb * nb + 1;
	    i__2 = -nb;
	    for (j = nn; i__2 < 0 ? j >= 1 : j <= 1; j += i__2) {
/* Computing MIN */
		i__1 = nb, i__4 = *n - j + 1;
		jb = min(i__1,i__4);
		if (j + jb <= *n) {

/*                 Compute rows j+jb:n of current block column */

		    i__1 = *n - j - jb + 1;
		    ctrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb, 
			    &c_b1, &a[j + jb + (j + jb) * a_dim1], lda, &a[j 
			    + jb + j * a_dim1], lda);
		    i__1 = *n - j - jb + 1;
		    q__1.r = -1.f, q__1.i = -0.f;
		    ctrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb, 
			     &q__1, &a[j + j * a_dim1], lda, &a[j + jb + j * 
			    a_dim1], lda);
		}

/*              Compute inverse of current diagonal block */

		ctrti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info);
/* L30: */
	    }
	}
    }

    return 0;

/*     End of CTRTRI */

} /* ctrtri_ */
示例#10
0
int main( int argc, char** argv )
{
	obj_t a, c;
	obj_t c_save;
	obj_t alpha;
	dim_t m, n;
	dim_t p;
	dim_t p_begin, p_end, p_inc;
	int   m_input, n_input;
	num_t dt;
	int   r, n_repeats;
	side_t side;
	uplo_t uploa;
	trans_t transa;
	diag_t diaga;
	f77_char f77_side;
	f77_char f77_uploa;
	f77_char f77_transa;
	f77_char f77_diaga;

	double dtime;
	double dtime_save;
	double gflops;

	//bli_init();

	//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );

	n_repeats = 3;

#ifndef PRINT
	p_begin = 200;
	p_end   = 2000;
	p_inc   = 200;

	m_input = -1;
	n_input = -1;
#else
	p_begin = 16;
	p_end   = 16;
	p_inc   = 1;

	m_input = 4;
	n_input = 4;
#endif

#if 1
	//dt = BLIS_FLOAT;
	dt = BLIS_DOUBLE;
#else
	//dt = BLIS_SCOMPLEX;
	dt = BLIS_DCOMPLEX;
#endif

	side = BLIS_LEFT;
	//side = BLIS_RIGHT;

	uploa = BLIS_LOWER;
	//uploa = BLIS_UPPER;

	transa = BLIS_NO_TRANSPOSE;

	diaga = BLIS_NONUNIT_DIAG;

	bli_param_map_blis_to_netlib_side( side, &f77_side );
	bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa );
	bli_param_map_blis_to_netlib_trans( transa, &f77_transa );
	bli_param_map_blis_to_netlib_diag( diaga, &f77_diaga );


	for ( p = p_begin; p <= p_end; p += p_inc )
	{
		if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
		else               m =     ( dim_t )    m_input;
		if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
		else               n =     ( dim_t )    n_input;

		bli_obj_create( dt, 1, 1, 0, 0, &alpha );

		if ( bli_is_left( side ) )
			bli_obj_create( dt, m, m, 0, 0, &a );
		else
			bli_obj_create( dt, n, n, 0, 0, &a );
		bli_obj_create( dt, m, n, 0, 0, &c );
		bli_obj_create( dt, m, n, 0, 0, &c_save );

		bli_randm( &a );
		bli_randm( &c );

		bli_obj_set_struc( BLIS_TRIANGULAR, &a );
		bli_obj_set_uplo( uploa, &a );
		bli_obj_set_conjtrans( transa, &a );
		bli_obj_set_diag( diaga, &a );

		// Randomize A, make it densely Hermitian, and zero the unstored
		// triangle to ensure the implementation reads only from the stored
		// region.
		bli_randm( &a );
		bli_mkherm( &a );
		bli_mktrim( &a );

		bli_setsc(  (2.0/1.0), 1.0, &alpha );


		bli_copym( &c, &c_save );
	
		dtime_save = DBL_MAX;

		for ( r = 0; r < n_repeats; ++r )
		{
			bli_copym( &c_save, &c );


			dtime = bli_clock();


#ifdef PRINT
			bli_invertd( &a );
			bli_printm( "a", &a, "%4.1f", "" );
			bli_invertd( &a );
			bli_printm( "c", &c, "%4.1f", "" );
#endif

#ifdef BLIS

			bli_trsm( side,
			          &alpha,
			          &a,
			          &c );
#else

		if ( bli_is_float( dt ) )
		{
			f77_int  mm     = bli_obj_length( &c );
			f77_int  nn     = bli_obj_width( &c );
			f77_int  lda    = bli_obj_col_stride( &a );
			f77_int  ldc    = bli_obj_col_stride( &c );
			float*   alphap = bli_obj_buffer( &alpha );
			float*   ap     = bli_obj_buffer( &a );
			float*   cp     = bli_obj_buffer( &c );

			strsm_( &f77_side,
			        &f77_uploa,
			        &f77_transa,
			        &f77_diaga,
			        &mm,
			        &nn,
			        alphap,
			        ap, &lda,
			        cp, &ldc );
		}
		else if ( bli_is_double( dt ) )
		{
			f77_int  mm     = bli_obj_length( &c );
			f77_int  nn     = bli_obj_width( &c );
			f77_int  lda    = bli_obj_col_stride( &a );
			f77_int  ldc    = bli_obj_col_stride( &c );
			double*  alphap = bli_obj_buffer( &alpha );
			double*  ap     = bli_obj_buffer( &a );
			double*  cp     = bli_obj_buffer( &c );

			dtrsm_( &f77_side,
			        &f77_uploa,
			        &f77_transa,
			        &f77_diaga,
			        &mm,
			        &nn,
			        alphap,
			        ap, &lda,
			        cp, &ldc );
		}
		else if ( bli_is_scomplex( dt ) )
		{
			f77_int  mm     = bli_obj_length( &c );
			f77_int  nn     = bli_obj_width( &c );
			f77_int  lda    = bli_obj_col_stride( &a );
			f77_int  ldc    = bli_obj_col_stride( &c );
			scomplex*  alphap = bli_obj_buffer( &alpha );
			scomplex*  ap     = bli_obj_buffer( &a );
			scomplex*  cp     = bli_obj_buffer( &c );

			ctrsm_( &f77_side,
			        &f77_uploa,
			        &f77_transa,
			        &f77_diaga,
			        &mm,
			        &nn,
			        alphap,
			        ap, &lda,
			        cp, &ldc );
		}
		else if ( bli_is_dcomplex( dt ) )
		{
			f77_int  mm     = bli_obj_length( &c );
			f77_int  nn     = bli_obj_width( &c );
			f77_int  lda    = bli_obj_col_stride( &a );
			f77_int  ldc    = bli_obj_col_stride( &c );
			dcomplex*  alphap = bli_obj_buffer( &alpha );
			dcomplex*  ap     = bli_obj_buffer( &a );
			dcomplex*  cp     = bli_obj_buffer( &c );

			ztrsm_( &f77_side,
			        &f77_uploa,
			        &f77_transa,
			        &f77_diaga,
			        &mm,
			        &nn,
			        alphap,
			        ap, &lda,
			        cp, &ldc );
		}
#endif

#ifdef PRINT
			bli_printm( "c after", &c, "%9.5f", "" );
			exit(1);
#endif

			dtime_save = bli_clock_min_diff( dtime_save, dtime );
		}

		if ( bli_is_left( side ) )
			gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 );
		else
			gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 );

		if ( bli_is_complex( dt ) ) gflops *= 4.0;

#ifdef BLIS
		printf( "data_trsm_blis" );
#else
		printf( "data_trsm_%s", BLAS );
#endif
		printf( "( %2lu, 1:4 ) = [ %4lu %4lu  %10.3e  %6.3f ];\n",
		        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
		        ( unsigned long )m,
		        ( unsigned long )n, dtime_save, gflops );

		bli_obj_free( &alpha );

		bli_obj_free( &a );
		bli_obj_free( &c );
		bli_obj_free( &c_save );
	}

	//bli_finalize();

	return 0;
}