Example #1
0
/***************************************************************************//**
 *  Parallel forward substitution for tile LU - dynamic scheduling
 **/
void plasma_pztrsmpl_quark(PLASMA_desc A, PLASMA_desc B, PLASMA_desc L, int *IPIV,
                           PLASMA_sequence *sequence, PLASMA_request *request)
{
    plasma_context_t *plasma;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;

    int k, m, n;
    int ldak, ldam, ldbk, ldbm;
    int tempkm, tempnn, tempkmin, tempmm, tempkn;
    int ib;

    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);

    ib = PLASMA_IB;
    for (k = 0; k < min(A.mt, A.nt); k++) {
        tempkm   = k == A.mt-1 ? A.m-k*A.mb : A.mb;
        tempkn   = k == A.nt-1 ? A.n-k*A.nb : A.nb;
        tempkmin = k == min(A.mt, A.nt)-1 ? min(A.m, A.n)-k*A.mb : A.mb;
        ldak = BLKLDD(A, k);
        ldbk = BLKLDD(B, k);
        for (n = 0; n < B.nt; n++) {
            tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
            QUARK_CORE_zgessm(
                plasma->quark, &task_flags,
                tempkm, tempnn, tempkmin, ib, L.nb,
                IPIV(k, k),
                A(k, k), ldak,
                B(k, n), ldbk);
        }
        for (m = k+1; m < A.mt; m++) {
            tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
            ldam = BLKLDD(A, m);
            ldbm = BLKLDD(B, m);
            for (n = 0; n < B.nt; n++) {
                tempnn  = n == B.nt-1 ? B.n-n*B.nb : B.nb;
                QUARK_CORE_zssssm(
                    plasma->quark, &task_flags,
                    A.nb, tempnn, tempmm, tempnn, tempkn, ib, L.nb,
                    B(k, n), ldbk,
                    B(m, n), ldbm,
                    L(m, k), L.mb,
                    A(m, k), ldam,
                    IPIV(m, k));
            }
        }
    }
}
Example #2
0
/***************************************************************************//**
 *  Parallel tile row interchanges - dynamic scheduling
 **/
void plasma_pclaswp_quark(PLASMA_desc B, int *IPIV, int inc,
                          PLASMA_sequence *sequence, PLASMA_request *request)
{
    plasma_context_t *plasma;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;

    int m, n;
    int tempi, tempm, tempmm, tempnn;

    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);

    if ( inc > 0 ) 
    {
        for (m = 0; m < B.mt; m++) {
            tempi = m * B.mb;
            tempm = B.m - tempi;
            tempmm = m == B.mt-1 ? tempm : B.mb;

            for (n = 0; n < B.nt; n++) {
                tempnn = n == B.nt-1 ? B.n - n * B.nb : B.nb;
                
                QUARK_CORE_claswp_ontile(
                    plasma->quark, &task_flags,
                    plasma_desc_submatrix(B, tempi, n*B.nb, tempm, tempnn),
                    B(m, n), 1, tempmm, IPIV(m), inc, B(B.mt-1, n) );
            }
        }
    } 
    else 
    {
        for (m = B.mt-1; m > -1; m--) {
            tempi = m * B.mb;
            tempm = B.m - tempi;
            tempmm = m == B.mt-1 ? tempm : B.mb;

            for (n = 0; n < B.nt; n++) {
                tempnn = n == B.nt-1 ? B.n - n * B.nb : B.nb;
                
                QUARK_CORE_claswp_ontile(
                    plasma->quark, &task_flags,
                    plasma_desc_submatrix(B, tempi, n*B.nb, tempm, tempnn),
                    B(m, n), 1, tempmm, IPIV(m), inc, B(0, n) );
            }
        }
    } 
}
Example #3
0
  void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Setup(const MultiVector& B, const MultiVector& Bc, RCP<const CrsGraph> Ppattern) {
    Ppattern_ = Ppattern;

    const RCP<const Map> uniqueMap    = Ppattern_->getDomainMap();
    const RCP<const Map> nonUniqueMap = Ppattern_->getColMap();
    RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap);

    const size_t NSDim = Bc.getNumVectors();
    X_ = MultiVectorFactory::Build(nonUniqueMap, NSDim);
    X_->doImport(Bc, *importer, Xpetra::INSERT);

    size_t numRows = Ppattern_->getNodeNumRows();
    XXtInv_.resize(numRows);
    Teuchos::SerialDenseVector<LO,SC> BcRow(NSDim, false);
    for (size_t i = 0; i < numRows; i++) {
      Teuchos::ArrayView<const LO> indices;
      Ppattern_->getLocalRowView(i, indices);

      size_t nnz = indices.size();

      Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, nnz, false);
      for (size_t j = 0; j < nnz; j++) {
        for (size_t k = 0; k < NSDim; k++)
          BcRow[k] = X_->getData(k)[indices[j]];

        Teuchos::setCol(BcRow, (LO)j, locX);
      }

      XXtInv_[i] = Teuchos::SerialDenseMatrix<LO,SC>(NSDim, NSDim, false);

      Teuchos::BLAS<LO,SC> blas;
      blas.GEMM(Teuchos::NO_TRANS, Teuchos::CONJ_TRANS, NSDim, NSDim, nnz,
                Teuchos::ScalarTraits<SC>::one(), locX.values(), locX.stride(),
                locX.values(), locX.stride(), Teuchos::ScalarTraits<SC>::zero(),
                XXtInv_[i].values(), XXtInv_[i].stride());

      Teuchos::LAPACK<LO,SC> lapack;
      LO info, lwork = 3*NSDim;
      ArrayRCP<LO> IPIV(NSDim);
      ArrayRCP<SC> WORK(lwork);
      lapack.GETRF(NSDim, NSDim, XXtInv_[i].values(), XXtInv_[i].stride(), IPIV.get(), &info);
      lapack.GETRI(NSDim, XXtInv_[i].values(), XXtInv_[i].stride(), IPIV.get(), WORK.get(), lwork, &info);
    }
  }
Example #4
0
/* Subroutine */ int dgtrfs_(char *trans, integer *n, integer *nrhs, 
	doublereal *dl, doublereal *d, doublereal *du, doublereal *dlf, 
	doublereal *df, doublereal *duf, doublereal *du2, integer *ipiv, 
	doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *
	ferr, doublereal *berr, doublereal *work, integer *iwork, integer *
	info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    DGTRFS improves the computed solution to a system of linear   
    equations when the coefficient matrix is tridiagonal, and provides   
    error bounds and backward error estimates for the solution.   

    Arguments   
    =========   

    TRANS   (input) CHARACTER*1   
            Specifies the form of the system of equations:   
            = 'N':  A * X = B     (No transpose)   
            = 'T':  A**T * X = B  (Transpose)   
            = 'C':  A**H * X = B  (Conjugate transpose = Transpose)   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    NRHS    (input) INTEGER   
            The number of right hand sides, i.e., the number of columns   
            of the matrix B.  NRHS >= 0.   

    DL      (input) DOUBLE PRECISION array, dimension (N-1)   
            The (n-1) subdiagonal elements of A.   

    D       (input) DOUBLE PRECISION array, dimension (N)   
            The diagonal elements of A.   

    DU      (input) DOUBLE PRECISION array, dimension (N-1)   
            The (n-1) superdiagonal elements of A.   

    DLF     (input) DOUBLE PRECISION array, dimension (N-1)   
            The (n-1) multipliers that define the matrix L from the   
            LU factorization of A as computed by DGTTRF.   

    DF      (input) DOUBLE PRECISION array, dimension (N)   
            The n diagonal elements of the upper triangular matrix U from 
  
            the LU factorization of A.   

    DUF     (input) DOUBLE PRECISION array, dimension (N-1)   
            The (n-1) elements of the first superdiagonal of U.   

    DU2     (input) DOUBLE PRECISION array, dimension (N-2)   
            The (n-2) elements of the second superdiagonal of U.   

    IPIV    (input) INTEGER array, dimension (N)   
            The pivot indices; for 1 <= i <= n, row i of the matrix was   
            interchanged with row IPIV(i).  IPIV(i) will always be either 
  
            i or i+1; IPIV(i) = i indicates a row interchange was not   
            required.   

    B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)   
            The right hand side matrix B.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    X       (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS)   
            On entry, the solution matrix X, as computed by DGTTRS.   
            On exit, the improved solution matrix X.   

    LDX     (input) INTEGER   
            The leading dimension of the array X.  LDX >= max(1,N).   

    FERR    (output) DOUBLE PRECISION array, dimension (NRHS)   
            The estimated forward error bound for each solution vector   
            X(j) (the j-th column of the solution matrix X).   
            If XTRUE is the true solution corresponding to X(j), FERR(j) 
  
            is an estimated upper bound for the magnitude of the largest 
  
            element in (X(j) - XTRUE) divided by the magnitude of the   
            largest element in X(j).  The estimate is as reliable as   
            the estimate for RCOND, and is almost always a slight   
            overestimate of the true error.   

    BERR    (output) DOUBLE PRECISION array, dimension (NRHS)   
            The componentwise relative backward error of each solution   
            vector X(j) (i.e., the smallest relative change in   
            any element of A or B that makes X(j) an exact solution).   

    WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)   

    IWORK   (workspace) INTEGER array, dimension (N)   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   

    Internal Parameters   
    ===================   

    ITMAX is the maximum number of steps of iterative refinement.   

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static integer c__1 = 1;
    static doublereal c_b18 = -1.;
    static doublereal c_b19 = 1.;
    
    /* System generated locals */
    integer b_dim1, b_offset, x_dim1, x_offset, i__1, i__2;
    doublereal d__1, d__2, d__3, d__4;
    /* Local variables */
    static integer kase;
    static doublereal safe1, safe2;
    static integer i, j;
    static doublereal s;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
	    doublereal *, integer *), daxpy_(integer *, doublereal *, 
	    doublereal *, integer *, doublereal *, integer *);
    static integer count;
    extern doublereal dlamch_(char *);
    extern /* Subroutine */ int dlacon_(integer *, doublereal *, doublereal *,
	     integer *, doublereal *, integer *);
    static integer nz;
    extern /* Subroutine */ int dlagtm_(char *, integer *, integer *, 
	    doublereal *, doublereal *, doublereal *, doublereal *, 
	    doublereal *, integer *, doublereal *, doublereal *, integer *);
    static doublereal safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static logical notran;
    static char transn[1];
    extern /* Subroutine */ int dgttrs_(char *, integer *, integer *, 
	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
	     doublereal *, integer *, integer *);
    static char transt[1];
    static doublereal lstres, eps;



#define DL(I) dl[(I)-1]
#define D(I) d[(I)-1]
#define DU(I) du[(I)-1]
#define DLF(I) dlf[(I)-1]
#define DF(I) df[(I)-1]
#define DUF(I) duf[(I)-1]
#define DU2(I) du2[(I)-1]
#define IPIV(I) ipiv[(I)-1]
#define FERR(I) ferr[(I)-1]
#define BERR(I) berr[(I)-1]
#define WORK(I) work[(I)-1]
#define IWORK(I) iwork[(I)-1]

#define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)]
#define X(I,J) x[(I)-1 + ((J)-1)* ( *ldx)]

    *info = 0;
    notran = lsame_(trans, "N");
    if (! notran && ! lsame_(trans, "T") && ! lsame_(trans, "C")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*nrhs < 0) {
	*info = -3;
    } else if (*ldb < max(1,*n)) {
	*info = -13;
    } else if (*ldx < max(1,*n)) {
	*info = -15;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DGTRFS", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
	i__1 = *nrhs;
	for (j = 1; j <= *nrhs; ++j) {
	    FERR(j) = 0.;
	    BERR(j) = 0.;
/* L10: */
	}
	return 0;
    }

    if (notran) {
	*(unsigned char *)transn = 'N';
	*(unsigned char *)transt = 'T';
    } else {
	*(unsigned char *)transn = 'T';
	*(unsigned char *)transt = 'N';
    }

/*     NZ = maximum number of nonzero elements in each row of A, plus 1 */

    nz = 4;
    eps = dlamch_("Epsilon");
    safmin = dlamch_("Safe minimum");
    safe1 = nz * safmin;
    safe2 = safe1 / eps;

/*     Do for each right hand side */

    i__1 = *nrhs;
    for (j = 1; j <= *nrhs; ++j) {

	count = 1;
	lstres = 3.;
L20:

/*        Loop until stopping criterion is satisfied.   

          Compute residual R = B - op(A) * X,   
          where op(A) = A, A**T, or A**H, depending on TRANS. */

	dcopy_(n, &B(1,j), &c__1, &WORK(*n + 1), &c__1);
	dlagtm_(trans, n, &c__1, &c_b18, &DL(1), &D(1), &DU(1), &X(1,j), ldx, &c_b19, &WORK(*n + 1), n);

/*        Compute abs(op(A))*abs(x) + abs(b) for use in the backward 
  
          error bound. */

	if (notran) {
	    if (*n == 1) {
		WORK(1) = (d__1 = B(1,j), abs(d__1)) + (d__2 = D(1)
			 * X(1,j), abs(d__2));
	    } else {
		WORK(1) = (d__1 = B(1,j), abs(d__1)) + (d__2 = D(1)
			 * X(1,j), abs(d__2)) + (d__3 = DU(1) * X(2,j), abs(d__3));
		i__2 = *n - 1;
		for (i = 2; i <= *n-1; ++i) {
		    WORK(i) = (d__1 = B(i,j), abs(d__1)) + (d__2 = 
			    DL(i - 1) * X(i-1,j), abs(d__2)) + (
			    d__3 = D(i) * X(i,j), abs(d__3)) + (
			    d__4 = DU(i) * X(i+1,j), abs(d__4));
/* L30: */
		}
		WORK(*n) = (d__1 = B(*n,j), abs(d__1)) + (d__2 = 
			DL(*n - 1) * X(*n-1,j), abs(d__2)) + (
			d__3 = D(*n) * X(*n,j), abs(d__3));
	    }
	} else {
	    if (*n == 1) {
		WORK(1) = (d__1 = B(1,j), abs(d__1)) + (d__2 = D(1)
			 * X(1,j), abs(d__2));
	    } else {
		WORK(1) = (d__1 = B(1,j), abs(d__1)) + (d__2 = D(1)
			 * X(1,j), abs(d__2)) + (d__3 = DL(1) * X(2,j), abs(d__3));
		i__2 = *n - 1;
		for (i = 2; i <= *n-1; ++i) {
		    WORK(i) = (d__1 = B(i,j), abs(d__1)) + (d__2 = 
			    DU(i - 1) * X(i-1,j), abs(d__2)) + (
			    d__3 = D(i) * X(i,j), abs(d__3)) + (
			    d__4 = DL(i) * X(i+1,j), abs(d__4));
/* L40: */
		}
		WORK(*n) = (d__1 = B(*n,j), abs(d__1)) + (d__2 = 
			DU(*n - 1) * X(*n-1,j), abs(d__2)) + (
			d__3 = D(*n) * X(*n,j), abs(d__3));
	    }
	}

/*        Compute componentwise relative backward error from formula 
  

          max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )   

          where abs(Z) is the componentwise absolute value of the matr
ix   
          or vector Z.  If the i-th component of the denominator is le
ss   
          than SAFE2, then SAFE1 is added to the i-th components of th
e   
          numerator and denominator before dividing. */

	s = 0.;
	i__2 = *n;
	for (i = 1; i <= *n; ++i) {
	    if (WORK(i) > safe2) {
/* Computing MAX */
		d__2 = s, d__3 = (d__1 = WORK(*n + i), abs(d__1)) / WORK(i);
		s = max(d__2,d__3);
	    } else {
/* Computing MAX */
		d__2 = s, d__3 = ((d__1 = WORK(*n + i), abs(d__1)) + safe1) / 
			(WORK(i) + safe1);
		s = max(d__2,d__3);
	    }
/* L50: */
	}
	BERR(j) = s;

/*        Test stopping criterion. Continue iterating if   
             1) The residual BERR(J) is larger than machine epsilon, a
nd   
             2) BERR(J) decreased by at least a factor of 2 during the
   
                last iteration, and   
             3) At most ITMAX iterations tried. */

	if (BERR(j) > eps && BERR(j) * 2. <= lstres && count <= 5) {

/*           Update solution and try again. */

	    dgttrs_(trans, n, &c__1, &DLF(1), &DF(1), &DUF(1), &DU2(1), &IPIV(
		    1), &WORK(*n + 1), n, info);
	    daxpy_(n, &c_b19, &WORK(*n + 1), &c__1, &X(1,j), &c__1)
		    ;
	    lstres = BERR(j);
	    ++count;
	    goto L20;
	}

/*        Bound error from formula   

          norm(X - XTRUE) / norm(X) .le. FERR =   
          norm( abs(inv(op(A)))*   
             ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X
)   

          where   
            norm(Z) is the magnitude of the largest component of Z   
            inv(op(A)) is the inverse of op(A)   
            abs(Z) is the componentwise absolute value of the matrix o
r   
               vector Z   
            NZ is the maximum number of nonzeros in any row of A, plus
 1   
            EPS is machine epsilon   

          The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B
))   
          is incremented by SAFE1 if the i-th component of   
          abs(op(A))*abs(X) + abs(B) is less than SAFE2.   

          Use DLACON to estimate the infinity-norm of the matrix   
             inv(op(A)) * diag(W),   
          where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */

	i__2 = *n;
	for (i = 1; i <= *n; ++i) {
	    if (WORK(i) > safe2) {
		WORK(i) = (d__1 = WORK(*n + i), abs(d__1)) + nz * eps * WORK(
			i);
	    } else {
		WORK(i) = (d__1 = WORK(*n + i), abs(d__1)) + nz * eps * WORK(
			i) + safe1;
	    }
/* L60: */
	}

	kase = 0;
L70:
	dlacon_(n, &WORK((*n << 1) + 1), &WORK(*n + 1), &IWORK(1), &FERR(j), &
		kase);
	if (kase != 0) {
	    if (kase == 1) {

/*              Multiply by diag(W)*inv(op(A)**T). */

		dgttrs_(transt, n, &c__1, &DLF(1), &DF(1), &DUF(1), &DU2(1), &
			IPIV(1), &WORK(*n + 1), n, info);
		i__2 = *n;
		for (i = 1; i <= *n; ++i) {
		    WORK(*n + i) = WORK(i) * WORK(*n + i);
/* L80: */
		}
	    } else {

/*              Multiply by inv(op(A))*diag(W). */

		i__2 = *n;
		for (i = 1; i <= *n; ++i) {
		    WORK(*n + i) = WORK(i) * WORK(*n + i);
/* L90: */
		}
		dgttrs_(transn, n, &c__1, &DLF(1), &DF(1), &DUF(1), &DU2(1), &
			IPIV(1), &WORK(*n + 1), n, info);
	    }
	    goto L70;
	}

/*        Normalize error. */

	lstres = 0.;
	i__2 = *n;
	for (i = 1; i <= *n; ++i) {
/* Computing MAX */
	    d__2 = lstres, d__3 = (d__1 = X(i,j), abs(d__1));
	    lstres = max(d__2,d__3);
/* L100: */
	}
	if (lstres != 0.) {
	    FERR(j) /= lstres;
	}

/* L110: */
    }

    return 0;

/*     End of DGTRFS */

} /* dgtrfs_ */
Example #5
0
/* Subroutine */ int cgtcon_(char *norm, integer *n, complex *dl, complex *d, 
	complex *du, complex *du2, integer *ipiv, real *anorm, real *rcond, 
	complex *work, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CGTCON estimates the reciprocal of the condition number of a complex 
  
    tridiagonal matrix A using the LU factorization as computed by   
    CGTTRF.   

    An estimate is obtained for norm(inv(A)), and the reciprocal of the   
    condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).   

    Arguments   
    =========   

    NORM    (input) CHARACTER*1   
            Specifies whether the 1-norm condition number or the   
            infinity-norm condition number is required:   
            = '1' or 'O':  1-norm;   
            = 'I':         Infinity-norm.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    DL      (input) COMPLEX array, dimension (N-1)   
            The (n-1) multipliers that define the matrix L from the   
            LU factorization of A as computed by CGTTRF.   

    D       (input) COMPLEX array, dimension (N)   
            The n diagonal elements of the upper triangular matrix U from 
  
            the LU factorization of A.   

    DU      (input) COMPLEX array, dimension (N-1)   
            The (n-1) elements of the first superdiagonal of U.   

    DU2     (input) COMPLEX array, dimension (N-2)   
            The (n-2) elements of the second superdiagonal of U.   

    IPIV    (input) INTEGER array, dimension (N)   
            The pivot indices; for 1 <= i <= n, row i of the matrix was   
            interchanged with row IPIV(i).  IPIV(i) will always be either 
  
            i or i+1; IPIV(i) = i indicates a row interchange was not   
            required.   

    ANORM   (input) REAL   
            If NORM = '1' or 'O', the 1-norm of the original matrix A.   
            If NORM = 'I', the infinity-norm of the original matrix A.   

    RCOND   (output) REAL   
            The reciprocal of the condition number of the matrix A,   
            computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an   
            estimate of the 1-norm of inv(A) computed in this routine.   

    WORK    (workspace) COMPLEX array, dimension (2*N)   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   

    ===================================================================== 
  


       Test the input arguments.   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static integer c__1 = 1;
    
    /* System generated locals */
    integer i__1, i__2;
    /* Local variables */
    static integer kase, kase1, i;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int clacon_(integer *, complex *, complex *, real 
	    *, integer *), xerbla_(char *, integer *);
    static real ainvnm;
    static logical onenrm;
    extern /* Subroutine */ int cgttrs_(char *, integer *, integer *, complex 
	    *, complex *, complex *, complex *, integer *, complex *, integer 
	    *, integer *);



#define WORK(I) work[(I)-1]
#define IPIV(I) ipiv[(I)-1]
#define DU2(I) du2[(I)-1]
#define DU(I) du[(I)-1]
#define D(I) d[(I)-1]
#define DL(I) dl[(I)-1]


    *info = 0;
    onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O");
    if (! onenrm && ! lsame_(norm, "I")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*anorm < 0.f) {
	*info = -8;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CGTCON", &i__1);
	return 0;
    }

/*     Quick return if possible */

    *rcond = 0.f;
    if (*n == 0) {
	*rcond = 1.f;
	return 0;
    } else if (*anorm == 0.f) {
	return 0;
    }

/*     Check that D(1:N) is non-zero. */

    i__1 = *n;
    for (i = 1; i <= *n; ++i) {
	i__2 = i;
	if (D(i).r == 0.f && D(i).i == 0.f) {
	    return 0;
	}
/* L10: */
    }

    ainvnm = 0.f;
    if (onenrm) {
	kase1 = 1;
    } else {
	kase1 = 2;
    }
    kase = 0;
L20:
    clacon_(n, &WORK(*n + 1), &WORK(1), &ainvnm, &kase);
    if (kase != 0) {
	if (kase == kase1) {

/*           Multiply by inv(U)*inv(L). */

	    cgttrs_("No transpose", n, &c__1, &DL(1), &D(1), &DU(1), &DU2(1), 
		    &IPIV(1), &WORK(1), n, info);
	} else {

/*           Multiply by inv(L')*inv(U'). */

	    cgttrs_("Conjugate transpose", n, &c__1, &DL(1), &D(1), &DU(1), &
		    DU2(1), &IPIV(1), &WORK(1), n, info);
	}
	goto L20;
    }

/*     Compute the estimate of the reciprocal condition number. */

    if (ainvnm != 0.f) {
	*rcond = 1.f / ainvnm / *anorm;
    }

    return 0;

/*     End of CGTCON */

} /* cgtcon_ */
Example #6
0
/* Subroutine */ int sgetf2_(integer *m, integer *n, real *a, integer *lda, 
	integer *ipiv, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1992   


    Purpose   
    =======   

    SGETF2 computes an LU factorization of a general m-by-n matrix A   
    using partial pivoting with row interchanges.   

    The factorization has the form   
       A = P * L * U   
    where P is a permutation matrix, L is lower triangular with unit   
    diagonal elements (lower trapezoidal if m > n), and U is upper   
    triangular (upper trapezoidal if m < n).   

    This is the right-looking Level 2 BLAS version of the algorithm.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix A.  N >= 0.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the m by n matrix to be factored.   
            On exit, the factors L and U from the factorization   
            A = P*L*U; the unit diagonal elements of L are not stored.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,M).   

    IPIV    (output) INTEGER array, dimension (min(M,N))   
            The pivot indices; for 1 <= i <= min(M,N), row i of the   
            matrix was interchanged with row IPIV(i).   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -k, the k-th argument had an illegal value   
            > 0: if INFO = k, U(k,k) is exactly zero. The factorization   
                 has been completed, but the factor U is exactly   
                 singular, and division by zero will occur if it is used 
  
                 to solve a system of equations.   

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static integer c__1 = 1;
    static real c_b6 = -1.f;
    
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    real r__1;
    /* Local variables */
    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, 
	    integer *, real *, integer *, real *, integer *);
    static integer j;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), 
	    sswap_(integer *, real *, integer *, real *, integer *);
    static integer jp;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer isamax_(integer *, real *, integer *);



#define IPIV(I) ipiv[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    *info = 0;
    if (*m < 0) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*m)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SGETF2", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*m == 0 || *n == 0) {
	return 0;
    }

    i__1 = min(*m,*n);
    for (j = 1; j <= min(*m,*n); ++j) {

/*        Find pivot and test for singularity. */

	i__2 = *m - j + 1;
	jp = j - 1 + isamax_(&i__2, &A(j,j), &c__1);
	IPIV(j) = jp;
	if (A(jp,j) != 0.f) {

/*           Apply the interchange to columns 1:N. */

	    if (jp != j) {
		sswap_(n, &A(j,1), lda, &A(jp,1), lda);
	    }

/*           Compute elements J+1:M of J-th column. */

	    if (j < *m) {
		i__2 = *m - j;
		r__1 = 1.f / A(j,j);
		sscal_(&i__2, &r__1, &A(j+1,j), &c__1);
	    }

	} else if (*info == 0) {

	    *info = j;
	}

	if (j < min(*m,*n)) {

/*           Update trailing submatrix. */

	    i__2 = *m - j;
	    i__3 = *n - j;
	    sger_(&i__2, &i__3, &c_b6, &A(j+1,j), &c__1, &A(j,j+1), lda, &A(j+1,j+1), lda);
	}
/* L10: */
    }
    return 0;

/*     End of SGETF2 */

} /* sgetf2_ */
Example #7
0
/* Subroutine */ int dsyrfs_(char *uplo, integer *n, integer *nrhs, 
	doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer *
	ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, 
	doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, 
	integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    DSYRFS improves the computed solution to a system of linear   
    equations when the coefficient matrix is symmetric indefinite, and   
    provides error bounds and backward error estimates for the solution. 
  

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            = 'U':  Upper triangle of A is stored;   
            = 'L':  Lower triangle of A is stored.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    NRHS    (input) INTEGER   
            The number of right hand sides, i.e., the number of columns   
            of the matrices B and X.  NRHS >= 0.   

    A       (input) DOUBLE PRECISION array, dimension (LDA,N)   
            The symmetric matrix A.  If UPLO = 'U', the leading N-by-N   
            upper triangular part of A contains the upper triangular part 
  
            of the matrix A, and the strictly lower triangular part of A 
  
            is not referenced.  If UPLO = 'L', the leading N-by-N lower   
            triangular part of A contains the lower triangular part of   
            the matrix A, and the strictly upper triangular part of A is 
  
            not referenced.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    AF      (input) DOUBLE PRECISION array, dimension (LDAF,N)   
            The factored form of the matrix A.  AF contains the block   
            diagonal matrix D and the multipliers used to obtain the   
            factor U or L from the factorization A = U*D*U**T or   
            A = L*D*L**T as computed by DSYTRF.   

    LDAF    (input) INTEGER   
            The leading dimension of the array AF.  LDAF >= max(1,N).   

    IPIV    (input) INTEGER array, dimension (N)   
            Details of the interchanges and the block structure of D   
            as determined by DSYTRF.   

    B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)   
            The right hand side matrix B.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    X       (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS)   
            On entry, the solution matrix X, as computed by DSYTRS.   
            On exit, the improved solution matrix X.   

    LDX     (input) INTEGER   
            The leading dimension of the array X.  LDX >= max(1,N).   

    FERR    (output) DOUBLE PRECISION array, dimension (NRHS)   
            The estimated forward error bound for each solution vector   
            X(j) (the j-th column of the solution matrix X).   
            If XTRUE is the true solution corresponding to X(j), FERR(j) 
  
            is an estimated upper bound for the magnitude of the largest 
  
            element in (X(j) - XTRUE) divided by the magnitude of the   
            largest element in X(j).  The estimate is as reliable as   
            the estimate for RCOND, and is almost always a slight   
            overestimate of the true error.   

    BERR    (output) DOUBLE PRECISION array, dimension (NRHS)   
            The componentwise relative backward error of each solution   
            vector X(j) (i.e., the smallest relative change in   
            any element of A or B that makes X(j) an exact solution).   

    WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)   

    IWORK   (workspace) INTEGER array, dimension (N)   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   

    Internal Parameters   
    ===================   

    ITMAX is the maximum number of steps of iterative refinement.   

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static integer c__1 = 1;
    static doublereal c_b12 = -1.;
    static doublereal c_b14 = 1.;
    
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, 
	    x_offset, i__1, i__2, i__3;
    doublereal d__1, d__2, d__3;
    /* Local variables */
    static integer kase;
    static doublereal safe1, safe2;
    static integer i, j, k;
    static doublereal s;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, 
	    doublereal *, integer *), daxpy_(integer *, doublereal *, 
	    doublereal *, integer *, doublereal *, integer *);
    static integer count;
    static logical upper;
    extern /* Subroutine */ int dsymv_(char *, integer *, doublereal *, 
	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
	    doublereal *, integer *);
    extern doublereal dlamch_(char *);
    extern /* Subroutine */ int dlacon_(integer *, doublereal *, doublereal *,
	     integer *, doublereal *, integer *);
    static doublereal xk;
    static integer nz;
    static doublereal safmin;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static doublereal lstres;
    extern /* Subroutine */ int dsytrs_(char *, integer *, integer *, 
	    doublereal *, integer *, integer *, doublereal *, integer *, 
	    integer *);
    static doublereal eps;



#define IPIV(I) ipiv[(I)-1]
#define FERR(I) ferr[(I)-1]
#define BERR(I) berr[(I)-1]
#define WORK(I) work[(I)-1]
#define IWORK(I) iwork[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
#define AF(I,J) af[(I)-1 + ((J)-1)* ( *ldaf)]
#define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)]
#define X(I,J) x[(I)-1 + ((J)-1)* ( *ldx)]

    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*nrhs < 0) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    } else if (*ldaf < max(1,*n)) {
	*info = -7;
    } else if (*ldb < max(1,*n)) {
	*info = -10;
    } else if (*ldx < max(1,*n)) {
	*info = -12;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DSYRFS", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
	i__1 = *nrhs;
	for (j = 1; j <= *nrhs; ++j) {
	    FERR(j) = 0.;
	    BERR(j) = 0.;
/* L10: */
	}
	return 0;
    }

/*     NZ = maximum number of nonzero elements in each row of A, plus 1 */

    nz = *n + 1;
    eps = dlamch_("Epsilon");
    safmin = dlamch_("Safe minimum");
    safe1 = nz * safmin;
    safe2 = safe1 / eps;

/*     Do for each right hand side */

    i__1 = *nrhs;
    for (j = 1; j <= *nrhs; ++j) {

	count = 1;
	lstres = 3.;
L20:

/*        Loop until stopping criterion is satisfied.   

          Compute residual R = B - A * X */

	dcopy_(n, &B(1,j), &c__1, &WORK(*n + 1), &c__1);
	dsymv_(uplo, n, &c_b12, &A(1,1), lda, &X(1,j), &c__1, 
		&c_b14, &WORK(*n + 1), &c__1);

/*        Compute componentwise relative backward error from formula 
  

          max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )   

          where abs(Z) is the componentwise absolute value of the matr
ix   
          or vector Z.  If the i-th component of the denominator is le
ss   
          than SAFE2, then SAFE1 is added to the i-th components of th
e   
          numerator and denominator before dividing. */

	i__2 = *n;
	for (i = 1; i <= *n; ++i) {
	    WORK(i) = (d__1 = B(i,j), abs(d__1));
/* L30: */
	}

/*        Compute abs(A)*abs(X) + abs(B). */

	if (upper) {
	    i__2 = *n;
	    for (k = 1; k <= *n; ++k) {
		s = 0.;
		xk = (d__1 = X(k,j), abs(d__1));
		i__3 = k - 1;
		for (i = 1; i <= k-1; ++i) {
		    WORK(i) += (d__1 = A(i,k), abs(d__1)) * xk;
		    s += (d__1 = A(i,k), abs(d__1)) * (d__2 = X(i,j), abs(d__2));
/* L40: */
		}
		WORK(k) = WORK(k) + (d__1 = A(k,k), abs(d__1)) * 
			xk + s;
/* L50: */
	    }
	} else {
	    i__2 = *n;
	    for (k = 1; k <= *n; ++k) {
		s = 0.;
		xk = (d__1 = X(k,j), abs(d__1));
		WORK(k) += (d__1 = A(k,k), abs(d__1)) * xk;
		i__3 = *n;
		for (i = k + 1; i <= *n; ++i) {
		    WORK(i) += (d__1 = A(i,k), abs(d__1)) * xk;
		    s += (d__1 = A(i,k), abs(d__1)) * (d__2 = X(i,j), abs(d__2));
/* L60: */
		}
		WORK(k) += s;
/* L70: */
	    }
	}
	s = 0.;
	i__2 = *n;
	for (i = 1; i <= *n; ++i) {
	    if (WORK(i) > safe2) {
/* Computing MAX */
		d__2 = s, d__3 = (d__1 = WORK(*n + i), abs(d__1)) / WORK(i);
		s = max(d__2,d__3);
	    } else {
/* Computing MAX */
		d__2 = s, d__3 = ((d__1 = WORK(*n + i), abs(d__1)) + safe1) / 
			(WORK(i) + safe1);
		s = max(d__2,d__3);
	    }
/* L80: */
	}
	BERR(j) = s;

/*        Test stopping criterion. Continue iterating if   
             1) The residual BERR(J) is larger than machine epsilon, a
nd   
             2) BERR(J) decreased by at least a factor of 2 during the
   
                last iteration, and   
             3) At most ITMAX iterations tried. */

	if (BERR(j) > eps && BERR(j) * 2. <= lstres && count <= 5) {

/*           Update solution and try again. */

	    dsytrs_(uplo, n, &c__1, &AF(1,1), ldaf, &IPIV(1), &WORK(*n 
		    + 1), n, info);
	    daxpy_(n, &c_b14, &WORK(*n + 1), &c__1, &X(1,j), &c__1)
		    ;
	    lstres = BERR(j);
	    ++count;
	    goto L20;
	}

/*        Bound error from formula   

          norm(X - XTRUE) / norm(X) .le. FERR =   
          norm( abs(inv(A))*   
             ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)   

          where   
            norm(Z) is the magnitude of the largest component of Z   
            inv(A) is the inverse of A   
            abs(Z) is the componentwise absolute value of the matrix o
r   
               vector Z   
            NZ is the maximum number of nonzeros in any row of A, plus
 1   
            EPS is machine epsilon   

          The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) 
  
          is incremented by SAFE1 if the i-th component of   
          abs(A)*abs(X) + abs(B) is less than SAFE2.   

          Use DLACON to estimate the infinity-norm of the matrix   
             inv(A) * diag(W),   
          where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */

	i__2 = *n;
	for (i = 1; i <= *n; ++i) {
	    if (WORK(i) > safe2) {
		WORK(i) = (d__1 = WORK(*n + i), abs(d__1)) + nz * eps * WORK(
			i);
	    } else {
		WORK(i) = (d__1 = WORK(*n + i), abs(d__1)) + nz * eps * WORK(
			i) + safe1;
	    }
/* L90: */
	}

	kase = 0;
L100:
	dlacon_(n, &WORK((*n << 1) + 1), &WORK(*n + 1), &IWORK(1), &FERR(j), &
		kase);
	if (kase != 0) {
	    if (kase == 1) {

/*              Multiply by diag(W)*inv(A'). */

		dsytrs_(uplo, n, &c__1, &AF(1,1), ldaf, &IPIV(1), &WORK(
			*n + 1), n, info);
		i__2 = *n;
		for (i = 1; i <= *n; ++i) {
		    WORK(*n + i) = WORK(i) * WORK(*n + i);
/* L110: */
		}
	    } else if (kase == 2) {

/*              Multiply by inv(A)*diag(W). */

		i__2 = *n;
		for (i = 1; i <= *n; ++i) {
		    WORK(*n + i) = WORK(i) * WORK(*n + i);
/* L120: */
		}
		dsytrs_(uplo, n, &c__1, &AF(1,1), ldaf, &IPIV(1), &WORK(
			*n + 1), n, info);
	    }
	    goto L100;
	}

/*        Normalize error. */

	lstres = 0.;
	i__2 = *n;
	for (i = 1; i <= *n; ++i) {
/* Computing MAX */
	    d__2 = lstres, d__3 = (d__1 = X(i,j), abs(d__1));
	    lstres = max(d__2,d__3);
/* L130: */
	}
	if (lstres != 0.) {
	    FERR(j) /= lstres;
	}

/* L140: */
    }

    return 0;

/*     End of DSYRFS */

} /* dsyrfs_ */
Example #8
0
/* Subroutine */ int cgbsv_(integer *n, integer *kl, integer *ku, integer *
	nrhs, complex *ab, integer *ldab, integer *ipiv, complex *b, integer *
	ldb, integer *info)
{
/*  -- LAPACK driver routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    CGBSV computes the solution to a complex system of linear equations   
    A * X = B, where A is a band matrix of order N with KL subdiagonals   
    and KU superdiagonals, and X and B are N-by-NRHS matrices.   

    The LU decomposition with partial pivoting and row interchanges is   
    used to factor A as A = L * U, where L is a product of permutation   
    and unit lower triangular matrices with KL subdiagonals, and U is   
    upper triangular with KL+KU superdiagonals.  The factored form of A   
    is then used to solve the system of equations A * X = B.   

    Arguments   
    =========   

    N       (input) INTEGER   
            The number of linear equations, i.e., the order of the   
            matrix A.  N >= 0.   

    KL      (input) INTEGER   
            The number of subdiagonals within the band of A.  KL >= 0.   

    KU      (input) INTEGER   
            The number of superdiagonals within the band of A.  KU >= 0. 
  

    NRHS    (input) INTEGER   
            The number of right hand sides, i.e., the number of columns   
            of the matrix B.  NRHS >= 0.   

    AB      (input/output) COMPLEX array, dimension (LDAB,N)   
            On entry, the matrix A in band storage, in rows KL+1 to   
            2*KL+KU+1; rows 1 to KL of the array need not be set.   
            The j-th column of A is stored in the j-th column of the   
            array AB as follows:   
            AB(KL+KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+KL)   
            On exit, details of the factorization: U is stored as an   
            upper triangular band matrix with KL+KU superdiagonals in   
            rows 1 to KL+KU+1, and the multipliers used during the   
            factorization are stored in rows KL+KU+2 to 2*KL+KU+1.   
            See below for further details.   

    LDAB    (input) INTEGER   
            The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.   

    IPIV    (output) INTEGER array, dimension (N)   
            The pivot indices that define the permutation matrix P;   
            row i of the matrix was interchanged with row IPIV(i).   

    B       (input/output) COMPLEX array, dimension (LDB,NRHS)   
            On entry, the N-by-NRHS right hand side matrix B.   
            On exit, if INFO = 0, the N-by-NRHS solution matrix X.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   
            > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization 
  
                  has been completed, but the factor U is exactly   
                  singular, and the solution has not been computed.   

    Further Details   
    ===============   

    The band storage scheme is illustrated by the following example, when 
  
    M = N = 6, KL = 2, KU = 1:   

    On entry:                       On exit:   

        *    *    *    +    +    +       *    *    *   u14  u25  u36   
        *    *    +    +    +    +       *    *   u13  u24  u35  u46   
        *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56   
       a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66   
       a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *   
       a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *   

    VISArray elements marked * are not used by the routine; elements marked 
  
    + need not be set on entry, but are required by the routine to store 
  
    elements of U because of fill-in resulting from the row interchanges. 
  

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* System generated locals */
    integer ab_dim1, ab_offset, b_dim1, b_offset, i__1;
    /* Local variables */
    extern /* Subroutine */ int cgbtrf_(integer *, integer *, integer *, 
	    integer *, complex *, integer *, integer *, integer *), xerbla_(
	    char *, integer *), cgbtrs_(char *, integer *, integer *, 
	    integer *, integer *, complex *, integer *, integer *, complex *, 
	    integer *, integer *);


#define IPIV(I) ipiv[(I)-1]

#define AB(I,J) ab[(I)-1 + ((J)-1)* ( *ldab)]
#define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)]

    *info = 0;
    if (*n < 0) {
	*info = -1;
    } else if (*kl < 0) {
	*info = -2;
    } else if (*ku < 0) {
	*info = -3;
    } else if (*nrhs < 0) {
	*info = -4;
    } else if (*ldab < (*kl << 1) + *ku + 1) {
	*info = -6;
    } else if (*ldb < max(*n,1)) {
	*info = -9;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CGBSV ", &i__1);
	return 0;
    }

/*     Compute the LU factorization of the band matrix A. */

    cgbtrf_(n, n, kl, ku, &AB(1,1), ldab, &IPIV(1), info);
    if (*info == 0) {

/*        Solve the system A*X = B, overwriting B with X. */

	cgbtrs_("No transpose", n, kl, ku, nrhs, &AB(1,1), ldab, &IPIV(
		1), &B(1,1), ldb, info);
    }
    return 0;

/*     End of CGBSV */

} /* cgbsv_ */
Example #9
0
/* Subroutine */ int dsytrs_(char *uplo, integer *n, integer *nrhs, 
	doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer *
	ldb, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       March 31, 1993   


    Purpose   
    =======   

    DSYTRS solves a system of linear equations A*X = B with a real   
    symmetric matrix A using the factorization A = U*D*U**T or   
    A = L*D*L**T computed by DSYTRF.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the details of the factorization are stored 
  
            as an upper or lower triangular matrix.   
            = 'U':  Upper triangular, form is A = U*D*U**T;   
            = 'L':  Lower triangular, form is A = L*D*L**T.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    NRHS    (input) INTEGER   
            The number of right hand sides, i.e., the number of columns   
            of the matrix B.  NRHS >= 0.   

    A       (input) DOUBLE PRECISION array, dimension (LDA,N)   
            The block diagonal matrix D and the multipliers used to   
            obtain the factor U or L as computed by DSYTRF.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    IPIV    (input) INTEGER array, dimension (N)   
            Details of the interchanges and the block structure of D   
            as determined by DSYTRF.   

    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)   
            On entry, the right hand side matrix B.   
            On exit, the solution matrix X.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   

    ===================================================================== 
  


    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static doublereal c_b7 = -1.;
    static integer c__1 = 1;
    static doublereal c_b19 = 1.;
    
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
    doublereal d__1;
    /* Local variables */
    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, 
	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
	    integer *);
    static doublereal akm1k;
    static integer j, k;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
	    integer *);
    extern logical lsame_(char *, char *);
    static doublereal denom;
    extern /* Subroutine */ int dgemv_(char *, integer *, integer *, 
	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *), dswap_(integer *, 
	    doublereal *, integer *, doublereal *, integer *);
    static logical upper;
    static doublereal ak, bk;
    static integer kp;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static doublereal akm1, bkm1;



#define IPIV(I) ipiv[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
#define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)]

    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*nrhs < 0) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    } else if (*ldb < max(1,*n)) {
	*info = -8;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DSYTRS", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
	return 0;
    }

    if (upper) {

/*        Solve A*X = B, where A = U*D*U'.   

          First solve U*D*X = B, overwriting B with X.   

          K is the main loop index, decreasing from N to 1 in steps of
   
          1 or 2, depending on the size of the diagonal blocks. */

	k = *n;
L10:

/*        If K < 1, exit from loop. */

	if (k < 1) {
	    goto L30;
	}

	if (IPIV(k) > 0) {

/*           1 x 1 diagonal block   

             Interchange rows K and IPIV(K). */

	    kp = IPIV(k);
	    if (kp != k) {
		dswap_(nrhs, &B(k,1), ldb, &B(kp,1), ldb);
	    }

/*           Multiply by inv(U(K)), where U(K) is the transformati
on   
             stored in column K of A. */

	    i__1 = k - 1;
	    dger_(&i__1, nrhs, &c_b7, &A(1,k), &c__1, &B(k,1), ldb, &B(1,1), ldb);

/*           Multiply by the inverse of the diagonal block. */

	    d__1 = 1. / A(k,k);
	    dscal_(nrhs, &d__1, &B(k,1), ldb);
	    --k;
	} else {

/*           2 x 2 diagonal block   

             Interchange rows K-1 and -IPIV(K). */

	    kp = -IPIV(k);
	    if (kp != k - 1) {
		dswap_(nrhs, &B(k-1,1), ldb, &B(kp,1), ldb);
	    }

/*           Multiply by inv(U(K)), where U(K) is the transformati
on   
             stored in columns K-1 and K of A. */

	    i__1 = k - 2;
	    dger_(&i__1, nrhs, &c_b7, &A(1,k), &c__1, &B(k,1), ldb, &B(1,1), ldb);
	    i__1 = k - 2;
	    dger_(&i__1, nrhs, &c_b7, &A(1,k-1), &c__1, &B(k-1,1), ldb, &B(1,1), ldb);

/*           Multiply by the inverse of the diagonal block. */

	    akm1k = A(k-1,k);
	    akm1 = A(k-1,k-1) / akm1k;
	    ak = A(k,k) / akm1k;
	    denom = akm1 * ak - 1.;
	    i__1 = *nrhs;
	    for (j = 1; j <= *nrhs; ++j) {
		bkm1 = B(k-1,j) / akm1k;
		bk = B(k,j) / akm1k;
		B(k-1,j) = (ak * bkm1 - bk) / denom;
		B(k,j) = (akm1 * bk - bkm1) / denom;
/* L20: */
	    }
	    k += -2;
	}

	goto L10;
L30:

/*        Next solve U'*X = B, overwriting B with X.   

          K is the main loop index, increasing from 1 to N in steps of
   
          1 or 2, depending on the size of the diagonal blocks. */

	k = 1;
L40:

/*        If K > N, exit from loop. */

	if (k > *n) {
	    goto L50;
	}

	if (IPIV(k) > 0) {

/*           1 x 1 diagonal block   

             Multiply by inv(U'(K)), where U(K) is the transformat
ion   
             stored in column K of A. */

	    i__1 = k - 1;
	    dgemv_("Transpose", &i__1, nrhs, &c_b7, &B(1,1), ldb, &A(1,k), &c__1, &c_b19, &B(k,1), ldb);

/*           Interchange rows K and IPIV(K). */

	    kp = IPIV(k);
	    if (kp != k) {
		dswap_(nrhs, &B(k,1), ldb, &B(kp,1), ldb);
	    }
	    ++k;
	} else {

/*           2 x 2 diagonal block   

             Multiply by inv(U'(K+1)), where U(K+1) is the transfo
rmation   
             stored in columns K and K+1 of A. */

	    i__1 = k - 1;
	    dgemv_("Transpose", &i__1, nrhs, &c_b7, &B(1,1), ldb, &A(1,k), &c__1, &c_b19, &B(k,1), ldb);
	    i__1 = k - 1;
	    dgemv_("Transpose", &i__1, nrhs, &c_b7, &B(1,1), ldb, &A(1,k+1), &c__1, &c_b19, &B(k+1,1), 
		    ldb);

/*           Interchange rows K and -IPIV(K). */

	    kp = -IPIV(k);
	    if (kp != k) {
		dswap_(nrhs, &B(k,1), ldb, &B(kp,1), ldb);
	    }
	    k += 2;
	}

	goto L40;
L50:

	;
    } else {

/*        Solve A*X = B, where A = L*D*L'.   

          First solve L*D*X = B, overwriting B with X.   

          K is the main loop index, increasing from 1 to N in steps of
   
          1 or 2, depending on the size of the diagonal blocks. */

	k = 1;
L60:

/*        If K > N, exit from loop. */

	if (k > *n) {
	    goto L80;
	}

	if (IPIV(k) > 0) {

/*           1 x 1 diagonal block   

             Interchange rows K and IPIV(K). */

	    kp = IPIV(k);
	    if (kp != k) {
		dswap_(nrhs, &B(k,1), ldb, &B(kp,1), ldb);
	    }

/*           Multiply by inv(L(K)), where L(K) is the transformati
on   
             stored in column K of A. */

	    if (k < *n) {
		i__1 = *n - k;
		dger_(&i__1, nrhs, &c_b7, &A(k+1,k), &c__1, &B(k,1), ldb, &B(k+1,1), ldb);
	    }

/*           Multiply by the inverse of the diagonal block. */

	    d__1 = 1. / A(k,k);
	    dscal_(nrhs, &d__1, &B(k,1), ldb);
	    ++k;
	} else {

/*           2 x 2 diagonal block   

             Interchange rows K+1 and -IPIV(K). */

	    kp = -IPIV(k);
	    if (kp != k + 1) {
		dswap_(nrhs, &B(k+1,1), ldb, &B(kp,1), ldb);
	    }

/*           Multiply by inv(L(K)), where L(K) is the transformati
on   
             stored in columns K and K+1 of A. */

	    if (k < *n - 1) {
		i__1 = *n - k - 1;
		dger_(&i__1, nrhs, &c_b7, &A(k+2,k), &c__1, &B(k,1), ldb, &B(k+2,1), ldb);
		i__1 = *n - k - 1;
		dger_(&i__1, nrhs, &c_b7, &A(k+2,k+1), &c__1,
			 &B(k+1,1), ldb, &B(k+2,1), ldb);
	    }

/*           Multiply by the inverse of the diagonal block. */

	    akm1k = A(k+1,k);
	    akm1 = A(k,k) / akm1k;
	    ak = A(k+1,k+1) / akm1k;
	    denom = akm1 * ak - 1.;
	    i__1 = *nrhs;
	    for (j = 1; j <= *nrhs; ++j) {
		bkm1 = B(k,j) / akm1k;
		bk = B(k+1,j) / akm1k;
		B(k,j) = (ak * bkm1 - bk) / denom;
		B(k+1,j) = (akm1 * bk - bkm1) / denom;
/* L70: */
	    }
	    k += 2;
	}

	goto L60;
L80:

/*        Next solve L'*X = B, overwriting B with X.   

          K is the main loop index, decreasing from N to 1 in steps of
   
          1 or 2, depending on the size of the diagonal blocks. */

	k = *n;
L90:

/*        If K < 1, exit from loop. */

	if (k < 1) {
	    goto L100;
	}

	if (IPIV(k) > 0) {

/*           1 x 1 diagonal block   

             Multiply by inv(L'(K)), where L(K) is the transformat
ion   
             stored in column K of A. */

	    if (k < *n) {
		i__1 = *n - k;
		dgemv_("Transpose", &i__1, nrhs, &c_b7, &B(k+1,1), 
			ldb, &A(k+1,k), &c__1, &c_b19, &B(k,1), ldb);
	    }

/*           Interchange rows K and IPIV(K). */

	    kp = IPIV(k);
	    if (kp != k) {
		dswap_(nrhs, &B(k,1), ldb, &B(kp,1), ldb);
	    }
	    --k;
	} else {

/*           2 x 2 diagonal block   

             Multiply by inv(L'(K-1)), where L(K-1) is the transfo
rmation   
             stored in columns K-1 and K of A. */

	    if (k < *n) {
		i__1 = *n - k;
		dgemv_("Transpose", &i__1, nrhs, &c_b7, &B(k+1,1), 
			ldb, &A(k+1,k), &c__1, &c_b19, &B(k,1), ldb);
		i__1 = *n - k;
		dgemv_("Transpose", &i__1, nrhs, &c_b7, &B(k+1,1), 
			ldb, &A(k+1,k-1), &c__1, &c_b19, &B(k-1,1), ldb);
	    }

/*           Interchange rows K and -IPIV(K). */

	    kp = -IPIV(k);
	    if (kp != k) {
		dswap_(nrhs, &B(k,1), ldb, &B(kp,1), ldb);
	    }
	    k += -2;
	}

	goto L90;
L100:
	;
    }

    return 0;

/*     End of DSYTRS */

} /* dsytrs_ */
  void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Setup(const MultiVector& B, const MultiVector& Bc, RCP<const CrsGraph> Ppattern) {
    const size_t NSDim = Bc.getNumVectors();

    Ppattern_ = Ppattern;

    size_t numRows = Ppattern_->getNodeNumRows();
    XXtInv_.resize(numRows);

    RCP<const Import> importer = Ppattern_->getImporter();

    X_ = MultiVectorFactory::Build(Ppattern_->getColMap(), NSDim);
    if (!importer.is_null())
      X_->doImport(Bc, *importer, Xpetra::INSERT);
    else
      *X_ = Bc;

    std::vector<const SC*> Xval(NSDim);
    for (size_t j = 0; j < NSDim; j++)
      Xval[j] = X_->getData(j).get();

    SC zero = Teuchos::ScalarTraits<SC>::zero();
    SC one  = Teuchos::ScalarTraits<SC>::one();

    Teuchos::BLAS  <LO,SC> blas;
    Teuchos::LAPACK<LO,SC> lapack;
    LO lwork = 3*NSDim;
    ArrayRCP<LO> IPIV(NSDim);
    ArrayRCP<SC> WORK(lwork);

    for (size_t i = 0; i < numRows; i++) {
      Teuchos::ArrayView<const LO> indices;
      Ppattern_->getLocalRowView(i, indices);

      size_t nnz = indices.size();

      XXtInv_[i] = Teuchos::SerialDenseMatrix<LO,SC>(NSDim, NSDim, false/*zeroOut*/);
      Teuchos::SerialDenseMatrix<LO,SC>& XXtInv = XXtInv_[i];

      if (NSDim == 1) {
        SC d = zero;
        for (size_t j = 0; j < nnz; j++)
          d += Xval[0][indices[j]] * Xval[0][indices[j]];
        XXtInv(0,0) = one/d;

      } else {
        Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, nnz, false/*zeroOut*/);
        for (size_t j = 0; j < nnz; j++)
          for (size_t k = 0; k < NSDim; k++)
            locX(k,j) = Xval[k][indices[j]];

        // XXtInv_ = (locX*locX^T)^{-1}
        blas.GEMM(Teuchos::NO_TRANS, Teuchos::CONJ_TRANS, NSDim, NSDim, nnz,
                   one,   locX.values(),   locX.stride(),
                          locX.values(),   locX.stride(),
                  zero, XXtInv.values(), XXtInv.stride());

        LO info;
        // Compute LU factorization using partial pivoting with row exchanges
        lapack.GETRF(NSDim, NSDim, XXtInv.values(), XXtInv.stride(), IPIV.get(), &info);
        // Use the computed factorization to compute the inverse
        lapack.GETRI(NSDim, XXtInv.values(), XXtInv.stride(), IPIV.get(), WORK.get(), lwork, &info);
      }
    }
  }
Example #11
0
/* Subroutine */ int ssytrf_(char *uplo, integer *n, real *a, integer *lda, 
	integer *ipiv, real *work, integer *lwork, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    SSYTRF computes the factorization of a real symmetric matrix A using 
  
    the Bunch-Kaufman diagonal pivoting method.  The form of the   
    factorization is   

       A = U*D*U**T  or  A = L*D*L**T   

    where U (or L) is a product of permutation and unit upper (lower)   
    triangular matrices, and D is symmetric and block diagonal with   
    1-by-1 and 2-by-2 diagonal blocks.   

    This is the blocked version of the algorithm, calling Level 3 BLAS.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            = 'U':  Upper triangle of A is stored;   
            = 'L':  Lower triangle of A is stored.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the symmetric matrix A.  If UPLO = 'U', the leading 
  
            N-by-N upper triangular part of A contains the upper   
            triangular part of the matrix A, and the strictly lower   
            triangular part of A is not referenced.  If UPLO = 'L', the   
            leading N-by-N lower triangular part of A contains the lower 
  
            triangular part of the matrix A, and the strictly upper   
            triangular part of A is not referenced.   

            On exit, the block diagonal matrix D and the multipliers used 
  
            to obtain the factor U or L (see below for further details). 
  

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    IPIV    (output) INTEGER array, dimension (N)   
            Details of the interchanges and the block structure of D.   
            If IPIV(k) > 0, then rows and columns k and IPIV(k) were   
            interchanged and D(k,k) is a 1-by-1 diagonal block.   
            If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and   
            columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) 
  
            is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =   
            IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were   
            interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.   

    WORK    (workspace/output) REAL array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The length of WORK.  LWORK >=1.  For best performance   
            LWORK >= N*NB, where NB is the block size returned by ILAENV. 
  

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   
            > 0:  if INFO = i, D(i,i) is exactly zero.  The factorization 
  
                  has been completed, but the block diagonal matrix D is 
  
                  exactly singular, and division by zero will occur if it 
  
                  is used to solve a system of equations.   

    Further Details   
    ===============   

    If UPLO = 'U', then A = U*D*U', where   
       U = P(n)*U(n)* ... *P(k)U(k)* ...,   
    i.e., U is a product of terms P(k)*U(k), where k decreases from n to 
  
    1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1   
    and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as   
    defined by IPIV(k), and U(k) is a unit upper triangular matrix, such 
  
    that if the diagonal block D(k) is of order s (s = 1 or 2), then   

               (   I    v    0   )   k-s   
       U(k) =  (   0    I    0   )   s   
               (   0    0    I   )   n-k   
                  k-s   s   n-k   

    If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k).   
    If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k), 
  
    and A(k,k), and v overwrites A(1:k-2,k-1:k).   

    If UPLO = 'L', then A = L*D*L', where   
       L = P(1)*L(1)* ... *P(k)*L(k)* ...,   
    i.e., L is a product of terms P(k)*L(k), where k increases from 1 to 
  
    n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1   
    and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as   
    defined by IPIV(k), and L(k) is a unit lower triangular matrix, such 
  
    that if the diagonal block D(k) is of order s (s = 1 or 2), then   

               (   I    0     0   )  k-1   
       L(k) =  (   0    I     0   )  s   
               (   0    v     I   )  n-k-s+1   
                  k-1   s  n-k-s+1   

    If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k).   
    If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k),   
    and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1).   

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static integer c__2 = 2;
    
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    /* Local variables */
    static integer j, k;
    extern logical lsame_(char *, char *);
    static integer nbmin, iinfo;
    static logical upper;
    static integer kb, nb;
    extern /* Subroutine */ int ssytf2_(char *, integer *, real *, integer *, 
	    integer *, integer *), xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int slasyf_(char *, integer *, integer *, integer 
	    *, real *, integer *, integer *, real *, integer *, integer *);
    static integer ldwork, iws;



#define IPIV(I) ipiv[(I)-1]
#define WORK(I) work[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    } else if (*lwork < 1) {
	*info = -7;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SSYTRF", &i__1);
	return 0;
    }

/*     Determine the block size */

    nb = ilaenv_(&c__1, "SSYTRF", uplo, n, &c_n1, &c_n1, &c_n1, 6L, 1L);
    nbmin = 2;
    ldwork = *n;
    if (nb > 1 && nb < *n) {
	iws = ldwork * nb;
	if (*lwork < iws) {
/* Computing MAX */
	    i__1 = *lwork / ldwork;
	    nb = max(i__1,1);
/* Computing MAX */
	    i__1 = 2, i__2 = ilaenv_(&c__2, "SSYTRF", uplo, n, &c_n1, &c_n1, &
		    c_n1, 6L, 1L);
	    nbmin = max(i__1,i__2);
	}
    } else {
	iws = 1;
    }
    if (nb < nbmin) {
	nb = *n;
    }

    if (upper) {

/*        Factorize A as U*D*U' using the upper triangle of A   

          K is the main loop index, decreasing from N to 1 in steps of
   
          KB, where KB is the number of columns factorized by SLASYF; 
  
          KB is either NB or NB-1, or K for the last block */

	k = *n;
L10:

/*        If K < 1, exit from loop */

	if (k < 1) {
	    goto L40;
	}

	if (k > nb) {

/*           Factorize columns k-kb+1:k of A and use blocked code 
to   
             update columns 1:k-kb */

	    slasyf_(uplo, &k, &nb, &kb, &A(1,1), lda, &IPIV(1), &WORK(1),
		     &ldwork, &iinfo);
	} else {

/*           Use unblocked code to factorize columns 1:k of A */

	    ssytf2_(uplo, &k, &A(1,1), lda, &IPIV(1), &iinfo);
	    kb = k;
	}

/*        Set INFO on the first occurrence of a zero pivot */

	if (*info == 0 && iinfo > 0) {
	    *info = iinfo;
	}

/*        Decrease K and return to the start of the main loop */

	k -= kb;
	goto L10;

    } else {

/*        Factorize A as L*D*L' using the lower triangle of A   

          K is the main loop index, increasing from 1 to N in steps of
   
          KB, where KB is the number of columns factorized by SLASYF; 
  
          KB is either NB or NB-1, or N-K+1 for the last block */

	k = 1;
L20:

/*        If K > N, exit from loop */

	if (k > *n) {
	    goto L40;
	}

	if (k <= *n - nb) {

/*           Factorize columns k:k+kb-1 of A and use blocked code 
to   
             update columns k+kb:n */

	    i__1 = *n - k + 1;
	    slasyf_(uplo, &i__1, &nb, &kb, &A(k,k), lda, &IPIV(k), 
		    &WORK(1), &ldwork, &iinfo);
	} else {

/*           Use unblocked code to factorize columns k:n of A */

	    i__1 = *n - k + 1;
	    ssytf2_(uplo, &i__1, &A(k,k), lda, &IPIV(k), &iinfo);
	    kb = *n - k + 1;
	}

/*        Set INFO on the first occurrence of a zero pivot */

	if (*info == 0 && iinfo > 0) {
	    *info = iinfo + k - 1;
	}

/*        Adjust IPIV */

	i__1 = k + kb - 1;
	for (j = k; j <= k+kb-1; ++j) {
	    if (IPIV(j) > 0) {
		IPIV(j) = IPIV(j) + k - 1;
	    } else {
		IPIV(j) = IPIV(j) - k + 1;
	    }
/* L30: */
	}

/*        Increase K and return to the start of the main loop */

	k += kb;
	goto L20;

    }

L40:
    WORK(1) = (real) iws;
    return 0;

/*     End of SSYTRF */

} /* ssytrf_ */
Example #12
0
/***************************************************************************//**
 *  Parallel tile LU factorization - dynamic scheduling - Right looking
 **/
void plasma_pdgetrf_rectil_quark(PLASMA_desc A, int *IPIV)
{
    int k, m, n;
    int tempk, tempm, tempkm, tempkn, tempmm, tempnn;
    int ldak, ldam;

    double zone  = (double)1.0;
    double mzone = (double)-1.0;

    void * fakedep;
    /* How many threads per panel? Probably needs to be adjusted during factorization. */

    CORE_dgetrf_rectil_init();

    for (k = 0; k < min(A.mt, A.nt); k++)
    {
        tempk  = k * A.mb;
        tempm  = A.m - tempk;
        tempkm = k == A.mt-1 ? tempm      : A.mb;
        tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
        ldak = BLKLDD(A, k);

        double *dA = A(k, k);
        int *dB = IPIV(k);
        PLASMA_desc pDesc = plasma_desc_submatrix(A, tempk, k*A.nb, tempm, tempkn);
hclib_pragma_marker("omp", "task depend(inout:dA[0:A.mb*A.nb]) depend(out:dB[0:pDesc.n])", "pragma59_omp_task");
        {
            int info[3];
            info[1] = 0;
            info[2] = 1;

            CORE_dgetrf_rectil( pDesc, dB, info );
        }

        /*
         * Update the trailing submatrix
         */
        fakedep = (void *)(intptr_t)(k+1);
        for (n = k+1; n < A.nt; n++)
        {
            /*
             * Apply row interchange after the panel (work on the panel)
             */
            tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
            PLASMA_desc descA = plasma_desc_submatrix(A, tempk, n*A.nb, tempm, tempnn);
            double *dA = A(k, n);
            double *dB = A(k, k);
            int *dipiv = IPIV(k);
hclib_pragma_marker("omp", "task depend(inout:dA[0:1]) depend(in:dB[0:ldak], dipiv[0:tempkm])", "pragma82_omp_task");
            CORE_dswptr_ontile(descA, 1, tempkm, dipiv, 1, dB, ldak);

            m = k+1;
            if ( m < A.mt ) {
                tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
                ldam = BLKLDD(A, m);

                double *dA = A(m , k);
                double *dB = A(k , n);
                double *dC = A(m , n);
hclib_pragma_marker("omp", "task depend(in:dA[0:A.mb*A.mb], dB[0:A.mb*A.mb]) depend(inout:dC[0:A.mb*A.mb])", "pragma93_omp_task");
                cblas_dgemm(CblasColMajor, (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_TRANSPOSE)PlasmaNoTrans,
                        tempmm, tempnn, A.nb,
                        mzone, dA, ldam,
                        dB, ldak,
                        zone, dC, ldam);

                for (m = k+2; m < A.mt; m++)
                {
                    tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
                    ldam = BLKLDD(A, m);

                    double *dA = A(m , k);
                    double *dB = A(k , n);
                    double *dC = A(m , n);
                    double *fake1 = A(k+1, n);
                    double *fake2 = (double *)fakedep;
hclib_pragma_marker("omp", "task depend(in:dA[0:A.mb*A.mb], dB[0:A.mb*A.mb], fake2[0:1]) depend(inout:dC[0:A.mb*A.mb], fake1[0:A.mb*A.nb])", "pragma110_omp_task");
                        cblas_dgemm(CblasColMajor, (CBLAS_TRANSPOSE)PlasmaNoTrans, (CBLAS_TRANSPOSE)PlasmaNoTrans,
                                tempmm, tempnn, A.nb,
                                mzone, dA, ldam,
                                dB, ldak,
                                zone, dC, ldam);
                }
            }
        }
    }

    for (k = 0; k < min(A.mt, A.nt); k++)
    {
        int mintmp;
        tempk  = k * A.mb;
        tempm  = A.m - tempk;
        tempkm = k == A.mt-1 ? tempm : A.mb;
        tempkn = k == A.nt-1 ? A.n - k * A.nb : A.nb;
        mintmp = min(tempkm, tempkn);
        ldak = BLKLDD(A, k);

        /*
         * Apply row interchange behind the panel (work on the panel)
         */
        fakedep = (void*)(intptr_t)k;
        for (n = 0; n < k; n++)
        {
            tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
            double *Aij = A(k, n);
            double *prevSwap = A(k-1, n);
            int *dipiv = IPIV(k);
            PLASMA_desc descA = plasma_desc_submatrix(A, tempk, n*A.nb, tempm, tempnn);
hclib_pragma_marker("omp", "task depend(inout:Aij[0:1],fakedep) depend(in:dipiv[0:mintmp], prevSwap[0:A.lm*A.nb])", "pragma142_omp_task");
            CORE_dlaswp_ontile(descA, 1, mintmp, dipiv, 1);
        }
    }
}
Example #13
0
/* Subroutine */ int dlasyf_(char *uplo, integer *n, integer *nb, integer *kb,
	 doublereal *a, integer *lda, integer *ipiv, doublereal *w, integer *
	ldw, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    DLASYF computes a partial factorization of a real symmetric matrix A 
  
    using the Bunch-Kaufman diagonal pivoting method. The partial   
    factorization has the form:   

    A  =  ( I  U12 ) ( A11  0  ) (  I    0   )  if UPLO = 'U', or:   
          ( 0  U22 ) (  0   D  ) ( U12' U22' )   

    A  =  ( L11  0 ) (  D   0  ) ( L11' L21' )  if UPLO = 'L'   
          ( L21  I ) (  0  A22 ) (  0    I   )   

    where the order of D is at most NB. The actual order is returned in   
    the argument KB, and is either NB or NB-1, or N if N <= NB.   

    DLASYF is an auxiliary routine called by DSYTRF. It uses blocked code 
  
    (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or 
  
    A22 (if UPLO = 'L').   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the upper or lower triangular part of the   
            symmetric matrix A is stored:   
            = 'U':  Upper triangular   
            = 'L':  Lower triangular   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    NB      (input) INTEGER   
            The maximum number of columns of the matrix A that should be 
  
            factored.  NB should be at least 2 to allow for 2-by-2 pivot 
  
            blocks.   

    KB      (output) INTEGER   
            The number of columns of A that were actually factored.   
            KB is either NB-1 or NB, or N if N <= NB.   

    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)   
            On entry, the symmetric matrix A.  If UPLO = 'U', the leading 
  
            n-by-n upper triangular part of A contains the upper   
            triangular part of the matrix A, and the strictly lower   
            triangular part of A is not referenced.  If UPLO = 'L', the   
            leading n-by-n lower triangular part of A contains the lower 
  
            triangular part of the matrix A, and the strictly upper   
            triangular part of A is not referenced.   
            On exit, A contains details of the partial factorization.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    IPIV    (output) INTEGER array, dimension (N)   
            Details of the interchanges and the block structure of D.   
            If UPLO = 'U', only the last KB elements of IPIV are set;   
            if UPLO = 'L', only the first KB elements are set.   

            If IPIV(k) > 0, then rows and columns k and IPIV(k) were   
            interchanged and D(k,k) is a 1-by-1 diagonal block.   
            If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and   
            columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) 
  
            is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =   
            IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were   
            interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.   

    W       (workspace) DOUBLE PRECISION array, dimension (LDW,NB)   

    LDW     (input) INTEGER   
            The leading dimension of the array W.  LDW >= max(1,N).   

    INFO    (output) INTEGER   
            = 0: successful exit   
            > 0: if INFO = k, D(k,k) is exactly zero.  The factorization 
  
                 has been completed, but the block diagonal matrix D is   
                 exactly singular.   

    ===================================================================== 
  


    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static integer c__1 = 1;
    static doublereal c_b8 = -1.;
    static doublereal c_b9 = 1.;
    
    /* System generated locals */
    integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4, i__5;
    doublereal d__1, d__2, d__3;
    /* Builtin functions */
    double sqrt(doublereal);
    /* Local variables */
    static integer imax, jmax, j, k;
    static doublereal t, alpha;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
	    integer *), dgemm_(char *, char *, integer *, integer *, integer *
	    , doublereal *, doublereal *, integer *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dgemv_(char *, integer *, integer *, 
	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *), dcopy_(integer *, 
	    doublereal *, integer *, doublereal *, integer *), dswap_(integer 
	    *, doublereal *, integer *, doublereal *, integer *);
    static integer kstep;
    static doublereal r1, d11, d21, d22;
    static integer jb, jj, kk, jp, kp;
    static doublereal absakk;
    static integer kw;
    extern integer idamax_(integer *, doublereal *, integer *);
    static doublereal colmax, rowmax;
    static integer kkw;



#define IPIV(I) ipiv[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
#define W(I,J) w[(I)-1 + ((J)-1)* ( *ldw)]

    *info = 0;

/*     Initialize ALPHA for use in choosing pivot block size. */

    alpha = (sqrt(17.) + 1.) / 8.;

    if (lsame_(uplo, "U")) {

/*        Factorize the trailing columns of A using the upper triangle
   
          of A and working backwards, and compute the matrix W = U12*D
   
          for use in updating A11   

          K is the main loop index, decreasing from N in steps of 1 or
 2   

          KW is the column of W which corresponds to column K of A */

	k = *n;
L10:
	kw = *nb + k - *n;

/*        Exit from loop */

	if (k <= *n - *nb + 1 && *nb < *n || k < 1) {
	    goto L30;
	}

/*        Copy column K of A to column KW of W and update it */

	dcopy_(&k, &A(1,k), &c__1, &W(1,kw), &c__1);
	if (k < *n) {
	    i__1 = *n - k;
	    dgemv_("No transpose", &k, &i__1, &c_b8, &A(1,k+1),
		     lda, &W(k,kw+1), ldw, &c_b9, &W(1,kw), &c__1);
	}

	kstep = 1;

/*        Determine rows and columns to be interchanged and whether   
          a 1-by-1 or 2-by-2 pivot block will be used */

	absakk = (d__1 = W(k,kw), abs(d__1));

/*        IMAX is the row-index of the largest off-diagonal element in
   
          column K, and COLMAX is its absolute value */

	if (k > 1) {
	    i__1 = k - 1;
	    imax = idamax_(&i__1, &W(1,kw), &c__1);
	    colmax = (d__1 = W(imax,kw), abs(d__1));
	} else {
	    colmax = 0.;
	}

	if (max(absakk,colmax) == 0.) {

/*           Column K is zero: set INFO and continue */

	    if (*info == 0) {
		*info = k;
	    }
	    kp = k;
	} else {
	    if (absakk >= alpha * colmax) {

/*              no interchange, use 1-by-1 pivot block */

		kp = k;
	    } else {

/*              Copy column IMAX to column KW-1 of W and updat
e it */

		dcopy_(&imax, &A(1,imax), &c__1, &W(1,kw-1), &c__1);
		i__1 = k - imax;
		dcopy_(&i__1, &A(imax,imax+1), lda, &W(imax+1,kw-1), &c__1);
		if (k < *n) {
		    i__1 = *n - k;
		    dgemv_("No transpose", &k, &i__1, &c_b8, &A(1,k+1), lda, &W(imax,kw+1), 
			    ldw, &c_b9, &W(1,kw-1), &c__1)
			    ;
		}

/*              JMAX is the column-index of the largest off-di
agonal   
                element in row IMAX, and ROWMAX is its absolut
e value */

		i__1 = k - imax;
		jmax = imax + idamax_(&i__1, &W(imax+1,kw-1),
			 &c__1);
		rowmax = (d__1 = W(jmax,kw-1), abs(d__1));
		if (imax > 1) {
		    i__1 = imax - 1;
		    jmax = idamax_(&i__1, &W(1,kw-1), &c__1);
/* Computing MAX */
		    d__2 = rowmax, d__3 = (d__1 = W(jmax,kw-1),
			     abs(d__1));
		    rowmax = max(d__2,d__3);
		}

		if (absakk >= alpha * colmax * (colmax / rowmax)) {

/*                 no interchange, use 1-by-1 pivot block 
*/

		    kp = k;
		} else if ((d__1 = W(imax,kw-1), abs(d__1)) >= 
			alpha * rowmax) {

/*                 interchange rows and columns K and IMAX
, use 1-by-1   
                   pivot block */

		    kp = imax;

/*                 copy column KW-1 of W to column KW */

		    dcopy_(&k, &W(1,kw-1), &c__1, &W(1,kw), &c__1);
		} else {

/*                 interchange rows and columns K-1 and IM
AX, use 2-by-2   
                   pivot block */

		    kp = imax;
		    kstep = 2;
		}
	    }

	    kk = k - kstep + 1;
	    kkw = *nb + kk - *n;

/*           Updated column KP is already stored in column KKW of 
W */

	    if (kp != kk) {

/*              Copy non-updated column KK to column KP */

		A(kp,k) = A(kk,k);
		i__1 = k - 1 - kp;
		dcopy_(&i__1, &A(kp+1,kk), &c__1, &A(kp,kp+1), lda);
		dcopy_(&kp, &A(1,kk), &c__1, &A(1,kp), &
			c__1);

/*              Interchange rows KK and KP in last KK columns 
of A and W */

		i__1 = *n - kk + 1;
		dswap_(&i__1, &A(kk,kk), lda, &A(kp,kk),
			 lda);
		i__1 = *n - kk + 1;
		dswap_(&i__1, &W(kk,kkw), ldw, &W(kp,kkw), ldw);
	    }

	    if (kstep == 1) {

/*              1-by-1 pivot block D(k): column KW of W now ho
lds   

                W(k) = U(k)*D(k)   

                where U(k) is the k-th column of U   

                Store U(k) in column k of A */

		dcopy_(&k, &W(1,kw), &c__1, &A(1,k), &
			c__1);
		r1 = 1. / A(k,k);
		i__1 = k - 1;
		dscal_(&i__1, &r1, &A(1,k), &c__1);
	    } else {

/*              2-by-2 pivot block D(k): columns KW and KW-1 o
f W now   
                hold   

                ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)   

                where U(k) and U(k-1) are the k-th and (k-1)-t
h columns   
                of U */

		if (k > 2) {

/*                 Store U(k) and U(k-1) in columns k and 
k-1 of A */

		    d21 = W(k-1,kw);
		    d11 = W(k,kw) / d21;
		    d22 = W(k-1,kw-1) / d21;
		    t = 1. / (d11 * d22 - 1.);
		    d21 = t / d21;
		    i__1 = k - 2;
		    for (j = 1; j <= k-2; ++j) {
			A(j,k-1) = d21 * (d11 * W(j,kw-1) - W(j,kw));
			A(j,k) = d21 * (d22 * W(j,kw) - 
				W(j,kw-1));
/* L20: */
		    }
		}

/*              Copy D(k) to A */

		A(k-1,k-1) = W(k-1,kw-1);
		A(k-1,k) = W(k-1,kw);
		A(k,k) = W(k,kw);
	    }
	}

/*        Store details of the interchanges in IPIV */

	if (kstep == 1) {
	    IPIV(k) = kp;
	} else {
	    IPIV(k) = -kp;
	    IPIV(k - 1) = -kp;
	}

/*        Decrease K and return to the start of the main loop */

	k -= kstep;
	goto L10;

L30:

/*        Update the upper triangle of A11 (= A(1:k,1:k)) as   

          A11 := A11 - U12*D*U12' = A11 - U12*W'   

          computing blocks of NB columns at a time */

	i__1 = -(*nb);
	for (j = (k - 1) / *nb * *nb + 1; i__1 < 0 ? j >= 1 : j <= 1; j += 
		i__1) {
/* Computing MIN */
	    i__2 = *nb, i__3 = k - j + 1;
	    jb = min(i__2,i__3);

/*           Update the upper triangle of the diagonal block */

	    i__2 = j + jb - 1;
	    for (jj = j; jj <= j+jb-1; ++jj) {
		i__3 = jj - j + 1;
		i__4 = *n - k;
		dgemv_("No transpose", &i__3, &i__4, &c_b8, &A(j,k+1), lda, &W(jj,kw+1), ldw, &c_b9, 
			&A(j,jj), &c__1);
/* L40: */
	    }

/*           Update the rectangular superdiagonal block */

	    i__2 = j - 1;
	    i__3 = *n - k;
	    dgemm_("No transpose", "Transpose", &i__2, &jb, &i__3, &c_b8, &A(1,k+1), lda, &W(j,kw+1), ldw,
		     &c_b9, &A(1,j), lda);
/* L50: */
	}

/*        Put U12 in standard form by partially undoing the interchang
es   
          in columns k+1:n */

	j = k + 1;
L60:
	jj = j;
	jp = IPIV(j);
	if (jp < 0) {
	    jp = -jp;
	    ++j;
	}
	++j;
	if (jp != jj && j <= *n) {
	    i__1 = *n - j + 1;
	    dswap_(&i__1, &A(jp,j), lda, &A(jj,j), lda);
	}
	if (j <= *n) {
	    goto L60;
	}

/*        Set KB to the number of columns factorized */

	*kb = *n - k;

    } else {

/*        Factorize the leading columns of A using the lower triangle 
  
          of A and working forwards, and compute the matrix W = L21*D 
  
          for use in updating A22   

          K is the main loop index, increasing from 1 in steps of 1 or
 2 */

	k = 1;
L70:

/*        Exit from loop */

	if (k >= *nb && *nb < *n || k > *n) {
	    goto L90;
	}

/*        Copy column K of A to column K of W and update it */

	i__1 = *n - k + 1;
	dcopy_(&i__1, &A(k,k), &c__1, &W(k,k), &c__1);
	i__1 = *n - k + 1;
	i__2 = k - 1;
	dgemv_("No transpose", &i__1, &i__2, &c_b8, &A(k,1), lda, &W(k,1), ldw, &c_b9, &W(k,k), &c__1);

	kstep = 1;

/*        Determine rows and columns to be interchanged and whether   
          a 1-by-1 or 2-by-2 pivot block will be used */

	absakk = (d__1 = W(k,k), abs(d__1));

/*        IMAX is the row-index of the largest off-diagonal element in
   
          column K, and COLMAX is its absolute value */

	if (k < *n) {
	    i__1 = *n - k;
	    imax = k + idamax_(&i__1, &W(k+1,k), &c__1);
	    colmax = (d__1 = W(imax,k), abs(d__1));
	} else {
	    colmax = 0.;
	}

	if (max(absakk,colmax) == 0.) {

/*           Column K is zero: set INFO and continue */

	    if (*info == 0) {
		*info = k;
	    }
	    kp = k;
	} else {
	    if (absakk >= alpha * colmax) {

/*              no interchange, use 1-by-1 pivot block */

		kp = k;
	    } else {

/*              Copy column IMAX to column K+1 of W and update
 it */

		i__1 = imax - k;
		dcopy_(&i__1, &A(imax,k), lda, &W(k,k+1), &c__1);
		i__1 = *n - imax + 1;
		dcopy_(&i__1, &A(imax,imax), &c__1, &W(imax,k+1), &c__1);
		i__1 = *n - k + 1;
		i__2 = k - 1;
		dgemv_("No transpose", &i__1, &i__2, &c_b8, &A(k,1), 
			lda, &W(imax,1), ldw, &c_b9, &W(k,k+1), &c__1);

/*              JMAX is the column-index of the largest off-di
agonal   
                element in row IMAX, and ROWMAX is its absolut
e value */

		i__1 = imax - k;
		jmax = k - 1 + idamax_(&i__1, &W(k,k+1), &c__1)
			;
		rowmax = (d__1 = W(jmax,k+1), abs(d__1));
		if (imax < *n) {
		    i__1 = *n - imax;
		    jmax = imax + idamax_(&i__1, &W(imax+1,k+1), &c__1);
/* Computing MAX */
		    d__2 = rowmax, d__3 = (d__1 = W(jmax,k+1), 
			    abs(d__1));
		    rowmax = max(d__2,d__3);
		}

		if (absakk >= alpha * colmax * (colmax / rowmax)) {

/*                 no interchange, use 1-by-1 pivot block 
*/

		    kp = k;
		} else if ((d__1 = W(imax,k+1), abs(d__1)) >= 
			alpha * rowmax) {

/*                 interchange rows and columns K and IMAX
, use 1-by-1   
                   pivot block */

		    kp = imax;

/*                 copy column K+1 of W to column K */

		    i__1 = *n - k + 1;
		    dcopy_(&i__1, &W(k,k+1), &c__1, &W(k,k), &c__1);
		} else {

/*                 interchange rows and columns K+1 and IM
AX, use 2-by-2   
                   pivot block */

		    kp = imax;
		    kstep = 2;
		}
	    }

	    kk = k + kstep - 1;

/*           Updated column KP is already stored in column KK of W
 */

	    if (kp != kk) {

/*              Copy non-updated column KK to column KP */

		A(kp,k) = A(kk,k);
		i__1 = kp - k - 1;
		dcopy_(&i__1, &A(k+1,kk), &c__1, &A(kp,k+1), lda);
		i__1 = *n - kp + 1;
		dcopy_(&i__1, &A(kp,kk), &c__1, &A(kp,kp), &c__1);

/*              Interchange rows KK and KP in first KK columns
 of A and W */

		dswap_(&kk, &A(kk,1), lda, &A(kp,1), lda);
		dswap_(&kk, &W(kk,1), ldw, &W(kp,1), ldw);
	    }

	    if (kstep == 1) {

/*              1-by-1 pivot block D(k): column k of W now hol
ds   

                W(k) = L(k)*D(k)   

                where L(k) is the k-th column of L   

                Store L(k) in column k of A */

		i__1 = *n - k + 1;
		dcopy_(&i__1, &W(k,k), &c__1, &A(k,k), &
			c__1);
		if (k < *n) {
		    r1 = 1. / A(k,k);
		    i__1 = *n - k;
		    dscal_(&i__1, &r1, &A(k+1,k), &c__1);
		}
	    } else {

/*              2-by-2 pivot block D(k): columns k and k+1 of 
W now hold   

                ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)   

                where L(k) and L(k+1) are the k-th and (k+1)-t
h columns   
                of L */

		if (k < *n - 1) {

/*                 Store L(k) and L(k+1) in columns k and 
k+1 of A */

		    d21 = W(k+1,k);
		    d11 = W(k+1,k+1) / d21;
		    d22 = W(k,k) / d21;
		    t = 1. / (d11 * d22 - 1.);
		    d21 = t / d21;
		    i__1 = *n;
		    for (j = k + 2; j <= *n; ++j) {
			A(j,k) = d21 * (d11 * W(j,k) - 
				W(j,k+1));
			A(j,k+1) = d21 * (d22 * W(j,k+1) - W(j,k));
/* L80: */
		    }
		}

/*              Copy D(k) to A */

		A(k,k) = W(k,k);
		A(k+1,k) = W(k+1,k);
		A(k+1,k+1) = W(k+1,k+1);
	    }
	}

/*        Store details of the interchanges in IPIV */

	if (kstep == 1) {
	    IPIV(k) = kp;
	} else {
	    IPIV(k) = -kp;
	    IPIV(k + 1) = -kp;
	}

/*        Increase K and return to the start of the main loop */

	k += kstep;
	goto L70;

L90:

/*        Update the lower triangle of A22 (= A(k:n,k:n)) as   

          A22 := A22 - L21*D*L21' = A22 - L21*W'   

          computing blocks of NB columns at a time */

	i__1 = *n;
	i__2 = *nb;
	for (j = k; *nb < 0 ? j >= *n : j <= *n; j += *nb) {
/* Computing MIN */
	    i__3 = *nb, i__4 = *n - j + 1;
	    jb = min(i__3,i__4);

/*           Update the lower triangle of the diagonal block */

	    i__3 = j + jb - 1;
	    for (jj = j; jj <= j+jb-1; ++jj) {
		i__4 = j + jb - jj;
		i__5 = k - 1;
		dgemv_("No transpose", &i__4, &i__5, &c_b8, &A(jj,1), 
			lda, &W(jj,1), ldw, &c_b9, &A(jj,jj)
			, &c__1);
/* L100: */
	    }

/*           Update the rectangular subdiagonal block */

	    if (j + jb <= *n) {
		i__3 = *n - j - jb + 1;
		i__4 = k - 1;
		dgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &c_b8, 
			&A(j+jb,1), lda, &W(j,1), ldw, &c_b9, 
			&A(j+jb,j), lda);
	    }
/* L110: */
	}

/*        Put L21 in standard form by partially undoing the interchang
es   
          in columns 1:k-1 */

	j = k - 1;
L120:
	jj = j;
	jp = IPIV(j);
	if (jp < 0) {
	    jp = -jp;
	    --j;
	}
	--j;
	if (jp != jj && j >= 1) {
	    dswap_(&j, &A(jp,1), lda, &A(jj,1), lda);
	}
	if (j >= 1) {
	    goto L120;
	}

/*        Set KB to the number of columns factorized */

	*kb = k - 1;

    }
    return 0;

/*     End of DLASYF */

} /* dlasyf_ */
Example #14
0
/* Subroutine */ int dgbtrs_(char *trans, integer *n, integer *kl, integer *
	ku, integer *nrhs, doublereal *ab, integer *ldab, integer *ipiv, 
	doublereal *b, integer *ldb, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       March 31, 1993   


    Purpose   
    =======   

    DGBTRS solves a system of linear equations   
       A * X = B  or  A' * X = B   
    with a general band matrix A using the LU factorization computed   
    by DGBTRF.   

    Arguments   
    =========   

    TRANS   (input) CHARACTER*1   
            Specifies the form of the system of equations.   
            = 'N':  A * X = B  (No transpose)   
            = 'T':  A'* X = B  (Transpose)   
            = 'C':  A'* X = B  (Conjugate transpose = Transpose)   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    KL      (input) INTEGER   
            The number of subdiagonals within the band of A.  KL >= 0.   

    KU      (input) INTEGER   
            The number of superdiagonals within the band of A.  KU >= 0. 
  

    NRHS    (input) INTEGER   
            The number of right hand sides, i.e., the number of columns   
            of the matrix B.  NRHS >= 0.   

    AB      (input) DOUBLE PRECISION array, dimension (LDAB,N)   
            Details of the LU factorization of the band matrix A, as   
            computed by DGBTRF.  U is stored as an upper triangular band 
  
            matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and   
            the multipliers used during the factorization are stored in   
            rows KL+KU+2 to 2*KL+KU+1.   

    LDAB    (input) INTEGER   
            The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.   

    IPIV    (input) INTEGER array, dimension (N)   
            The pivot indices; for 1 <= i <= N, row i of the matrix was   
            interchanged with row IPIV(i).   

    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)   
            On entry, the right hand side matrix B.   
            On exit, the solution matrix X.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0: if INFO = -i, the i-th argument had an illegal value   

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static doublereal c_b7 = -1.;
    static integer c__1 = 1;
    static doublereal c_b23 = 1.;
    
    /* System generated locals */
    integer ab_dim1, ab_offset, b_dim1, b_offset, i__1, i__2, i__3;
    /* Local variables */
    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, 
	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
	    integer *);
    static integer i, j, l;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dgemv_(char *, integer *, integer *, 
	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *), dswap_(integer *, 
	    doublereal *, integer *, doublereal *, integer *), dtbsv_(char *, 
	    char *, char *, integer *, integer *, doublereal *, integer *, 
	    doublereal *, integer *);
    static logical lnoti;
    static integer kd, lm;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static logical notran;



#define IPIV(I) ipiv[(I)-1]

#define AB(I,J) ab[(I)-1 + ((J)-1)* ( *ldab)]
#define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)]

    *info = 0;
    notran = lsame_(trans, "N");
    if (! notran && ! lsame_(trans, "T") && ! lsame_(trans, "C")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*kl < 0) {
	*info = -3;
    } else if (*ku < 0) {
	*info = -4;
    } else if (*nrhs < 0) {
	*info = -5;
    } else if (*ldab < (*kl << 1) + *ku + 1) {
	*info = -7;
    } else if (*ldb < max(1,*n)) {
	*info = -10;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DGBTRS", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0 || *nrhs == 0) {
	return 0;
    }

    kd = *ku + *kl + 1;
    lnoti = *kl > 0;

    if (notran) {

/*        Solve  A*X = B.   

          Solve L*X = B, overwriting B with X.   

          L is represented as a product of permutations and unit lower
   
          triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1),
   
          where each transformation L(i) is a rank-one modification of
   
          the identity matrix. */

	if (lnoti) {
	    i__1 = *n - 1;
	    for (j = 1; j <= *n-1; ++j) {
/* Computing MIN */
		i__2 = *kl, i__3 = *n - j;
		lm = min(i__2,i__3);
		l = IPIV(j);
		if (l != j) {
		    dswap_(nrhs, &B(l,1), ldb, &B(j,1), ldb);
		}
		dger_(&lm, nrhs, &c_b7, &AB(kd+1,j), &c__1, &B(j,1), ldb, &B(j+1,1), ldb);
/* L10: */
	    }
	}

	i__1 = *nrhs;
	for (i = 1; i <= *nrhs; ++i) {

/*           Solve U*X = B, overwriting B with X. */

	    i__2 = *kl + *ku;
	    dtbsv_("Upper", "No transpose", "Non-unit", n, &i__2, &AB(1,1), ldab, &B(1,i), &c__1);
/* L20: */
	}

    } else {

/*        Solve A'*X = B. */

	i__1 = *nrhs;
	for (i = 1; i <= *nrhs; ++i) {

/*           Solve U'*X = B, overwriting B with X. */

	    i__2 = *kl + *ku;
	    dtbsv_("Upper", "Transpose", "Non-unit", n, &i__2, &AB(1,1),
		     ldab, &B(1,i), &c__1);
/* L30: */
	}

/*        Solve L'*X = B, overwriting B with X. */

	if (lnoti) {
	    for (j = *n - 1; j >= 1; --j) {
/* Computing MIN */
		i__1 = *kl, i__2 = *n - j;
		lm = min(i__1,i__2);
		dgemv_("Transpose", &lm, nrhs, &c_b7, &B(j+1,1), ldb,
			 &AB(kd+1,j), &c__1, &c_b23, &B(j,1), ldb);
		l = IPIV(j);
		if (l != j) {
		    dswap_(nrhs, &B(l,1), ldb, &B(j,1), ldb);
		}
/* L40: */
	    }
	}
    }
    return 0;

/*     End of DGBTRS */

} /* dgbtrs_ */
Example #15
0
/* Subroutine */ int chetri_(char *uplo, integer *n, complex *a, integer *lda,
	 integer *ipiv, complex *work, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CHETRI computes the inverse of a complex Hermitian indefinite matrix 
  
    A using the factorization A = U*D*U**H or A = L*D*L**H computed by   
    CHETRF.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the details of the factorization are stored 
  
            as an upper or lower triangular matrix.   
            = 'U':  Upper triangular, form is A = U*D*U**H;   
            = 'L':  Lower triangular, form is A = L*D*L**H.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    A       (input/output) COMPLEX array, dimension (LDA,N)   
            On entry, the block diagonal matrix D and the multipliers   
            used to obtain the factor U or L as computed by CHETRF.   

            On exit, if INFO = 0, the (Hermitian) inverse of the original 
  
            matrix.  If UPLO = 'U', the upper triangular part of the   
            inverse is formed and the part of A below the diagonal is not 
  
            referenced; if UPLO = 'L' the lower triangular part of the   
            inverse is formed and the part of A above the diagonal is   
            not referenced.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    IPIV    (input) INTEGER array, dimension (N)   
            Details of the interchanges and the block structure of D   
            as determined by CHETRF.   

    WORK    (workspace) COMPLEX array, dimension (N)   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -i, the i-th argument had an illegal value   
            > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its 
  
                 inverse could not be computed.   

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static complex c_b2 = {0.f,0.f};
    static integer c__1 = 1;
    
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3;
    doublereal d__1;
    complex q__1, q__2;
    /* Builtin functions */
    double c_abs(complex *);
    void r_cnjg(complex *, complex *);
    /* Local variables */
    static complex temp, akkp1;
    static real d;
    static integer j, k;
    static real t;
    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer 
	    *, complex *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int chemv_(char *, integer *, complex *, complex *
	    , integer *, complex *, integer *, complex *, complex *, integer *
	    ), ccopy_(integer *, complex *, integer *, complex *, 
	    integer *), cswap_(integer *, complex *, integer *, complex *, 
	    integer *);
    static integer kstep;
    static logical upper;
    static real ak;
    static integer kp;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static real akp1;



#define IPIV(I) ipiv[(I)-1]
#define WORK(I) work[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CHETRI", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Check that the diagonal matrix D is nonsingular. */

    if (upper) {

/*        Upper triangular storage: examine D from bottom to top */

	for (*info = *n; *info >= 1; --(*info)) {
	    i__1 = *info + *info * a_dim1;
	    if (IPIV(*info) > 0 && (A(*info,*info).r == 0.f && A(*info,*info).i == 0.f)) {
		return 0;
	    }
/* L10: */
	}
    } else {

/*        Lower triangular storage: examine D from top to bottom. */

	i__1 = *n;
	for (*info = 1; *info <= i__1; ++(*info)) {
	    i__2 = *info + *info * a_dim1;
	    if (IPIV(*info) > 0 && (A(*info,*info).r == 0.f && A(*info,*info).i == 0.f)) {
		return 0;
	    }
/* L20: */
	}
    }
    *info = 0;

    if (upper) {

/*        Compute inv(A) from the factorization A = U*D*U'.   

          K is the main loop index, increasing from 1 to N in steps of
   
          1 or 2, depending on the size of the diagonal blocks. */

	k = 1;
L30:

/*        If K > N, exit from loop. */

	if (k > *n) {
	    goto L50;
	}

	if (IPIV(k) > 0) {

/*           1 x 1 diagonal block   

             Invert the diagonal block. */

	    i__1 = k + k * a_dim1;
	    i__2 = k + k * a_dim1;
	    d__1 = 1.f / A(k,k).r;
	    A(k,k).r = d__1, A(k,k).i = 0.f;

/*           Compute column K of the inverse. */

	    if (k > 1) {
		i__1 = k - 1;
		ccopy_(&i__1, &A(1,k), &c__1, &WORK(1), &c__1);
		i__1 = k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		chemv_(uplo, &i__1, &q__1, &A(1,1), lda, &WORK(1), &c__1,
			 &c_b2, &A(1,k), &c__1);
		i__1 = k + k * a_dim1;
		i__2 = k + k * a_dim1;
		i__3 = k - 1;
		cdotc_(&q__2, &i__3, &WORK(1), &c__1, &A(1,k), &
			c__1);
		d__1 = q__2.r;
		q__1.r = A(k,k).r - d__1, q__1.i = A(k,k).i;
		A(k,k).r = q__1.r, A(k,k).i = q__1.i;
	    }
	    kstep = 1;
	} else {

/*           2 x 2 diagonal block   

             Invert the diagonal block. */

	    t = c_abs(&A(k,k+1));
	    i__1 = k + k * a_dim1;
	    ak = A(k,k).r / t;
	    i__1 = k + 1 + (k + 1) * a_dim1;
	    akp1 = A(k+1,k+1).r / t;
	    i__1 = k + (k + 1) * a_dim1;
	    q__1.r = A(k,k+1).r / t, q__1.i = A(k,k+1).i / t;
	    akkp1.r = q__1.r, akkp1.i = q__1.i;
	    d = t * (ak * akp1 - 1.f);
	    i__1 = k + k * a_dim1;
	    d__1 = akp1 / d;
	    A(k,k).r = d__1, A(k,k).i = 0.f;
	    i__1 = k + 1 + (k + 1) * a_dim1;
	    d__1 = ak / d;
	    A(k+1,k+1).r = d__1, A(k+1,k+1).i = 0.f;
	    i__1 = k + (k + 1) * a_dim1;
	    q__2.r = -(doublereal)akkp1.r, q__2.i = -(doublereal)akkp1.i;
	    q__1.r = q__2.r / d, q__1.i = q__2.i / d;
	    A(k,k+1).r = q__1.r, A(k,k+1).i = q__1.i;

/*           Compute columns K and K+1 of the inverse. */

	    if (k > 1) {
		i__1 = k - 1;
		ccopy_(&i__1, &A(1,k), &c__1, &WORK(1), &c__1);
		i__1 = k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		chemv_(uplo, &i__1, &q__1, &A(1,1), lda, &WORK(1), &c__1,
			 &c_b2, &A(1,k), &c__1);
		i__1 = k + k * a_dim1;
		i__2 = k + k * a_dim1;
		i__3 = k - 1;
		cdotc_(&q__2, &i__3, &WORK(1), &c__1, &A(1,k), &
			c__1);
		d__1 = q__2.r;
		q__1.r = A(k,k).r - d__1, q__1.i = A(k,k).i;
		A(k,k).r = q__1.r, A(k,k).i = q__1.i;
		i__1 = k + (k + 1) * a_dim1;
		i__2 = k + (k + 1) * a_dim1;
		i__3 = k - 1;
		cdotc_(&q__2, &i__3, &A(1,k), &c__1, &A(1,k+1), &c__1);
		q__1.r = A(k,k+1).r - q__2.r, q__1.i = A(k,k+1).i - q__2.i;
		A(k,k+1).r = q__1.r, A(k,k+1).i = q__1.i;
		i__1 = k - 1;
		ccopy_(&i__1, &A(1,k+1), &c__1, &WORK(1), &
			c__1);
		i__1 = k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		chemv_(uplo, &i__1, &q__1, &A(1,1), lda, &WORK(1), &c__1,
			 &c_b2, &A(1,k+1), &c__1);
		i__1 = k + 1 + (k + 1) * a_dim1;
		i__2 = k + 1 + (k + 1) * a_dim1;
		i__3 = k - 1;
		cdotc_(&q__2, &i__3, &WORK(1), &c__1, &A(1,k+1)
			, &c__1);
		d__1 = q__2.r;
		q__1.r = A(k+1,k+1).r - d__1, q__1.i = A(k+1,k+1).i;
		A(k+1,k+1).r = q__1.r, A(k+1,k+1).i = q__1.i;
	    }
	    kstep = 2;
	}

	kp = (i__1 = IPIV(k), abs(i__1));
	if (kp != k) {

/*           Interchange rows and columns K and KP in the leading 
  
             submatrix A(1:k+1,1:k+1) */

	    i__1 = kp - 1;
	    cswap_(&i__1, &A(1,k), &c__1, &A(1,kp), &
		    c__1);
	    i__1 = k - 1;
	    for (j = kp + 1; j <= k-1; ++j) {
		r_cnjg(&q__1, &A(j,k));
		temp.r = q__1.r, temp.i = q__1.i;
		i__2 = j + k * a_dim1;
		r_cnjg(&q__1, &A(kp,j));
		A(j,k).r = q__1.r, A(j,k).i = q__1.i;
		i__2 = kp + j * a_dim1;
		A(kp,j).r = temp.r, A(kp,j).i = temp.i;
/* L40: */
	    }
	    i__1 = kp + k * a_dim1;
	    r_cnjg(&q__1, &A(kp,k));
	    A(kp,k).r = q__1.r, A(kp,k).i = q__1.i;
	    i__1 = k + k * a_dim1;
	    temp.r = A(k,k).r, temp.i = A(k,k).i;
	    i__1 = k + k * a_dim1;
	    i__2 = kp + kp * a_dim1;
	    A(k,k).r = A(kp,kp).r, A(k,k).i = A(kp,kp).i;
	    i__1 = kp + kp * a_dim1;
	    A(kp,kp).r = temp.r, A(kp,kp).i = temp.i;
	    if (kstep == 2) {
		i__1 = k + (k + 1) * a_dim1;
		temp.r = A(k,k+1).r, temp.i = A(k,k+1).i;
		i__1 = k + (k + 1) * a_dim1;
		i__2 = kp + (k + 1) * a_dim1;
		A(k,k+1).r = A(kp,k+1).r, A(k,k+1).i = A(kp,k+1).i;
		i__1 = kp + (k + 1) * a_dim1;
		A(kp,k+1).r = temp.r, A(kp,k+1).i = temp.i;
	    }
	}

	k += kstep;
	goto L30;
L50:

	;
    } else {

/*        Compute inv(A) from the factorization A = L*D*L'.   

          K is the main loop index, increasing from 1 to N in steps of
   
          1 or 2, depending on the size of the diagonal blocks. */

	k = *n;
L60:

/*        If K < 1, exit from loop. */

	if (k < 1) {
	    goto L80;
	}

	if (IPIV(k) > 0) {

/*           1 x 1 diagonal block   

             Invert the diagonal block. */

	    i__1 = k + k * a_dim1;
	    i__2 = k + k * a_dim1;
	    d__1 = 1.f / A(k,k).r;
	    A(k,k).r = d__1, A(k,k).i = 0.f;

/*           Compute column K of the inverse. */

	    if (k < *n) {
		i__1 = *n - k;
		ccopy_(&i__1, &A(k+1,k), &c__1, &WORK(1), &c__1);
		i__1 = *n - k;
		q__1.r = -1.f, q__1.i = 0.f;
		chemv_(uplo, &i__1, &q__1, &A(k+1,k+1), lda, 
			&WORK(1), &c__1, &c_b2, &A(k+1,k), &c__1);
		i__1 = k + k * a_dim1;
		i__2 = k + k * a_dim1;
		i__3 = *n - k;
		cdotc_(&q__2, &i__3, &WORK(1), &c__1, &A(k+1,k), 
			&c__1);
		d__1 = q__2.r;
		q__1.r = A(k,k).r - d__1, q__1.i = A(k,k).i;
		A(k,k).r = q__1.r, A(k,k).i = q__1.i;
	    }
	    kstep = 1;
	} else {

/*           2 x 2 diagonal block   

             Invert the diagonal block. */

	    t = c_abs(&A(k,k-1));
	    i__1 = k - 1 + (k - 1) * a_dim1;
	    ak = A(k-1,k-1).r / t;
	    i__1 = k + k * a_dim1;
	    akp1 = A(k,k).r / t;
	    i__1 = k + (k - 1) * a_dim1;
	    q__1.r = A(k,k-1).r / t, q__1.i = A(k,k-1).i / t;
	    akkp1.r = q__1.r, akkp1.i = q__1.i;
	    d = t * (ak * akp1 - 1.f);
	    i__1 = k - 1 + (k - 1) * a_dim1;
	    d__1 = akp1 / d;
	    A(k-1,k-1).r = d__1, A(k-1,k-1).i = 0.f;
	    i__1 = k + k * a_dim1;
	    d__1 = ak / d;
	    A(k,k).r = d__1, A(k,k).i = 0.f;
	    i__1 = k + (k - 1) * a_dim1;
	    q__2.r = -(doublereal)akkp1.r, q__2.i = -(doublereal)akkp1.i;
	    q__1.r = q__2.r / d, q__1.i = q__2.i / d;
	    A(k,k-1).r = q__1.r, A(k,k-1).i = q__1.i;

/*           Compute columns K-1 and K of the inverse. */

	    if (k < *n) {
		i__1 = *n - k;
		ccopy_(&i__1, &A(k+1,k), &c__1, &WORK(1), &c__1);
		i__1 = *n - k;
		q__1.r = -1.f, q__1.i = 0.f;
		chemv_(uplo, &i__1, &q__1, &A(k+1,k+1), lda, 
			&WORK(1), &c__1, &c_b2, &A(k+1,k), &c__1);
		i__1 = k + k * a_dim1;
		i__2 = k + k * a_dim1;
		i__3 = *n - k;
		cdotc_(&q__2, &i__3, &WORK(1), &c__1, &A(k+1,k), 
			&c__1);
		d__1 = q__2.r;
		q__1.r = A(k,k).r - d__1, q__1.i = A(k,k).i;
		A(k,k).r = q__1.r, A(k,k).i = q__1.i;
		i__1 = k + (k - 1) * a_dim1;
		i__2 = k + (k - 1) * a_dim1;
		i__3 = *n - k;
		cdotc_(&q__2, &i__3, &A(k+1,k), &c__1, &A(k+1,k-1), &c__1);
		q__1.r = A(k,k-1).r - q__2.r, q__1.i = A(k,k-1).i - q__2.i;
		A(k,k-1).r = q__1.r, A(k,k-1).i = q__1.i;
		i__1 = *n - k;
		ccopy_(&i__1, &A(k+1,k-1), &c__1, &WORK(1), &
			c__1);
		i__1 = *n - k;
		q__1.r = -1.f, q__1.i = 0.f;
		chemv_(uplo, &i__1, &q__1, &A(k+1,k+1), lda, 
			&WORK(1), &c__1, &c_b2, &A(k+1,k-1), 
			&c__1);
		i__1 = k - 1 + (k - 1) * a_dim1;
		i__2 = k - 1 + (k - 1) * a_dim1;
		i__3 = *n - k;
		cdotc_(&q__2, &i__3, &WORK(1), &c__1, &A(k+1,k-1), &c__1);
		d__1 = q__2.r;
		q__1.r = A(k-1,k-1).r - d__1, q__1.i = A(k-1,k-1).i;
		A(k-1,k-1).r = q__1.r, A(k-1,k-1).i = q__1.i;
	    }
	    kstep = 2;
	}

	kp = (i__1 = IPIV(k), abs(i__1));
	if (kp != k) {

/*           Interchange rows and columns K and KP in the trailing
   
             submatrix A(k-1:n,k-1:n) */

	    if (kp < *n) {
		i__1 = *n - kp;
		cswap_(&i__1, &A(kp+1,k), &c__1, &A(kp+1,kp), &c__1);
	    }
	    i__1 = kp - 1;
	    for (j = k + 1; j <= kp-1; ++j) {
		r_cnjg(&q__1, &A(j,k));
		temp.r = q__1.r, temp.i = q__1.i;
		i__2 = j + k * a_dim1;
		r_cnjg(&q__1, &A(kp,j));
		A(j,k).r = q__1.r, A(j,k).i = q__1.i;
		i__2 = kp + j * a_dim1;
		A(kp,j).r = temp.r, A(kp,j).i = temp.i;
/* L70: */
	    }
	    i__1 = kp + k * a_dim1;
	    r_cnjg(&q__1, &A(kp,k));
	    A(kp,k).r = q__1.r, A(kp,k).i = q__1.i;
	    i__1 = k + k * a_dim1;
	    temp.r = A(k,k).r, temp.i = A(k,k).i;
	    i__1 = k + k * a_dim1;
	    i__2 = kp + kp * a_dim1;
	    A(k,k).r = A(kp,kp).r, A(k,k).i = A(kp,kp).i;
	    i__1 = kp + kp * a_dim1;
	    A(kp,kp).r = temp.r, A(kp,kp).i = temp.i;
	    if (kstep == 2) {
		i__1 = k + (k - 1) * a_dim1;
		temp.r = A(k,k-1).r, temp.i = A(k,k-1).i;
		i__1 = k + (k - 1) * a_dim1;
		i__2 = kp + (k - 1) * a_dim1;
		A(k,k-1).r = A(kp,k-1).r, A(k,k-1).i = A(kp,k-1).i;
		i__1 = kp + (k - 1) * a_dim1;
		A(kp,k-1).r = temp.r, A(kp,k-1).i = temp.i;
	    }
	}

	k -= kstep;
	goto L60;
L80:
	;
    }

    return 0;

/*     End of CHETRI */

} /* chetri_ */
Example #16
0
/* Subroutine */ int chptri_(char *uplo, integer *n, complex *ap, integer *
	ipiv, complex *work, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CHPTRI computes the inverse of a complex Hermitian indefinite matrix 
  
    A in packed storage using the factorization A = U*D*U**H or   
    A = L*D*L**H computed by CHPTRF.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the details of the factorization are stored 
  
            as an upper or lower triangular matrix.   
            = 'U':  Upper triangular, form is A = U*D*U**H;   
            = 'L':  Lower triangular, form is A = L*D*L**H.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)   
            On entry, the block diagonal matrix D and the multipliers   
            used to obtain the factor U or L as computed by CHPTRF,   
            stored as a packed triangular matrix.   

            On exit, if INFO = 0, the (Hermitian) inverse of the original 
  
            matrix, stored as a packed triangular matrix. The j-th column 
  
            of inv(A) is stored in the array AP as follows:   
            if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j;   
            if UPLO = 'L',   
               AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n.   

    IPIV    (input) INTEGER array, dimension (N)   
            Details of the interchanges and the block structure of D   
            as determined by CHPTRF.   

    WORK    (workspace) COMPLEX array, dimension (N)   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -i, the i-th argument had an illegal value   
            > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its 
  
                 inverse could not be computed.   

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static complex c_b2 = {0.f,0.f};
    static integer c__1 = 1;
    
    /* System generated locals */
    integer i__1, i__2, i__3;
    doublereal d__1;
    complex q__1, q__2;
    /* Builtin functions */
    double c_abs(complex *);
    void r_cnjg(complex *, complex *);
    /* Local variables */
    static complex temp, akkp1;
    static real d;
    static integer j, k;
    static real t;
    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer 
	    *, complex *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, 
	    complex *, integer *), chpmv_(char *, integer *, complex *, 
	    complex *, complex *, integer *, complex *, complex *, integer *), cswap_(integer *, complex *, integer *, complex *, 
	    integer *);
    static integer kstep;
    static logical upper;
    static real ak;
    static integer kc, kp, kx;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static integer kcnext, kpc, npp;
    static real akp1;



#define WORK(I) work[(I)-1]
#define IPIV(I) ipiv[(I)-1]
#define AP(I) ap[(I)-1]


    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CHPTRI", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Check that the diagonal matrix D is nonsingular. */

    if (upper) {

/*        Upper triangular storage: examine D from bottom to top */

	kp = *n * (*n + 1) / 2;
	for (*info = *n; *info >= 1; --(*info)) {
	    i__1 = kp;
	    if (IPIV(*info) > 0 && (AP(kp).r == 0.f && AP(kp).i == 0.f)) {
		return 0;
	    }
	    kp -= *info;
/* L10: */
	}
    } else {

/*        Lower triangular storage: examine D from top to bottom. */

	kp = 1;
	i__1 = *n;
	for (*info = 1; *info <= i__1; ++(*info)) {
	    i__2 = kp;
	    if (IPIV(*info) > 0 && (AP(kp).r == 0.f && AP(kp).i == 0.f)) {
		return 0;
	    }
	    kp = kp + *n - *info + 1;
/* L20: */
	}
    }
    *info = 0;

    if (upper) {

/*        Compute inv(A) from the factorization A = U*D*U'.   

          K is the main loop index, increasing from 1 to N in steps of
   
          1 or 2, depending on the size of the diagonal blocks. */

	k = 1;
	kc = 1;
L30:

/*        If K > N, exit from loop. */

	if (k > *n) {
	    goto L50;
	}

	kcnext = kc + k;
	if (IPIV(k) > 0) {

/*           1 x 1 diagonal block   

             Invert the diagonal block. */

	    i__1 = kc + k - 1;
	    i__2 = kc + k - 1;
	    d__1 = 1.f / AP(kc+k-1).r;
	    AP(kc+k-1).r = d__1, AP(kc+k-1).i = 0.f;

/*           Compute column K of the inverse. */

	    if (k > 1) {
		i__1 = k - 1;
		ccopy_(&i__1, &AP(kc), &c__1, &WORK(1), &c__1);
		i__1 = k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		chpmv_(uplo, &i__1, &q__1, &AP(1), &WORK(1), &c__1, &c_b2, &
			AP(kc), &c__1);
		i__1 = kc + k - 1;
		i__2 = kc + k - 1;
		i__3 = k - 1;
		cdotc_(&q__2, &i__3, &WORK(1), &c__1, &AP(kc), &c__1);
		d__1 = q__2.r;
		q__1.r = AP(kc+k-1).r - d__1, q__1.i = AP(kc+k-1).i;
		AP(kc+k-1).r = q__1.r, AP(kc+k-1).i = q__1.i;
	    }
	    kstep = 1;
	} else {

/*           2 x 2 diagonal block   

             Invert the diagonal block. */

	    t = c_abs(&AP(kcnext + k - 1));
	    i__1 = kc + k - 1;
	    ak = AP(kc+k-1).r / t;
	    i__1 = kcnext + k;
	    akp1 = AP(kcnext+k).r / t;
	    i__1 = kcnext + k - 1;
	    q__1.r = AP(kcnext+k-1).r / t, q__1.i = AP(kcnext+k-1).i / t;
	    akkp1.r = q__1.r, akkp1.i = q__1.i;
	    d = t * (ak * akp1 - 1.f);
	    i__1 = kc + k - 1;
	    d__1 = akp1 / d;
	    AP(kc+k-1).r = d__1, AP(kc+k-1).i = 0.f;
	    i__1 = kcnext + k;
	    d__1 = ak / d;
	    AP(kcnext+k).r = d__1, AP(kcnext+k).i = 0.f;
	    i__1 = kcnext + k - 1;
	    q__2.r = -(doublereal)akkp1.r, q__2.i = -(doublereal)akkp1.i;
	    q__1.r = q__2.r / d, q__1.i = q__2.i / d;
	    AP(kcnext+k-1).r = q__1.r, AP(kcnext+k-1).i = q__1.i;

/*           Compute columns K and K+1 of the inverse. */

	    if (k > 1) {
		i__1 = k - 1;
		ccopy_(&i__1, &AP(kc), &c__1, &WORK(1), &c__1);
		i__1 = k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		chpmv_(uplo, &i__1, &q__1, &AP(1), &WORK(1), &c__1, &c_b2, &
			AP(kc), &c__1);
		i__1 = kc + k - 1;
		i__2 = kc + k - 1;
		i__3 = k - 1;
		cdotc_(&q__2, &i__3, &WORK(1), &c__1, &AP(kc), &c__1);
		d__1 = q__2.r;
		q__1.r = AP(kc+k-1).r - d__1, q__1.i = AP(kc+k-1).i;
		AP(kc+k-1).r = q__1.r, AP(kc+k-1).i = q__1.i;
		i__1 = kcnext + k - 1;
		i__2 = kcnext + k - 1;
		i__3 = k - 1;
		cdotc_(&q__2, &i__3, &AP(kc), &c__1, &AP(kcnext), &c__1);
		q__1.r = AP(kcnext+k-1).r - q__2.r, q__1.i = AP(kcnext+k-1).i - q__2.i;
		AP(kcnext+k-1).r = q__1.r, AP(kcnext+k-1).i = q__1.i;
		i__1 = k - 1;
		ccopy_(&i__1, &AP(kcnext), &c__1, &WORK(1), &c__1);
		i__1 = k - 1;
		q__1.r = -1.f, q__1.i = 0.f;
		chpmv_(uplo, &i__1, &q__1, &AP(1), &WORK(1), &c__1, &c_b2, &
			AP(kcnext), &c__1);
		i__1 = kcnext + k;
		i__2 = kcnext + k;
		i__3 = k - 1;
		cdotc_(&q__2, &i__3, &WORK(1), &c__1, &AP(kcnext), &c__1);
		d__1 = q__2.r;
		q__1.r = AP(kcnext+k).r - d__1, q__1.i = AP(kcnext+k).i;
		AP(kcnext+k).r = q__1.r, AP(kcnext+k).i = q__1.i;
	    }
	    kstep = 2;
	    kcnext = kcnext + k + 1;
	}

	kp = (i__1 = IPIV(k), abs(i__1));
	if (kp != k) {

/*           Interchange rows and columns K and KP in the leading 
  
             submatrix A(1:k+1,1:k+1) */

	    kpc = (kp - 1) * kp / 2 + 1;
	    i__1 = kp - 1;
	    cswap_(&i__1, &AP(kc), &c__1, &AP(kpc), &c__1);
	    kx = kpc + kp - 1;
	    i__1 = k - 1;
	    for (j = kp + 1; j <= k-1; ++j) {
		kx = kx + j - 1;
		r_cnjg(&q__1, &AP(kc + j - 1));
		temp.r = q__1.r, temp.i = q__1.i;
		i__2 = kc + j - 1;
		r_cnjg(&q__1, &AP(kx));
		AP(kc+j-1).r = q__1.r, AP(kc+j-1).i = q__1.i;
		i__2 = kx;
		AP(kx).r = temp.r, AP(kx).i = temp.i;
/* L40: */
	    }
	    i__1 = kc + kp - 1;
	    r_cnjg(&q__1, &AP(kc + kp - 1));
	    AP(kc+kp-1).r = q__1.r, AP(kc+kp-1).i = q__1.i;
	    i__1 = kc + k - 1;
	    temp.r = AP(kc+k-1).r, temp.i = AP(kc+k-1).i;
	    i__1 = kc + k - 1;
	    i__2 = kpc + kp - 1;
	    AP(kc+k-1).r = AP(kpc+kp-1).r, AP(kc+k-1).i = AP(kpc+kp-1).i;
	    i__1 = kpc + kp - 1;
	    AP(kpc+kp-1).r = temp.r, AP(kpc+kp-1).i = temp.i;
	    if (kstep == 2) {
		i__1 = kc + k + k - 1;
		temp.r = AP(kc+k+k-1).r, temp.i = AP(kc+k+k-1).i;
		i__1 = kc + k + k - 1;
		i__2 = kc + k + kp - 1;
		AP(kc+k+k-1).r = AP(kc+k+kp-1).r, AP(kc+k+k-1).i = AP(kc+k+kp-1).i;
		i__1 = kc + k + kp - 1;
		AP(kc+k+kp-1).r = temp.r, AP(kc+k+kp-1).i = temp.i;
	    }
	}

	k += kstep;
	kc = kcnext;
	goto L30;
L50:

	;
    } else {

/*        Compute inv(A) from the factorization A = L*D*L'.   

          K is the main loop index, increasing from 1 to N in steps of
   
          1 or 2, depending on the size of the diagonal blocks. */

	npp = *n * (*n + 1) / 2;
	k = *n;
	kc = npp;
L60:

/*        If K < 1, exit from loop. */

	if (k < 1) {
	    goto L80;
	}

	kcnext = kc - (*n - k + 2);
	if (IPIV(k) > 0) {

/*           1 x 1 diagonal block   

             Invert the diagonal block. */

	    i__1 = kc;
	    i__2 = kc;
	    d__1 = 1.f / AP(kc).r;
	    AP(kc).r = d__1, AP(kc).i = 0.f;

/*           Compute column K of the inverse. */

	    if (k < *n) {
		i__1 = *n - k;
		ccopy_(&i__1, &AP(kc + 1), &c__1, &WORK(1), &c__1);
		i__1 = *n - k;
		q__1.r = -1.f, q__1.i = 0.f;
		chpmv_(uplo, &i__1, &q__1, &AP(kc + *n - k + 1), &WORK(1), &
			c__1, &c_b2, &AP(kc + 1), &c__1);
		i__1 = kc;
		i__2 = kc;
		i__3 = *n - k;
		cdotc_(&q__2, &i__3, &WORK(1), &c__1, &AP(kc + 1), &c__1);
		d__1 = q__2.r;
		q__1.r = AP(kc).r - d__1, q__1.i = AP(kc).i;
		AP(kc).r = q__1.r, AP(kc).i = q__1.i;
	    }
	    kstep = 1;
	} else {

/*           2 x 2 diagonal block   

             Invert the diagonal block. */

	    t = c_abs(&AP(kcnext + 1));
	    i__1 = kcnext;
	    ak = AP(kcnext).r / t;
	    i__1 = kc;
	    akp1 = AP(kc).r / t;
	    i__1 = kcnext + 1;
	    q__1.r = AP(kcnext+1).r / t, q__1.i = AP(kcnext+1).i / t;
	    akkp1.r = q__1.r, akkp1.i = q__1.i;
	    d = t * (ak * akp1 - 1.f);
	    i__1 = kcnext;
	    d__1 = akp1 / d;
	    AP(kcnext).r = d__1, AP(kcnext).i = 0.f;
	    i__1 = kc;
	    d__1 = ak / d;
	    AP(kc).r = d__1, AP(kc).i = 0.f;
	    i__1 = kcnext + 1;
	    q__2.r = -(doublereal)akkp1.r, q__2.i = -(doublereal)akkp1.i;
	    q__1.r = q__2.r / d, q__1.i = q__2.i / d;
	    AP(kcnext+1).r = q__1.r, AP(kcnext+1).i = q__1.i;

/*           Compute columns K-1 and K of the inverse. */

	    if (k < *n) {
		i__1 = *n - k;
		ccopy_(&i__1, &AP(kc + 1), &c__1, &WORK(1), &c__1);
		i__1 = *n - k;
		q__1.r = -1.f, q__1.i = 0.f;
		chpmv_(uplo, &i__1, &q__1, &AP(kc + (*n - k + 1)), &WORK(1), &
			c__1, &c_b2, &AP(kc + 1), &c__1);
		i__1 = kc;
		i__2 = kc;
		i__3 = *n - k;
		cdotc_(&q__2, &i__3, &WORK(1), &c__1, &AP(kc + 1), &c__1);
		d__1 = q__2.r;
		q__1.r = AP(kc).r - d__1, q__1.i = AP(kc).i;
		AP(kc).r = q__1.r, AP(kc).i = q__1.i;
		i__1 = kcnext + 1;
		i__2 = kcnext + 1;
		i__3 = *n - k;
		cdotc_(&q__2, &i__3, &AP(kc + 1), &c__1, &AP(kcnext + 2), &
			c__1);
		q__1.r = AP(kcnext+1).r - q__2.r, q__1.i = AP(kcnext+1).i - q__2.i;
		AP(kcnext+1).r = q__1.r, AP(kcnext+1).i = q__1.i;
		i__1 = *n - k;
		ccopy_(&i__1, &AP(kcnext + 2), &c__1, &WORK(1), &c__1);
		i__1 = *n - k;
		q__1.r = -1.f, q__1.i = 0.f;
		chpmv_(uplo, &i__1, &q__1, &AP(kc + (*n - k + 1)), &WORK(1), &
			c__1, &c_b2, &AP(kcnext + 2), &c__1);
		i__1 = kcnext;
		i__2 = kcnext;
		i__3 = *n - k;
		cdotc_(&q__2, &i__3, &WORK(1), &c__1, &AP(kcnext + 2), &c__1);
		d__1 = q__2.r;
		q__1.r = AP(kcnext).r - d__1, q__1.i = AP(kcnext).i;
		AP(kcnext).r = q__1.r, AP(kcnext).i = q__1.i;
	    }
	    kstep = 2;
	    kcnext -= *n - k + 3;
	}

	kp = (i__1 = IPIV(k), abs(i__1));
	if (kp != k) {

/*           Interchange rows and columns K and KP in the trailing
   
             submatrix A(k-1:n,k-1:n) */

	    kpc = npp - (*n - kp + 1) * (*n - kp + 2) / 2 + 1;
	    if (kp < *n) {
		i__1 = *n - kp;
		cswap_(&i__1, &AP(kc + kp - k + 1), &c__1, &AP(kpc + 1), &
			c__1);
	    }
	    kx = kc + kp - k;
	    i__1 = kp - 1;
	    for (j = k + 1; j <= kp-1; ++j) {
		kx = kx + *n - j + 1;
		r_cnjg(&q__1, &AP(kc + j - k));
		temp.r = q__1.r, temp.i = q__1.i;
		i__2 = kc + j - k;
		r_cnjg(&q__1, &AP(kx));
		AP(kc+j-k).r = q__1.r, AP(kc+j-k).i = q__1.i;
		i__2 = kx;
		AP(kx).r = temp.r, AP(kx).i = temp.i;
/* L70: */
	    }
	    i__1 = kc + kp - k;
	    r_cnjg(&q__1, &AP(kc + kp - k));
	    AP(kc+kp-k).r = q__1.r, AP(kc+kp-k).i = q__1.i;
	    i__1 = kc;
	    temp.r = AP(kc).r, temp.i = AP(kc).i;
	    i__1 = kc;
	    i__2 = kpc;
	    AP(kc).r = AP(kpc).r, AP(kc).i = AP(kpc).i;
	    i__1 = kpc;
	    AP(kpc).r = temp.r, AP(kpc).i = temp.i;
	    if (kstep == 2) {
		i__1 = kc - *n + k - 1;
		temp.r = AP(kc-*n+k-1).r, temp.i = AP(kc-*n+k-1).i;
		i__1 = kc - *n + k - 1;
		i__2 = kc - *n + kp - 1;
		AP(kc-*n+k-1).r = AP(kc-*n+kp-1).r, AP(kc-*n+k-1).i = AP(kc-*n+kp-1).i;
		i__1 = kc - *n + kp - 1;
		AP(kc-*n+kp-1).r = temp.r, AP(kc-*n+kp-1).i = temp.i;
	    }
	}

	k -= kstep;
	kc = kcnext;
	goto L60;
L80:
	;
    }

    return 0;

/*     End of CHPTRI */

} /* chptri_ */
Example #17
0
void setup (int N, const Parameter &param, Array<double, 1> &WR, Array<double,2> &ev, Array<double,2> &evInv)
{
   int Nm1 = N;
   int i;
   Array<double, 1> x;
   Array<double, 2> D;
   Array<double, 1> r;
   Array<double, 2> Dsec;
   Array<double, 1> XX;
   Array<double, 1> YY;
   Array<double, 2> A(N,N);
   Array<double, 2> B(N,N);
   Array<int, 1> IPIV(Nm1);
    char BALANC[1];
    char JOBVL[1];
    char JOBVR[1];
    char SENSE[1];
    int LDA;
    int LDVL;
    int LDVR;
    int NRHS;
    int LDB;
    int INFO;

	//resize output arrays
	WR.resize(N);
	ev.resize(N, N);
	evInv.resize(N, N);


// parameters for DGEEVX
    Array<double, 1>  WI(Nm1); // WR(Nm1),
           // The real and imaginary part of the eig.values
	Array<double, 2> VL(N, N);
    Array<double, 2> VR(Nm1,Nm1); //VR(Nm1,Nm1); 
           // The left and rigth eigenvectors
    int ILO, IHI;        // Info on the balanced output matrix
    Array<double, 1> SCALE(Nm1);     // Scaling factors applied for balancing
    double ABNRM;        // 1-Norm of the balanced matrix
    Array<double, 1> RCONDE(Nm1);  
           // the reciprocal cond. numb of the respective eig.val
    Array<double, 1> RCONDV(Nm1); 
           // the reciprocal cond. numb of the respective eig.vec
    int LWORK = (N+1)*(N+7); // Depending on SENSE            
    Array<double, 1> WORK(LWORK);
    Array<int, 1> IWORK(2*(N+1)-2);


// Compute the Chebyshev differensiation matrix and D*D

//   cheb(N, x, D);
   cheb(N, x, D);
   Dsec.resize(D.shape());
   MatrixMatrixMultiply(D, D, Dsec);


// Compute the 1. and 2. derivatives of the transformations

   XYmat(N, param, XX, YY, r);

   // Set up the full timepropagation matrix A
   // dy/dt = - i A y
   Range range(1, N); //Dsec and D have range 0, N+1. 
   					  //We don't want the edge points in A
   A = XX(tensor::i) * Dsec(range, range) + YY(tensor::i) * D(range, range);
   //Transpose A
   for (int i=0; i<A.extent(0); i++)
   {
		for (int j=0; j<i; j++)
		{
			double t = A(i,j);
			A(i,j) = A(j, i);
			A(j,i) = t;
		}
   }


// Add radialpart of non-time dependent potential here
/* 2D radial
	for (int i=0; i<A.extent(0); i++)
   {
		A(i, i) += 0.25 / (r(i)*r(i));
   }
*/

// Compute eigen decomposition

   BALANC[0] ='B';
   JOBVL[0]  ='V';
   JOBVR[0]  ='V';
   SENSE[0]  ='B';
   LDA = Nm1;
   LDVL = Nm1;
   LDVR = Nm1;

   FORTRAN_NAME(dgeevx)(BALANC, JOBVL, JOBVR, SENSE, &Nm1,
            A.data(), &LDA, WR.data(), WI.data(),
            VL.data(), &LDVL, VR.data(), &LDVR, &ILO, &IHI,
            SCALE.data(), &ABNRM,
            RCONDE.data(), RCONDV.data(), WORK.data(), &LWORK,
            IWORK.data(), &INFO);

// Compute the inverse of the eigen vector matrix

    NRHS = Nm1;
	
    evInv = VR ;// VL;
    LDB = LDA;
    B = 0.0;
    for (i=0; i<Nm1; i++) B(i,i) = 1.0; 

    FORTRAN_NAME(dgesv)(&Nm1, &NRHS, evInv.data(), &LDA, IPIV.data(), B.data(), &LDB, &INFO);

	ev = VR(tensor::j, tensor::i);   //Transpose
	evInv = B(tensor::j, tensor::i); //Transpose
	
	//cout << "Eigenvectors (right): " << ev << endl;
	//cout << "Eigenvectors (inv): " << evInv << endl;
    //printf(" Done  inverse, INFO = %d \n", INFO);
} // done
Example #18
0
/* Subroutine */ int ssptri_(char *uplo, integer *n, real *ap, integer *ipiv, 
	real *work, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       March 31, 1993   


    Purpose   
    =======   

    SSPTRI computes the inverse of a real symmetric indefinite matrix   
    A in packed storage using the factorization A = U*D*U**T or   
    A = L*D*L**T computed by SSPTRF.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the details of the factorization are stored 
  
            as an upper or lower triangular matrix.   
            = 'U':  Upper triangular, form is A = U*D*U**T;   
            = 'L':  Lower triangular, form is A = L*D*L**T.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    AP      (input/output) REAL array, dimension (N*(N+1)/2)   
            On entry, the block diagonal matrix D and the multipliers   
            used to obtain the factor U or L as computed by SSPTRF,   
            stored as a packed triangular matrix.   

            On exit, if INFO = 0, the (symmetric) inverse of the original 
  
            matrix, stored as a packed triangular matrix. The j-th column 
  
            of inv(A) is stored in the array AP as follows:   
            if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j;   
            if UPLO = 'L',   
               AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n.   

    IPIV    (input) INTEGER array, dimension (N)   
            Details of the interchanges and the block structure of D   
            as determined by SSPTRF.   

    WORK    (workspace) REAL array, dimension (N)   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -i, the i-th argument had an illegal value   
            > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its 
  
                 inverse could not be computed.   

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static integer c__1 = 1;
    static real c_b11 = -1.f;
    static real c_b13 = 0.f;
    
    /* System generated locals */
    integer i__1;
    real r__1;
    /* Local variables */
    static real temp;
    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
    static real akkp1, d;
    static integer j, k;
    static real t;
    extern logical lsame_(char *, char *);
    static integer kstep;
    static logical upper;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *), sswap_(integer *, real *, integer *, real *, integer *
	    ), sspmv_(char *, integer *, real *, real *, real *, integer *, 
	    real *, real *, integer *);
    static real ak;
    static integer kc, kp, kx;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    static integer kcnext, kpc, npp;
    static real akp1;



#define WORK(I) work[(I)-1]
#define IPIV(I) ipiv[(I)-1]
#define AP(I) ap[(I)-1]


    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SSPTRI", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Check that the diagonal matrix D is nonsingular. */

    if (upper) {

/*        Upper triangular storage: examine D from bottom to top */

	kp = *n * (*n + 1) / 2;
	for (*info = *n; *info >= 1; --(*info)) {
	    if (IPIV(*info) > 0 && AP(kp) == 0.f) {
		return 0;
	    }
	    kp -= *info;
/* L10: */
	}
    } else {

/*        Lower triangular storage: examine D from top to bottom. */

	kp = 1;
	i__1 = *n;
	for (*info = 1; *info <= i__1; ++(*info)) {
	    if (IPIV(*info) > 0 && AP(kp) == 0.f) {
		return 0;
	    }
	    kp = kp + *n - *info + 1;
/* L20: */
	}
    }
    *info = 0;

    if (upper) {

/*        Compute inv(A) from the factorization A = U*D*U'.   

          K is the main loop index, increasing from 1 to N in steps of
   
          1 or 2, depending on the size of the diagonal blocks. */

	k = 1;
	kc = 1;
L30:

/*        If K > N, exit from loop. */

	if (k > *n) {
	    goto L50;
	}

	kcnext = kc + k;
	if (IPIV(k) > 0) {

/*           1 x 1 diagonal block   

             Invert the diagonal block. */

	    AP(kc + k - 1) = 1.f / AP(kc + k - 1);

/*           Compute column K of the inverse. */

	    if (k > 1) {
		i__1 = k - 1;
		scopy_(&i__1, &AP(kc), &c__1, &WORK(1), &c__1);
		i__1 = k - 1;
		sspmv_(uplo, &i__1, &c_b11, &AP(1), &WORK(1), &c__1, &c_b13, &
			AP(kc), &c__1);
		i__1 = k - 1;
		AP(kc + k - 1) -= sdot_(&i__1, &WORK(1), &c__1, &AP(kc), &
			c__1);
	    }
	    kstep = 1;
	} else {

/*           2 x 2 diagonal block   

             Invert the diagonal block. */

	    t = (r__1 = AP(kcnext + k - 1), dabs(r__1));
	    ak = AP(kc + k - 1) / t;
	    akp1 = AP(kcnext + k) / t;
	    akkp1 = AP(kcnext + k - 1) / t;
	    d = t * (ak * akp1 - 1.f);
	    AP(kc + k - 1) = akp1 / d;
	    AP(kcnext + k) = ak / d;
	    AP(kcnext + k - 1) = -(doublereal)akkp1 / d;

/*           Compute columns K and K+1 of the inverse. */

	    if (k > 1) {
		i__1 = k - 1;
		scopy_(&i__1, &AP(kc), &c__1, &WORK(1), &c__1);
		i__1 = k - 1;
		sspmv_(uplo, &i__1, &c_b11, &AP(1), &WORK(1), &c__1, &c_b13, &
			AP(kc), &c__1);
		i__1 = k - 1;
		AP(kc + k - 1) -= sdot_(&i__1, &WORK(1), &c__1, &AP(kc), &
			c__1);
		i__1 = k - 1;
		AP(kcnext + k - 1) -= sdot_(&i__1, &AP(kc), &c__1, &AP(kcnext)
			, &c__1);
		i__1 = k - 1;
		scopy_(&i__1, &AP(kcnext), &c__1, &WORK(1), &c__1);
		i__1 = k - 1;
		sspmv_(uplo, &i__1, &c_b11, &AP(1), &WORK(1), &c__1, &c_b13, &
			AP(kcnext), &c__1);
		i__1 = k - 1;
		AP(kcnext + k) -= sdot_(&i__1, &WORK(1), &c__1, &AP(kcnext), &
			c__1);
	    }
	    kstep = 2;
	    kcnext = kcnext + k + 1;
	}

	kp = (i__1 = IPIV(k), abs(i__1));
	if (kp != k) {

/*           Interchange rows and columns K and KP in the leading 
  
             submatrix A(1:k+1,1:k+1) */

	    kpc = (kp - 1) * kp / 2 + 1;
	    i__1 = kp - 1;
	    sswap_(&i__1, &AP(kc), &c__1, &AP(kpc), &c__1);
	    kx = kpc + kp - 1;
	    i__1 = k - 1;
	    for (j = kp + 1; j <= k-1; ++j) {
		kx = kx + j - 1;
		temp = AP(kc + j - 1);
		AP(kc + j - 1) = AP(kx);
		AP(kx) = temp;
/* L40: */
	    }
	    temp = AP(kc + k - 1);
	    AP(kc + k - 1) = AP(kpc + kp - 1);
	    AP(kpc + kp - 1) = temp;
	    if (kstep == 2) {
		temp = AP(kc + k + k - 1);
		AP(kc + k + k - 1) = AP(kc + k + kp - 1);
		AP(kc + k + kp - 1) = temp;
	    }
	}

	k += kstep;
	kc = kcnext;
	goto L30;
L50:

	;
    } else {

/*        Compute inv(A) from the factorization A = L*D*L'.   

          K is the main loop index, increasing from 1 to N in steps of
   
          1 or 2, depending on the size of the diagonal blocks. */

	npp = *n * (*n + 1) / 2;
	k = *n;
	kc = npp;
L60:

/*        If K < 1, exit from loop. */

	if (k < 1) {
	    goto L80;
	}

	kcnext = kc - (*n - k + 2);
	if (IPIV(k) > 0) {

/*           1 x 1 diagonal block   

             Invert the diagonal block. */

	    AP(kc) = 1.f / AP(kc);

/*           Compute column K of the inverse. */

	    if (k < *n) {
		i__1 = *n - k;
		scopy_(&i__1, &AP(kc + 1), &c__1, &WORK(1), &c__1);
		i__1 = *n - k;
		sspmv_(uplo, &i__1, &c_b11, &AP(kc + *n - k + 1), &WORK(1), &
			c__1, &c_b13, &AP(kc + 1), &c__1);
		i__1 = *n - k;
		AP(kc) -= sdot_(&i__1, &WORK(1), &c__1, &AP(kc + 1), &c__1);
	    }
	    kstep = 1;
	} else {

/*           2 x 2 diagonal block   

             Invert the diagonal block. */

	    t = (r__1 = AP(kcnext + 1), dabs(r__1));
	    ak = AP(kcnext) / t;
	    akp1 = AP(kc) / t;
	    akkp1 = AP(kcnext + 1) / t;
	    d = t * (ak * akp1 - 1.f);
	    AP(kcnext) = akp1 / d;
	    AP(kc) = ak / d;
	    AP(kcnext + 1) = -(doublereal)akkp1 / d;

/*           Compute columns K-1 and K of the inverse. */

	    if (k < *n) {
		i__1 = *n - k;
		scopy_(&i__1, &AP(kc + 1), &c__1, &WORK(1), &c__1);
		i__1 = *n - k;
		sspmv_(uplo, &i__1, &c_b11, &AP(kc + (*n - k + 1)), &WORK(1), 
			&c__1, &c_b13, &AP(kc + 1), &c__1);
		i__1 = *n - k;
		AP(kc) -= sdot_(&i__1, &WORK(1), &c__1, &AP(kc + 1), &c__1);
		i__1 = *n - k;
		AP(kcnext + 1) -= sdot_(&i__1, &AP(kc + 1), &c__1, &AP(kcnext 
			+ 2), &c__1);
		i__1 = *n - k;
		scopy_(&i__1, &AP(kcnext + 2), &c__1, &WORK(1), &c__1);
		i__1 = *n - k;
		sspmv_(uplo, &i__1, &c_b11, &AP(kc + (*n - k + 1)), &WORK(1), 
			&c__1, &c_b13, &AP(kcnext + 2), &c__1);
		i__1 = *n - k;
		AP(kcnext) -= sdot_(&i__1, &WORK(1), &c__1, &AP(kcnext + 2), &
			c__1);
	    }
	    kstep = 2;
	    kcnext -= *n - k + 3;
	}

	kp = (i__1 = IPIV(k), abs(i__1));
	if (kp != k) {

/*           Interchange rows and columns K and KP in the trailing
   
             submatrix A(k-1:n,k-1:n) */

	    kpc = npp - (*n - kp + 1) * (*n - kp + 2) / 2 + 1;
	    if (kp < *n) {
		i__1 = *n - kp;
		sswap_(&i__1, &AP(kc + kp - k + 1), &c__1, &AP(kpc + 1), &
			c__1);
	    }
	    kx = kc + kp - k;
	    i__1 = kp - 1;
	    for (j = k + 1; j <= kp-1; ++j) {
		kx = kx + *n - j + 1;
		temp = AP(kc + j - k);
		AP(kc + j - k) = AP(kx);
		AP(kx) = temp;
/* L70: */
	    }
	    temp = AP(kc);
	    AP(kc) = AP(kpc);
	    AP(kpc) = temp;
	    if (kstep == 2) {
		temp = AP(kc - *n + k - 1);
		AP(kc - *n + k - 1) = AP(kc - *n + kp - 1);
		AP(kc - *n + kp - 1) = temp;
	    }
	}

	k -= kstep;
	kc = kcnext;
	goto L60;
L80:
	;
    }

    return 0;

/*     End of SSPTRI */

} /* ssptri_ */
Example #19
0
/* Subroutine */ int dgbtrf_(integer *m, integer *n, integer *kl, integer *ku,
	 doublereal *ab, integer *ldab, integer *ipiv, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    DGBTRF computes an LU factorization of a real m-by-n band matrix A   
    using partial pivoting with row interchanges.   

    This is the blocked version of the algorithm, calling Level 3 BLAS.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix A.  N >= 0.   

    KL      (input) INTEGER   
            The number of subdiagonals within the band of A.  KL >= 0.   

    KU      (input) INTEGER   
            The number of superdiagonals within the band of A.  KU >= 0. 
  

    AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N)   
            On entry, the matrix A in band storage, in rows KL+1 to   
            2*KL+KU+1; rows 1 to KL of the array need not be set.   
            The j-th column of A is stored in the j-th column of the   
            array AB as follows:   
            AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)   

            On exit, details of the factorization: U is stored as an   
            upper triangular band matrix with KL+KU superdiagonals in   
            rows 1 to KL+KU+1, and the multipliers used during the   
            factorization are stored in rows KL+KU+2 to 2*KL+KU+1.   
            See below for further details.   

    LDAB    (input) INTEGER   
            The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.   

    IPIV    (output) INTEGER array, dimension (min(M,N))   
            The pivot indices; for 1 <= i <= min(M,N), row i of the   
            matrix was interchanged with row IPIV(i).   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -i, the i-th argument had an illegal value   
            > 0: if INFO = +i, U(i,i) is exactly zero. The factorization 
  
                 has been completed, but the factor U is exactly   
                 singular, and division by zero will occur if it is used 
  
                 to solve a system of equations.   

    Further Details   
    ===============   

    The band storage scheme is illustrated by the following example, when 
  
    M = N = 6, KL = 2, KU = 1:   

    On entry:                       On exit:   

        *    *    *    +    +    +       *    *    *   u14  u25  u36   
        *    *    +    +    +    +       *    *   u13  u24  u35  u46   
        *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56   
       a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66   
       a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *   
       a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *   

    VISArray elements marked * are not used by the routine; elements marked 
  
    + need not be set on entry, but are required by the routine to store 
  
    elements of U because of fill-in resulting from the row interchanges. 
  

    ===================================================================== 
  


       KV is the number of superdiagonals in the factor U, allowing for   
       fill-in   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c__65 = 65;
    static doublereal c_b18 = -1.;
    static doublereal c_b31 = 1.;
    
    /* System generated locals */
    integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6;
    doublereal d__1;
    /* Local variables */
    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, 
	    doublereal *, integer *, doublereal *, integer *, doublereal *, 
	    integer *);
    static doublereal temp;
    static integer i, j;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
	    integer *), dgemm_(char *, char *, integer *, integer *, integer *
	    , doublereal *, doublereal *, integer *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *), dcopy_(
	    integer *, doublereal *, integer *, doublereal *, integer *), 
	    dswap_(integer *, doublereal *, integer *, doublereal *, integer *
	    );
    static doublereal work13[4160]	/* was [65][64] */, work31[4160]	
	    /* was [65][64] */;
    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, 
	    integer *, integer *, doublereal *, doublereal *, integer *, 
	    doublereal *, integer *);
    static integer i2, i3, j2, j3, k2;
    extern /* Subroutine */ int dgbtf2_(integer *, integer *, integer *, 
	    integer *, doublereal *, integer *, integer *, integer *);
    static integer jb, nb, ii, jj, jm, ip, jp, km, ju, kv;
    extern integer idamax_(integer *, doublereal *, integer *);
    static integer nw;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *, 
	    integer *, integer *, integer *, integer *);



#define WORK13(I) work13[(I)]
#define WAS(I) was[(I)]
#define IPIV(I) ipiv[(I)-1]

#define AB(I,J) ab[(I)-1 + ((J)-1)* ( *ldab)]

    kv = *ku + *kl;

/*     Test the input parameters. */

    *info = 0;
    if (*m < 0) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*kl < 0) {
	*info = -3;
    } else if (*ku < 0) {
	*info = -4;
    } else if (*ldab < *kl + kv + 1) {
	*info = -6;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DGBTRF", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*m == 0 || *n == 0) {
	return 0;
    }

/*     Determine the block size for this environment */

    nb = ilaenv_(&c__1, "DGBTRF", " ", m, n, kl, ku, 6L, 1L);

/*     The block size must not exceed the limit set by the size of the   
       local arrays WORK13 and WORK31. */

    nb = min(nb,64);

    if (nb <= 1 || nb > *kl) {

/*        Use unblocked code */

	dgbtf2_(m, n, kl, ku, &AB(1,1), ldab, &IPIV(1), info);
    } else {

/*        Use blocked code   

          Zero the superdiagonal elements of the work array WORK13 */

	i__1 = nb;
	for (j = 1; j <= nb; ++j) {
	    i__2 = j - 1;
	    for (i = 1; i <= j-1; ++i) {
		WORK13(i + j * 65 - 66) = 0.;
/* L10: */
	    }
/* L20: */
	}

/*        Zero the subdiagonal elements of the work array WORK31 */

	i__1 = nb;
	for (j = 1; j <= nb; ++j) {
	    i__2 = nb;
	    for (i = j + 1; i <= nb; ++i) {
		work31[i + j * 65 - 66] = 0.;
/* L30: */
	    }
/* L40: */
	}

/*        Gaussian elimination with partial pivoting   

          Set fill-in elements in columns KU+2 to KV to zero */

	i__1 = min(kv,*n);
	for (j = *ku + 2; j <= min(kv,*n); ++j) {
	    i__2 = *kl;
	    for (i = kv - j + 2; i <= *kl; ++i) {
		AB(i,j) = 0.;
/* L50: */
	    }
/* L60: */
	}

/*        JU is the index of the last column affected by the current 
  
          stage of the factorization */

	ju = 1;

	i__1 = min(*m,*n);
	i__2 = nb;
	for (j = 1; nb < 0 ? j >= min(*m,*n) : j <= min(*m,*n); j += nb) {
/* Computing MIN */
	    i__3 = nb, i__4 = min(*m,*n) - j + 1;
	    jb = min(i__3,i__4);

/*           The active part of the matrix is partitioned   

                A11   A12   A13   
                A21   A22   A23   
                A31   A32   A33   

             Here A11, A21 and A31 denote the current block of JB 
columns   
             which is about to be factorized. The number of rows i
n the   
             partitioning are JB, I2, I3 respectively, and the num
bers   
             of columns are JB, J2, J3. The superdiagonal elements
 of A13   
             and the subdiagonal elements of A31 lie outside the b
and.   

   Computing MIN */
	    i__3 = *kl - jb, i__4 = *m - j - jb + 1;
	    i2 = min(i__3,i__4);
/* Computing MIN */
	    i__3 = jb, i__4 = *m - j - *kl + 1;
	    i3 = min(i__3,i__4);

/*           J2 and J3 are computed after JU has been updated.   

             Factorize the current block of JB columns */

	    i__3 = j + jb - 1;
	    for (jj = j; jj <= j+jb-1; ++jj) {

/*              Set fill-in elements in column JJ+KV to zero 
*/

		if (jj + kv <= *n) {
		    i__4 = *kl;
		    for (i = 1; i <= *kl; ++i) {
			AB(i,jj+kv) = 0.;
/* L70: */
		    }
		}

/*              Find pivot and test for singularity. KM is the
 number of   
                subdiagonal elements in the current column.   

   Computing MIN */
		i__4 = *kl, i__5 = *m - jj;
		km = min(i__4,i__5);
		i__4 = km + 1;
		jp = idamax_(&i__4, &AB(kv+1,jj), &c__1);
		IPIV(jj) = jp + jj - j;
		if (AB(kv+jp,jj) != 0.) {
/* Computing MAX   
   Computing MIN */
		    i__6 = jj + *ku + jp - 1;
		    i__4 = ju, i__5 = min(i__6,*n);
		    ju = max(i__4,i__5);
		    if (jp != 1) {

/*                    Apply interchange to columns J t
o J+JB-1 */

			if (jp + jj - 1 < j + *kl) {

			    i__4 = *ldab - 1;
			    i__5 = *ldab - 1;
			    dswap_(&jb, &AB(kv+1+jj-j,j), &
				    i__4, &AB(kv+jp+jj-j,j),
				     &i__5);
			} else {

/*                       The interchange affects c
olumns J to JJ-1 of A31   
                         which are stored in the w
ork array WORK31 */

			    i__4 = jj - j;
			    i__5 = *ldab - 1;
			    dswap_(&i__4, &AB(kv+1+jj-j,j), 
				    &i__5, &work31[jp + jj - j - *kl - 1], &
				    c__65);
			    i__4 = j + jb - jj;
			    i__5 = *ldab - 1;
			    i__6 = *ldab - 1;
			    dswap_(&i__4, &AB(kv+1,jj), &i__5, &
				    AB(kv+jp,jj), &i__6);
			}
		    }

/*                 Compute multipliers */

		    d__1 = 1. / AB(kv+1,jj);
		    dscal_(&km, &d__1, &AB(kv+2,jj), &c__1);

/*                 Update trailing submatrix within the ba
nd and within   
                   the current block. JM is the index of t
he last column   
                   which needs to be updated.   

   Computing MIN */
		    i__4 = ju, i__5 = j + jb - 1;
		    jm = min(i__4,i__5);
		    if (jm > jj) {
			i__4 = jm - jj;
			i__5 = *ldab - 1;
			i__6 = *ldab - 1;
			dger_(&km, &i__4, &c_b18, &AB(kv+2,jj), 
				&c__1, &AB(kv,jj+1), &i__5, &
				AB(kv+1,jj+1), &i__6);
		    }
		} else {

/*                 If pivot is zero, set INFO to the index
 of the pivot   
                   unless a zero pivot has already been fo
und. */

		    if (*info == 0) {
			*info = jj;
		    }
		}

/*              Copy current column of A31 into the work array
 WORK31   

   Computing MIN */
		i__4 = jj - j + 1;
		nw = min(i__4,i3);
		if (nw > 0) {
		    dcopy_(&nw, &AB(kv+*kl+1-jj+j,jj), &
			    c__1, &work31[(jj - j + 1) * 65 - 65], &c__1);
		}
/* L80: */
	    }
	    if (j + jb <= *n) {

/*              Apply the row interchanges to the other blocks
.   

   Computing MIN */
		i__3 = ju - j + 1;
		j2 = min(i__3,kv) - jb;
/* Computing MAX */
		i__3 = 0, i__4 = ju - j - kv + 1;
		j3 = max(i__3,i__4);

/*              Use DLASWP to apply the row interchanges to A1
2, A22, and   
                A32. */

		i__3 = *ldab - 1;
		dlaswp_(&j2, &AB(kv+1-jb,j+jb), &i__3, &
			c__1, &jb, &IPIV(j), &c__1);

/*              Adjust the pivot indices. */

		i__3 = j + jb - 1;
		for (i = j; i <= j+jb-1; ++i) {
		    IPIV(i) = IPIV(i) + j - 1;
/* L90: */
		}

/*              Apply the row interchanges to A13, A23, and A3
3   
                columnwise. */

		k2 = j - 1 + jb + j2;
		i__3 = j3;
		for (i = 1; i <= j3; ++i) {
		    jj = k2 + i;
		    i__4 = j + jb - 1;
		    for (ii = j + i - 1; ii <= j+jb-1; ++ii) {
			ip = IPIV(ii);
			if (ip != ii) {
			    temp = AB(kv+1+ii-jj,jj);
			    AB(kv+1+ii-jj,jj) = AB(kv+1+ip-jj,jj);
			    AB(kv+1+ip-jj,jj) = temp;
			}
/* L100: */
		    }
/* L110: */
		}

/*              Update the relevant part of the trailing subma
trix */

		if (j2 > 0) {

/*                 Update A12 */

		    i__3 = *ldab - 1;
		    i__4 = *ldab - 1;
		    dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j2, 
			    &c_b31, &AB(kv+1,j), &i__3, &AB(kv+1-jb,j+jb), &i__4);

		    if (i2 > 0) {

/*                    Update A22 */

			i__3 = *ldab - 1;
			i__4 = *ldab - 1;
			i__5 = *ldab - 1;
			dgemm_("No transpose", "No transpose", &i2, &j2, &jb, 
				&c_b18, &AB(kv+1+jb,j), &i__3,
				 &AB(kv+1-jb,j+jb), &i__4,
				 &c_b31, &AB(kv+1,j+jb), &
				i__5);
		    }

		    if (i3 > 0) {

/*                    Update A32 */

			i__3 = *ldab - 1;
			i__4 = *ldab - 1;
			dgemm_("No transpose", "No transpose", &i3, &j2, &jb, 
				&c_b18, work31, &c__65, &AB(kv+1-jb,j+jb), &i__3, &c_b31, &AB(kv+*kl+1-jb,j+jb), &i__4);
		    }
		}

		if (j3 > 0) {

/*                 Copy the lower triangle of A13 into the
 work array   
                   WORK13 */

		    i__3 = j3;
		    for (jj = 1; jj <= j3; ++jj) {
			i__4 = jb;
			for (ii = jj; ii <= jb; ++ii) {
			    WORK13(ii + jj * 65 - 66) = AB(ii-jj+1,jj+j+kv-1);
/* L120: */
			}
/* L130: */
		    }

/*                 Update A13 in the work array */

		    i__3 = *ldab - 1;
		    dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &j3, 
			    &c_b31, &AB(kv+1,j), &i__3, work13, 
			    &c__65);

		    if (i2 > 0) {

/*                    Update A23 */

			i__3 = *ldab - 1;
			i__4 = *ldab - 1;
			dgemm_("No transpose", "No transpose", &i2, &j3, &jb, 
				&c_b18, &AB(kv+1+jb,j), &i__3,
				 work13, &c__65, &c_b31, &AB(jb+1,j+kv), &i__4);
		    }

		    if (i3 > 0) {

/*                    Update A33 */

			i__3 = *ldab - 1;
			dgemm_("No transpose", "No transpose", &i3, &j3, &jb, 
				&c_b18, work31, &c__65, work13, &c__65, &
				c_b31, &AB(*kl+1,j+kv), &
				i__3);
		    }

/*                 Copy the lower triangle of A13 back int
o place */

		    i__3 = j3;
		    for (jj = 1; jj <= j3; ++jj) {
			i__4 = jb;
			for (ii = jj; ii <= jb; ++ii) {
			    AB(ii-jj+1,jj+j+kv-1) = 
				    WORK13(ii + jj * 65 - 66);
/* L140: */
			}
/* L150: */
		    }
		}
	    } else {

/*              Adjust the pivot indices. */

		i__3 = j + jb - 1;
		for (i = j; i <= j+jb-1; ++i) {
		    IPIV(i) = IPIV(i) + j - 1;
/* L160: */
		}
	    }

/*           Partially undo the interchanges in the current block 
to   
             restore the upper triangular form of A31 and copy the
 upper   
             triangle of A31 back into place */

	    i__3 = j;
	    for (jj = j + jb - 1; jj >= j; --jj) {
		jp = IPIV(jj) - jj + 1;
		if (jp != 1) {

/*                 Apply interchange to columns J to JJ-1 
*/

		    if (jp + jj - 1 < j + *kl) {

/*                    The interchange does not affect 
A31 */

			i__4 = jj - j;
			i__5 = *ldab - 1;
			i__6 = *ldab - 1;
			dswap_(&i__4, &AB(kv+1+jj-j,j), &
				i__5, &AB(kv+jp+jj-j,j), &
				i__6);
		    } else {

/*                    The interchange does affect A31 
*/

			i__4 = jj - j;
			i__5 = *ldab - 1;
			dswap_(&i__4, &AB(kv+1+jj-j,j), &
				i__5, &work31[jp + jj - j - *kl - 1], &c__65);
		    }
		}

/*              Copy the current column of A31 back into place
   

   Computing MIN */
		i__4 = i3, i__5 = jj - j + 1;
		nw = min(i__4,i__5);
		if (nw > 0) {
		    dcopy_(&nw, &work31[(jj - j + 1) * 65 - 65], &c__1, &AB(kv+*kl+1-jj+j,jj), &c__1);
		}
/* L170: */
	    }
/* L180: */
	}
    }

    return 0;

/*     End of DGBTRF */

} /* dgbtrf_ */
Example #20
0
/* Subroutine */ int dsysv_(char *uplo, integer *n, integer *nrhs, doublereal 
	*a, integer *lda, integer *ipiv, doublereal *b, integer *ldb, 
	doublereal *work, integer *lwork, integer *info)
{
/*  -- LAPACK driver routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    DSYSV computes the solution to a real system of linear equations   
       A * X = B,   
    where A is an N-by-N symmetric matrix and X and B are N-by-NRHS   
    matrices.   

    The diagonal pivoting method is used to factor A as   
       A = U * D * U**T,  if UPLO = 'U', or   
       A = L * D * L**T,  if UPLO = 'L',   
    where U (or L) is a product of permutation and unit upper (lower)   
    triangular matrices, and D is symmetric and block diagonal with   
    1-by-1 and 2-by-2 diagonal blocks.  The factored form of A is then   
    used to solve the system of equations A * X = B.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            = 'U':  Upper triangle of A is stored;   
            = 'L':  Lower triangle of A is stored.   

    N       (input) INTEGER   
            The number of linear equations, i.e., the order of the   
            matrix A.  N >= 0.   

    NRHS    (input) INTEGER   
            The number of right hand sides, i.e., the number of columns   
            of the matrix B.  NRHS >= 0.   

    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)   
            On entry, the symmetric matrix A.  If UPLO = 'U', the leading 
  
            N-by-N upper triangular part of A contains the upper   
            triangular part of the matrix A, and the strictly lower   
            triangular part of A is not referenced.  If UPLO = 'L', the   
            leading N-by-N lower triangular part of A contains the lower 
  
            triangular part of the matrix A, and the strictly upper   
            triangular part of A is not referenced.   

            On exit, if INFO = 0, the block diagonal matrix D and the   
            multipliers used to obtain the factor U or L from the   
            factorization A = U*D*U**T or A = L*D*L**T as computed by   
            DSYTRF.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    IPIV    (output) INTEGER array, dimension (N)   
            Details of the interchanges and the block structure of D, as 
  
            determined by DSYTRF.  If IPIV(k) > 0, then rows and columns 
  
            k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1   
            diagonal block.  If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0,   
            then rows and columns k-1 and -IPIV(k) were interchanged and 
  
            D(k-1:k,k-1:k) is a 2-by-2 diagonal block.  If UPLO = 'L' and 
  
            IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and   
            -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2   
            diagonal block.   

    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)   
            On entry, the N-by-NRHS right hand side matrix B.   
            On exit, if INFO = 0, the N-by-NRHS solution matrix X.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK) 
  
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The length of WORK.  LWORK >= 1, and for best performance   
            LWORK >= N*NB, where NB is the optimal blocksize for   
            DSYTRF.   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -i, the i-th argument had an illegal value   
            > 0: if INFO = i, D(i,i) is exactly zero.  The factorization 
  
                 has been completed, but the block diagonal matrix D is   
                 exactly singular, so the solution could not be computed. 
  

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
    /* Local variables */
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int xerbla_(char *, integer *), dsytrf_(
	    char *, integer *, doublereal *, integer *, integer *, doublereal 
	    *, integer *, integer *), dsytrs_(char *, integer *, 
	    integer *, doublereal *, integer *, integer *, doublereal *, 
	    integer *, integer *);


#define IPIV(I) ipiv[(I)-1]
#define WORK(I) work[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
#define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)]

    *info = 0;
    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*nrhs < 0) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    } else if (*ldb < max(1,*n)) {
	*info = -8;
    } else if (*lwork < 1) {
	*info = -10;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DSYSV ", &i__1);
	return 0;
    }

/*     Compute the factorization A = U*D*U' or A = L*D*L'. */

    dsytrf_(uplo, n, &A(1,1), lda, &IPIV(1), &WORK(1), lwork, info);
    if (*info == 0) {

/*        Solve the system A*X = B, overwriting B with X. */

	dsytrs_(uplo, n, nrhs, &A(1,1), lda, &IPIV(1), &B(1,1), ldb,
		 info);

    }
    return 0;

/*     End of DSYSV */

} /* dsysv_ */
Example #21
0
/* Subroutine */ int dgesv_(integer *n, integer *nrhs, doublereal *a, integer 
	*lda, integer *ipiv, doublereal *b, integer *ldb, integer *info)
{
/*  -- LAPACK driver routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       March 31, 1993   


    Purpose   
    =======   

    DGESV computes the solution to a real system of linear equations   
       A * X = B,   
    where A is an N-by-N matrix and X and B are N-by-NRHS matrices.   

    The LU decomposition with partial pivoting and row interchanges is   
    used to factor A as   
       A = P * L * U,   
    where P is a permutation matrix, L is unit lower triangular, and U is 
  
    upper triangular.  The factored form of A is then used to solve the   
    system of equations A * X = B.   

    Arguments   
    =========   

    N       (input) INTEGER   
            The number of linear equations, i.e., the order of the   
            matrix A.  N >= 0.   

    NRHS    (input) INTEGER   
            The number of right hand sides, i.e., the number of columns   
            of the matrix B.  NRHS >= 0.   

    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)   
            On entry, the N-by-N coefficient matrix A.   
            On exit, the factors L and U from the factorization   
            A = P*L*U; the unit diagonal elements of L are not stored.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    IPIV    (output) INTEGER array, dimension (N)   
            The pivot indices that define the permutation matrix P;   
            row i of the matrix was interchanged with row IPIV(i).   

    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)   
            On entry, the N-by-NRHS matrix of right hand side matrix B.   
            On exit, if INFO = 0, the N-by-NRHS solution matrix X.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   
            > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization 
  
                  has been completed, but the factor U is exactly   
                  singular, so the solution could not be computed.   

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
    /* Local variables */
    extern /* Subroutine */ int dgetrf_(integer *, integer *, doublereal *, 
	    integer *, integer *, integer *), xerbla_(char *, integer *), dgetrs_(char *, integer *, integer *, doublereal *, 
	    integer *, integer *, doublereal *, integer *, integer *);


#define IPIV(I) ipiv[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
#define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)]

    *info = 0;
    if (*n < 0) {
	*info = -1;
    } else if (*nrhs < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    } else if (*ldb < max(1,*n)) {
	*info = -7;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DGESV ", &i__1);
	return 0;
    }

/*     Compute the LU factorization of A. */

    dgetrf_(n, n, &A(1,1), lda, &IPIV(1), info);
    if (*info == 0) {

/*        Solve the system A*X = B, overwriting B with X. */

	dgetrs_("No transpose", n, nrhs, &A(1,1), lda, &IPIV(1), &B(1,1), ldb, info);
    }
    return 0;

/*     End of DGESV */

} /* dgesv_ */
Example #22
0
/***************************************************************************//**
 *  Parallel tile LU factorization - static scheduling
 **/
void plasma_pzgetrf_incpiv(plasma_context_t *plasma)
{
    PLASMA_desc A;
    PLASMA_desc L;
    int *IPIV;
    PLASMA_sequence *sequence;
    PLASMA_request *request;

    int k, m, n;
    int next_k;
    int next_m;
    int next_n;
    int ldak, ldam;
    int info;
    int tempkn, tempkm, tempmm, tempnn;
    int ib = PLASMA_IB;
    PLASMA_Complex64_t *work;

    plasma_unpack_args_5(A, L, IPIV, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;
    work = (PLASMA_Complex64_t*)plasma_private_alloc(plasma, ib*L.nb, L.dtyp);
    ss_init(A.mt, A.nt, -1);

    k = 0;
    n = PLASMA_RANK;
    while (n >= A.nt) {
        k++;
        n = n-A.nt+k;
    }
    m = k;

    while (k < min(A.mt, A.nt) && n < A.nt && !ss_aborted()) {
        next_n = n;
        next_m = m;
        next_k = k;

        next_m++;
        if (next_m == A.mt) {
            next_n += PLASMA_SIZE;
            while (next_n >= A.nt && next_k < min(A.mt, A.nt)) {
                next_k++;
                next_n = next_n-A.nt+next_k;
            }
            next_m = next_k;
        }

        tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
        tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
        tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
        tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;

        ldak = BLKLDD(A, k);
        ldam = BLKLDD(A, m);

        if (n == k) {
            if (m == k) {
                ss_cond_wait(k, k, k-1);
                CORE_zgetrf_incpiv(
                    tempkm, tempkn, ib,
                    A(k, k), ldak,
                    IPIV(k, k), &info);
                if (info != 0 && m == A.mt-1) {
                    plasma_request_fail(sequence, request, info + A.nb*k);
                    ss_abort();
                }
                ss_cond_set(k, k, k);
            }
            else {
                ss_cond_wait(m, k, k-1);
                CORE_ztstrf(
                    tempmm, tempkn, ib, A.nb,
                    A(k, k), ldak,
                    A(m, k), ldam,
                    L(m, k), L.mb,
                    IPIV(m, k),
                    work, L.nb, &info);
                if (info != 0 && m == A.mt-1) {
                    plasma_request_fail(sequence, request, info + A.nb*k);
                    ss_abort();
                }
                ss_cond_set(m, k, k);
            }
        }
        else {
            if (m == k) {
                ss_cond_wait(k, k, k);
                ss_cond_wait(k, n, k-1);
                CORE_zgessm(
                    tempkm, tempnn, tempkm, ib,
                    IPIV(k, k),
                    A(k, k), ldak,
                    A(k, n), ldak);
            }
            else {
                ss_cond_wait(m, k, k);
                ss_cond_wait(m, n, k-1);
                CORE_zssssm(
                    A.nb, tempnn, tempmm, tempnn, A.nb, ib,
                    A(k, n), ldak,
                    A(m, n), ldam,
                    L(m, k), L.mb,
                    A(m, k), ldam,
                    IPIV(m, k));
                ss_cond_set(m, n, k);
            }
        }
        n = next_n;
        m = next_m;
        k = next_k;
    }
    plasma_private_free(plasma, work);
    ss_finalize();
}
Example #23
0
/* Subroutine */ int zsysvx_(char *fact, char *uplo, integer *n, integer *
	nrhs, doublecomplex *a, integer *lda, doublecomplex *af, integer *
	ldaf, integer *ipiv, doublecomplex *b, integer *ldb, doublecomplex *x,
	 integer *ldx, doublereal *rcond, doublereal *ferr, doublereal *berr, 
	doublecomplex *work, integer *lwork, doublereal *rwork, integer *info)
{
/*  -- LAPACK driver routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    ZSYSVX uses the diagonal pivoting factorization to compute the   
    solution to a complex system of linear equations A * X = B,   
    where A is an N-by-N symmetric matrix and X and B are N-by-NRHS   
    matrices.   

    Error bounds on the solution and a condition estimate are also   
    provided.   

    Description   
    ===========   

    The following steps are performed:   

    1. If FACT = 'N', the diagonal pivoting method is used to factor A.   
       The form of the factorization is   
          A = U * D * U**T,  if UPLO = 'U', or   
          A = L * D * L**T,  if UPLO = 'L',   
       where U (or L) is a product of permutation and unit upper (lower) 
  
       triangular matrices, and D is symmetric and block diagonal with   
       1-by-1 and 2-by-2 diagonal blocks.   

    2. The factored form of A is used to estimate the condition number   
       of the matrix A.  If the reciprocal of the condition number is   
       less than machine precision, steps 3 and 4 are skipped.   

    3. The system of equations is solved for X using the factored form   
       of A.   

    4. Iterative refinement is applied to improve the computed solution   
       matrix and calculate error bounds and backward error estimates   
       for it.   

    Arguments   
    =========   

    FACT    (input) CHARACTER*1   
            Specifies whether or not the factored form of A has been   
            supplied on entry.   
            = 'F':  On entry, AF and IPIV contain the factored form   
                    of A.  A, AF and IPIV will not be modified.   
            = 'N':  The matrix A will be copied to AF and factored.   

    UPLO    (input) CHARACTER*1   
            = 'U':  Upper triangle of A is stored;   
            = 'L':  Lower triangle of A is stored.   

    N       (input) INTEGER   
            The number of linear equations, i.e., the order of the   
            matrix A.  N >= 0.   

    NRHS    (input) INTEGER   
            The number of right hand sides, i.e., the number of columns   
            of the matrices B and X.  NRHS >= 0.   

    A       (input) COMPLEX*16 array, dimension (LDA,N)   
            The symmetric matrix A.  If UPLO = 'U', the leading N-by-N   
            upper triangular part of A contains the upper triangular part 
  
            of the matrix A, and the strictly lower triangular part of A 
  
            is not referenced.  If UPLO = 'L', the leading N-by-N lower   
            triangular part of A contains the lower triangular part of   
            the matrix A, and the strictly upper triangular part of A is 
  
            not referenced.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    AF      (input or output) COMPLEX*16 array, dimension (LDAF,N)   
            If FACT = 'F', then AF is an input argument and on entry   
            contains the block diagonal matrix D and the multipliers used 
  
            to obtain the factor U or L from the factorization   
            A = U*D*U**T or A = L*D*L**T as computed by ZSYTRF.   

            If FACT = 'N', then AF is an output argument and on exit   
            returns the block diagonal matrix D and the multipliers used 
  
            to obtain the factor U or L from the factorization   
            A = U*D*U**T or A = L*D*L**T.   

    LDAF    (input) INTEGER   
            The leading dimension of the array AF.  LDAF >= max(1,N).   

    IPIV    (input or output) INTEGER array, dimension (N)   
            If FACT = 'F', then IPIV is an input argument and on entry   
            contains details of the interchanges and the block structure 
  
            of D, as determined by ZSYTRF.   
            If IPIV(k) > 0, then rows and columns k and IPIV(k) were   
            interchanged and D(k,k) is a 1-by-1 diagonal block.   
            If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and   
            columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k) 
  
            is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =   
            IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were   
            interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.   

            If FACT = 'N', then IPIV is an output argument and on exit   
            contains details of the interchanges and the block structure 
  
            of D, as determined by ZSYTRF.   

    B       (input) COMPLEX*16 array, dimension (LDB,NRHS)   
            The N-by-NRHS right hand side matrix B.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    X       (output) COMPLEX*16 array, dimension (LDX,NRHS)   
            If INFO = 0, the N-by-NRHS solution matrix X.   

    LDX     (input) INTEGER   
            The leading dimension of the array X.  LDX >= max(1,N).   

    RCOND   (output) DOUBLE PRECISION   
            The estimate of the reciprocal condition number of the matrix 
  
            A.  If RCOND is less than the machine precision (in   
            particular, if RCOND = 0), the matrix is singular to working 
  
            precision.  This condition is indicated by a return code of   
            INFO > 0, and the solution and error bounds are not computed. 
  

    FERR    (output) DOUBLE PRECISION array, dimension (NRHS)   
            The estimated forward error bound for each solution vector   
            X(j) (the j-th column of the solution matrix X).   
            If XTRUE is the true solution corresponding to X(j), FERR(j) 
  
            is an estimated upper bound for the magnitude of the largest 
  
            element in (X(j) - XTRUE) divided by the magnitude of the   
            largest element in X(j).  The estimate is as reliable as   
            the estimate for RCOND, and is almost always a slight   
            overestimate of the true error.   

    BERR    (output) DOUBLE PRECISION array, dimension (NRHS)   
            The componentwise relative backward error of each solution   
            vector X(j) (i.e., the smallest relative change in   
            any element of A or B that makes X(j) an exact solution).   

    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The length of WORK.  LWORK >= 2*N, and for best performance   
            LWORK >= N*NB, where NB is the optimal blocksize for   
            ZSYTRF.   

    RWORK   (workspace) DOUBLE PRECISION array, dimension (N)   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -i, the i-th argument had an illegal value   
            > 0: if INFO = i, and i is   
                 <= N: D(i,i) is exactly zero.  The factorization   
                       has been completed, but the block diagonal   
                       matrix D is exactly singular, so the solution and 
  
                       error bounds could not be computed.   
                 = N+1: the block diagonal matrix D is nonsingular, but   
                       RCOND is less than machine precision.  The   
                       factorization has been completed, but the matrix   
                       is singular to working precision, so the solution 
  
                       and error bounds have not been computed.   

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, 
	    x_offset, i__1;
    /* Local variables */
    extern logical lsame_(char *, char *);
    static doublereal anorm;
    extern doublereal dlamch_(char *);
    static logical nofact;
    extern /* Subroutine */ int xerbla_(char *, integer *), zlacpy_(
	    char *, integer *, integer *, doublecomplex *, integer *, 
	    doublecomplex *, integer *);
    extern doublereal zlansy_(char *, char *, integer *, doublecomplex *, 
	    integer *, doublereal *);
    extern /* Subroutine */ int zsycon_(char *, integer *, doublecomplex *, 
	    integer *, integer *, doublereal *, doublereal *, doublecomplex *,
	     integer *), zsyrfs_(char *, integer *, integer *, 
	    doublecomplex *, integer *, doublecomplex *, integer *, integer *,
	     doublecomplex *, integer *, doublecomplex *, integer *, 
	    doublereal *, doublereal *, doublecomplex *, doublereal *, 
	    integer *), zsytrf_(char *, integer *, doublecomplex *, 
	    integer *, integer *, doublecomplex *, integer *, integer *), zsytrs_(char *, integer *, integer *, doublecomplex *, 
	    integer *, integer *, doublecomplex *, integer *, integer *);


#define IPIV(I) ipiv[(I)-1]
#define FERR(I) ferr[(I)-1]
#define BERR(I) berr[(I)-1]
#define WORK(I) work[(I)-1]
#define RWORK(I) rwork[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
#define AF(I,J) af[(I)-1 + ((J)-1)* ( *ldaf)]
#define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)]
#define X(I,J) x[(I)-1 + ((J)-1)* ( *ldx)]

    *info = 0;
    nofact = lsame_(fact, "N");
    if (! nofact && ! lsame_(fact, "F")) {
	*info = -1;
    } else if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*nrhs < 0) {
	*info = -4;
    } else if (*lda < max(1,*n)) {
	*info = -6;
    } else if (*ldaf < max(1,*n)) {
	*info = -8;
    } else if (*ldb < max(1,*n)) {
	*info = -11;
    } else if (*ldx < max(1,*n)) {
	*info = -13;
    } else if (*lwork < *n << 1) {
	*info = -18;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("ZSYSVX", &i__1);
	return 0;
    }

    if (nofact) {

/*        Compute the factorization A = U*D*U' or A = L*D*L'. */

	zlacpy_(uplo, n, n, &A(1,1), lda, &AF(1,1), ldaf);
	zsytrf_(uplo, n, &AF(1,1), ldaf, &IPIV(1), &WORK(1), lwork, 
		info);

/*        Return if INFO is non-zero. */

	if (*info != 0) {
	    if (*info > 0) {
		*rcond = 0.;
	    }
	    return 0;
	}
    }

/*     Compute the norm of the matrix A. */

    anorm = zlansy_("I", uplo, n, &A(1,1), lda, &RWORK(1));

/*     Compute the reciprocal of the condition number of A. */

    zsycon_(uplo, n, &AF(1,1), ldaf, &IPIV(1), &anorm, rcond, &WORK(1), 
	    info);

/*     Return if the matrix is singular to working precision. */

    if (*rcond < dlamch_("Epsilon")) {
	*info = *n + 1;
	return 0;
    }

/*     Compute the solution vectors X. */

    zlacpy_("Full", n, nrhs, &B(1,1), ldb, &X(1,1), ldx);
    zsytrs_(uplo, n, nrhs, &AF(1,1), ldaf, &IPIV(1), &X(1,1), ldx, 
	    info);

/*     Use iterative refinement to improve the computed solutions and   
       compute error bounds and backward error estimates for them. */

    zsyrfs_(uplo, n, nrhs, &A(1,1), lda, &AF(1,1), ldaf, &IPIV(1), 
	    &B(1,1), ldb, &X(1,1), ldx, &FERR(1), &BERR(1), &WORK(1)
	    , &RWORK(1), info);

    return 0;

/*     End of ZSYSVX */

} /* zsysvx_ */
Example #24
0
/* Subroutine */ int sgtsvx_(char *fact, char *trans, integer *n, integer *
	nrhs, real *dl, real *d, real *du, real *dlf, real *df, real *duf, 
	real *du2, integer *ipiv, real *b, integer *ldb, real *x, integer *
	ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, 
	integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    SGTSVX uses the LU factorization to compute the solution to a real   
    system of linear equations A * X = B or A**T * X = B,   
    where A is a tridiagonal matrix of order N and X and B are N-by-NRHS 
  
    matrices.   

    Error bounds on the solution and a condition estimate are also   
    provided.   

    Description   
    ===========   

    The following steps are performed:   

    1. If FACT = 'N', the LU decomposition is used to factor the matrix A 
  
       as A = L * U, where L is a product of permutation and unit lower   
       bidiagonal matrices and U is upper triangular with nonzeros in   
       only the main diagonal and first two superdiagonals.   

    2. The factored form of A is used to estimate the condition number   
       of the matrix A.  If the reciprocal of the condition number is   
       less than machine precision, steps 3 and 4 are skipped.   

    3. The system of equations is solved for X using the factored form   
       of A.   

    4. Iterative refinement is applied to improve the computed solution   
       matrix and calculate error bounds and backward error estimates   
       for it.   

    Arguments   
    =========   

    FACT    (input) CHARACTER*1   
            Specifies whether or not the factored form of A has been   
            supplied on entry.   
            = 'F':  DLF, DF, DUF, DU2, and IPIV contain the factored   
                    form of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV   
                    will not be modified.   
            = 'N':  The matrix will be copied to DLF, DF, and DUF   
                    and factored.   

    TRANS   (input) CHARACTER*1   
            Specifies the form of the system of equations:   
            = 'N':  A * X = B     (No transpose)   
            = 'T':  A**T * X = B  (Transpose)   
            = 'C':  A**H * X = B  (Conjugate transpose = Transpose)   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    NRHS    (input) INTEGER   
            The number of right hand sides, i.e., the number of columns   
            of the matrix B.  NRHS >= 0.   

    DL      (input) REAL array, dimension (N-1)   
            The (n-1) subdiagonal elements of A.   

    D       (input) REAL array, dimension (N)   
            The n diagonal elements of A.   

    DU      (input) REAL array, dimension (N-1)   
            The (n-1) superdiagonal elements of A.   

    DLF     (input or output) REAL array, dimension (N-1)   
            If FACT = 'F', then DLF is an input argument and on entry   
            contains the (n-1) multipliers that define the matrix L from 
  
            the LU factorization of A as computed by SGTTRF.   

            If FACT = 'N', then DLF is an output argument and on exit   
            contains the (n-1) multipliers that define the matrix L from 
  
            the LU factorization of A.   

    DF      (input or output) REAL array, dimension (N)   
            If FACT = 'F', then DF is an input argument and on entry   
            contains the n diagonal elements of the upper triangular   
            matrix U from the LU factorization of A.   

            If FACT = 'N', then DF is an output argument and on exit   
            contains the n diagonal elements of the upper triangular   
            matrix U from the LU factorization of A.   

    DUF     (input or output) REAL array, dimension (N-1)   
            If FACT = 'F', then DUF is an input argument and on entry   
            contains the (n-1) elements of the first superdiagonal of U. 
  

            If FACT = 'N', then DUF is an output argument and on exit   
            contains the (n-1) elements of the first superdiagonal of U. 
  

    DU2     (input or output) REAL array, dimension (N-2)   
            If FACT = 'F', then DU2 is an input argument and on entry   
            contains the (n-2) elements of the second superdiagonal of   
            U.   

            If FACT = 'N', then DU2 is an output argument and on exit   
            contains the (n-2) elements of the second superdiagonal of   
            U.   

    IPIV    (input or output) INTEGER array, dimension (N)   
            If FACT = 'F', then IPIV is an input argument and on entry   
            contains the pivot indices from the LU factorization of A as 
  
            computed by SGTTRF.   

            If FACT = 'N', then IPIV is an output argument and on exit   
            contains the pivot indices from the LU factorization of A;   
            row i of the matrix was interchanged with row IPIV(i).   
            IPIV(i) will always be either i or i+1; IPIV(i) = i indicates 
  
            a row interchange was not required.   

    B       (input) REAL array, dimension (LDB,NRHS)   
            The N-by-NRHS right hand side matrix B.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    X       (output) REAL array, dimension (LDX,NRHS)   
            If INFO = 0, the N-by-NRHS solution matrix X.   

    LDX     (input) INTEGER   
            The leading dimension of the array X.  LDX >= max(1,N).   

    RCOND   (output) REAL   
            The estimate of the reciprocal condition number of the matrix 
  
            A.  If RCOND is less than the machine precision (in   
            particular, if RCOND = 0), the matrix is singular to working 
  
            precision.  This condition is indicated by a return code of   
            INFO > 0, and the solution and error bounds are not computed. 
  

    FERR    (output) REAL array, dimension (NRHS)   
            The estimated forward error bound for each solution vector   
            X(j) (the j-th column of the solution matrix X).   
            If XTRUE is the true solution corresponding to X(j), FERR(j) 
  
            is an estimated upper bound for the magnitude of the largest 
  
            element in (X(j) - XTRUE) divided by the magnitude of the   
            largest element in X(j).  The estimate is as reliable as   
            the estimate for RCOND, and is almost always a slight   
            overestimate of the true error.   

    BERR    (output) REAL array, dimension (NRHS)   
            The componentwise relative backward error of each solution   
            vector X(j) (i.e., the smallest relative change in   
            any element of A or B that makes X(j) an exact solution).   

    WORK    (workspace) REAL array, dimension (3*N)   

    IWORK   (workspace) INTEGER array, dimension (N)   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   
            > 0:  if INFO = i, and i is   
                  <= N:  U(i,i) is exactly zero.  The factorization   
                         has not been completed unless i = N, but the   
                         factor U is exactly singular, so the solution   
                         and error bounds could not be computed.   
                 = N+1:  RCOND is less than machine precision.  The   
                         factorization has been completed, but the   
                         matrix is singular to working precision, and   
                         the solution and error bounds have not been   
                         computed.   

    ===================================================================== 
  


    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static integer c__1 = 1;
    
    /* System generated locals */
    integer b_dim1, b_offset, x_dim1, x_offset, i__1;
    /* Local variables */
    static char norm[1];
    extern logical lsame_(char *, char *);
    static real anorm;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *);
    extern doublereal slamch_(char *);
    static logical nofact;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern doublereal slangt_(char *, integer *, real *, real *, real *);
    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
	    integer *, real *, integer *), sgtcon_(char *, integer *, 
	    real *, real *, real *, real *, integer *, real *, real *, real *,
	     integer *, integer *);
    static logical notran;
    extern /* Subroutine */ int sgtrfs_(char *, integer *, integer *, real *, 
	    real *, real *, real *, real *, real *, real *, integer *, real *,
	     integer *, real *, integer *, real *, real *, real *, integer *, 
	    integer *), sgttrf_(integer *, real *, real *, real *, 
	    real *, integer *, integer *), sgttrs_(char *, integer *, integer 
	    *, real *, real *, real *, real *, integer *, real *, integer *, 
	    integer *);



#define DL(I) dl[(I)-1]
#define D(I) d[(I)-1]
#define DU(I) du[(I)-1]
#define DLF(I) dlf[(I)-1]
#define DF(I) df[(I)-1]
#define DUF(I) duf[(I)-1]
#define DU2(I) du2[(I)-1]
#define IPIV(I) ipiv[(I)-1]
#define FERR(I) ferr[(I)-1]
#define BERR(I) berr[(I)-1]
#define WORK(I) work[(I)-1]
#define IWORK(I) iwork[(I)-1]

#define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)]
#define X(I,J) x[(I)-1 + ((J)-1)* ( *ldx)]

    *info = 0;
    nofact = lsame_(fact, "N");
    notran = lsame_(trans, "N");
    if (! nofact && ! lsame_(fact, "F")) {
	*info = -1;
    } else if (! notran && ! lsame_(trans, "T") && ! lsame_(trans, 
	    "C")) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*nrhs < 0) {
	*info = -4;
    } else if (*ldb < max(1,*n)) {
	*info = -14;
    } else if (*ldx < max(1,*n)) {
	*info = -16;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SGTSVX", &i__1);
	return 0;
    }

    if (nofact) {

/*        Compute the LU factorization of A. */

	scopy_(n, &D(1), &c__1, &DF(1), &c__1);
	if (*n > 1) {
	    i__1 = *n - 1;
	    scopy_(&i__1, &DL(1), &c__1, &DLF(1), &c__1);
	    i__1 = *n - 1;
	    scopy_(&i__1, &DU(1), &c__1, &DUF(1), &c__1);
	}
	sgttrf_(n, &DLF(1), &DF(1), &DUF(1), &DU2(1), &IPIV(1), info);

/*        Return if INFO is non-zero. */

	if (*info != 0) {
	    if (*info > 0) {
		*rcond = 0.f;
	    }
	    return 0;
	}
    }

/*     Compute the norm of the matrix A. */

    if (notran) {
	*(unsigned char *)norm = '1';
    } else {
	*(unsigned char *)norm = 'I';
    }
    anorm = slangt_(norm, n, &DL(1), &D(1), &DU(1));

/*     Compute the reciprocal of the condition number of A. */

    sgtcon_(norm, n, &DLF(1), &DF(1), &DUF(1), &DU2(1), &IPIV(1), &anorm, 
	    rcond, &WORK(1), &IWORK(1), info);

/*     Return if the matrix is singular to working precision. */

    if (*rcond < slamch_("Epsilon")) {
	*info = *n + 1;
	return 0;
    }

/*     Compute the solution vectors X. */

    slacpy_("Full", n, nrhs, &B(1,1), ldb, &X(1,1), ldx);
    sgttrs_(trans, n, nrhs, &DLF(1), &DF(1), &DUF(1), &DU2(1), &IPIV(1), &X(1,1), ldx, info);

/*     Use iterative refinement to improve the computed solutions and   
       compute error bounds and backward error estimates for them. */

    sgtrfs_(trans, n, nrhs, &DL(1), &D(1), &DU(1), &DLF(1), &DF(1), &DUF(1), &
	    DU2(1), &IPIV(1), &B(1,1), ldb, &X(1,1), ldx, &FERR(1), 
	    &BERR(1), &WORK(1), &IWORK(1), info);

    return 0;

/*     End of SGTSVX */

} /* sgtsvx_ */
Example #25
0
/***************************************************************************//**
 *  Parallel forward substitution for tile LU - static scheduling
 **/
void plasma_pztrsmpl(plasma_context_t *plasma)
{
    PLASMA_desc A;
    PLASMA_desc B;
    PLASMA_desc L;
    int *IPIV;
    PLASMA_sequence *sequence;
    PLASMA_request *request;

    int k, m, n;
    int next_k;
    int next_m;
    int next_n;
    int ldak, ldbk, ldam, ldbm;
    int tempkm, tempnn, tempkmin, tempmm, tempkn;
    int ib;

    plasma_unpack_args_6(A, B, L, IPIV, sequence, request);
    if (sequence->status != PLASMA_SUCCESS)
        return;
    ss_init(B.mt, B.nt, -1);

    ib = PLASMA_IB;
    k = 0;
    n = PLASMA_RANK;
    while (n >= B.nt) {
        k++;
        n = n-B.nt;
    }
    m = k;

    while (k < min(A.mt, A.nt) && n < B.nt) {
        next_n = n;
        next_m = m;
        next_k = k;

        next_m++;
        if (next_m == A.mt) {
            next_n += PLASMA_SIZE;
            while (next_n >= B.nt && next_k < min(A.mt, A.nt)) {
                next_k++;
                next_n = next_n-B.nt;
            }
            next_m = next_k;
        }

        tempkm   = k == A.mt-1 ? A.m-k*A.mb : A.mb;
        tempkn   = k == A.nt-1 ? A.n-k*A.nb : A.nb;
        tempkmin = k == min(A.mt, A.nt)-1 ? min(A.m, A.n)-k*A.mb : A.mb;
        tempnn   = n == B.nt-1 ? B.n-n*B.nb : B.nb;
        tempmm   = m == A.mt-1 ? A.m-m*A.mb : A.mb;

        ldak = BLKLDD(A, k);
        ldbk = BLKLDD(B, k);
        ldam = BLKLDD(A, m);
        ldbm = BLKLDD(B, m);

        if (m == k) {
            ss_cond_wait(k, n, k-1);
            CORE_zgessm(
                tempkm, tempnn, tempkmin, ib,
                IPIV(k, k),
                A(k, k), ldak,
                B(k, n), ldbk);
            ss_cond_set(k, n, k);
        }
        else {
            ss_cond_wait(m, n, k-1);
            CORE_zssssm(
                A.nb, tempnn, tempmm, tempnn, tempkn, ib,
                B(k, n), ldbk,
                B(m, n), ldbm,
                L(m, k), L.mb,
                A(m, k), ldam,
                IPIV(m, k));
            ss_cond_set(m, n, k);
        }
        n = next_n;
        m = next_m;
        k = next_k;
    }
    ss_finalize();
}
Example #26
0
/***************************************************************************//**
 *  Parallel tile LU factorization - dynamic scheduling
 **/
void plasma_psgetrf_incpiv_quark(PLASMA_desc A, PLASMA_desc L, int *IPIV,
                          PLASMA_sequence *sequence, PLASMA_request *request)
{
    plasma_context_t *plasma;
    Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;

    int k, m, n;
    int ldak, ldam;
    int tempkm, tempkn, tempmm, tempnn;
    int ib;

    plasma = plasma_context_self();
    if (sequence->status != PLASMA_SUCCESS)
        return;
    QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);

    ib = PLASMA_IB;
    for (k = 0; k < min(A.mt, A.nt); k++) {
        tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
        tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
        ldak = BLKLDD(A, k);
        QUARK_CORE_sgetrf_incpiv(
            plasma->quark, &task_flags,
            tempkm, tempkn, ib, L.nb,
            A(k, k), ldak, IPIV(k, k),
            sequence, request,
            k == A.mt-1, A.nb*k);

        for (n = k+1; n < A.nt; n++) {
            tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
            QUARK_CORE_sgessm(
                plasma->quark, &task_flags,
                tempkm, tempnn, tempkm, ib, L.nb,
                IPIV(k, k),
                A(k, k), ldak,
                A(k, n), ldak);
        }
        for (m = k+1; m < A.mt; m++) {
            tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
            ldam = BLKLDD(A, m);
            QUARK_CORE_ststrf(
                plasma->quark, &task_flags,
                tempmm, tempkn, ib, L.nb,
                A(k, k), ldak,
                A(m, k), ldam,
                L(m, k), L.mb,
                IPIV(m, k),
                sequence, request,
                m == A.mt-1, A.nb*k);

            for (n = k+1; n < A.nt; n++) {
                tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
                QUARK_CORE_sssssm(
                    plasma->quark, &task_flags,
                    A.nb, tempnn, tempmm, tempnn, A.nb, ib, L.nb,
                    A(k, n), ldak,
                    A(m, n), ldam,
                    L(m, k), L.mb,
                    A(m, k), ldam,
                    IPIV(m, k));
            }
        }
    }
}
Example #27
0
/* Subroutine */ int cspsv_(char *uplo, integer *n, integer *nrhs, complex *
	ap, integer *ipiv, complex *b, integer *ldb, integer *info)
{
/*  -- LAPACK driver routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       March 31, 1993   


    Purpose   
    =======   

    CSPSV computes the solution to a complex system of linear equations   
       A * X = B,   
    where A is an N-by-N symmetric matrix stored in packed format and X   
    and B are N-by-NRHS matrices.   

    The diagonal pivoting method is used to factor A as   
       A = U * D * U**T,  if UPLO = 'U', or   
       A = L * D * L**T,  if UPLO = 'L',   
    where U (or L) is a product of permutation and unit upper (lower)   
    triangular matrices, D is symmetric and block diagonal with 1-by-1   
    and 2-by-2 diagonal blocks.  The factored form of A is then used to   
    solve the system of equations A * X = B.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            = 'U':  Upper triangle of A is stored;   
            = 'L':  Lower triangle of A is stored.   

    N       (input) INTEGER   
            The number of linear equations, i.e., the order of the   
            matrix A.  N >= 0.   

    NRHS    (input) INTEGER   
            The number of right hand sides, i.e., the number of columns   
            of the matrix B.  NRHS >= 0.   

    AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)   
            On entry, the upper or lower triangle of the symmetric matrix 
  
            A, packed columnwise in a linear array.  The j-th column of A 
  
            is stored in the array AP as follows:   
            if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;   
            if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.   
            See below for further details.   

            On exit, the block diagonal matrix D and the multipliers used 
  
            to obtain the factor U or L from the factorization   
            A = U*D*U**T or A = L*D*L**T as computed by CSPTRF, stored as 
  
            a packed triangular matrix in the same storage format as A.   

    IPIV    (output) INTEGER array, dimension (N)   
            Details of the interchanges and the block structure of D, as 
  
            determined by CSPTRF.  If IPIV(k) > 0, then rows and columns 
  
            k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1   
            diagonal block.  If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0,   
            then rows and columns k-1 and -IPIV(k) were interchanged and 
  
            D(k-1:k,k-1:k) is a 2-by-2 diagonal block.  If UPLO = 'L' and 
  
            IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and   
            -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2   
            diagonal block.   

    B       (input/output) COMPLEX array, dimension (LDB,NRHS)   
            On entry, the N-by-NRHS right hand side matrix B.   
            On exit, if INFO = 0, the N-by-NRHS solution matrix X.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   
            > 0:  if INFO = i, D(i,i) is exactly zero.  The factorization 
  
                  has been completed, but the block diagonal matrix D is 
  
                  exactly singular, so the solution could not be   
                  computed.   

    Further Details   
    ===============   

    The packed storage scheme is illustrated by the following example   
    when N = 4, UPLO = 'U':   

    Two-dimensional storage of the symmetric matrix A:   

       a11 a12 a13 a14   
           a22 a23 a24   
               a33 a34     (aij = aji)   
                   a44   

    Packed storage of the upper triangle of A:   

    AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]   

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* System generated locals */
    integer b_dim1, b_offset, i__1;
    /* Local variables */
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int xerbla_(char *, integer *), csptrf_(
	    char *, integer *, complex *, integer *, integer *), 
	    csptrs_(char *, integer *, integer *, complex *, integer *, 
	    complex *, integer *, integer *);


#define AP(I) ap[(I)-1]
#define IPIV(I) ipiv[(I)-1]

#define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)]

    *info = 0;
    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*nrhs < 0) {
	*info = -3;
    } else if (*ldb < max(1,*n)) {
	*info = -7;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CSPSV ", &i__1);
	return 0;
    }

/*     Compute the factorization A = U*D*U' or A = L*D*L'. */

    csptrf_(uplo, n, &AP(1), &IPIV(1), info);
    if (*info == 0) {

/*        Solve the system A*X = B, overwriting B with X. */

	csptrs_(uplo, n, nrhs, &AP(1), &IPIV(1), &B(1,1), ldb, info);

    }
    return 0;

/*     End of CSPSV */

} /* cspsv_ */