void STARPU_SSYRK (const char *uplo, const char *trans, const int n,
                   const int k, const float alpha, const float *A,
                   const int lda, const float beta, float *C,
                   const int ldc)
{
	ssyrk_(uplo, trans, &n, &k, &alpha, A, &lda, &beta, C, &ldc); 
}
Beispiel #2
0
float *fmat_new_covariance (int d, int n, const float *v, float *avg, int assume_centered)
{
  
  long i, j;

  float *cov = fvec_new_0 (d * d);
  
  if(!assume_centered) {

    float *sums = avg ? avg : fvec_new(d);
    fvec_0(sums,d);
    
    for (i = 0; i < n; i++)
      for (j = 0; j < d; j++)
        sums[j] += v[i * d + j];
    
    
    for (i = 0; i < d; i++)
      for (j = 0; j < d; j++)
        cov[i + j * d] = sums[i] * sums[j];
    
    
    if(avg)
      for(i=0;i<d;i++) avg[i]/=n;
    else
      free (sums);

  } 

  FINTEGER di=d,ni=n;

  if(0)  {
    float alpha = 1.0 / n, beta = -1.0 / (n * n);
    sgemm_ ("N", "T", &di, &di, &ni, &alpha, v, &di, v, &di, &beta, cov, &di);
  } else if(1) {
    /* transpose input matrix */
    float *vt=fvec_new(n*d);
    for(i=0;i<d;i++) 
      for(j=0;j<n;j++) 
        vt[i*n+j]=v[j*d+i];
    float alpha = 1.0 / n, beta = -1.0 / (n * n);
    
    sgemm_ ("T", "N", &di, &di, &ni, &alpha, vt, &ni, vt, &ni, &beta, cov, &di);
    
    free(vt);
  } else {
    float alpha = 1.0 / n, beta = -1.0 / (n * n);
    ssyrk_("L","N", &di, &ni, &alpha,(float*)v,&di,&beta,cov,&di);

    /* copy lower triangle to upper */

    for(i=0;i<d;i++)
      for(j=i+1;j<d;j++) 
        cov[i+j*d]=cov[j+i*d];

  }

  return cov;
}
Beispiel #3
0
int
f2c_ssyrk(char* uplo, char* trans, integer* N, integer* K,
          real* alpha,
          real* A, integer* lda,
          real* beta,
          real* C, integer* ldc)
{
    ssyrk_(uplo, trans, N, K,
           alpha, A, lda, beta, C, ldc);
    return 0;
}
Beispiel #4
0
/* Subroutine */ int slqt02_(integer *m, integer *n, integer *k, real *a, 
	real *af, real *q, real *l, integer *lda, real *tau, real *work, 
	integer *lwork, real *rwork, real *result)
{
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, l_dim1, l_offset, q_dim1, 
	    q_offset, i__1;

    /* Builtin functions   
       Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);

    /* Local variables */
    static integer info;
    static real resid;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    static real anorm;
    extern /* Subroutine */ int ssyrk_(char *, char *, integer *, integer *, 
	    real *, real *, integer *, real *, real *, integer *);
    extern doublereal slamch_(char *), slange_(char *, integer *, 
	    integer *, real *, integer *, real *);
    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
	    integer *, real *, integer *), slaset_(char *, integer *, 
	    integer *, real *, real *, real *, integer *), sorglq_(
	    integer *, integer *, integer *, real *, integer *, real *, real *
	    , integer *, integer *);
    extern doublereal slansy_(char *, char *, integer *, real *, integer *, 
	    real *);
    static real eps;


#define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]
#define af_ref(a_1,a_2) af[(a_2)*af_dim1 + a_1]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    SLQT02 tests SORGLQ, which generates an m-by-n matrix Q with   
    orthonornmal rows that is defined as the product of k elementary   
    reflectors.   

    Given the LQ factorization of an m-by-n matrix A, SLQT02 generates   
    the orthogonal matrix Q defined by the factorization of the first k   
    rows of A; it compares L(1:k,1:m) with A(1:k,1:n)*Q(1:m,1:n)', and   
    checks that the rows of Q are orthonormal.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of the matrix Q to be generated.  M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix Q to be generated.   
            N >= M >= 0.   

    K       (input) INTEGER   
            The number of elementary reflectors whose product defines the   
            matrix Q. M >= K >= 0.   

    A       (input) REAL array, dimension (LDA,N)   
            The m-by-n matrix A which was factorized by SLQT01.   

    AF      (input) REAL array, dimension (LDA,N)   
            Details of the LQ factorization of A, as returned by SGELQF.   
            See SGELQF for further details.   

    Q       (workspace) REAL array, dimension (LDA,N)   

    L       (workspace) REAL array, dimension (LDA,M)   

    LDA     (input) INTEGER   
            The leading dimension of the arrays A, AF, Q and L. LDA >= N.   

    TAU     (input) REAL array, dimension (M)   
            The scalar factors of the elementary reflectors corresponding   
            to the LQ factorization in AF.   

    WORK    (workspace) REAL array, dimension (LWORK)   

    LWORK   (input) INTEGER   
            The dimension of the array WORK.   

    RWORK   (workspace) REAL array, dimension (M)   

    RESULT  (output) REAL array, dimension (2)   
            The test ratios:   
            RESULT(1) = norm( L - A*Q' ) / ( N * norm(A) * EPS )   
            RESULT(2) = norm( I - Q*Q' ) / ( N * EPS )   

    =====================================================================   


       Parameter adjustments */
    l_dim1 = *lda;
    l_offset = 1 + l_dim1 * 1;
    l -= l_offset;
    q_dim1 = *lda;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    af_dim1 = *lda;
    af_offset = 1 + af_dim1 * 1;
    af -= af_offset;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;
    --rwork;
    --result;

    /* Function Body */
    eps = slamch_("Epsilon");

/*     Copy the first k rows of the factorization to the array Q */

    slaset_("Full", m, n, &c_b4, &c_b4, &q[q_offset], lda);
    i__1 = *n - 1;
    slacpy_("Upper", k, &i__1, &af_ref(1, 2), lda, &q_ref(1, 2), lda);

/*     Generate the first n columns of the matrix Q */

    s_copy(srnamc_1.srnamt, "SORGLQ", (ftnlen)6, (ftnlen)6);
    sorglq_(m, n, k, &q[q_offset], lda, &tau[1], &work[1], lwork, &info);

/*     Copy L(1:k,1:m) */

    slaset_("Full", k, m, &c_b9, &c_b9, &l[l_offset], lda);
    slacpy_("Lower", k, m, &af[af_offset], lda, &l[l_offset], lda);

/*     Compute L(1:k,1:m) - A(1:k,1:n) * Q(1:m,1:n)' */

    sgemm_("No transpose", "Transpose", k, m, n, &c_b14, &a[a_offset], lda, &
	    q[q_offset], lda, &c_b15, &l[l_offset], lda);

/*     Compute norm( L - A*Q' ) / ( N * norm(A) * EPS ) . */

    anorm = slange_("1", k, n, &a[a_offset], lda, &rwork[1]);
    resid = slange_("1", k, m, &l[l_offset], lda, &rwork[1]);
    if (anorm > 0.f) {
	result[1] = resid / (real) max(1,*n) / anorm / eps;
    } else {
	result[1] = 0.f;
    }

/*     Compute I - Q*Q' */

    slaset_("Full", m, m, &c_b9, &c_b15, &l[l_offset], lda);
    ssyrk_("Upper", "No transpose", m, n, &c_b14, &q[q_offset], lda, &c_b15, &
	    l[l_offset], lda);

/*     Compute norm( I - Q*Q' ) / ( N * EPS ) . */

    resid = slansy_("1", "Upper", m, &l[l_offset], lda, &rwork[1]);

    result[2] = resid / (real) max(1,*n) / eps;

    return 0;

/*     End of SLQT02 */

} /* slqt02_ */
Beispiel #5
0
/* Subroutine */ int sort01_(char *rowcol, integer *m, integer *n, real *u, 
	integer *ldu, real *work, integer *lwork, real *resid)
{
    /* System generated locals */
    integer u_dim1, u_offset, i__1, i__2;
    real r__1, r__2;

    /* Local variables */
    integer i__, j, k;
    real eps, tmp;
    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
    integer mnmin;
    extern /* Subroutine */ int ssyrk_(char *, char *, integer *, integer *, 
	    real *, real *, integer *, real *, real *, integer *);
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int slaset_(char *, integer *, integer *, real *, 
	    real *, real *, integer *);
    integer ldwork;
    extern doublereal slansy_(char *, char *, integer *, real *, integer *, 
	    real *);
    char transu[1];


/*  -- LAPACK test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SORT01 checks that the matrix U is orthogonal by computing the ratio */

/*     RESID = norm( I - U*U' ) / ( n * EPS ), if ROWCOL = 'R', */
/*  or */
/*     RESID = norm( I - U'*U ) / ( m * EPS ), if ROWCOL = 'C'. */

/*  Alternatively, if there isn't sufficient workspace to form */
/*  I - U*U' or I - U'*U, the ratio is computed as */

/*     RESID = abs( I - U*U' ) / ( n * EPS ), if ROWCOL = 'R', */
/*  or */
/*     RESID = abs( I - U'*U ) / ( m * EPS ), if ROWCOL = 'C'. */

/*  where EPS is the machine precision.  ROWCOL is used only if m = n; */
/*  if m > n, ROWCOL is assumed to be 'C', and if m < n, ROWCOL is */
/*  assumed to be 'R'. */

/*  Arguments */
/*  ========= */

/*  ROWCOL  (input) CHARACTER */
/*          Specifies whether the rows or columns of U should be checked */
/*          for orthogonality.  Used only if M = N. */
/*          = 'R':  Check for orthogonal rows of U */
/*          = 'C':  Check for orthogonal columns of U */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix U. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix U. */

/*  U       (input) REAL array, dimension (LDU,N) */
/*          The orthogonal matrix U.  U is checked for orthogonal columns */
/*          if m > n or if m = n and ROWCOL = 'C'.  U is checked for */
/*          orthogonal rows if m < n or if m = n and ROWCOL = 'R'. */

/*  LDU     (input) INTEGER */
/*          The leading dimension of the array U.  LDU >= max(1,M). */

/*  WORK    (workspace) REAL array, dimension (LWORK) */

/*  LWORK   (input) INTEGER */
/*          The length of the array WORK.  For best performance, LWORK */
/*          should be at least N*(N+1) if ROWCOL = 'C' or M*(M+1) if */
/*          ROWCOL = 'R', but the test will be done even if LWORK is 0. */

/*  RESID   (output) REAL */
/*          RESID = norm( I - U * U' ) / ( n * EPS ), if ROWCOL = 'R', or */
/*          RESID = norm( I - U' * U ) / ( m * EPS ), if ROWCOL = 'C'. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1;
    u -= u_offset;
    --work;

    /* Function Body */
    *resid = 0.f;

/*     Quick return if possible */

    if (*m <= 0 || *n <= 0) {
	return 0;
    }

    eps = slamch_("Precision");
    if (*m < *n || *m == *n && lsame_(rowcol, "R")) {
	*(unsigned char *)transu = 'N';
	k = *n;
    } else {
	*(unsigned char *)transu = 'T';
	k = *m;
    }
    mnmin = min(*m,*n);

    if ((mnmin + 1) * mnmin <= *lwork) {
	ldwork = mnmin;
    } else {
	ldwork = 0;
    }
    if (ldwork > 0) {

/*        Compute I - U*U' or I - U'*U. */

	slaset_("Upper", &mnmin, &mnmin, &c_b7, &c_b8, &work[1], &ldwork);
	ssyrk_("Upper", transu, &mnmin, &k, &c_b10, &u[u_offset], ldu, &c_b8, 
		&work[1], &ldwork);

/*        Compute norm( I - U*U' ) / ( K * EPS ) . */

	*resid = slansy_("1", "Upper", &mnmin, &work[1], &ldwork, &work[
		ldwork * mnmin + 1]);
	*resid = *resid / (real) k / eps;
    } else if (*(unsigned char *)transu == 'T') {

/*        Find the maximum element in abs( I - U'*U ) / ( m * EPS ) */

	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    i__2 = j;
	    for (i__ = 1; i__ <= i__2; ++i__) {
		if (i__ != j) {
		    tmp = 0.f;
		} else {
		    tmp = 1.f;
		}
		tmp -= sdot_(m, &u[i__ * u_dim1 + 1], &c__1, &u[j * u_dim1 + 
			1], &c__1);
/* Computing MAX */
		r__1 = *resid, r__2 = dabs(tmp);
		*resid = dmax(r__1,r__2);
/* L10: */
	    }
/* L20: */
	}
	*resid = *resid / (real) (*m) / eps;
    } else {

/*        Find the maximum element in abs( I - U*U' ) / ( n * EPS ) */

	i__1 = *m;
	for (j = 1; j <= i__1; ++j) {
	    i__2 = j;
	    for (i__ = 1; i__ <= i__2; ++i__) {
		if (i__ != j) {
		    tmp = 0.f;
		} else {
		    tmp = 1.f;
		}
		tmp -= sdot_(n, &u[j + u_dim1], ldu, &u[i__ + u_dim1], ldu);
/* Computing MAX */
		r__1 = *resid, r__2 = dabs(tmp);
		*resid = dmax(r__1,r__2);
/* L30: */
	    }
/* L40: */
	}
	*resid = *resid / (real) (*n) / eps;
    }
    return 0;

/*     End of SORT01 */

} /* sort01_ */
Beispiel #6
0
/* Subroutine */ int sgqrts_(integer *n, integer *m, integer *p, real *a, 
	real *af, real *q, real *r__, integer *lda, real *taua, real *b, real 
	*bf, real *z__, real *t, real *bwk, integer *ldb, real *taub, real *
	work, integer *lwork, real *rwork, real *result)
{
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, r_dim1, r_offset, q_dim1, 
	    q_offset, b_dim1, b_offset, bf_dim1, bf_offset, t_dim1, t_offset, 
	    z_dim1, z_offset, bwk_dim1, bwk_offset, i__1, i__2;
    real r__1;

    /* Local variables */
    real ulp;
    integer info;
    real unfl, resid;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    real anorm, bnorm;
    extern /* Subroutine */ int ssyrk_(char *, char *, integer *, integer *, 
	    real *, real *, integer *, real *, real *, integer *);
    extern doublereal slamch_(char *), slange_(char *, integer *, 
	    integer *, real *, integer *, real *);
    extern /* Subroutine */ int sggqrf_(integer *, integer *, integer *, real 
	    *, integer *, real *, real *, integer *, real *, real *, integer *
, integer *), slacpy_(char *, integer *, integer *, real *, 
	    integer *, real *, integer *), slaset_(char *, integer *, 
	    integer *, real *, real *, real *, integer *);
    extern doublereal slansy_(char *, char *, integer *, real *, integer *, 
	    real *);
    extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real 
	    *, integer *, real *, real *, integer *, integer *), sorgrq_(
	    integer *, integer *, integer *, real *, integer *, real *, real *
, integer *, integer *);


/*  -- LAPACK test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SGQRTS tests SGGQRF, which computes the GQR factorization of an */
/*  N-by-M matrix A and a N-by-P matrix B: A = Q*R and B = Q*T*Z. */

/*  Arguments */
/*  ========= */

/*  N       (input) INTEGER */
/*          The number of rows of the matrices A and B.  N >= 0. */

/*  M       (input) INTEGER */
/*          The number of columns of the matrix A.  M >= 0. */

/*  P       (input) INTEGER */
/*          The number of columns of the matrix B.  P >= 0. */

/*  A       (input) REAL array, dimension (LDA,M) */
/*          The N-by-M matrix A. */

/*  AF      (output) REAL array, dimension (LDA,N) */
/*          Details of the GQR factorization of A and B, as returned */
/*          by SGGQRF, see SGGQRF for further details. */

/*  Q       (output) REAL array, dimension (LDA,N) */
/*          The M-by-M orthogonal matrix Q. */

/*  R       (workspace) REAL array, dimension (LDA,MAX(M,N)) */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the arrays A, AF, R and Q. */
/*          LDA >= max(M,N). */

/*  TAUA    (output) REAL array, dimension (min(M,N)) */
/*          The scalar factors of the elementary reflectors, as returned */
/*          by SGGQRF. */

/*  B       (input) REAL array, dimension (LDB,P) */
/*          On entry, the N-by-P matrix A. */

/*  BF      (output) REAL array, dimension (LDB,N) */
/*          Details of the GQR factorization of A and B, as returned */
/*          by SGGQRF, see SGGQRF for further details. */

/*  Z       (output) REAL array, dimension (LDB,P) */
/*          The P-by-P orthogonal matrix Z. */

/*  T       (workspace) REAL array, dimension (LDB,max(P,N)) */

/*  BWK     (workspace) REAL array, dimension (LDB,N) */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the arrays B, BF, Z and T. */
/*          LDB >= max(P,N). */

/*  TAUB    (output) REAL array, dimension (min(P,N)) */
/*          The scalar factors of the elementary reflectors, as returned */
/*          by SGGRQF. */

/*  WORK    (workspace) REAL array, dimension (LWORK) */

/*  LWORK   (input) INTEGER */
/*          The dimension of the array WORK, LWORK >= max(N,M,P)**2. */

/*  RWORK   (workspace) REAL array, dimension (max(N,M,P)) */

/*  RESULT  (output) REAL array, dimension (4) */
/*          The test ratios: */
/*            RESULT(1) = norm( R - Q'*A ) / ( MAX(M,N)*norm(A)*ULP) */
/*            RESULT(2) = norm( T*Z - Q'*B ) / (MAX(P,N)*norm(B)*ULP) */
/*            RESULT(3) = norm( I - Q'*Q ) / ( M*ULP ) */
/*            RESULT(4) = norm( I - Z'*Z ) / ( P*ULP ) */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    r_dim1 = *lda;
    r_offset = 1 + r_dim1;
    r__ -= r_offset;
    q_dim1 = *lda;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    af_dim1 = *lda;
    af_offset = 1 + af_dim1;
    af -= af_offset;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --taua;
    bwk_dim1 = *ldb;
    bwk_offset = 1 + bwk_dim1;
    bwk -= bwk_offset;
    t_dim1 = *ldb;
    t_offset = 1 + t_dim1;
    t -= t_offset;
    z_dim1 = *ldb;
    z_offset = 1 + z_dim1;
    z__ -= z_offset;
    bf_dim1 = *ldb;
    bf_offset = 1 + bf_dim1;
    bf -= bf_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    --taub;
    --work;
    --rwork;
    --result;

    /* Function Body */
    ulp = slamch_("Precision");
    unfl = slamch_("Safe minimum");

/*     Copy the matrix A to the array AF. */

    slacpy_("Full", n, m, &a[a_offset], lda, &af[af_offset], lda);
    slacpy_("Full", n, p, &b[b_offset], ldb, &bf[bf_offset], ldb);

/* Computing MAX */
    r__1 = slange_("1", n, m, &a[a_offset], lda, &rwork[1]);
    anorm = dmax(r__1,unfl);
/* Computing MAX */
    r__1 = slange_("1", n, p, &b[b_offset], ldb, &rwork[1]);
    bnorm = dmax(r__1,unfl);

/*     Factorize the matrices A and B in the arrays AF and BF. */

    sggqrf_(n, m, p, &af[af_offset], lda, &taua[1], &bf[bf_offset], ldb, &
	    taub[1], &work[1], lwork, &info);

/*     Generate the N-by-N matrix Q */

    slaset_("Full", n, n, &c_b9, &c_b9, &q[q_offset], lda);
    i__1 = *n - 1;
    slacpy_("Lower", &i__1, m, &af[af_dim1 + 2], lda, &q[q_dim1 + 2], lda);
    i__1 = min(*n,*m);
    sorgqr_(n, n, &i__1, &q[q_offset], lda, &taua[1], &work[1], lwork, &info);

/*     Generate the P-by-P matrix Z */

    slaset_("Full", p, p, &c_b9, &c_b9, &z__[z_offset], ldb);
    if (*n <= *p) {
	if (*n > 0 && *n < *p) {
	    i__1 = *p - *n;
	    slacpy_("Full", n, &i__1, &bf[bf_offset], ldb, &z__[*p - *n + 1 + 
		    z_dim1], ldb);
	}
	if (*n > 1) {
	    i__1 = *n - 1;
	    i__2 = *n - 1;
	    slacpy_("Lower", &i__1, &i__2, &bf[(*p - *n + 1) * bf_dim1 + 2], 
		    ldb, &z__[*p - *n + 2 + (*p - *n + 1) * z_dim1], ldb);
	}
    } else {
	if (*p > 1) {
	    i__1 = *p - 1;
	    i__2 = *p - 1;
	    slacpy_("Lower", &i__1, &i__2, &bf[*n - *p + 2 + bf_dim1], ldb, &
		    z__[z_dim1 + 2], ldb);
	}
    }
    i__1 = min(*n,*p);
    sorgrq_(p, p, &i__1, &z__[z_offset], ldb, &taub[1], &work[1], lwork, &
	    info);

/*     Copy R */

    slaset_("Full", n, m, &c_b19, &c_b19, &r__[r_offset], lda);
    slacpy_("Upper", n, m, &af[af_offset], lda, &r__[r_offset], lda);

/*     Copy T */

    slaset_("Full", n, p, &c_b19, &c_b19, &t[t_offset], ldb);
    if (*n <= *p) {
	slacpy_("Upper", n, n, &bf[(*p - *n + 1) * bf_dim1 + 1], ldb, &t[(*p 
		- *n + 1) * t_dim1 + 1], ldb);
    } else {
	i__1 = *n - *p;
	slacpy_("Full", &i__1, p, &bf[bf_offset], ldb, &t[t_offset], ldb);
	slacpy_("Upper", p, p, &bf[*n - *p + 1 + bf_dim1], ldb, &t[*n - *p + 
		1 + t_dim1], ldb);
    }

/*     Compute R - Q'*A */

    sgemm_("Transpose", "No transpose", n, m, n, &c_b30, &q[q_offset], lda, &
	    a[a_offset], lda, &c_b31, &r__[r_offset], lda);

/*     Compute norm( R - Q'*A ) / ( MAX(M,N)*norm(A)*ULP ) . */

    resid = slange_("1", n, m, &r__[r_offset], lda, &rwork[1]);
    if (anorm > 0.f) {
/* Computing MAX */
	i__1 = max(1,*m);
	result[1] = resid / (real) max(i__1,*n) / anorm / ulp;
    } else {
	result[1] = 0.f;
    }

/*     Compute T*Z - Q'*B */

    sgemm_("No Transpose", "No transpose", n, p, p, &c_b31, &t[t_offset], ldb, 
	     &z__[z_offset], ldb, &c_b19, &bwk[bwk_offset], ldb);
    sgemm_("Transpose", "No transpose", n, p, n, &c_b30, &q[q_offset], lda, &
	    b[b_offset], ldb, &c_b31, &bwk[bwk_offset], ldb);

/*     Compute norm( T*Z - Q'*B ) / ( MAX(P,N)*norm(A)*ULP ) . */

    resid = slange_("1", n, p, &bwk[bwk_offset], ldb, &rwork[1]);
    if (bnorm > 0.f) {
/* Computing MAX */
	i__1 = max(1,*p);
	result[2] = resid / (real) max(i__1,*n) / bnorm / ulp;
    } else {
	result[2] = 0.f;
    }

/*     Compute I - Q'*Q */

    slaset_("Full", n, n, &c_b19, &c_b31, &r__[r_offset], lda);
    ssyrk_("Upper", "Transpose", n, n, &c_b30, &q[q_offset], lda, &c_b31, &
	    r__[r_offset], lda);

/*     Compute norm( I - Q'*Q ) / ( N * ULP ) . */

    resid = slansy_("1", "Upper", n, &r__[r_offset], lda, &rwork[1]);
    result[3] = resid / (real) max(1,*n) / ulp;

/*     Compute I - Z'*Z */

    slaset_("Full", p, p, &c_b19, &c_b31, &t[t_offset], ldb);
    ssyrk_("Upper", "Transpose", p, p, &c_b30, &z__[z_offset], ldb, &c_b31, &
	    t[t_offset], ldb);

/*     Compute norm( I - Z'*Z ) / ( P*ULP ) . */

    resid = slansy_("1", "Upper", p, &t[t_offset], ldb, &rwork[1]);
    result[4] = resid / (real) max(1,*p) / ulp;

    return 0;

/*     End of SGQRTS */

} /* sgqrts_ */
Beispiel #7
0
/* Subroutine */
int spftri_(char *transr, char *uplo, integer *n, real *a, integer *info)
{
    /* System generated locals */
    integer i__1, i__2;
    /* Local variables */
    integer k, n1, n2;
    logical normaltransr;
    extern logical lsame_(char *, char *);
    logical lower;
    extern /* Subroutine */
    int strmm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * ), ssyrk_(char *, char *, integer *, integer *, real *, real *, integer *, real *, real *, integer * ), xerbla_(char *, integer *);
    logical nisodd;
    extern /* Subroutine */
    int slauum_(char *, integer *, real *, integer *, integer *), stftri_(char *, char *, char *, integer *, real *, integer *);
    /* -- LAPACK computational routine (version 3.4.0) -- */
    /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
    /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
    /* November 2011 */
    /* .. Scalar Arguments .. */
    /* .. Array Arguments .. */
    /* .. */
    /* ===================================================================== */
    /* .. Parameters .. */
    /* .. */
    /* .. Local Scalars .. */
    /* .. */
    /* .. External Functions .. */
    /* .. */
    /* .. External Subroutines .. */
    /* .. */
    /* .. Intrinsic Functions .. */
    /* .. */
    /* .. Executable Statements .. */
    /* Test the input parameters. */
    *info = 0;
    normaltransr = lsame_(transr, "N");
    lower = lsame_(uplo, "L");
    if (! normaltransr && ! lsame_(transr, "T"))
    {
        *info = -1;
    }
    else if (! lower && ! lsame_(uplo, "U"))
    {
        *info = -2;
    }
    else if (*n < 0)
    {
        *info = -3;
    }
    if (*info != 0)
    {
        i__1 = -(*info);
        xerbla_("SPFTRI", &i__1);
        return 0;
    }
    /* Quick return if possible */
    if (*n == 0)
    {
        return 0;
    }
    /* Invert the triangular Cholesky factor U or L. */
    stftri_(transr, uplo, "N", n, a, info);
    if (*info > 0)
    {
        return 0;
    }
    /* If N is odd, set NISODD = .TRUE. */
    /* If N is even, set K = N/2 and NISODD = .FALSE. */
    if (*n % 2 == 0)
    {
        k = *n / 2;
        nisodd = FALSE_;
    }
    else
    {
        nisodd = TRUE_;
    }
    /* Set N1 and N2 depending on LOWER */
    if (lower)
    {
        n2 = *n / 2;
        n1 = *n - n2;
    }
    else
    {
        n1 = *n / 2;
        n2 = *n - n1;
    }
    /* Start execution of triangular matrix multiply: inv(U)*inv(U)^C or */
    /* inv(L)^C*inv(L). There are eight cases. */
    if (nisodd)
    {
        /* N is odd */
        if (normaltransr)
        {
            /* N is odd and TRANSR = 'N' */
            if (lower)
            {
                /* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:N1-1) ) */
                /* T1 -> a(0,0), T2 -> a(0,1), S -> a(N1,0) */
                /* T1 -> a(0), T2 -> a(n), S -> a(N1) */
                slauum_("L", &n1, a, n, info);
                ssyrk_("L", "T", &n1, &n2, &c_b11, &a[n1], n, &c_b11, a, n);
                strmm_("L", "U", "N", "N", &n2, &n1, &c_b11, &a[*n], n, &a[n1] , n);
                slauum_("U", &n2, &a[*n], n, info);
            }
            else
            {
                /* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:N2-1) */
                /* T1 -> a(N1+1,0), T2 -> a(N1,0), S -> a(0,0) */
                /* T1 -> a(N2), T2 -> a(N1), S -> a(0) */
                slauum_("L", &n1, &a[n2], n, info);
                ssyrk_("L", "N", &n1, &n2, &c_b11, a, n, &c_b11, &a[n2], n);
                strmm_("R", "U", "T", "N", &n1, &n2, &c_b11, &a[n1], n, a, n);
                slauum_("U", &n2, &a[n1], n, info);
            }
        }
        else
        {
            /* N is odd and TRANSR = 'T' */
            if (lower)
            {
                /* SRPA for LOWER, TRANSPOSE, and N is odd */
                /* T1 -> a(0), T2 -> a(1), S -> a(0+N1*N1) */
                slauum_("U", &n1, a, &n1, info);
                ssyrk_("U", "N", &n1, &n2, &c_b11, &a[n1 * n1], &n1, &c_b11, a, &n1);
                strmm_("R", "L", "N", "N", &n1, &n2, &c_b11, &a[1], &n1, &a[ n1 * n1], &n1);
                slauum_("L", &n2, &a[1], &n1, info);
            }
            else
            {
                /* SRPA for UPPER, TRANSPOSE, and N is odd */
                /* T1 -> a(0+N2*N2), T2 -> a(0+N1*N2), S -> a(0) */
                slauum_("U", &n1, &a[n2 * n2], &n2, info);
                ssyrk_("U", "T", &n1, &n2, &c_b11, a, &n2, &c_b11, &a[n2 * n2] , &n2);
                strmm_("L", "L", "T", "N", &n2, &n1, &c_b11, &a[n1 * n2], &n2, a, &n2);
                slauum_("L", &n2, &a[n1 * n2], &n2, info);
            }
        }
    }
    else
    {
        /* N is even */
        if (normaltransr)
        {
            /* N is even and TRANSR = 'N' */
            if (lower)
            {
                /* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) */
                /* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) */
                /* T1 -> a(1), T2 -> a(0), S -> a(k+1) */
                i__1 = *n + 1;
                slauum_("L", &k, &a[1], &i__1, info);
                i__1 = *n + 1;
                i__2 = *n + 1;
                ssyrk_("L", "T", &k, &k, &c_b11, &a[k + 1], &i__1, &c_b11, &a[ 1], &i__2);
                i__1 = *n + 1;
                i__2 = *n + 1;
                strmm_("L", "U", "N", "N", &k, &k, &c_b11, a, &i__1, &a[k + 1] , &i__2);
                i__1 = *n + 1;
                slauum_("U", &k, a, &i__1, info);
            }
            else
            {
                /* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) */
                /* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) */
                /* T1 -> a(k+1), T2 -> a(k), S -> a(0) */
                i__1 = *n + 1;
                slauum_("L", &k, &a[k + 1], &i__1, info);
                i__1 = *n + 1;
                i__2 = *n + 1;
                ssyrk_("L", "N", &k, &k, &c_b11, a, &i__1, &c_b11, &a[k + 1], &i__2);
                i__1 = *n + 1;
                i__2 = *n + 1;
                strmm_("R", "U", "T", "N", &k, &k, &c_b11, &a[k], &i__1, a, & i__2);
                i__1 = *n + 1;
                slauum_("U", &k, &a[k], &i__1, info);
            }
        }
        else
        {
            /* N is even and TRANSR = 'T' */
            if (lower)
            {
                /* SRPA for LOWER, TRANSPOSE, and N is even (see paper) */
                /* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1), */
                /* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1));
                lda=k */
                slauum_("U", &k, &a[k], &k, info);
                ssyrk_("U", "N", &k, &k, &c_b11, &a[k * (k + 1)], &k, &c_b11, &a[k], &k);
                strmm_("R", "L", "N", "N", &k, &k, &c_b11, a, &k, &a[k * (k + 1)], &k);
                slauum_("L", &k, a, &k, info);
            }
            else
            {
                /* SRPA for UPPER, TRANSPOSE, and N is even (see paper) */
                /* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0), */
                /* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0));
                lda=k */
                slauum_("U", &k, &a[k * (k + 1)], &k, info);
                ssyrk_("U", "T", &k, &k, &c_b11, a, &k, &c_b11, &a[k * (k + 1) ], &k);
                strmm_("L", "L", "T", "N", &k, &k, &c_b11, &a[k * k], &k, a, & k);
                slauum_("L", &k, &a[k * k], &k, info);
            }
        }
    }
    return 0;
    /* End of SPFTRI */
}
Beispiel #8
0
/* Subroutine */ int spotrf_(char *uplo, integer *n, real *a, integer *lda, 
	integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       March 31, 1993   


    Purpose   
    =======   

    SPOTRF computes the Cholesky factorization of a real symmetric   
    positive definite matrix A.   

    The factorization has the form   
       A = U**T * U,  if UPLO = 'U', or   
       A = L  * L**T,  if UPLO = 'L',   
    where U is an upper triangular matrix and L is lower triangular.   

    This is the block version of the algorithm, calling Level 3 BLAS.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            = 'U':  Upper triangle of A is stored;   
            = 'L':  Lower triangle of A is stored.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the symmetric matrix A.  If UPLO = 'U', the leading   
            N-by-N upper triangular part of A contains the upper   
            triangular part of the matrix A, and the strictly lower   
            triangular part of A is not referenced.  If UPLO = 'L', the   
            leading N-by-N lower triangular part of A contains the lower   
            triangular part of the matrix A, and the strictly upper   
            triangular part of A is not referenced.   

            On exit, if INFO = 0, the factor U or L from the Cholesky   
            factorization A = U**T*U or A = L*L**T.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   
            > 0:  if INFO = i, the leading minor of order i is not   
                  positive definite, and the factorization could not be   
                  completed.   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static real c_b13 = -1.f;
    static real c_b14 = 1.f;
    
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    /* Local variables */
    static integer j;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    static logical upper;
    extern /* Subroutine */ int strsm_(char *, char *, char *, char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
	    ), ssyrk_(char *, char *, integer 
	    *, integer *, real *, real *, integer *, real *, real *, integer *
	    );
    static integer jb;
    extern /* Subroutine */ int spotf2_(char *, integer *, real *, integer *, 
	    integer *);
    static integer nb;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SPOTRF", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine the block size for this environment. */

    nb = ilaenv_(&c__1, "SPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)1);
    if (nb <= 1 || nb >= *n) {

/*        Use unblocked code. */

	spotf2_(uplo, n, &a[a_offset], lda, info);
    } else {

/*        Use blocked code. */

	if (upper) {

/*           Compute the Cholesky factorization A = U'*U. */

	    i__1 = *n;
	    i__2 = nb;
	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {

/*              Update and factorize the current diagonal block and test   
                for non-positive-definiteness.   

   Computing MIN */
		i__3 = nb, i__4 = *n - j + 1;
		jb = min(i__3,i__4);
		i__3 = j - 1;
		ssyrk_("Upper", "Transpose", &jb, &i__3, &c_b13, &a_ref(1, j),
			 lda, &c_b14, &a_ref(j, j), lda)
			;
		spotf2_("Upper", &jb, &a_ref(j, j), lda, info);
		if (*info != 0) {
		    goto L30;
		}
		if (j + jb <= *n) {

/*                 Compute the current block row. */

		    i__3 = *n - j - jb + 1;
		    i__4 = j - 1;
		    sgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, &
			    c_b13, &a_ref(1, j), lda, &a_ref(1, j + jb), lda, 
			    &c_b14, &a_ref(j, j + jb), lda);
		    i__3 = *n - j - jb + 1;
		    strsm_("Left", "Upper", "Transpose", "Non-unit", &jb, &
			    i__3, &c_b14, &a_ref(j, j), lda, &a_ref(j, j + jb)
			    , lda)
			    ;
		}
/* L10: */
	    }

	} else {

/*           Compute the Cholesky factorization A = L*L'. */

	    i__2 = *n;
	    i__1 = nb;
	    for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {

/*              Update and factorize the current diagonal block and test   
                for non-positive-definiteness.   

   Computing MIN */
		i__3 = nb, i__4 = *n - j + 1;
		jb = min(i__3,i__4);
		i__3 = j - 1;
		ssyrk_("Lower", "No transpose", &jb, &i__3, &c_b13, &a_ref(j, 
			1), lda, &c_b14, &a_ref(j, j), lda);
		spotf2_("Lower", &jb, &a_ref(j, j), lda, info);
		if (*info != 0) {
		    goto L30;
		}
		if (j + jb <= *n) {

/*                 Compute the current block column. */

		    i__3 = *n - j - jb + 1;
		    i__4 = j - 1;
		    sgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &
			    c_b13, &a_ref(j + jb, 1), lda, &a_ref(j, 1), lda, 
			    &c_b14, &a_ref(j + jb, j), lda);
		    i__3 = *n - j - jb + 1;
		    strsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, &
			    jb, &c_b14, &a_ref(j, j), lda, &a_ref(j + jb, j), 
			    lda);
		}
/* L20: */
	    }
	}
    }
    goto L40;

L30:
    *info = *info + j - 1;

L40:
    return 0;

/*     End of SPOTRF */

} /* spotrf_ */
Beispiel #9
0
int main( int argc, char** argv )
{
	obj_t a, c;
	obj_t c_save;
	obj_t alpha, beta;
	dim_t m, k;
	dim_t p;
	dim_t p_begin, p_end, p_inc;
	int   m_input, k_input;
	num_t dt;
	int   r, n_repeats;
	uplo_t uploc;
	trans_t transa;
	f77_char f77_uploc;
	f77_char f77_transa;

	double dtime;
	double dtime_save;
	double gflops;

	bli_init();

	//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );

	n_repeats = 3;

#ifndef PRINT
	p_begin = 200;
	p_end   = 2000;
	p_inc   = 200;

	m_input = -1;
	k_input = -1;
#else
	p_begin = 16;
	p_end   = 16;
	p_inc   = 1;

	m_input = 3;
	k_input = 1;
#endif

#if 1
	//dt = BLIS_FLOAT;
	dt = BLIS_DOUBLE;
#else
	//dt = BLIS_SCOMPLEX;
	dt = BLIS_DCOMPLEX;
#endif

	uploc = BLIS_LOWER;
	//uploc = BLIS_UPPER;

	transa = BLIS_NO_TRANSPOSE;

	bli_param_map_blis_to_netlib_uplo( uploc, &f77_uploc );
	bli_param_map_blis_to_netlib_trans( transa, &f77_transa );


	for ( p = p_begin; p <= p_end; p += p_inc )
	{
		if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
		else               m =     ( dim_t )    m_input;
		if ( k_input < 0 ) k = p * ( dim_t )abs(k_input);
		else               k =     ( dim_t )    k_input;

		bli_obj_create( dt, 1, 1, 0, 0, &alpha );
		bli_obj_create( dt, 1, 1, 0, 0, &beta );

		if ( bli_does_trans( transa ) )
			bli_obj_create( dt, k, m, 0, 0, &a );
		else
			bli_obj_create( dt, m, k, 0, 0, &a );
		bli_obj_create( dt, m, m, 0, 0, &c );
		bli_obj_create( dt, m, m, 0, 0, &c_save );

		bli_randm( &a );
		bli_randm( &c );

		bli_obj_set_struc( BLIS_HERMITIAN, c );
		bli_obj_set_uplo( uploc, c );

		bli_obj_set_conjtrans( transa, a );


		bli_setsc(  (2.0/1.0), 0.0, &alpha );
		bli_setsc( -(1.0/1.0), 0.0, &beta );


		bli_copym( &c, &c_save );
	
		dtime_save = 1.0e9;

		for ( r = 0; r < n_repeats; ++r )
		{
			bli_copym( &c_save, &c );


			dtime = bli_clock();


#ifdef PRINT
			bli_printm( "a", &a, "%4.1f", "" );
			bli_printm( "c", &c, "%4.1f", "" );
#endif

#ifdef BLIS

			bli_herk( &alpha,
			          &a,
			          &beta,
			          &c );

#else
		if ( bli_is_float( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  kk     = bli_obj_width_after_trans( a );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldc    = bli_obj_col_stride( c );
			float*   alphap = bli_obj_buffer( alpha );
			float*   ap     = bli_obj_buffer( a );
			float*   betap  = bli_obj_buffer( beta );
			float*   cp     = bli_obj_buffer( c );

			ssyrk_( &f77_uploc,
			        &f77_transa,
			        &mm,
			        &kk,
			        alphap,
			        ap, &lda,
			        betap,
			        cp, &ldc );
		}
		else if ( bli_is_double( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  kk     = bli_obj_width_after_trans( a );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldc    = bli_obj_col_stride( c );
			double*  alphap = bli_obj_buffer( alpha );
			double*  ap     = bli_obj_buffer( a );
			double*  betap  = bli_obj_buffer( beta );
			double*  cp     = bli_obj_buffer( c );

			dsyrk_( &f77_uploc,
			        &f77_transa,
			        &mm,
			        &kk,
			        alphap,
			        ap, &lda,
			        betap,
			        cp, &ldc );
		}
		else if ( bli_is_scomplex( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  kk     = bli_obj_width_after_trans( a );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldc    = bli_obj_col_stride( c );
			float*     alphap = bli_obj_buffer( alpha );
			scomplex*  ap     = bli_obj_buffer( a );
			float*     betap  = bli_obj_buffer( beta );
			scomplex*  cp     = bli_obj_buffer( c );

			cherk_( &f77_uploc,
			        &f77_transa,
			        &mm,
			        &kk,
			        alphap,
			        ap, &lda,
			        betap,
			        cp, &ldc );
		}
		else if ( bli_is_dcomplex( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  kk     = bli_obj_width_after_trans( a );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldc    = bli_obj_col_stride( c );
			double*    alphap = bli_obj_buffer( alpha );
			dcomplex*  ap     = bli_obj_buffer( a );
			double*    betap  = bli_obj_buffer( beta );
			dcomplex*  cp     = bli_obj_buffer( c );

			zherk_( &f77_uploc,
			        &f77_transa,
			        &mm,
			        &kk,
			        alphap,
			        ap, &lda,
			        betap,
			        cp, &ldc );
		}
#endif

#ifdef PRINT
			bli_printm( "c after", &c, "%4.1f", "" );
			exit(1);
#endif


			dtime_save = bli_clock_min_diff( dtime_save, dtime );
		}

		gflops = ( 1.0 * m * k * m ) / ( dtime_save * 1.0e9 );

		if ( bli_is_complex( dt ) ) gflops *= 4.0;

#ifdef BLIS
		printf( "data_herk_blis" );
#else
		printf( "data_herk_%s", BLAS );
#endif
		printf( "( %2lu, 1:4 ) = [ %4lu %4lu  %10.3e  %6.3f ];\n",
		        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
		        ( unsigned long )m,
		        ( unsigned long )k, dtime_save, gflops );


		bli_obj_free( &alpha );
		bli_obj_free( &beta );

		bli_obj_free( &a );
		bli_obj_free( &c );
		bli_obj_free( &c_save );
	}

	bli_finalize();

	return 0;
}
Beispiel #10
0
/* Subroutine */ int spbtrf_(char *uplo, integer *n, integer *kd, real *ab, 
	integer *ldab, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       March 31, 1993   


    Purpose   
    =======   

    SPBTRF computes the Cholesky factorization of a real symmetric   
    positive definite band matrix A.   

    The factorization has the form   
       A = U**T * U,  if UPLO = 'U', or   
       A = L  * L**T,  if UPLO = 'L',   
    where U is an upper triangular matrix and L is lower triangular.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            = 'U':  Upper triangle of A is stored;   
            = 'L':  Lower triangle of A is stored.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    KD      (input) INTEGER   
            The number of superdiagonals of the matrix A if UPLO = 'U',   
            or the number of subdiagonals if UPLO = 'L'.  KD >= 0.   

    AB      (input/output) REAL array, dimension (LDAB,N)   
            On entry, the upper or lower triangle of the symmetric band   
            matrix A, stored in the first KD+1 rows of the array.  The   
            j-th column of A is stored in the j-th column of the array AB   
            as follows:   
            if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;   
            if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).   

            On exit, if INFO = 0, the triangular factor U or L from the   
            Cholesky factorization A = U**T*U or A = L*L**T of the band   
            matrix A, in the same storage format as A.   

    LDAB    (input) INTEGER   
            The leading dimension of the array AB.  LDAB >= KD+1.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   
            > 0:  if INFO = i, the leading minor of order i is not   
                  positive definite, and the factorization could not be   
                  completed.   

    Further Details   
    ===============   

    The band storage scheme is illustrated by the following example, when   
    N = 6, KD = 2, and UPLO = 'U':   

    On entry:                       On exit:   

        *    *   a13  a24  a35  a46      *    *   u13  u24  u35  u46   
        *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56   
       a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66   

    Similarly, if UPLO = 'L' the format of A is as follows:   

    On entry:                       On exit:   

       a11  a22  a33  a44  a55  a66     l11  l22  l33  l44  l55  l66   
       a21  a32  a43  a54  a65   *      l21  l32  l43  l54  l65   *   
       a31  a42  a53  a64   *    *      l31  l42  l53  l64   *    *   

    Array elements marked * are not used by the routine.   

    Contributed by   
    Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static real c_b18 = 1.f;
    static real c_b21 = -1.f;
    static integer c__33 = 33;
    
    /* System generated locals */
    integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4;
    /* Local variables */
    static real work[1056]	/* was [33][32] */;
    static integer i__, j;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    static integer i2, i3;
    extern /* Subroutine */ int strsm_(char *, char *, char *, char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
	    ), ssyrk_(char *, char *, integer 
	    *, integer *, real *, real *, integer *, real *, real *, integer *
	    ), spbtf2_(char *, integer *, integer *, real *, 
	    integer *, integer *);
    static integer ib;
    extern /* Subroutine */ int spotf2_(char *, integer *, real *, integer *, 
	    integer *);
    static integer nb, ii, jj;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
#define work_ref(a_1,a_2) work[(a_2)*33 + a_1 - 34]
#define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1]


    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;

    /* Function Body */
    *info = 0;
    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*kd < 0) {
	*info = -3;
    } else if (*ldab < *kd + 1) {
	*info = -5;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SPBTRF", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine the block size for this environment */

    nb = ilaenv_(&c__1, "SPBTRF", uplo, n, kd, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)1);

/*     The block size must not exceed the semi-bandwidth KD, and must not   
       exceed the limit set by the size of the local array WORK. */

    nb = min(nb,32);

    if (nb <= 1 || nb > *kd) {

/*        Use unblocked code */

	spbtf2_(uplo, n, kd, &ab[ab_offset], ldab, info);
    } else {

/*        Use blocked code */

	if (lsame_(uplo, "U")) {

/*           Compute the Cholesky factorization of a symmetric band   
             matrix, given the upper triangle of the matrix in band   
             storage.   

             Zero the upper triangle of the work array. */

	    i__1 = nb;
	    for (j = 1; j <= i__1; ++j) {
		i__2 = j - 1;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    work_ref(i__, j) = 0.f;
/* L10: */
		}
/* L20: */
	    }

/*           Process the band matrix one diagonal block at a time. */

	    i__1 = *n;
	    i__2 = nb;
	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
/* Computing MIN */
		i__3 = nb, i__4 = *n - i__ + 1;
		ib = min(i__3,i__4);

/*              Factorize the diagonal block */

		i__3 = *ldab - 1;
		spotf2_(uplo, &ib, &ab_ref(*kd + 1, i__), &i__3, &ii);
		if (ii != 0) {
		    *info = i__ + ii - 1;
		    goto L150;
		}
		if (i__ + ib <= *n) {

/*                 Update the relevant part of the trailing submatrix.   
                   If A11 denotes the diagonal block which has just been   
                   factorized, then we need to update the remaining   
                   blocks in the diagram:   

                      A11   A12   A13   
                            A22   A23   
                                  A33   

                   The numbers of rows and columns in the partitioning   
                   are IB, I2, I3 respectively. The blocks A12, A22 and   
                   A23 are empty if IB = KD. The upper triangle of A13   
                   lies outside the band.   

   Computing MIN */
		    i__3 = *kd - ib, i__4 = *n - i__ - ib + 1;
		    i2 = min(i__3,i__4);
/* Computing MIN */
		    i__3 = ib, i__4 = *n - i__ - *kd + 1;
		    i3 = min(i__3,i__4);

		    if (i2 > 0) {

/*                    Update A12 */

			i__3 = *ldab - 1;
			i__4 = *ldab - 1;
			strsm_("Left", "Upper", "Transpose", "Non-unit", &ib, 
				&i2, &c_b18, &ab_ref(*kd + 1, i__), &i__3, &
				ab_ref(*kd + 1 - ib, i__ + ib), &i__4);

/*                    Update A22 */

			i__3 = *ldab - 1;
			i__4 = *ldab - 1;
			ssyrk_("Upper", "Transpose", &i2, &ib, &c_b21, &
				ab_ref(*kd + 1 - ib, i__ + ib), &i__3, &c_b18,
				 &ab_ref(*kd + 1, i__ + ib), &i__4);
		    }

		    if (i3 > 0) {

/*                    Copy the lower triangle of A13 into the work array. */

			i__3 = i3;
			for (jj = 1; jj <= i__3; ++jj) {
			    i__4 = ib;
			    for (ii = jj; ii <= i__4; ++ii) {
				work_ref(ii, jj) = ab_ref(ii - jj + 1, jj + 
					i__ + *kd - 1);
/* L30: */
			    }
/* L40: */
			}

/*                    Update A13 (in the work array). */

			i__3 = *ldab - 1;
			strsm_("Left", "Upper", "Transpose", "Non-unit", &ib, 
				&i3, &c_b18, &ab_ref(*kd + 1, i__), &i__3, 
				work, &c__33);

/*                    Update A23 */

			if (i2 > 0) {
			    i__3 = *ldab - 1;
			    i__4 = *ldab - 1;
			    sgemm_("Transpose", "No Transpose", &i2, &i3, &ib,
				     &c_b21, &ab_ref(*kd + 1 - ib, i__ + ib), 
				    &i__3, work, &c__33, &c_b18, &ab_ref(ib + 
				    1, i__ + *kd), &i__4);
			}

/*                    Update A33 */

			i__3 = *ldab - 1;
			ssyrk_("Upper", "Transpose", &i3, &ib, &c_b21, work, &
				c__33, &c_b18, &ab_ref(*kd + 1, i__ + *kd), &
				i__3);

/*                    Copy the lower triangle of A13 back into place. */

			i__3 = i3;
			for (jj = 1; jj <= i__3; ++jj) {
			    i__4 = ib;
			    for (ii = jj; ii <= i__4; ++ii) {
				ab_ref(ii - jj + 1, jj + i__ + *kd - 1) = 
					work_ref(ii, jj);
/* L50: */
			    }
/* L60: */
			}
		    }
		}
/* L70: */
	    }
	} else {

/*           Compute the Cholesky factorization of a symmetric band   
             matrix, given the lower triangle of the matrix in band   
             storage.   

             Zero the lower triangle of the work array. */

	    i__2 = nb;
	    for (j = 1; j <= i__2; ++j) {
		i__1 = nb;
		for (i__ = j + 1; i__ <= i__1; ++i__) {
		    work_ref(i__, j) = 0.f;
/* L80: */
		}
/* L90: */
	    }

/*           Process the band matrix one diagonal block at a time. */

	    i__2 = *n;
	    i__1 = nb;
	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
/* Computing MIN */
		i__3 = nb, i__4 = *n - i__ + 1;
		ib = min(i__3,i__4);

/*              Factorize the diagonal block */

		i__3 = *ldab - 1;
		spotf2_(uplo, &ib, &ab_ref(1, i__), &i__3, &ii);
		if (ii != 0) {
		    *info = i__ + ii - 1;
		    goto L150;
		}
		if (i__ + ib <= *n) {

/*                 Update the relevant part of the trailing submatrix.   
                   If A11 denotes the diagonal block which has just been   
                   factorized, then we need to update the remaining   
                   blocks in the diagram:   

                      A11   
                      A21   A22   
                      A31   A32   A33   

                   The numbers of rows and columns in the partitioning   
                   are IB, I2, I3 respectively. The blocks A21, A22 and   
                   A32 are empty if IB = KD. The lower triangle of A31   
                   lies outside the band.   

   Computing MIN */
		    i__3 = *kd - ib, i__4 = *n - i__ - ib + 1;
		    i2 = min(i__3,i__4);
/* Computing MIN */
		    i__3 = ib, i__4 = *n - i__ - *kd + 1;
		    i3 = min(i__3,i__4);

		    if (i2 > 0) {

/*                    Update A21 */

			i__3 = *ldab - 1;
			i__4 = *ldab - 1;
			strsm_("Right", "Lower", "Transpose", "Non-unit", &i2,
				 &ib, &c_b18, &ab_ref(1, i__), &i__3, &ab_ref(
				ib + 1, i__), &i__4);

/*                    Update A22 */

			i__3 = *ldab - 1;
			i__4 = *ldab - 1;
			ssyrk_("Lower", "No Transpose", &i2, &ib, &c_b21, &
				ab_ref(ib + 1, i__), &i__3, &c_b18, &ab_ref(1,
				 i__ + ib), &i__4);
		    }

		    if (i3 > 0) {

/*                    Copy the upper triangle of A31 into the work array. */

			i__3 = ib;
			for (jj = 1; jj <= i__3; ++jj) {
			    i__4 = min(jj,i3);
			    for (ii = 1; ii <= i__4; ++ii) {
				work_ref(ii, jj) = ab_ref(*kd + 1 - jj + ii, 
					jj + i__ - 1);
/* L100: */
			    }
/* L110: */
			}

/*                    Update A31 (in the work array). */

			i__3 = *ldab - 1;
			strsm_("Right", "Lower", "Transpose", "Non-unit", &i3,
				 &ib, &c_b18, &ab_ref(1, i__), &i__3, work, &
				c__33);

/*                    Update A32 */

			if (i2 > 0) {
			    i__3 = *ldab - 1;
			    i__4 = *ldab - 1;
			    sgemm_("No transpose", "Transpose", &i3, &i2, &ib,
				     &c_b21, work, &c__33, &ab_ref(ib + 1, 
				    i__), &i__3, &c_b18, &ab_ref(*kd + 1 - ib,
				     i__ + ib), &i__4);
			}

/*                    Update A33 */

			i__3 = *ldab - 1;
			ssyrk_("Lower", "No Transpose", &i3, &ib, &c_b21, 
				work, &c__33, &c_b18, &ab_ref(1, i__ + *kd), &
				i__3);

/*                    Copy the upper triangle of A31 back into place. */

			i__3 = ib;
			for (jj = 1; jj <= i__3; ++jj) {
			    i__4 = min(jj,i3);
			    for (ii = 1; ii <= i__4; ++ii) {
				ab_ref(*kd + 1 - jj + ii, jj + i__ - 1) = 
					work_ref(ii, jj);
/* L120: */
			    }
/* L130: */
			}
		    }
		}
/* L140: */
	    }
	}
    }
    return 0;

L150:
    return 0;

/*     End of SPBTRF */

} /* spbtrf_ */
Beispiel #11
0
/* Subroutine */ int sgsvts_(integer *m, integer *p, integer *n, real *a, 
	real *af, integer *lda, real *b, real *bf, integer *ldb, real *u, 
	integer *ldu, real *v, integer *ldv, real *q, integer *ldq, real *
	alpha, real *beta, real *r__, integer *ldr, integer *iwork, real *
	work, integer *lwork, real *rwork, real *result)
{
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, bf_dim1, 
	    bf_offset, q_dim1, q_offset, r_dim1, r_offset, u_dim1, u_offset, 
	    v_dim1, v_offset, i__1, i__2;
    real r__1;

    /* Local variables */
    integer i__, j, k, l;
    real ulp;
    integer info;
    real unfl, temp, resid;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    real anorm, bnorm;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *), ssyrk_(char *, char *, integer *, integer *, real *, 
	    real *, integer *, real *, real *, integer *);
    extern doublereal slamch_(char *), slange_(char *, integer *, 
	    integer *, real *, integer *, real *);
    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
	    integer *, real *, integer *), slaset_(char *, integer *, 
	    integer *, real *, real *, real *, integer *), sggsvd_(
	    char *, char *, char *, integer *, integer *, integer *, integer *
, integer *, real *, integer *, real *, integer *, real *, real *, 
	     real *, integer *, real *, integer *, real *, integer *, real *, 
	    integer *, integer *);
    extern doublereal slansy_(char *, char *, integer *, real *, integer *, 
	    real *);
    real ulpinv;


/*  -- LAPACK test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SGSVTS tests SGGSVD, which computes the GSVD of an M-by-N matrix A */
/*  and a P-by-N matrix B: */
/*               U'*A*Q = D1*R and V'*B*Q = D2*R. */

/*  Arguments */
/*  ========= */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix A.  M >= 0. */

/*  P       (input) INTEGER */
/*          The number of rows of the matrix B.  P >= 0. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrices A and B.  N >= 0. */

/*  A       (input) REAL array, dimension (LDA,M) */
/*          The M-by-N matrix A. */

/*  AF      (output) REAL array, dimension (LDA,N) */
/*          Details of the GSVD of A and B, as returned by SGGSVD, */
/*          see SGGSVD for further details. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the arrays A and AF. */
/*          LDA >= max( 1,M ). */

/*  B       (input) REAL array, dimension (LDB,P) */
/*          On entry, the P-by-N matrix B. */

/*  BF      (output) REAL array, dimension (LDB,N) */
/*          Details of the GSVD of A and B, as returned by SGGSVD, */
/*          see SGGSVD for further details. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the arrays B and BF. */
/*          LDB >= max(1,P). */

/*  U       (output) REAL array, dimension(LDU,M) */
/*          The M by M orthogonal matrix U. */

/*  LDU     (input) INTEGER */
/*          The leading dimension of the array U. LDU >= max(1,M). */

/*  V       (output) REAL array, dimension(LDV,M) */
/*          The P by P orthogonal matrix V. */

/*  LDV     (input) INTEGER */
/*          The leading dimension of the array V. LDV >= max(1,P). */

/*  Q       (output) REAL array, dimension(LDQ,N) */
/*          The N by N orthogonal matrix Q. */

/*  LDQ     (input) INTEGER */
/*          The leading dimension of the array Q. LDQ >= max(1,N). */

/*  ALPHA   (output) REAL array, dimension (N) */
/*  BETA    (output) REAL array, dimension (N) */
/*          The generalized singular value pairs of A and B, the */
/*          ``diagonal'' matrices D1 and D2 are constructed from */
/*          ALPHA and BETA, see subroutine SGGSVD for details. */

/*  R       (output) REAL array, dimension(LDQ,N) */
/*          The upper triangular matrix R. */

/*  LDR     (input) INTEGER */
/*          The leading dimension of the array R. LDR >= max(1,N). */

/*  IWORK   (workspace) INTEGER array, dimension (N) */

/*  WORK    (workspace) REAL array, dimension (LWORK) */

/*  LWORK   (input) INTEGER */
/*          The dimension of the array WORK, */
/*          LWORK >= max(M,P,N)*max(M,P,N). */

/*  RWORK   (workspace) REAL array, dimension (max(M,P,N)) */

/*  RESULT  (output) REAL array, dimension (6) */
/*          The test ratios: */
/*          RESULT(1) = norm( U'*A*Q - D1*R ) / ( MAX(M,N)*norm(A)*ULP) */
/*          RESULT(2) = norm( V'*B*Q - D2*R ) / ( MAX(P,N)*norm(B)*ULP) */
/*          RESULT(3) = norm( I - U'*U ) / ( M*ULP ) */
/*          RESULT(4) = norm( I - V'*V ) / ( P*ULP ) */
/*          RESULT(5) = norm( I - Q'*Q ) / ( N*ULP ) */
/*          RESULT(6) = 0        if ALPHA is in decreasing order; */
/*                    = ULPINV   otherwise. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    af_dim1 = *lda;
    af_offset = 1 + af_dim1;
    af -= af_offset;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    bf_dim1 = *ldb;
    bf_offset = 1 + bf_dim1;
    bf -= bf_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1;
    u -= u_offset;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1;
    v -= v_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    --alpha;
    --beta;
    r_dim1 = *ldr;
    r_offset = 1 + r_dim1;
    r__ -= r_offset;
    --iwork;
    --work;
    --rwork;
    --result;

    /* Function Body */
    ulp = slamch_("Precision");
    ulpinv = 1.f / ulp;
    unfl = slamch_("Safe minimum");

/*     Copy the matrix A to the array AF. */

    slacpy_("Full", m, n, &a[a_offset], lda, &af[af_offset], lda);
    slacpy_("Full", p, n, &b[b_offset], ldb, &bf[bf_offset], ldb);

/* Computing MAX */
    r__1 = slange_("1", m, n, &a[a_offset], lda, &rwork[1]);
    anorm = dmax(r__1,unfl);
/* Computing MAX */
    r__1 = slange_("1", p, n, &b[b_offset], ldb, &rwork[1]);
    bnorm = dmax(r__1,unfl);

/*     Factorize the matrices A and B in the arrays AF and BF. */

    sggsvd_("U", "V", "Q", m, n, p, &k, &l, &af[af_offset], lda, &bf[
	    bf_offset], ldb, &alpha[1], &beta[1], &u[u_offset], ldu, &v[
	    v_offset], ldv, &q[q_offset], ldq, &work[1], &iwork[1], &info);

/*     Copy R */

/* Computing MIN */
    i__2 = k + l;
    i__1 = min(i__2,*m);
    for (i__ = 1; i__ <= i__1; ++i__) {
	i__2 = k + l;
	for (j = i__; j <= i__2; ++j) {
	    r__[i__ + j * r_dim1] = af[i__ + (*n - k - l + j) * af_dim1];
/* L10: */
	}
/* L20: */
    }

    if (*m - k - l < 0) {
	i__1 = k + l;
	for (i__ = *m + 1; i__ <= i__1; ++i__) {
	    i__2 = k + l;
	    for (j = i__; j <= i__2; ++j) {
		r__[i__ + j * r_dim1] = bf[i__ - k + (*n - k - l + j) * 
			bf_dim1];
/* L30: */
	    }
/* L40: */
	}
    }

/*     Compute A:= U'*A*Q - D1*R */

    sgemm_("No transpose", "No transpose", m, n, n, &c_b17, &a[a_offset], lda, 
	     &q[q_offset], ldq, &c_b18, &work[1], lda)
	    ;

    sgemm_("Transpose", "No transpose", m, n, m, &c_b17, &u[u_offset], ldu, &
	    work[1], lda, &c_b18, &a[a_offset], lda);

    i__1 = k;
    for (i__ = 1; i__ <= i__1; ++i__) {
	i__2 = k + l;
	for (j = i__; j <= i__2; ++j) {
	    a[i__ + (*n - k - l + j) * a_dim1] -= r__[i__ + j * r_dim1];
/* L50: */
	}
/* L60: */
    }

/* Computing MIN */
    i__2 = k + l;
    i__1 = min(i__2,*m);
    for (i__ = k + 1; i__ <= i__1; ++i__) {
	i__2 = k + l;
	for (j = i__; j <= i__2; ++j) {
	    a[i__ + (*n - k - l + j) * a_dim1] -= alpha[i__] * r__[i__ + j * 
		    r_dim1];
/* L70: */
	}
/* L80: */
    }

/*     Compute norm( U'*A*Q - D1*R ) / ( MAX(1,M,N)*norm(A)*ULP ) . */

    resid = slange_("1", m, n, &a[a_offset], lda, &rwork[1]);

    if (anorm > 0.f) {
/* Computing MAX */
	i__1 = max(1,*m);
	result[1] = resid / (real) max(i__1,*n) / anorm / ulp;
    } else {
	result[1] = 0.f;
    }

/*     Compute B := V'*B*Q - D2*R */

    sgemm_("No transpose", "No transpose", p, n, n, &c_b17, &b[b_offset], ldb, 
	     &q[q_offset], ldq, &c_b18, &work[1], ldb)
	    ;

    sgemm_("Transpose", "No transpose", p, n, p, &c_b17, &v[v_offset], ldv, &
	    work[1], ldb, &c_b18, &b[b_offset], ldb);

    i__1 = l;
    for (i__ = 1; i__ <= i__1; ++i__) {
	i__2 = l;
	for (j = i__; j <= i__2; ++j) {
	    b[i__ + (*n - l + j) * b_dim1] -= beta[k + i__] * r__[k + i__ + (
		    k + j) * r_dim1];
/* L90: */
	}
/* L100: */
    }

/*     Compute norm( V'*B*Q - D2*R ) / ( MAX(P,N)*norm(B)*ULP ) . */

    resid = slange_("1", p, n, &b[b_offset], ldb, &rwork[1]);
    if (bnorm > 0.f) {
/* Computing MAX */
	i__1 = max(1,*p);
	result[2] = resid / (real) max(i__1,*n) / bnorm / ulp;
    } else {
	result[2] = 0.f;
    }

/*     Compute I - U'*U */

    slaset_("Full", m, m, &c_b18, &c_b17, &work[1], ldq);
    ssyrk_("Upper", "Transpose", m, m, &c_b44, &u[u_offset], ldu, &c_b17, &
	    work[1], ldu);

/*     Compute norm( I - U'*U ) / ( M * ULP ) . */

    resid = slansy_("1", "Upper", m, &work[1], ldu, &rwork[1]);
    result[3] = resid / (real) max(1,*m) / ulp;

/*     Compute I - V'*V */

    slaset_("Full", p, p, &c_b18, &c_b17, &work[1], ldv);
    ssyrk_("Upper", "Transpose", p, p, &c_b44, &v[v_offset], ldv, &c_b17, &
	    work[1], ldv);

/*     Compute norm( I - V'*V ) / ( P * ULP ) . */

    resid = slansy_("1", "Upper", p, &work[1], ldv, &rwork[1]);
    result[4] = resid / (real) max(1,*p) / ulp;

/*     Compute I - Q'*Q */

    slaset_("Full", n, n, &c_b18, &c_b17, &work[1], ldq);
    ssyrk_("Upper", "Transpose", n, n, &c_b44, &q[q_offset], ldq, &c_b17, &
	    work[1], ldq);

/*     Compute norm( I - Q'*Q ) / ( N * ULP ) . */

    resid = slansy_("1", "Upper", n, &work[1], ldq, &rwork[1]);
    result[5] = resid / (real) max(1,*n) / ulp;

/*     Check sorting */

    scopy_(n, &alpha[1], &c__1, &work[1], &c__1);
/* Computing MIN */
    i__2 = k + l;
    i__1 = min(i__2,*m);
    for (i__ = k + 1; i__ <= i__1; ++i__) {
	j = iwork[i__];
	if (i__ != j) {
	    temp = work[i__];
	    work[i__] = work[j];
	    work[j] = temp;
	}
/* L110: */
    }

    result[6] = 0.f;
/* Computing MIN */
    i__2 = k + l;
    i__1 = min(i__2,*m) - 1;
    for (i__ = k + 1; i__ <= i__1; ++i__) {
	if (work[i__] < work[i__ + 1]) {
	    result[6] = ulpinv;
	}
/* L120: */
    }

    return 0;

/*     End of SGSVTS */

} /* sgsvts_ */
Beispiel #12
0
/* Subroutine */ int spbtrf_(char *uplo, integer *n, integer *kd, real *ab, 
	integer *ldab, integer *info)
{
    /* System generated locals */
    integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4;

    /* Local variables */
    integer i__, j, i2, i3, ib, nb, ii, jj;
    real work[1056]	/* was [33][32] */;

/*  -- LAPACK routine (version 3.2) -- */
/*     November 2006 */

/*  Purpose */
/*  ======= */

/*  SPBTRF computes the Cholesky factorization of a real symmetric */
/*  positive definite band matrix A. */

/*  The factorization has the form */
/*     A = U**T * U,  if UPLO = 'U', or */
/*     A = L  * L**T,  if UPLO = 'L', */
/*  where U is an upper triangular matrix and L is lower triangular. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          = 'U':  Upper triangle of A is stored; */
/*          = 'L':  Lower triangle of A is stored. */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  KD      (input) INTEGER */
/*          The number of superdiagonals of the matrix A if UPLO = 'U', */
/*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0. */

/*  AB      (input/output) REAL array, dimension (LDAB,N) */
/*          On entry, the upper or lower triangle of the symmetric band */
/*          matrix A, stored in the first KD+1 rows of the array.  The */
/*          j-th column of A is stored in the j-th column of the array AB */
/*          as follows: */
/*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */
/*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd). */

/*          On exit, if INFO = 0, the triangular factor U or L from the */
/*          Cholesky factorization A = U**T*U or A = L*L**T of the band */
/*          matrix A, in the same storage format as A. */

/*  LDAB    (input) INTEGER */
/*          The leading dimension of the array AB.  LDAB >= KD+1. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
/*          > 0:  if INFO = i, the leading minor of order i is not */
/*                positive definite, and the factorization could not be */
/*                completed. */

/*  Further Details */
/*  =============== */

/*  The band storage scheme is illustrated by the following example, when */
/*  N = 6, KD = 2, and UPLO = 'U': */

/*  On entry:                       On exit: */

/*      *    *   a13  a24  a35  a46      *    *   u13  u24  u35  u46 */
/*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56 */
/*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66 */

/*  Similarly, if UPLO = 'L' the format of A is as follows: */

/*  On entry:                       On exit: */

/*     a11  a22  a33  a44  a55  a66     l11  l22  l33  l44  l55  l66 */
/*     a21  a32  a43  a54  a65   *      l21  l32  l43  l54  l65   * */
/*     a31  a42  a53  a64   *    *      l31  l42  l53  l64   *    * */

/*  Array elements marked * are not used by the routine. */

/*  Contributed by */
/*  Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989 */

/*  ===================================================================== */

/*     Test the input parameters. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1;
    ab -= ab_offset;

    /* Function Body */
    *info = 0;
    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*kd < 0) {
	*info = -3;
    } else if (*ldab < *kd + 1) {
	*info = -5;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SPBTRF", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine the block size for this environment */

    nb = ilaenv_(&c__1, "SPBTRF", uplo, n, kd, &c_n1, &c_n1);

/*     The block size must not exceed the semi-bandwidth KD, and must not */
/*     exceed the limit set by the size of the local array WORK. */

    nb = min(nb,32);

    if (nb <= 1 || nb > *kd) {

/*        Use unblocked code */

	spbtf2_(uplo, n, kd, &ab[ab_offset], ldab, info);
    } else {

/*        Use blocked code */

	if (lsame_(uplo, "U")) {

/*           Compute the Cholesky factorization of a symmetric band */
/*           matrix, given the upper triangle of the matrix in band */
/*           storage. */

/*           Zero the upper triangle of the work array. */

	    i__1 = nb;
	    for (j = 1; j <= i__1; ++j) {
		i__2 = j - 1;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    work[i__ + j * 33 - 34] = 0.f;
		}
	    }

/*           Process the band matrix one diagonal block at a time. */

	    i__1 = *n;
	    i__2 = nb;
	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
/* Computing MIN */
		i__3 = nb, i__4 = *n - i__ + 1;
		ib = min(i__3,i__4);

/*              Factorize the diagonal block */

		i__3 = *ldab - 1;
		spotf2_(uplo, &ib, &ab[*kd + 1 + i__ * ab_dim1], &i__3, &ii);
		if (ii != 0) {
		    *info = i__ + ii - 1;
		    goto L150;
		}
		if (i__ + ib <= *n) {

/*                 Update the relevant part of the trailing submatrix. */
/*                 If A11 denotes the diagonal block which has just been */
/*                 factorized, then we need to update the remaining */
/*                 blocks in the diagram: */

/*                    A11   A12   A13 */
/*                          A22   A23 */
/*                                A33 */

/*                 The numbers of rows and columns in the partitioning */
/*                 are IB, I2, I3 respectively. The blocks A12, A22 and */
/*                 A23 are empty if IB = KD. The upper triangle of A13 */
/*                 lies outside the band. */

/* Computing MIN */
		    i__3 = *kd - ib, i__4 = *n - i__ - ib + 1;
		    i2 = min(i__3,i__4);
/* Computing MIN */
		    i__3 = ib, i__4 = *n - i__ - *kd + 1;
		    i3 = min(i__3,i__4);

		    if (i2 > 0) {

/*                    Update A12 */

			i__3 = *ldab - 1;
			i__4 = *ldab - 1;
			strsm_("Left", "Upper", "Transpose", "Non-unit", &ib, 
				&i2, &c_b18, &ab[*kd + 1 + i__ * ab_dim1], &
				i__3, &ab[*kd + 1 - ib + (i__ + ib) * ab_dim1]
, &i__4);

/*                    Update A22 */

			i__3 = *ldab - 1;
			i__4 = *ldab - 1;
			ssyrk_("Upper", "Transpose", &i2, &ib, &c_b21, &ab[*
				kd + 1 - ib + (i__ + ib) * ab_dim1], &i__3, &
				c_b18, &ab[*kd + 1 + (i__ + ib) * ab_dim1], &
				i__4);
		    }

		    if (i3 > 0) {

/*                    Copy the lower triangle of A13 into the work array. */

			i__3 = i3;
			for (jj = 1; jj <= i__3; ++jj) {
			    i__4 = ib;
			    for (ii = jj; ii <= i__4; ++ii) {
				work[ii + jj * 33 - 34] = ab[ii - jj + 1 + (
					jj + i__ + *kd - 1) * ab_dim1];
			    }
			}

/*                    Update A13 (in the work array). */

			i__3 = *ldab - 1;
			strsm_("Left", "Upper", "Transpose", "Non-unit", &ib, 
				&i3, &c_b18, &ab[*kd + 1 + i__ * ab_dim1], &
				i__3, work, &c__33);

/*                    Update A23 */

			if (i2 > 0) {
			    i__3 = *ldab - 1;
			    i__4 = *ldab - 1;
			    sgemm_("Transpose", "No Transpose", &i2, &i3, &ib, 
				     &c_b21, &ab[*kd + 1 - ib + (i__ + ib) * 
				    ab_dim1], &i__3, work, &c__33, &c_b18, &
				    ab[ib + 1 + (i__ + *kd) * ab_dim1], &i__4);
			}

/*                    Update A33 */

			i__3 = *ldab - 1;
			ssyrk_("Upper", "Transpose", &i3, &ib, &c_b21, work, &
				c__33, &c_b18, &ab[*kd + 1 + (i__ + *kd) * 
				ab_dim1], &i__3);

/*                    Copy the lower triangle of A13 back into place. */

			i__3 = i3;
			for (jj = 1; jj <= i__3; ++jj) {
			    i__4 = ib;
			    for (ii = jj; ii <= i__4; ++ii) {
				ab[ii - jj + 1 + (jj + i__ + *kd - 1) * 
					ab_dim1] = work[ii + jj * 33 - 34];
			    }
			}
		    }
		}
	    }
	} else {

/*           Compute the Cholesky factorization of a symmetric band */
/*           matrix, given the lower triangle of the matrix in band */
/*           storage. */

/*           Zero the lower triangle of the work array. */

	    i__2 = nb;
	    for (j = 1; j <= i__2; ++j) {
		i__1 = nb;
		for (i__ = j + 1; i__ <= i__1; ++i__) {
		    work[i__ + j * 33 - 34] = 0.f;
		}
	    }

/*           Process the band matrix one diagonal block at a time. */

	    i__2 = *n;
	    i__1 = nb;
	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
/* Computing MIN */
		i__3 = nb, i__4 = *n - i__ + 1;
		ib = min(i__3,i__4);

/*              Factorize the diagonal block */

		i__3 = *ldab - 1;
		spotf2_(uplo, &ib, &ab[i__ * ab_dim1 + 1], &i__3, &ii);
		if (ii != 0) {
		    *info = i__ + ii - 1;
		    goto L150;
		}
		if (i__ + ib <= *n) {

/*                 Update the relevant part of the trailing submatrix. */
/*                 If A11 denotes the diagonal block which has just been */
/*                 factorized, then we need to update the remaining */
/*                 blocks in the diagram: */

/*                    A11 */
/*                    A21   A22 */
/*                    A31   A32   A33 */

/*                 The numbers of rows and columns in the partitioning */
/*                 are IB, I2, I3 respectively. The blocks A21, A22 and */
/*                 A32 are empty if IB = KD. The lower triangle of A31 */
/*                 lies outside the band. */

/* Computing MIN */
		    i__3 = *kd - ib, i__4 = *n - i__ - ib + 1;
		    i2 = min(i__3,i__4);
/* Computing MIN */
		    i__3 = ib, i__4 = *n - i__ - *kd + 1;
		    i3 = min(i__3,i__4);

		    if (i2 > 0) {

/*                    Update A21 */

			i__3 = *ldab - 1;
			i__4 = *ldab - 1;
			strsm_("Right", "Lower", "Transpose", "Non-unit", &i2, 
				 &ib, &c_b18, &ab[i__ * ab_dim1 + 1], &i__3, &
				ab[ib + 1 + i__ * ab_dim1], &i__4);

/*                    Update A22 */

			i__3 = *ldab - 1;
			i__4 = *ldab - 1;
			ssyrk_("Lower", "No Transpose", &i2, &ib, &c_b21, &ab[
				ib + 1 + i__ * ab_dim1], &i__3, &c_b18, &ab[(
				i__ + ib) * ab_dim1 + 1], &i__4);
		    }

		    if (i3 > 0) {

/*                    Copy the upper triangle of A31 into the work array. */

			i__3 = ib;
			for (jj = 1; jj <= i__3; ++jj) {
			    i__4 = min(jj,i3);
			    for (ii = 1; ii <= i__4; ++ii) {
				work[ii + jj * 33 - 34] = ab[*kd + 1 - jj + 
					ii + (jj + i__ - 1) * ab_dim1];
			    }
			}

/*                    Update A31 (in the work array). */

			i__3 = *ldab - 1;
			strsm_("Right", "Lower", "Transpose", "Non-unit", &i3, 
				 &ib, &c_b18, &ab[i__ * ab_dim1 + 1], &i__3, 
				work, &c__33);

/*                    Update A32 */

			if (i2 > 0) {
			    i__3 = *ldab - 1;
			    i__4 = *ldab - 1;
			    sgemm_("No transpose", "Transpose", &i3, &i2, &ib, 
				     &c_b21, work, &c__33, &ab[ib + 1 + i__ * 
				    ab_dim1], &i__3, &c_b18, &ab[*kd + 1 - ib 
				    + (i__ + ib) * ab_dim1], &i__4);
			}

/*                    Update A33 */

			i__3 = *ldab - 1;
			ssyrk_("Lower", "No Transpose", &i3, &ib, &c_b21, 
				work, &c__33, &c_b18, &ab[(i__ + *kd) * 
				ab_dim1 + 1], &i__3);

/*                    Copy the upper triangle of A31 back into place. */

			i__3 = ib;
			for (jj = 1; jj <= i__3; ++jj) {
			    i__4 = min(jj,i3);
			    for (ii = 1; ii <= i__4; ++ii) {
				ab[*kd + 1 - jj + ii + (jj + i__ - 1) * 
					ab_dim1] = work[ii + jj * 33 - 34];
			    }
			}
		    }
		}
	    }
	}
    }
    return 0;

L150:
    return 0;

/*     End of SPBTRF */

} /* spbtrf_ */
/* Subroutine */ int slauum_(char *uplo, integer *n, real *a, integer *lda, 
	integer *info)
{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    SLAUUM computes the product U * U' or L' * L, where the triangular   
    factor U or L is stored in the upper or lower triangular part of   
    the array A.   

    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,   
    overwriting the factor U in A.   
    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,   
    overwriting the factor L in A.   

    This is the blocked form of the algorithm, calling Level 3 BLAS.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the triangular factor stored in the array A   
            is upper or lower triangular:   
            = 'U':  Upper triangular   
            = 'L':  Lower triangular   

    N       (input) INTEGER   
            The order of the triangular factor U or L.  N >= 0.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the triangular factor U or L.   
            On exit, if UPLO = 'U', the upper triangle of A is   
            overwritten with the upper triangle of the product U * U';   
            if UPLO = 'L', the lower triangle of A is overwritten with   
            the lower triangle of the product L' * L.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -k, the k-th argument had an illegal value   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static real c_b15 = 1.f;
    
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    /* Local variables */
    static integer i__;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    static logical upper;
    extern /* Subroutine */ int strmm_(char *, char *, char *, char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
	    ), ssyrk_(char *, char *, integer 
	    *, integer *, real *, real *, integer *, real *, real *, integer *
	    );
    static integer ib;
    extern /* Subroutine */ int slauu2_(char *, integer *, real *, integer *, 
	    integer *);
    static integer nb;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLAUUM", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine the block size for this environment. */

    nb = ilaenv_(&c__1, "SLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)1);

    if (nb <= 1 || nb >= *n) {

/*        Use unblocked code */

	slauu2_(uplo, n, &a[a_offset], lda, info);
    } else {

/*        Use blocked code */

	if (upper) {

/*           Compute the product U * U'. */

	    i__1 = *n;
	    i__2 = nb;
	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
/* Computing MIN */
		i__3 = nb, i__4 = *n - i__ + 1;
		ib = min(i__3,i__4);
		i__3 = i__ - 1;
		strmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib, 
			&c_b15, &a_ref(i__, i__), lda, &a_ref(1, i__), lda);
		slauu2_("Upper", &ib, &a_ref(i__, i__), lda, info);
		if (i__ + ib <= *n) {
		    i__3 = i__ - 1;
		    i__4 = *n - i__ - ib + 1;
		    sgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, &
			    c_b15, &a_ref(1, i__ + ib), lda, &a_ref(i__, i__ 
			    + ib), lda, &c_b15, &a_ref(1, i__), lda);
		    i__3 = *n - i__ - ib + 1;
		    ssyrk_("Upper", "No transpose", &ib, &i__3, &c_b15, &
			    a_ref(i__, i__ + ib), lda, &c_b15, &a_ref(i__, 
			    i__), lda);
		}
/* L10: */
	    }
	} else {

/*           Compute the product L' * L. */

	    i__2 = *n;
	    i__1 = nb;
	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
/* Computing MIN */
		i__3 = nb, i__4 = *n - i__ + 1;
		ib = min(i__3,i__4);
		i__3 = i__ - 1;
		strmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, &
			c_b15, &a_ref(i__, i__), lda, &a_ref(i__, 1), lda);
		slauu2_("Lower", &ib, &a_ref(i__, i__), lda, info);
		if (i__ + ib <= *n) {
		    i__3 = i__ - 1;
		    i__4 = *n - i__ - ib + 1;
		    sgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, &
			    c_b15, &a_ref(i__ + ib, i__), lda, &a_ref(i__ + 
			    ib, 1), lda, &c_b15, &a_ref(i__, 1), lda);
		    i__3 = *n - i__ - ib + 1;
		    ssyrk_("Lower", "Transpose", &ib, &i__3, &c_b15, &a_ref(
			    i__ + ib, i__), lda, &c_b15, &a_ref(i__, i__), 
			    lda);
		}
/* L20: */
	    }
	}
    }

    return 0;

/*     End of SLAUUM */

} /* slauum_ */
Beispiel #14
0
/* Subroutine */ int ssfrk_(char *transr, char *uplo, char *trans, integer *n, 
	 integer *k, real *alpha, real *a, integer *lda, real *beta, real *
	c__)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1;

    /* Local variables */
    integer j, n1, n2, nk, info;
    logical normaltransr;
    integer nrowa;
    logical lower;
    logical nisodd, notrans;

/*  -- LAPACK routine (version 3.2)                                    -- */

/*  -- Contributed by Julien Langou of the Univ. of Colorado Denver    -- */
/*  -- November 2008                                                   -- */

/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */

/*  Purpose */
/*  ======= */

/*  Level 3 BLAS like routine for C in RFP Format. */

/*  SSFRK performs one of the symmetric rank--k operations */

/*     C := alpha*A*A' + beta*C, */

/*  or */

/*     C := alpha*A'*A + beta*C, */

/*  where alpha and beta are real scalars, C is an n--by--n symmetric */
/*  matrix and A is an n--by--k matrix in the first case and a k--by--n */
/*  matrix in the second case. */

/*  Arguments */
/*  ========== */

/*  TRANSR    (input) CHARACTER */
/*          = 'N':  The Normal Form of RFP A is stored; */
/*          = 'T':  The Transpose Form of RFP A is stored. */

/*  UPLO   - (input) CHARACTER */
/*           On  entry, UPLO specifies whether the upper or lower */
/*           triangular part of the array C is to be referenced as */
/*           follows: */

/*              UPLO = 'U' or 'u'   Only the upper triangular part of C */
/*                                  is to be referenced. */

/*              UPLO = 'L' or 'l'   Only the lower triangular part of C */
/*                                  is to be referenced. */

/*           Unchanged on exit. */

/*  TRANS  - (input) CHARACTER */
/*           On entry, TRANS specifies the operation to be performed as */
/*           follows: */

/*              TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C. */

/*              TRANS = 'T' or 't'   C := alpha*A'*A + beta*C. */

/*           Unchanged on exit. */

/*  N      - (input) INTEGER. */
/*           On entry, N specifies the order of the matrix C. N must be */
/*           at least zero. */
/*           Unchanged on exit. */

/*  K      - (input) INTEGER. */
/*           On entry with TRANS = 'N' or 'n', K specifies the number */
/*           of  columns of the matrix A, and on entry with TRANS = 'T' */
/*           or 't', K specifies the number of rows of the matrix A. K */
/*           must be at least zero. */
/*           Unchanged on exit. */

/*  ALPHA  - (input) REAL. */
/*           On entry, ALPHA specifies the scalar alpha. */
/*           Unchanged on exit. */

/*  A      - (input) REAL array of DIMENSION ( LDA, ka ), where KA */
/*           is K  when TRANS = 'N' or 'n', and is N otherwise. Before */
/*           entry with TRANS = 'N' or 'n', the leading N--by--K part of */
/*           the array A must contain the matrix A, otherwise the leading */
/*           K--by--N part of the array A must contain the matrix A. */
/*           Unchanged on exit. */

/*  LDA    - (input) INTEGER. */
/*           On entry, LDA specifies the first dimension of A as declared */
/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
/*           be at least  max( 1, k ). */
/*           Unchanged on exit. */

/*  BETA   - (input) REAL. */
/*           On entry, BETA specifies the scalar beta. */
/*           Unchanged on exit. */

/*  C      - (input/output) REAL array, dimension ( NT ); */
/*           NT = N*(N+1)/2. On entry, the symmetric matrix C in RFP */
/*           Format. RFP Format is described by TRANSR, UPLO and N. */

/*  Arguments */
/*  ========== */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --c__;

    /* Function Body */
    info = 0;
    normaltransr = lsame_(transr, "N");
    lower = lsame_(uplo, "L");
    notrans = lsame_(trans, "N");

    if (notrans) {
	nrowa = *n;
    } else {
	nrowa = *k;
    }

    if (! normaltransr && ! lsame_(transr, "T")) {
	info = -1;
    } else if (! lower && ! lsame_(uplo, "U")) {
	info = -2;
    } else if (! notrans && ! lsame_(trans, "T")) {
	info = -3;
    } else if (*n < 0) {
	info = -4;
    } else if (*k < 0) {
	info = -5;
    } else if (*lda < max(1,nrowa)) {
	info = -8;
    }
    if (info != 0) {
	i__1 = -info;
	xerbla_("SSFRK ", &i__1);
	return 0;
    }

/*     Quick return if possible. */

/*     The quick return case: ((ALPHA.EQ.0).AND.(BETA.NE.ZERO)) is not */
/*     done (it is in SSYRK for example) and left in the general case. */

    if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) {
	return 0;
    }

    if (*alpha == 0.f && *beta == 0.f) {
	i__1 = *n * (*n + 1) / 2;
	for (j = 1; j <= i__1; ++j) {
	    c__[j] = 0.f;
	}
	return 0;
    }

/*     C is N-by-N. */
/*     If N is odd, set NISODD = .TRUE., and N1 and N2. */
/*     If N is even, NISODD = .FALSE., and NK. */

    if (*n % 2 == 0) {
	nisodd = FALSE_;
	nk = *n / 2;
    } else {
	nisodd = TRUE_;
	if (lower) {
	    n2 = *n / 2;
	    n1 = *n - n2;
	} else {
	    n1 = *n / 2;
	    n2 = *n - n1;
	}
    }

    if (nisodd) {

/*        N is odd */

	if (normaltransr) {

/*           N is odd and TRANSR = 'N' */

	    if (lower) {

/*              N is odd, TRANSR = 'N', and UPLO = 'L' */

		if (notrans) {

/*                 N is odd, TRANSR = 'N', UPLO = 'L', and TRANS = 'N' */

		    ssyrk_("L", "N", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[1], n);
		    ssyrk_("U", "N", &n2, k, alpha, &a[n1 + 1 + a_dim1], lda, 
			    beta, &c__[*n + 1], n);
		    sgemm_("N", "T", &n2, &n1, k, alpha, &a[n1 + 1 + a_dim1], 
			    lda, &a[a_dim1 + 1], lda, beta, &c__[n1 + 1], n);

		} else {

/*                 N is odd, TRANSR = 'N', UPLO = 'L', and TRANS = 'T' */

		    ssyrk_("L", "T", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[1], n);
		    ssyrk_("U", "T", &n2, k, alpha, &a[(n1 + 1) * a_dim1 + 1], 
			     lda, beta, &c__[*n + 1], n)
			    ;
		    sgemm_("T", "N", &n2, &n1, k, alpha, &a[(n1 + 1) * a_dim1 
			    + 1], lda, &a[a_dim1 + 1], lda, beta, &c__[n1 + 1]
, n);

		}

	    } else {

/*              N is odd, TRANSR = 'N', and UPLO = 'U' */

		if (notrans) {

/*                 N is odd, TRANSR = 'N', UPLO = 'U', and TRANS = 'N' */

		    ssyrk_("L", "N", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[n2 + 1], n);
		    ssyrk_("U", "N", &n2, k, alpha, &a[n2 + a_dim1], lda, 
			    beta, &c__[n1 + 1], n);
		    sgemm_("N", "T", &n1, &n2, k, alpha, &a[a_dim1 + 1], lda, 
			    &a[n2 + a_dim1], lda, beta, &c__[1], n);

		} else {

/*                 N is odd, TRANSR = 'N', UPLO = 'U', and TRANS = 'T' */

		    ssyrk_("L", "T", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[n2 + 1], n);
		    ssyrk_("U", "T", &n2, k, alpha, &a[n2 * a_dim1 + 1], lda, 
			    beta, &c__[n1 + 1], n);
		    sgemm_("T", "N", &n1, &n2, k, alpha, &a[a_dim1 + 1], lda, 
			    &a[n2 * a_dim1 + 1], lda, beta, &c__[1], n);

		}

	    }

	} else {

/*           N is odd, and TRANSR = 'T' */

	    if (lower) {

/*              N is odd, TRANSR = 'T', and UPLO = 'L' */

		if (notrans) {

/*                 N is odd, TRANSR = 'T', UPLO = 'L', and TRANS = 'N' */

		    ssyrk_("U", "N", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[1], &n1);
		    ssyrk_("L", "N", &n2, k, alpha, &a[n1 + 1 + a_dim1], lda, 
			    beta, &c__[2], &n1);
		    sgemm_("N", "T", &n1, &n2, k, alpha, &a[a_dim1 + 1], lda, 
			    &a[n1 + 1 + a_dim1], lda, beta, &c__[n1 * n1 + 1], 
			     &n1);

		} else {

/*                 N is odd, TRANSR = 'T', UPLO = 'L', and TRANS = 'T' */

		    ssyrk_("U", "T", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[1], &n1);
		    ssyrk_("L", "T", &n2, k, alpha, &a[(n1 + 1) * a_dim1 + 1], 
			     lda, beta, &c__[2], &n1);
		    sgemm_("T", "N", &n1, &n2, k, alpha, &a[a_dim1 + 1], lda, 
			    &a[(n1 + 1) * a_dim1 + 1], lda, beta, &c__[n1 * 
			    n1 + 1], &n1);

		}

	    } else {

/*              N is odd, TRANSR = 'T', and UPLO = 'U' */

		if (notrans) {

/*                 N is odd, TRANSR = 'T', UPLO = 'U', and TRANS = 'N' */

		    ssyrk_("U", "N", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[n2 * n2 + 1], &n2);
		    ssyrk_("L", "N", &n2, k, alpha, &a[n1 + 1 + a_dim1], lda, 
			    beta, &c__[n1 * n2 + 1], &n2);
		    sgemm_("N", "T", &n2, &n1, k, alpha, &a[n1 + 1 + a_dim1], 
			    lda, &a[a_dim1 + 1], lda, beta, &c__[1], &n2);

		} else {

/*                 N is odd, TRANSR = 'T', UPLO = 'U', and TRANS = 'T' */

		    ssyrk_("U", "T", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[n2 * n2 + 1], &n2);
		    ssyrk_("L", "T", &n2, k, alpha, &a[(n1 + 1) * a_dim1 + 1], 
			     lda, beta, &c__[n1 * n2 + 1], &n2);
		    sgemm_("T", "N", &n2, &n1, k, alpha, &a[(n1 + 1) * a_dim1 
			    + 1], lda, &a[a_dim1 + 1], lda, beta, &c__[1], &
			    n2);

		}

	    }

	}

    } else {

/*        N is even */

	if (normaltransr) {

/*           N is even and TRANSR = 'N' */

	    if (lower) {

/*              N is even, TRANSR = 'N', and UPLO = 'L' */

		if (notrans) {

/*                 N is even, TRANSR = 'N', UPLO = 'L', and TRANS = 'N' */

		    i__1 = *n + 1;
		    ssyrk_("L", "N", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[2], &i__1);
		    i__1 = *n + 1;
		    ssyrk_("U", "N", &nk, k, alpha, &a[nk + 1 + a_dim1], lda, 
			    beta, &c__[1], &i__1);
		    i__1 = *n + 1;
		    sgemm_("N", "T", &nk, &nk, k, alpha, &a[nk + 1 + a_dim1], 
			    lda, &a[a_dim1 + 1], lda, beta, &c__[nk + 2], &
			    i__1);

		} else {

/*                 N is even, TRANSR = 'N', UPLO = 'L', and TRANS = 'T' */

		    i__1 = *n + 1;
		    ssyrk_("L", "T", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[2], &i__1);
		    i__1 = *n + 1;
		    ssyrk_("U", "T", &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], 
			     lda, beta, &c__[1], &i__1);
		    i__1 = *n + 1;
		    sgemm_("T", "N", &nk, &nk, k, alpha, &a[(nk + 1) * a_dim1 
			    + 1], lda, &a[a_dim1 + 1], lda, beta, &c__[nk + 2]
, &i__1);

		}

	    } else {

/*              N is even, TRANSR = 'N', and UPLO = 'U' */

		if (notrans) {

/*                 N is even, TRANSR = 'N', UPLO = 'U', and TRANS = 'N' */

		    i__1 = *n + 1;
		    ssyrk_("L", "N", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[nk + 2], &i__1);
		    i__1 = *n + 1;
		    ssyrk_("U", "N", &nk, k, alpha, &a[nk + 1 + a_dim1], lda, 
			    beta, &c__[nk + 1], &i__1);
		    i__1 = *n + 1;
		    sgemm_("N", "T", &nk, &nk, k, alpha, &a[a_dim1 + 1], lda, 
			    &a[nk + 1 + a_dim1], lda, beta, &c__[1], &i__1);

		} else {

/*                 N is even, TRANSR = 'N', UPLO = 'U', and TRANS = 'T' */

		    i__1 = *n + 1;
		    ssyrk_("L", "T", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[nk + 2], &i__1);
		    i__1 = *n + 1;
		    ssyrk_("U", "T", &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], 
			     lda, beta, &c__[nk + 1], &i__1);
		    i__1 = *n + 1;
		    sgemm_("T", "N", &nk, &nk, k, alpha, &a[a_dim1 + 1], lda, 
			    &a[(nk + 1) * a_dim1 + 1], lda, beta, &c__[1], &
			    i__1);

		}

	    }

	} else {

/*           N is even, and TRANSR = 'T' */

	    if (lower) {

/*              N is even, TRANSR = 'T', and UPLO = 'L' */

		if (notrans) {

/*                 N is even, TRANSR = 'T', UPLO = 'L', and TRANS = 'N' */

		    ssyrk_("U", "N", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[nk + 1], &nk);
		    ssyrk_("L", "N", &nk, k, alpha, &a[nk + 1 + a_dim1], lda, 
			    beta, &c__[1], &nk);
		    sgemm_("N", "T", &nk, &nk, k, alpha, &a[a_dim1 + 1], lda, 
			    &a[nk + 1 + a_dim1], lda, beta, &c__[(nk + 1) * 
			    nk + 1], &nk);

		} else {

/*                 N is even, TRANSR = 'T', UPLO = 'L', and TRANS = 'T' */

		    ssyrk_("U", "T", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[nk + 1], &nk);
		    ssyrk_("L", "T", &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], 
			     lda, beta, &c__[1], &nk);
		    sgemm_("T", "N", &nk, &nk, k, alpha, &a[a_dim1 + 1], lda, 
			    &a[(nk + 1) * a_dim1 + 1], lda, beta, &c__[(nk + 
			    1) * nk + 1], &nk);

		}

	    } else {

/*              N is even, TRANSR = 'T', and UPLO = 'U' */

		if (notrans) {

/*                 N is even, TRANSR = 'T', UPLO = 'U', and TRANS = 'N' */

		    ssyrk_("U", "N", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[nk * (nk + 1) + 1], &nk);
		    ssyrk_("L", "N", &nk, k, alpha, &a[nk + 1 + a_dim1], lda, 
			    beta, &c__[nk * nk + 1], &nk);
		    sgemm_("N", "T", &nk, &nk, k, alpha, &a[nk + 1 + a_dim1], 
			    lda, &a[a_dim1 + 1], lda, beta, &c__[1], &nk);

		} else {

/*                 N is even, TRANSR = 'T', UPLO = 'U', and TRANS = 'T' */

		    ssyrk_("U", "T", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[nk * (nk + 1) + 1], &nk);
		    ssyrk_("L", "T", &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], 
			     lda, beta, &c__[nk * nk + 1], &nk);
		    sgemm_("T", "N", &nk, &nk, k, alpha, &a[(nk + 1) * a_dim1 
			    + 1], lda, &a[a_dim1 + 1], lda, beta, &c__[1], &
			    nk);

		}

	    }

	}

    }

    return 0;

/*     End of SSFRK */

} /* ssfrk_ */
Beispiel #15
0
void
ssyrk(char uplo, char transa, int n, int k, float alpha, float *a, int lda, float beta, float *c, int ldc)
{
    ssyrk_(&uplo, &transa, &n, &k, &alpha, a, &lda, &beta, c, &ldc);
}
Beispiel #16
0
/* Subroutine */ int srqt02_(integer *m, integer *n, integer *k, real *a, 
	real *af, real *q, real *r__, integer *lda, real *tau, real *work, 
	integer *lwork, real *rwork, real *result)
{
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, q_dim1, q_offset, r_dim1, 
	    r_offset, i__1, i__2;

    /* Builtin functions */
    /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);

    /* Local variables */
    real eps;
    integer info;
    real resid;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    real anorm;
    extern /* Subroutine */ int ssyrk_(char *, char *, integer *, integer *, 
	    real *, real *, integer *, real *, real *, integer *);
    extern doublereal slamch_(char *), slange_(char *, integer *, 
	    integer *, real *, integer *, real *);
    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
	    integer *, real *, integer *), slaset_(char *, integer *, 
	    integer *, real *, real *, real *, integer *);
    extern doublereal slansy_(char *, char *, integer *, real *, integer *, 
	    real *);
    extern /* Subroutine */ int sorgrq_(integer *, integer *, integer *, real 
	    *, integer *, real *, real *, integer *, integer *);


/*  -- LAPACK test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SRQT02 tests SORGRQ, which generates an m-by-n matrix Q with */
/*  orthonornmal rows that is defined as the product of k elementary */
/*  reflectors. */

/*  Given the RQ factorization of an m-by-n matrix A, SRQT02 generates */
/*  the orthogonal matrix Q defined by the factorization of the last k */
/*  rows of A; it compares R(m-k+1:m,n-m+1:n) with */
/*  A(m-k+1:m,1:n)*Q(n-m+1:n,1:n)', and checks that the rows of Q are */
/*  orthonormal. */

/*  Arguments */
/*  ========= */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix Q to be generated.  M >= 0. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix Q to be generated. */
/*          N >= M >= 0. */

/*  K       (input) INTEGER */
/*          The number of elementary reflectors whose product defines the */
/*          matrix Q. M >= K >= 0. */

/*  A       (input) REAL array, dimension (LDA,N) */
/*          The m-by-n matrix A which was factorized by SRQT01. */

/*  AF      (input) REAL array, dimension (LDA,N) */
/*          Details of the RQ factorization of A, as returned by SGERQF. */
/*          See SGERQF for further details. */

/*  Q       (workspace) REAL array, dimension (LDA,N) */

/*  R       (workspace) REAL array, dimension (LDA,M) */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the arrays A, AF, Q and L. LDA >= N. */

/*  TAU     (input) REAL array, dimension (M) */
/*          The scalar factors of the elementary reflectors corresponding */
/*          to the RQ factorization in AF. */

/*  WORK    (workspace) REAL array, dimension (LWORK) */

/*  LWORK   (input) INTEGER */
/*          The dimension of the array WORK. */

/*  RWORK   (workspace) REAL array, dimension (M) */

/*  RESULT  (output) REAL array, dimension (2) */
/*          The test ratios: */
/*          RESULT(1) = norm( R - A*Q' ) / ( N * norm(A) * EPS ) */
/*          RESULT(2) = norm( I - Q*Q' ) / ( N * EPS ) */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Scalars in Common .. */
/*     .. */
/*     .. Common blocks .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Quick return if possible */

    /* Parameter adjustments */
    r_dim1 = *lda;
    r_offset = 1 + r_dim1;
    r__ -= r_offset;
    q_dim1 = *lda;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    af_dim1 = *lda;
    af_offset = 1 + af_dim1;
    af -= af_offset;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --tau;
    --work;
    --rwork;
    --result;

    /* Function Body */
    if (*m == 0 || *n == 0 || *k == 0) {
	result[1] = 0.f;
	result[2] = 0.f;
	return 0;
    }

    eps = slamch_("Epsilon");

/*     Copy the last k rows of the factorization to the array Q */

    slaset_("Full", m, n, &c_b4, &c_b4, &q[q_offset], lda);
    if (*k < *n) {
	i__1 = *n - *k;
	slacpy_("Full", k, &i__1, &af[*m - *k + 1 + af_dim1], lda, &q[*m - *k 
		+ 1 + q_dim1], lda);
    }
    if (*k > 1) {
	i__1 = *k - 1;
	i__2 = *k - 1;
	slacpy_("Lower", &i__1, &i__2, &af[*m - *k + 2 + (*n - *k + 1) * 
		af_dim1], lda, &q[*m - *k + 2 + (*n - *k + 1) * q_dim1], lda);
    }

/*     Generate the last n rows of the matrix Q */

    s_copy(srnamc_1.srnamt, "SORGRQ", (ftnlen)6, (ftnlen)6);
    sorgrq_(m, n, k, &q[q_offset], lda, &tau[*m - *k + 1], &work[1], lwork, &
	    info);

/*     Copy R(m-k+1:m,n-m+1:n) */

    slaset_("Full", k, m, &c_b10, &c_b10, &r__[*m - *k + 1 + (*n - *m + 1) * 
	    r_dim1], lda);
    slacpy_("Upper", k, k, &af[*m - *k + 1 + (*n - *k + 1) * af_dim1], lda, &
	    r__[*m - *k + 1 + (*n - *k + 1) * r_dim1], lda);

/*     Compute R(m-k+1:m,n-m+1:n) - A(m-k+1:m,1:n) * Q(n-m+1:n,1:n)' */

    sgemm_("No transpose", "Transpose", k, m, n, &c_b15, &a[*m - *k + 1 + 
	    a_dim1], lda, &q[q_offset], lda, &c_b16, &r__[*m - *k + 1 + (*n - 
	    *m + 1) * r_dim1], lda);

/*     Compute norm( R - A*Q' ) / ( N * norm(A) * EPS ) . */

    anorm = slange_("1", k, n, &a[*m - *k + 1 + a_dim1], lda, &rwork[1]);
    resid = slange_("1", k, m, &r__[*m - *k + 1 + (*n - *m + 1) * r_dim1], 
	    lda, &rwork[1]);
    if (anorm > 0.f) {
	result[1] = resid / (real) max(1,*n) / anorm / eps;
    } else {
	result[1] = 0.f;
    }

/*     Compute I - Q*Q' */

    slaset_("Full", m, m, &c_b10, &c_b16, &r__[r_offset], lda);
    ssyrk_("Upper", "No transpose", m, n, &c_b15, &q[q_offset], lda, &c_b16, &
	    r__[r_offset], lda);

/*     Compute norm( I - Q*Q' ) / ( N * EPS ) . */

    resid = slansy_("1", "Upper", m, &r__[r_offset], lda, &rwork[1]);

    result[2] = resid / (real) max(1,*n) / eps;

    return 0;

/*     End of SRQT02 */

} /* srqt02_ */
Beispiel #17
0
/* Subroutine */ int slauum_(char *uplo, integer *n, real *a, integer *lda, 
	integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;

    /* Local variables */
    integer i__, ib, nb;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    logical upper;
    extern /* Subroutine */ int strmm_(char *, char *, char *, char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
), ssyrk_(char *, char *, integer 
	    *, integer *, real *, real *, integer *, real *, real *, integer *
), slauu2_(char *, integer *, real *, integer *, 
	    integer *), xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *);


/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLAUUM computes the product U * U' or L' * L, where the triangular */
/*  factor U or L is stored in the upper or lower triangular part of */
/*  the array A. */

/*  If UPLO = 'U' or 'u' then the upper triangle of the result is stored, */
/*  overwriting the factor U in A. */
/*  If UPLO = 'L' or 'l' then the lower triangle of the result is stored, */
/*  overwriting the factor L in A. */

/*  This is the blocked form of the algorithm, calling Level 3 BLAS. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          Specifies whether the triangular factor stored in the array A */
/*          is upper or lower triangular: */
/*          = 'U':  Upper triangular */
/*          = 'L':  Lower triangular */

/*  N       (input) INTEGER */
/*          The order of the triangular factor U or L.  N >= 0. */

/*  A       (input/output) REAL array, dimension (LDA,N) */
/*          On entry, the triangular factor U or L. */
/*          On exit, if UPLO = 'U', the upper triangle of A is */
/*          overwritten with the upper triangle of the product U * U'; */
/*          if UPLO = 'L', the lower triangle of A is overwritten with */
/*          the lower triangle of the product L' * L. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  INFO    (output) INTEGER */
/*          = 0: successful exit */
/*          < 0: if INFO = -k, the k-th argument had an illegal value */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLAUUM", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine the block size for this environment. */

    nb = ilaenv_(&c__1, "SLAUUM", uplo, n, &c_n1, &c_n1, &c_n1);

    if (nb <= 1 || nb >= *n) {

/*        Use unblocked code */

	slauu2_(uplo, n, &a[a_offset], lda, info);
    } else {

/*        Use blocked code */

	if (upper) {

/*           Compute the product U * U'. */

	    i__1 = *n;
	    i__2 = nb;
	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
/* Computing MIN */
		i__3 = nb, i__4 = *n - i__ + 1;
		ib = min(i__3,i__4);
		i__3 = i__ - 1;
		strmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib, 
			&c_b15, &a[i__ + i__ * a_dim1], lda, &a[i__ * a_dim1 
			+ 1], lda)
			;
		slauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info);
		if (i__ + ib <= *n) {
		    i__3 = i__ - 1;
		    i__4 = *n - i__ - ib + 1;
		    sgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, &
			    c_b15, &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__ + 
			    (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ * 
			    a_dim1 + 1], lda);
		    i__3 = *n - i__ - ib + 1;
		    ssyrk_("Upper", "No transpose", &ib, &i__3, &c_b15, &a[
			    i__ + (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ + 
			    i__ * a_dim1], lda);
		}
/* L10: */
	    }
	} else {

/*           Compute the product L' * L. */

	    i__2 = *n;
	    i__1 = nb;
	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
/* Computing MIN */
		i__3 = nb, i__4 = *n - i__ + 1;
		ib = min(i__3,i__4);
		i__3 = i__ - 1;
		strmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, &
			c_b15, &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1], 
			lda);
		slauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info);
		if (i__ + ib <= *n) {
		    i__3 = i__ - 1;
		    i__4 = *n - i__ - ib + 1;
		    sgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, &
			    c_b15, &a[i__ + ib + i__ * a_dim1], lda, &a[i__ + 
			    ib + a_dim1], lda, &c_b15, &a[i__ + a_dim1], lda);
		    i__3 = *n - i__ - ib + 1;
		    ssyrk_("Lower", "Transpose", &ib, &i__3, &c_b15, &a[i__ + 
			    ib + i__ * a_dim1], lda, &c_b15, &a[i__ + i__ * 
			    a_dim1], lda);
		}
/* L20: */
	    }
	}
    }

    return 0;

/*     End of SLAUUM */

} /* slauum_ */
Beispiel #18
0
/* Subroutine */ int sgqrts_(integer *n, integer *m, integer *p, real *a, 
	real *af, real *q, real *r__, integer *lda, real *taua, real *b, real 
	*bf, real *z__, real *t, real *bwk, integer *ldb, real *taub, real *
	work, integer *lwork, real *rwork, real *result)
{
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, r_dim1, r_offset, q_dim1, 
	    q_offset, b_dim1, b_offset, bf_dim1, bf_offset, t_dim1, t_offset, 
	    z_dim1, z_offset, bwk_dim1, bwk_offset, i__1, i__2;
    real r__1;

    /* Local variables */
    static integer info;
    static real unfl, resid;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    static real anorm, bnorm;
    extern /* Subroutine */ int ssyrk_(char *, char *, integer *, integer *, 
	    real *, real *, integer *, real *, real *, integer *);
    extern doublereal slamch_(char *), slange_(char *, integer *, 
	    integer *, real *, integer *, real *);
    extern /* Subroutine */ int sggqrf_(integer *, integer *, integer *, real 
	    *, integer *, real *, real *, integer *, real *, real *, integer *
	    , integer *), slacpy_(char *, integer *, integer *, real *, 
	    integer *, real *, integer *), slaset_(char *, integer *, 
	    integer *, real *, real *, real *, integer *);
    extern doublereal slansy_(char *, char *, integer *, real *, integer *, 
	    real *);
    extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real 
	    *, integer *, real *, real *, integer *, integer *), sorgrq_(
	    integer *, integer *, integer *, real *, integer *, real *, real *
	    , integer *, integer *);
    static real ulp;


#define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]
#define t_ref(a_1,a_2) t[(a_2)*t_dim1 + a_1]
#define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]
#define af_ref(a_1,a_2) af[(a_2)*af_dim1 + a_1]
#define bf_ref(a_1,a_2) bf[(a_2)*bf_dim1 + a_1]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    SGQRTS tests SGGQRF, which computes the GQR factorization of an   
    N-by-M matrix A and a N-by-P matrix B: A = Q*R and B = Q*T*Z.   

    Arguments   
    =========   

    N       (input) INTEGER   
            The number of rows of the matrices A and B.  N >= 0.   

    M       (input) INTEGER   
            The number of columns of the matrix A.  M >= 0.   

    P       (input) INTEGER   
            The number of columns of the matrix B.  P >= 0.   

    A       (input) REAL array, dimension (LDA,M)   
            The N-by-M matrix A.   

    AF      (output) REAL array, dimension (LDA,N)   
            Details of the GQR factorization of A and B, as returned   
            by SGGQRF, see SGGQRF for further details.   

    Q       (output) REAL array, dimension (LDA,N)   
            The M-by-M orthogonal matrix Q.   

    R       (workspace) REAL array, dimension (LDA,MAX(M,N))   

    LDA     (input) INTEGER   
            The leading dimension of the arrays A, AF, R and Q.   
            LDA >= max(M,N).   

    TAUA    (output) REAL array, dimension (min(M,N))   
            The scalar factors of the elementary reflectors, as returned   
            by SGGQRF.   

    B       (input) REAL array, dimension (LDB,P)   
            On entry, the N-by-P matrix A.   

    BF      (output) REAL array, dimension (LDB,N)   
            Details of the GQR factorization of A and B, as returned   
            by SGGQRF, see SGGQRF for further details.   

    Z       (output) REAL array, dimension (LDB,P)   
            The P-by-P orthogonal matrix Z.   

    T       (workspace) REAL array, dimension (LDB,max(P,N))   

    BWK     (workspace) REAL array, dimension (LDB,N)   

    LDB     (input) INTEGER   
            The leading dimension of the arrays B, BF, Z and T.   
            LDB >= max(P,N).   

    TAUB    (output) REAL array, dimension (min(P,N))   
            The scalar factors of the elementary reflectors, as returned   
            by SGGRQF.   

    WORK    (workspace) REAL array, dimension (LWORK)   

    LWORK   (input) INTEGER   
            The dimension of the array WORK, LWORK >= max(N,M,P)**2.   

    RWORK   (workspace) REAL array, dimension (max(N,M,P))   

    RESULT  (output) REAL array, dimension (4)   
            The test ratios:   
              RESULT(1) = norm( R - Q'*A ) / ( MAX(M,N)*norm(A)*ULP)   
              RESULT(2) = norm( T*Z - Q'*B ) / (MAX(P,N)*norm(B)*ULP)   
              RESULT(3) = norm( I - Q'*Q ) / ( M*ULP )   
              RESULT(4) = norm( I - Z'*Z ) / ( P*ULP )   

    =====================================================================   


       Parameter adjustments */
    r_dim1 = *lda;
    r_offset = 1 + r_dim1 * 1;
    r__ -= r_offset;
    q_dim1 = *lda;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    af_dim1 = *lda;
    af_offset = 1 + af_dim1 * 1;
    af -= af_offset;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --taua;
    bwk_dim1 = *ldb;
    bwk_offset = 1 + bwk_dim1 * 1;
    bwk -= bwk_offset;
    t_dim1 = *ldb;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    z_dim1 = *ldb;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    bf_dim1 = *ldb;
    bf_offset = 1 + bf_dim1 * 1;
    bf -= bf_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --taub;
    --work;
    --rwork;
    --result;

    /* Function Body */
    ulp = slamch_("Precision");
    unfl = slamch_("Safe minimum");

/*     Copy the matrix A to the array AF. */

    slacpy_("Full", n, m, &a[a_offset], lda, &af[af_offset], lda);
    slacpy_("Full", n, p, &b[b_offset], ldb, &bf[bf_offset], ldb);

/* Computing MAX */
    r__1 = slange_("1", n, m, &a[a_offset], lda, &rwork[1]);
    anorm = dmax(r__1,unfl);
/* Computing MAX */
    r__1 = slange_("1", n, p, &b[b_offset], ldb, &rwork[1]);
    bnorm = dmax(r__1,unfl);

/*     Factorize the matrices A and B in the arrays AF and BF. */

    sggqrf_(n, m, p, &af[af_offset], lda, &taua[1], &bf[bf_offset], ldb, &
	    taub[1], &work[1], lwork, &info);

/*     Generate the N-by-N matrix Q */

    slaset_("Full", n, n, &c_b9, &c_b9, &q[q_offset], lda);
    i__1 = *n - 1;
    slacpy_("Lower", &i__1, m, &af_ref(2, 1), lda, &q_ref(2, 1), lda);
    i__1 = min(*n,*m);
    sorgqr_(n, n, &i__1, &q[q_offset], lda, &taua[1], &work[1], lwork, &info);

/*     Generate the P-by-P matrix Z */

    slaset_("Full", p, p, &c_b9, &c_b9, &z__[z_offset], ldb);
    if (*n <= *p) {
	if (*n > 0 && *n < *p) {
	    i__1 = *p - *n;
	    slacpy_("Full", n, &i__1, &bf[bf_offset], ldb, &z___ref(*p - *n + 
		    1, 1), ldb);
	}
	if (*n > 1) {
	    i__1 = *n - 1;
	    i__2 = *n - 1;
	    slacpy_("Lower", &i__1, &i__2, &bf_ref(2, *p - *n + 1), ldb, &
		    z___ref(*p - *n + 2, *p - *n + 1), ldb);
	}
    } else {
	if (*p > 1) {
	    i__1 = *p - 1;
	    i__2 = *p - 1;
	    slacpy_("Lower", &i__1, &i__2, &bf_ref(*n - *p + 2, 1), ldb, &
		    z___ref(2, 1), ldb);
	}
    }
    i__1 = min(*n,*p);
    sorgrq_(p, p, &i__1, &z__[z_offset], ldb, &taub[1], &work[1], lwork, &
	    info);

/*     Copy R */

    slaset_("Full", n, m, &c_b19, &c_b19, &r__[r_offset], lda);
    slacpy_("Upper", n, m, &af[af_offset], lda, &r__[r_offset], lda);

/*     Copy T */

    slaset_("Full", n, p, &c_b19, &c_b19, &t[t_offset], ldb);
    if (*n <= *p) {
	slacpy_("Upper", n, n, &bf_ref(1, *p - *n + 1), ldb, &t_ref(1, *p - *
		n + 1), ldb);
    } else {
	i__1 = *n - *p;
	slacpy_("Full", &i__1, p, &bf[bf_offset], ldb, &t[t_offset], ldb);
	slacpy_("Upper", p, p, &bf_ref(*n - *p + 1, 1), ldb, &t_ref(*n - *p + 
		1, 1), ldb);
    }

/*     Compute R - Q'*A */

    sgemm_("Transpose", "No transpose", n, m, n, &c_b30, &q[q_offset], lda, &
	    a[a_offset], lda, &c_b31, &r__[r_offset], lda);

/*     Compute norm( R - Q'*A ) / ( MAX(M,N)*norm(A)*ULP ) . */

    resid = slange_("1", n, m, &r__[r_offset], lda, &rwork[1]);
    if (anorm > 0.f) {
/* Computing MAX */
	i__1 = max(1,*m);
	result[1] = resid / (real) max(i__1,*n) / anorm / ulp;
    } else {
	result[1] = 0.f;
    }

/*     Compute T*Z - Q'*B */

    sgemm_("No Transpose", "No transpose", n, p, p, &c_b31, &t[t_offset], ldb,
	     &z__[z_offset], ldb, &c_b19, &bwk[bwk_offset], ldb);
    sgemm_("Transpose", "No transpose", n, p, n, &c_b30, &q[q_offset], lda, &
	    b[b_offset], ldb, &c_b31, &bwk[bwk_offset], ldb);

/*     Compute norm( T*Z - Q'*B ) / ( MAX(P,N)*norm(A)*ULP ) . */

    resid = slange_("1", n, p, &bwk[bwk_offset], ldb, &rwork[1]);
    if (bnorm > 0.f) {
/* Computing MAX */
	i__1 = max(1,*p);
	result[2] = resid / (real) max(i__1,*n) / bnorm / ulp;
    } else {
	result[2] = 0.f;
    }

/*     Compute I - Q'*Q */

    slaset_("Full", n, n, &c_b19, &c_b31, &r__[r_offset], lda);
    ssyrk_("Upper", "Transpose", n, n, &c_b30, &q[q_offset], lda, &c_b31, &
	    r__[r_offset], lda);

/*     Compute norm( I - Q'*Q ) / ( N * ULP ) . */

    resid = slansy_("1", "Upper", n, &r__[r_offset], lda, &rwork[1]);
    result[3] = resid / (real) max(1,*n) / ulp;

/*     Compute I - Z'*Z */

    slaset_("Full", p, p, &c_b19, &c_b31, &t[t_offset], ldb);
    ssyrk_("Upper", "Transpose", p, p, &c_b30, &z__[z_offset], ldb, &c_b31, &
	    t[t_offset], ldb);

/*     Compute norm( I - Z'*Z ) / ( P*ULP ) . */

    resid = slansy_("1", "Upper", p, &t[t_offset], ldb, &rwork[1]);
    result[4] = resid / (real) max(1,*p) / ulp;

    return 0;

/*     End of SGQRTS */

} /* sgqrts_ */
Beispiel #19
0
/* Subroutine */ int spstrf_(char *uplo, integer *n, real *a, integer *lda, 
	integer *piv, integer *rank, real *tol, real *work, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
    real r__1;

    /* Builtin functions */
    double sqrt(doublereal);

    /* Local variables */
    integer i__, j, k, maxlocval, jb, nb;
    real ajj;
    integer pvt;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    integer itemp;
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
	    real *, integer *, real *, integer *, real *, real *, integer *);
    real stemp;
    logical upper;
    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *, 
	    integer *);
    real sstop;
    extern /* Subroutine */ int ssyrk_(char *, char *, integer *, integer *, 
	    real *, real *, integer *, real *, real *, integer *), spstf2_(char *, integer *, real *, integer *, integer *, 
	    integer *, real *, real *, integer *);
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *);
    extern logical sisnan_(real *);
    extern integer smaxloc_(real *, integer *);


/*  -- LAPACK routine (version 3.2) -- */
/*     Craig Lucas, University of Manchester / NAG Ltd. */
/*     October, 2008 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SPSTRF computes the Cholesky factorization with complete */
/*  pivoting of a real symmetric positive semidefinite matrix A. */

/*  The factorization has the form */
/*     P' * A * P = U' * U ,  if UPLO = 'U', */
/*     P' * A * P = L  * L',  if UPLO = 'L', */
/*  where U is an upper triangular matrix and L is lower triangular, and */
/*  P is stored as vector PIV. */

/*  This algorithm does not attempt to check that A is positive */
/*  semidefinite. This version of the algorithm calls level 3 BLAS. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          Specifies whether the upper or lower triangular part of the */
/*          symmetric matrix A is stored. */
/*          = 'U':  Upper triangular */
/*          = 'L':  Lower triangular */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  A       (input/output) REAL array, dimension (LDA,N) */
/*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading */
/*          n by n upper triangular part of A contains the upper */
/*          triangular part of the matrix A, and the strictly lower */
/*          triangular part of A is not referenced.  If UPLO = 'L', the */
/*          leading n by n lower triangular part of A contains the lower */
/*          triangular part of the matrix A, and the strictly upper */
/*          triangular part of A is not referenced. */

/*          On exit, if INFO = 0, the factor U or L from the Cholesky */
/*          factorization as above. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  PIV     (output) INTEGER array, dimension (N) */
/*          PIV is such that the nonzero entries are P( PIV(K), K ) = 1. */

/*  RANK    (output) INTEGER */
/*          The rank of A given by the number of steps the algorithm */
/*          completed. */

/*  TOL     (input) REAL */
/*          User defined tolerance. If TOL < 0, then N*U*MAX( A(K,K) ) */
/*          will be used. The algorithm terminates at the (K-1)st step */
/*          if the pivot <= TOL. */

/*  WORK    REAL array, dimension (2*N) */
/*          Work space. */

/*  INFO    (output) INTEGER */
/*          < 0: If INFO = -K, the K-th argument had an illegal value, */
/*          = 0: algorithm completed successfully, and */
/*          > 0: the matrix A is either rank deficient with computed rank */
/*               as returned in RANK, or is indefinite.  See Section 7 of */
/*               LAPACK Working Note #161 for further information. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    --work;
    --piv;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SPSTRF", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Get block size */

    nb = ilaenv_(&c__1, "SPOTRF", uplo, n, &c_n1, &c_n1, &c_n1);
    if (nb <= 1 || nb >= *n) {

/*        Use unblocked code */

	spstf2_(uplo, n, &a[a_dim1 + 1], lda, &piv[1], rank, tol, &work[1], 
		info);
	goto L200;

    } else {

/*     Initialize PIV */

	i__1 = *n;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    piv[i__] = i__;
/* L100: */
	}

/*     Compute stopping value */

	pvt = 1;
	ajj = a[pvt + pvt * a_dim1];
	i__1 = *n;
	for (i__ = 2; i__ <= i__1; ++i__) {
	    if (a[i__ + i__ * a_dim1] > ajj) {
		pvt = i__;
		ajj = a[pvt + pvt * a_dim1];
	    }
	}
	if (ajj == 0.f || sisnan_(&ajj)) {
	    *rank = 0;
	    *info = 1;
	    goto L200;
	}

/*     Compute stopping value if not supplied */

	if (*tol < 0.f) {
	    sstop = *n * slamch_("Epsilon") * ajj;
	} else {
	    sstop = *tol;
	}


	if (upper) {

/*           Compute the Cholesky factorization P' * A * P = U' * U */

	    i__1 = *n;
	    i__2 = nb;
	    for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {

/*              Account for last block not being NB wide */

/* Computing MIN */
		i__3 = nb, i__4 = *n - k + 1;
		jb = min(i__3,i__4);

/*              Set relevant part of first half of WORK to zero, */
/*              holds dot products */

		i__3 = *n;
		for (i__ = k; i__ <= i__3; ++i__) {
		    work[i__] = 0.f;
/* L110: */
		}

		i__3 = k + jb - 1;
		for (j = k; j <= i__3; ++j) {

/*              Find pivot, test for exit, else swap rows and columns */
/*              Update dot products, compute possible pivots which are */
/*              stored in the second half of WORK */

		    i__4 = *n;
		    for (i__ = j; i__ <= i__4; ++i__) {

			if (j > k) {
/* Computing 2nd power */
			    r__1 = a[j - 1 + i__ * a_dim1];
			    work[i__] += r__1 * r__1;
			}
			work[*n + i__] = a[i__ + i__ * a_dim1] - work[i__];

/* L120: */
		    }

		    if (j > 1) {
			maxlocval = (*n << 1) - (*n + j) + 1;
			itemp = smaxloc_(&work[*n + j], &maxlocval);
			pvt = itemp + j - 1;
			ajj = work[*n + pvt];
			if (ajj <= sstop || sisnan_(&ajj)) {
			    a[j + j * a_dim1] = ajj;
			    goto L190;
			}
		    }

		    if (j != pvt) {

/*                    Pivot OK, so can now swap pivot rows and columns */

			a[pvt + pvt * a_dim1] = a[j + j * a_dim1];
			i__4 = j - 1;
			sswap_(&i__4, &a[j * a_dim1 + 1], &c__1, &a[pvt * 
				a_dim1 + 1], &c__1);
			if (pvt < *n) {
			    i__4 = *n - pvt;
			    sswap_(&i__4, &a[j + (pvt + 1) * a_dim1], lda, &a[
				    pvt + (pvt + 1) * a_dim1], lda);
			}
			i__4 = pvt - j - 1;
			sswap_(&i__4, &a[j + (j + 1) * a_dim1], lda, &a[j + 1 
				+ pvt * a_dim1], &c__1);

/*                    Swap dot products and PIV */

			stemp = work[j];
			work[j] = work[pvt];
			work[pvt] = stemp;
			itemp = piv[pvt];
			piv[pvt] = piv[j];
			piv[j] = itemp;
		    }

		    ajj = sqrt(ajj);
		    a[j + j * a_dim1] = ajj;

/*                 Compute elements J+1:N of row J. */

		    if (j < *n) {
			i__4 = j - k;
			i__5 = *n - j;
			sgemv_("Trans", &i__4, &i__5, &c_b22, &a[k + (j + 1) *
				 a_dim1], lda, &a[k + j * a_dim1], &c__1, &
				c_b24, &a[j + (j + 1) * a_dim1], lda);
			i__4 = *n - j;
			r__1 = 1.f / ajj;
			sscal_(&i__4, &r__1, &a[j + (j + 1) * a_dim1], lda);
		    }

/* L130: */
		}

/*              Update trailing matrix, J already incremented */

		if (k + jb <= *n) {
		    i__3 = *n - j + 1;
		    ssyrk_("Upper", "Trans", &i__3, &jb, &c_b22, &a[k + j * 
			    a_dim1], lda, &c_b24, &a[j + j * a_dim1], lda);
		}

/* L140: */
	    }

	} else {

/*        Compute the Cholesky factorization P' * A * P = L * L' */

	    i__2 = *n;
	    i__1 = nb;
	    for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) {

/*              Account for last block not being NB wide */

/* Computing MIN */
		i__3 = nb, i__4 = *n - k + 1;
		jb = min(i__3,i__4);

/*              Set relevant part of first half of WORK to zero, */
/*              holds dot products */

		i__3 = *n;
		for (i__ = k; i__ <= i__3; ++i__) {
		    work[i__] = 0.f;
/* L150: */
		}

		i__3 = k + jb - 1;
		for (j = k; j <= i__3; ++j) {

/*              Find pivot, test for exit, else swap rows and columns */
/*              Update dot products, compute possible pivots which are */
/*              stored in the second half of WORK */

		    i__4 = *n;
		    for (i__ = j; i__ <= i__4; ++i__) {

			if (j > k) {
/* Computing 2nd power */
			    r__1 = a[i__ + (j - 1) * a_dim1];
			    work[i__] += r__1 * r__1;
			}
			work[*n + i__] = a[i__ + i__ * a_dim1] - work[i__];

/* L160: */
		    }

		    if (j > 1) {
			maxlocval = (*n << 1) - (*n + j) + 1;
			itemp = smaxloc_(&work[*n + j], &maxlocval);
			pvt = itemp + j - 1;
			ajj = work[*n + pvt];
			if (ajj <= sstop || sisnan_(&ajj)) {
			    a[j + j * a_dim1] = ajj;
			    goto L190;
			}
		    }

		    if (j != pvt) {

/*                    Pivot OK, so can now swap pivot rows and columns */

			a[pvt + pvt * a_dim1] = a[j + j * a_dim1];
			i__4 = j - 1;
			sswap_(&i__4, &a[j + a_dim1], lda, &a[pvt + a_dim1], 
				lda);
			if (pvt < *n) {
			    i__4 = *n - pvt;
			    sswap_(&i__4, &a[pvt + 1 + j * a_dim1], &c__1, &a[
				    pvt + 1 + pvt * a_dim1], &c__1);
			}
			i__4 = pvt - j - 1;
			sswap_(&i__4, &a[j + 1 + j * a_dim1], &c__1, &a[pvt + 
				(j + 1) * a_dim1], lda);

/*                    Swap dot products and PIV */

			stemp = work[j];
			work[j] = work[pvt];
			work[pvt] = stemp;
			itemp = piv[pvt];
			piv[pvt] = piv[j];
			piv[j] = itemp;
		    }

		    ajj = sqrt(ajj);
		    a[j + j * a_dim1] = ajj;

/*                 Compute elements J+1:N of column J. */

		    if (j < *n) {
			i__4 = *n - j;
			i__5 = j - k;
			sgemv_("No Trans", &i__4, &i__5, &c_b22, &a[j + 1 + k 
				* a_dim1], lda, &a[j + k * a_dim1], lda, &
				c_b24, &a[j + 1 + j * a_dim1], &c__1);
			i__4 = *n - j;
			r__1 = 1.f / ajj;
			sscal_(&i__4, &r__1, &a[j + 1 + j * a_dim1], &c__1);
		    }

/* L170: */
		}

/*              Update trailing matrix, J already incremented */

		if (k + jb <= *n) {
		    i__3 = *n - j + 1;
		    ssyrk_("Lower", "No Trans", &i__3, &jb, &c_b22, &a[j + k *
			     a_dim1], lda, &c_b24, &a[j + j * a_dim1], lda);
		}

/* L180: */
	    }

	}
    }

/*     Ran to completion, A has full rank */

    *rank = *n;

    goto L200;
L190:

/*     Rank is the number of steps completed.  Set INFO = 1 to signal */
/*     that the factorization cannot be used to solve a system. */

    *rank = j - 1;
    *info = 1;

L200:
    return 0;

/*     End of SPSTRF */

} /* spstrf_ */
Beispiel #20
0
/* Subroutine */ int spotrf_(char *uplo, integer *n, real *a, integer *lda, 
	integer *info, ftnlen uplo_len)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;

    /* Local variables */
    static integer j, jb, nb;
    extern logical lsame_(char *, char *, ftnlen, ftnlen);
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *, ftnlen, ftnlen);
    static logical upper;
    extern /* Subroutine */ int strsm_(char *, char *, char *, char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
	    , ftnlen, ftnlen, ftnlen, ftnlen), ssyrk_(char *, char *, integer 
	    *, integer *, real *, real *, integer *, real *, real *, integer *
	    , ftnlen, ftnlen), spotf2_(char *, integer *, real *, integer *, 
	    integer *, ftnlen), xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);


/*  -- LAPACK routine (version 3.0) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */
/*     Courant Institute, Argonne National Lab, and Rice University */
/*     March 31, 1993 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SPOTRF computes the Cholesky factorization of a real symmetric */
/*  positive definite matrix A. */

/*  The factorization has the form */
/*     A = U**T * U,  if UPLO = 'U', or */
/*     A = L  * L**T,  if UPLO = 'L', */
/*  where U is an upper triangular matrix and L is lower triangular. */

/*  This is the block version of the algorithm, calling Level 3 BLAS. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          = 'U':  Upper triangle of A is stored; */
/*          = 'L':  Lower triangle of A is stored. */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  A       (input/output) REAL array, dimension (LDA,N) */
/*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading */
/*          N-by-N upper triangular part of A contains the upper */
/*          triangular part of the matrix A, and the strictly lower */
/*          triangular part of A is not referenced.  If UPLO = 'L', the */
/*          leading N-by-N lower triangular part of A contains the lower */
/*          triangular part of the matrix A, and the strictly upper */
/*          triangular part of A is not referenced. */

/*          On exit, if INFO = 0, the factor U or L from the Cholesky */
/*          factorization A = U**T*U or A = L*L**T. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
/*          > 0:  if INFO = i, the leading minor of order i is not */
/*                positive definite, and the factorization could not be */
/*                completed. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SPOTRF", &i__1, (ftnlen)6);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine the block size for this environment. */

    nb = ilaenv_(&c__1, "SPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)1);
    if (nb <= 1 || nb >= *n) {

/*        Use unblocked code. */

	spotf2_(uplo, n, &a[a_offset], lda, info, (ftnlen)1);
    } else {

/*        Use blocked code. */

	if (upper) {

/*           Compute the Cholesky factorization A = U'*U. */

	    i__1 = *n;
	    i__2 = nb;
	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {

/*              Update and factorize the current diagonal block and test */
/*              for non-positive-definiteness. */

/* Computing MIN */
		i__3 = nb, i__4 = *n - j + 1;
		jb = min(i__3,i__4);
		i__3 = j - 1;
		ssyrk_("Upper", "Transpose", &jb, &i__3, &c_b13, &a[j * 
			a_dim1 + 1], lda, &c_b14, &a[j + j * a_dim1], lda, (
			ftnlen)5, (ftnlen)9);
		spotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info, (ftnlen)
			5);
		if (*info != 0) {
		    goto L30;
		}
		if (j + jb <= *n) {

/*                 Compute the current block row. */

		    i__3 = *n - j - jb + 1;
		    i__4 = j - 1;
		    sgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, &
			    c_b13, &a[j * a_dim1 + 1], lda, &a[(j + jb) * 
			    a_dim1 + 1], lda, &c_b14, &a[j + (j + jb) * 
			    a_dim1], lda, (ftnlen)9, (ftnlen)12);
		    i__3 = *n - j - jb + 1;
		    strsm_("Left", "Upper", "Transpose", "Non-unit", &jb, &
			    i__3, &c_b14, &a[j + j * a_dim1], lda, &a[j + (j 
			    + jb) * a_dim1], lda, (ftnlen)4, (ftnlen)5, (
			    ftnlen)9, (ftnlen)8);
		}
/* L10: */
	    }

	} else {

/*           Compute the Cholesky factorization A = L*L'. */

	    i__2 = *n;
	    i__1 = nb;
	    for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {

/*              Update and factorize the current diagonal block and test */
/*              for non-positive-definiteness. */

/* Computing MIN */
		i__3 = nb, i__4 = *n - j + 1;
		jb = min(i__3,i__4);
		i__3 = j - 1;
		ssyrk_("Lower", "No transpose", &jb, &i__3, &c_b13, &a[j + 
			a_dim1], lda, &c_b14, &a[j + j * a_dim1], lda, (
			ftnlen)5, (ftnlen)12);
		spotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info, (ftnlen)
			5);
		if (*info != 0) {
		    goto L30;
		}
		if (j + jb <= *n) {

/*                 Compute the current block column. */

		    i__3 = *n - j - jb + 1;
		    i__4 = j - 1;
		    sgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &
			    c_b13, &a[j + jb + a_dim1], lda, &a[j + a_dim1], 
			    lda, &c_b14, &a[j + jb + j * a_dim1], lda, (
			    ftnlen)12, (ftnlen)9);
		    i__3 = *n - j - jb + 1;
		    strsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, &
			    jb, &c_b14, &a[j + j * a_dim1], lda, &a[j + jb + 
			    j * a_dim1], lda, (ftnlen)5, (ftnlen)5, (ftnlen)9,
			     (ftnlen)8);
		}
/* L20: */
	    }
	}
    }
    goto L40;

L30:
    *info = *info + j - 1;

L40:
    return 0;

/*     End of SPOTRF */

} /* spotrf_ */
Beispiel #21
0
 int spftri_(char *transr, char *uplo, int *n, float *a, 
	int *info)
{
    /* System generated locals */
    int i__1, i__2;

    /* Local variables */
    int k, n1, n2;
    int normaltransr;
    extern int lsame_(char *, char *);
    int lower;
    extern  int strmm_(char *, char *, char *, char *, 
	    int *, int *, float *, float *, int *, float *, int *
), ssyrk_(char *, char *, int 
	    *, int *, float *, float *, int *, float *, float *, int *
), xerbla_(char *, int *);
    int nisodd;
    extern  int slauum_(char *, int *, float *, int *, 
	    int *), stftri_(char *, char *, char *, int *, 
	    float *, int *);


/*  -- LAPACK routine (version 3.2)                                    -- */

/*  -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */
/*  -- November 2008                                                   -- */

/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */

/*     .. Scalar Arguments .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SPFTRI computes the inverse of a float (symmetric) positive definite */
/*  matrix A using the Cholesky factorization A = U**T*U or A = L*L**T */
/*  computed by SPFTRF. */

/*  Arguments */
/*  ========= */

/*  TRANSR    (input) CHARACTER */
/*          = 'N':  The Normal TRANSR of RFP A is stored; */
/*          = 'T':  The Transpose TRANSR of RFP A is stored. */

/*  UPLO    (input) CHARACTER */
/*          = 'U':  Upper triangle of A is stored; */
/*          = 'L':  Lower triangle of A is stored. */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  A       (input/output) REAL array, dimension ( N*(N+1)/2 ) */
/*          On entry, the symmetric matrix A in RFP format. RFP format is */
/*          described by TRANSR, UPLO, and N as follows: If TRANSR = 'N' */
/*          then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is */
/*          (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'T' then RFP is */
/*          the transpose of RFP A as defined when */
/*          TRANSR = 'N'. The contents of RFP A are defined by UPLO as */
/*          follows: If UPLO = 'U' the RFP A contains the nt elements of */
/*          upper packed A. If UPLO = 'L' the RFP A contains the elements */
/*          of lower packed A. The LDA of RFP A is (N+1)/2 when TRANSR = */
/*          'T'. When TRANSR is 'N' the LDA is N+1 when N is even and N */
/*          is odd. See the Note below for more details. */

/*          On exit, the symmetric inverse of the original matrix, in the */
/*          same storage format. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
/*          > 0:  if INFO = i, the (i,i) element of the factor U or L is */
/*                zero, and the inverse could not be computed. */

/*  Notes */
/*  ===== */

/*  We first consider Rectangular Full Packed (RFP) Format when N is */
/*  even. We give an example where N = 6. */

/*      AP is Upper             AP is Lower */

/*   00 01 02 03 04 05       00 */
/*      11 12 13 14 15       10 11 */
/*         22 23 24 25       20 21 22 */
/*            33 34 35       30 31 32 33 */
/*               44 45       40 41 42 43 44 */
/*                  55       50 51 52 53 54 55 */


/*  Let TRANSR = 'N'. RFP holds AP as follows: */
/*  For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */
/*  three columns of AP upper. The lower triangle A(4:6,0:2) consists of */
/*  the transpose of the first three columns of AP upper. */
/*  For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */
/*  three columns of AP lower. The upper triangle A(0:2,0:2) consists of */
/*  the transpose of the last three columns of AP lower. */
/*  This covers the case N even and TRANSR = 'N'. */

/*         RFP A                   RFP A */

/*        03 04 05                33 43 53 */
/*        13 14 15                00 44 54 */
/*        23 24 25                10 11 55 */
/*        33 34 35                20 21 22 */
/*        00 44 45                30 31 32 */
/*        01 11 55                40 41 42 */
/*        02 12 22                50 51 52 */

/*  Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */
/*  transpose of RFP A above. One therefore gets: */


/*           RFP A                   RFP A */

/*     03 13 23 33 00 01 02    33 00 10 20 30 40 50 */
/*     04 14 24 34 44 11 12    43 44 11 21 31 41 51 */
/*     05 15 25 35 45 55 22    53 54 55 22 32 42 52 */


/*  We first consider Rectangular Full Packed (RFP) Format when N is */
/*  odd. We give an example where N = 5. */

/*     AP is Upper                 AP is Lower */

/*   00 01 02 03 04              00 */
/*      11 12 13 14              10 11 */
/*         22 23 24              20 21 22 */
/*            33 34              30 31 32 33 */
/*               44              40 41 42 43 44 */


/*  Let TRANSR = 'N'. RFP holds AP as follows: */
/*  For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */
/*  three columns of AP upper. The lower triangle A(3:4,0:1) consists of */
/*  the transpose of the first two columns of AP upper. */
/*  For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */
/*  three columns of AP lower. The upper triangle A(0:1,1:2) consists of */
/*  the transpose of the last two columns of AP lower. */
/*  This covers the case N odd and TRANSR = 'N'. */

/*         RFP A                   RFP A */

/*        02 03 04                00 33 43 */
/*        12 13 14                10 11 44 */
/*        22 23 24                20 21 22 */
/*        00 33 34                30 31 32 */
/*        01 11 44                40 41 42 */

/*  Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */
/*  transpose of RFP A above. One therefore gets: */

/*           RFP A                   RFP A */

/*     02 12 22 00 01             00 10 20 30 40 50 */
/*     03 13 23 33 11             33 11 21 31 41 51 */
/*     04 14 24 34 44             43 44 22 32 42 52 */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    *info = 0;
    normaltransr = lsame_(transr, "N");
    lower = lsame_(uplo, "L");
    if (! normaltransr && ! lsame_(transr, "T")) {
	*info = -1;
    } else if (! lower && ! lsame_(uplo, "U")) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SPFTRI", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Invert the triangular Cholesky factor U or L. */

    stftri_(transr, uplo, "N", n, a, info);
    if (*info > 0) {
	return 0;
    }

/*     If N is odd, set NISODD = .TRUE. */
/*     If N is even, set K = N/2 and NISODD = .FALSE. */

    if (*n % 2 == 0) {
	k = *n / 2;
	nisodd = FALSE;
    } else {
	nisodd = TRUE;
    }

/*     Set N1 and N2 depending on LOWER */

    if (lower) {
	n2 = *n / 2;
	n1 = *n - n2;
    } else {
	n1 = *n / 2;
	n2 = *n - n1;
    }

/*     Start execution of triangular matrix multiply: inv(U)*inv(U)^C or */
/*     inv(L)^C*inv(L). There are eight cases. */

    if (nisodd) {

/*        N is odd */

	if (normaltransr) {

/*           N is odd and TRANSR = 'N' */

	    if (lower) {

/*              SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:N1-1) ) */
/*              T1 -> a(0,0), T2 -> a(0,1), S -> a(N1,0) */
/*              T1 -> a(0), T2 -> a(n), S -> a(N1) */

		slauum_("L", &n1, a, n, info);
		ssyrk_("L", "T", &n1, &n2, &c_b11, &a[n1], n, &c_b11, a, n);
		strmm_("L", "U", "N", "N", &n2, &n1, &c_b11, &a[*n], n, &a[n1]
, n);
		slauum_("U", &n2, &a[*n], n, info);

	    } else {

/*              SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:N2-1) */
/*              T1 -> a(N1+1,0), T2 -> a(N1,0), S -> a(0,0) */
/*              T1 -> a(N2), T2 -> a(N1), S -> a(0) */

		slauum_("L", &n1, &a[n2], n, info);
		ssyrk_("L", "N", &n1, &n2, &c_b11, a, n, &c_b11, &a[n2], n);
		strmm_("R", "U", "T", "N", &n1, &n2, &c_b11, &a[n1], n, a, n);
		slauum_("U", &n2, &a[n1], n, info);

	    }

	} else {

/*           N is odd and TRANSR = 'T' */

	    if (lower) {

/*              SRPA for LOWER, TRANSPOSE, and N is odd */
/*              T1 -> a(0), T2 -> a(1), S -> a(0+N1*N1) */

		slauum_("U", &n1, a, &n1, info);
		ssyrk_("U", "N", &n1, &n2, &c_b11, &a[n1 * n1], &n1, &c_b11, 
			a, &n1);
		strmm_("R", "L", "N", "N", &n1, &n2, &c_b11, &a[1], &n1, &a[
			n1 * n1], &n1);
		slauum_("L", &n2, &a[1], &n1, info);

	    } else {

/*              SRPA for UPPER, TRANSPOSE, and N is odd */
/*              T1 -> a(0+N2*N2), T2 -> a(0+N1*N2), S -> a(0) */

		slauum_("U", &n1, &a[n2 * n2], &n2, info);
		ssyrk_("U", "T", &n1, &n2, &c_b11, a, &n2, &c_b11, &a[n2 * n2]
, &n2);
		strmm_("L", "L", "T", "N", &n2, &n1, &c_b11, &a[n1 * n2], &n2, 
			 a, &n2);
		slauum_("L", &n2, &a[n1 * n2], &n2, info);

	    }

	}

    } else {

/*        N is even */

	if (normaltransr) {

/*           N is even and TRANSR = 'N' */

	    if (lower) {

/*              SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) */
/*              T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) */
/*              T1 -> a(1), T2 -> a(0), S -> a(k+1) */

		i__1 = *n + 1;
		slauum_("L", &k, &a[1], &i__1, info);
		i__1 = *n + 1;
		i__2 = *n + 1;
		ssyrk_("L", "T", &k, &k, &c_b11, &a[k + 1], &i__1, &c_b11, &a[
			1], &i__2);
		i__1 = *n + 1;
		i__2 = *n + 1;
		strmm_("L", "U", "N", "N", &k, &k, &c_b11, a, &i__1, &a[k + 1]
, &i__2);
		i__1 = *n + 1;
		slauum_("U", &k, a, &i__1, info);

	    } else {

/*              SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) */
/*              T1 -> a(k+1,0) ,  T2 -> a(k,0),   S -> a(0,0) */
/*              T1 -> a(k+1), T2 -> a(k), S -> a(0) */

		i__1 = *n + 1;
		slauum_("L", &k, &a[k + 1], &i__1, info);
		i__1 = *n + 1;
		i__2 = *n + 1;
		ssyrk_("L", "N", &k, &k, &c_b11, a, &i__1, &c_b11, &a[k + 1], 
			&i__2);
		i__1 = *n + 1;
		i__2 = *n + 1;
		strmm_("R", "U", "T", "N", &k, &k, &c_b11, &a[k], &i__1, a, &
			i__2);
		i__1 = *n + 1;
		slauum_("U", &k, &a[k], &i__1, info);

	    }

	} else {

/*           N is even and TRANSR = 'T' */

	    if (lower) {

/*              SRPA for LOWER, TRANSPOSE, and N is even (see paper) */
/*              T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1), */
/*              T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k */

		slauum_("U", &k, &a[k], &k, info);
		ssyrk_("U", "N", &k, &k, &c_b11, &a[k * (k + 1)], &k, &c_b11, 
			&a[k], &k);
		strmm_("R", "L", "N", "N", &k, &k, &c_b11, a, &k, &a[k * (k + 
			1)], &k);
		slauum_("L", &k, a, &k, info);

	    } else {

/*              SRPA for UPPER, TRANSPOSE, and N is even (see paper) */
/*              T1 -> B(0,k+1),     T2 -> B(0,k),   S -> B(0,0), */
/*              T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k */

		slauum_("U", &k, &a[k * (k + 1)], &k, info);
		ssyrk_("U", "T", &k, &k, &c_b11, a, &k, &c_b11, &a[k * (k + 1)
			], &k);
		strmm_("L", "L", "T", "N", &k, &k, &c_b11, &a[k * k], &k, a, &
			k);
		slauum_("L", &k, &a[k * k], &k, info);

	    }

	}

    }

    return 0;

/*     End of SPFTRI */

} /* spftri_ */