C++ (Cpp) cgemm_ Examples

Programming Language: C++ (Cpp)

Method/Function: cgemm_

Examples at hotexamples.com: 30

C++ (Cpp) cgemm_ - 30 examples found. These are the top rated real world C++ (Cpp) examples of cgemm_ extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: fblaswr.c Project: CIBC-Internal/clapack

int
f2c_cgemm(char* transA, char* transB, integer* M, integer* N, integer* K,
          complex* alpha,
          complex* A, integer* lda,
          complex* B, integer* ldb,
          complex* beta,
          complex* C, integer* ldc)
{
    cgemm_(transA, transB, M, N, K,
           alpha, A, lda, B, ldb, beta, C, ldc);
    return 0;
}

Example #2

Show file

File: cqrt15.c Project: nya3jp/python-animeface

/* Subroutine */ int cqrt15_(integer *scale, integer *rksel, integer *m, 
	integer *n, integer *nrhs, complex *a, integer *lda, complex *b, 
	integer *ldb, real *s, integer *rank, real *norma, real *normb, 
	integer *iseed, complex *work, integer *lwork)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
    real r__1;

    /* Local variables */
    integer j, mn;
    real eps;
    integer info;
    real temp;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
	    integer *, complex *, complex *, integer *, complex *, integer *, 
	    complex *, complex *, integer *), clarf_(char *, 
	    integer *, integer *, complex *, integer *, complex *, complex *, 
	    integer *, complex *);
    extern doublereal sasum_(integer *, real *, integer *);
    real dummy[1];
    extern doublereal scnrm2_(integer *, complex *, integer *);
    extern /* Subroutine */ int slabad_(real *, real *);
    extern doublereal clange_(char *, integer *, integer *, complex *, 
	    integer *, real *);
    extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, 
	    real *, integer *, integer *, complex *, integer *, integer *);
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer 
	    *), claset_(char *, integer *, integer *, complex *, complex *, 
	    complex *, integer *), xerbla_(char *, integer *);
    real bignum;
    extern /* Subroutine */ int claror_(char *, char *, integer *, integer *, 
	    complex *, integer *, integer *, complex *, integer *);
    extern doublereal slarnd_(integer *, integer *);
    extern /* Subroutine */ int slaord_(char *, integer *, real *, integer *), clarnv_(integer *, integer *, integer *, complex *), 
	    slascl_(char *, integer *, integer *, real *, real *, integer *, 
	    integer *, real *, integer *, integer *);
    real smlnum;


/*  -- LAPACK test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CQRT15 generates a matrix with full or deficient rank and of various */
/*  norms. */

/*  Arguments */
/*  ========= */

/*  SCALE   (input) INTEGER */
/*          SCALE = 1: normally scaled matrix */
/*          SCALE = 2: matrix scaled up */
/*          SCALE = 3: matrix scaled down */

/*  RKSEL   (input) INTEGER */
/*          RKSEL = 1: full rank matrix */
/*          RKSEL = 2: rank-deficient matrix */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix A. */

/*  N       (input) INTEGER */
/*          The number of columns of A. */

/*  NRHS    (input) INTEGER */
/*          The number of columns of B. */

/*  A       (output) COMPLEX array, dimension (LDA,N) */
/*          The M-by-N matrix A. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A. */

/*  B       (output) COMPLEX array, dimension (LDB, NRHS) */
/*          A matrix that is in the range space of matrix A. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B. */

/*  S       (output) REAL array, dimension MIN(M,N) */
/*          Singular values of A. */

/*  RANK    (output) INTEGER */
/*          number of nonzero singular values of A. */

/*  NORMA   (output) REAL */
/*          one-norm norm of A. */

/*  NORMB   (output) REAL */
/*          one-norm norm of B. */

/*  ISEED   (input/output) integer array, dimension (4) */
/*          seed for random number generator. */

/*  WORK    (workspace) COMPLEX array, dimension (LWORK) */

/*  LWORK   (input) INTEGER */
/*          length of work space required. */
/*          LWORK >= MAX(M+MIN(M,N),NRHS*MIN(M,N),2*N+M) */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    --s;
    --iseed;
    --work;

    /* Function Body */
    mn = min(*m,*n);
/* Computing MAX */
    i__1 = *m + mn, i__2 = mn * *nrhs, i__1 = max(i__1,i__2), i__2 = (*n << 1)
	     + *m;
    if (*lwork < max(i__1,i__2)) {
	xerbla_("CQRT15", &c__16);
	return 0;
    }

    smlnum = slamch_("Safe minimum");
    bignum = 1.f / smlnum;
    slabad_(&smlnum, &bignum);
    eps = slamch_("Epsilon");
    smlnum = smlnum / eps / eps;
    bignum = 1.f / smlnum;

/*     Determine rank and (unscaled) singular values */

    if (*rksel == 1) {
	*rank = mn;
    } else if (*rksel == 2) {
	*rank = mn * 3 / 4;
	i__1 = mn;
	for (j = *rank + 1; j <= i__1; ++j) {
	    s[j] = 0.f;
/* L10: */
	}
    } else {
	xerbla_("CQRT15", &c__2);
    }

    if (*rank > 0) {

/*        Nontrivial case */

	s[1] = 1.f;
	i__1 = *rank;
	for (j = 2; j <= i__1; ++j) {
L20:
	    temp = slarnd_(&c__1, &iseed[1]);
	    if (temp > .1f) {
		s[j] = dabs(temp);
	    } else {
		goto L20;
	    }
/* L30: */
	}
	slaord_("Decreasing", rank, &s[1], &c__1);

/*        Generate 'rank' columns of a random orthogonal matrix in A */

	clarnv_(&c__2, &iseed[1], m, &work[1]);
	r__1 = 1.f / scnrm2_(m, &work[1], &c__1);
	csscal_(m, &r__1, &work[1], &c__1);
	claset_("Full", m, rank, &c_b1, &c_b2, &a[a_offset], lda);
	clarf_("Left", m, rank, &work[1], &c__1, &c_b22, &a[a_offset], lda, &
		work[*m + 1]);

/*        workspace used: m+mn */

/*        Generate consistent rhs in the range space of A */

	i__1 = *rank * *nrhs;
	clarnv_(&c__2, &iseed[1], &i__1, &work[1]);
	cgemm_("No transpose", "No transpose", m, nrhs, rank, &c_b2, &a[
		a_offset], lda, &work[1], rank, &c_b1, &b[b_offset], ldb);

/*        work space used: <= mn *nrhs */

/*        generate (unscaled) matrix A */

	i__1 = *rank;
	for (j = 1; j <= i__1; ++j) {
	    csscal_(m, &s[j], &a[j * a_dim1 + 1], &c__1);
/* L40: */
	}
	if (*rank < *n) {
	    i__1 = *n - *rank;
	    claset_("Full", m, &i__1, &c_b1, &c_b1, &a[(*rank + 1) * a_dim1 + 
		    1], lda);
	}
	claror_("Right", "No initialization", m, n, &a[a_offset], lda, &iseed[
		1], &work[1], &info);

    } else {

/*        work space used 2*n+m */

/*        Generate null matrix and rhs */

	i__1 = mn;
	for (j = 1; j <= i__1; ++j) {
	    s[j] = 0.f;
/* L50: */
	}
	claset_("Full", m, n, &c_b1, &c_b1, &a[a_offset], lda);
	claset_("Full", m, nrhs, &c_b1, &c_b1, &b[b_offset], ldb);

    }

/*     Scale the matrix */

    if (*scale != 1) {
	*norma = clange_("Max", m, n, &a[a_offset], lda, dummy);
	if (*norma != 0.f) {
	    if (*scale == 2) {

/*              matrix scaled up */

		clascl_("General", &c__0, &c__0, norma, &bignum, m, n, &a[
			a_offset], lda, &info);
		slascl_("General", &c__0, &c__0, norma, &bignum, &mn, &c__1, &
			s[1], &mn, &info);
		clascl_("General", &c__0, &c__0, norma, &bignum, m, nrhs, &b[
			b_offset], ldb, &info);
	    } else if (*scale == 3) {

/*              matrix scaled down */

		clascl_("General", &c__0, &c__0, norma, &smlnum, m, n, &a[
			a_offset], lda, &info);
		slascl_("General", &c__0, &c__0, norma, &smlnum, &mn, &c__1, &
			s[1], &mn, &info);
		clascl_("General", &c__0, &c__0, norma, &smlnum, m, nrhs, &b[
			b_offset], ldb, &info);
	    } else {
		xerbla_("CQRT15", &c__1);
		return 0;
	    }
	}
    }

    *norma = sasum_(&mn, &s[1], &c__1);
    *normb = clange_("One-norm", m, nrhs, &b[b_offset], ldb, dummy)
	    ;

    return 0;

/*     End of CQRT15 */

} /* cqrt15_ */

Example #3

Show file

File: cgetrf.c Project: 3deggi/levmar-ndk

/* Subroutine */ int cgetrf_(integer *m, integer *n, complex *a, integer *lda, 
	 integer *ipiv, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
    complex q__1;

    /* Local variables */
    integer i__, j, jb, nb;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
	    integer *, complex *, complex *, integer *, complex *, integer *, 
	    complex *, complex *, integer *);
    integer iinfo;
    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, 
	    integer *, integer *, complex *, complex *, integer *, complex *, 
	    integer *), cgetf2_(integer *, 
	    integer *, complex *, integer *, integer *, integer *), xerbla_(
	    char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *);
    extern /* Subroutine */ int claswp_(integer *, complex *, integer *, 
	    integer *, integer *, integer *, integer *);


/*  -- LAPACK routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     March 2008 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CGETRF computes an LU factorization of a general M-by-N matrix A */
/*  using partial pivoting with row interchanges. */

/*  The factorization has the form */
/*     A = P * L * U */
/*  where P is a permutation matrix, L is lower triangular with unit */
/*  diagonal elements (lower trapezoidal if m > n), and U is upper */
/*  triangular (upper trapezoidal if m < n). */

/*  This is the Crout Level 3 BLAS version of the algorithm. */

/*  Arguments */
/*  ========= */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix A.  M >= 0. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix A.  N >= 0. */

/*  A       (input/output) COMPLEX array, dimension (LDA,N) */
/*          On entry, the M-by-N matrix to be factored. */
/*          On exit, the factors L and U from the factorization */
/*          A = P*L*U; the unit diagonal elements of L are not stored. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,M). */

/*  IPIV    (output) INTEGER array, dimension (min(M,N)) */
/*          The pivot indices; for 1 <= i <= min(M,N), row i of the */
/*          matrix was interchanged with row IPIV(i). */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
/*          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization */
/*                has been completed, but the factor U is exactly */
/*                singular, and division by zero will occur if it is used */
/*                to solve a system of equations. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --ipiv;

    /* Function Body */
    *info = 0;
    if (*m < 0) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*m)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CGETRF", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*m == 0 || *n == 0) {
	return 0;
    }

/*     Determine the block size for this environment. */

    nb = ilaenv_(&c__1, "CGETRF", " ", m, n, &c_n1, &c_n1);
    if (nb <= 1 || nb >= min(*m,*n)) {

/*        Use unblocked code. */

	cgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
    } else {

/*        Use blocked code. */

	i__1 = min(*m,*n);
	i__2 = nb;
	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
/* Computing MIN */
	    i__3 = min(*m,*n) - j + 1;
	    jb = min(i__3,nb);

/*           Update current block. */

	    i__3 = *m - j + 1;
	    i__4 = j - 1;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgemm_("No transpose", "No transpose", &i__3, &jb, &i__4, &q__1, &
		    a[j + a_dim1], lda, &a[j * a_dim1 + 1], lda, &c_b1, &a[j 
		    + j * a_dim1], lda);

/*           Factor diagonal and subdiagonal blocks and test for exact */
/*           singularity. */

	    i__3 = *m - j + 1;
	    cgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);

/*           Adjust INFO and the pivot indices. */

	    if (*info == 0 && iinfo > 0) {
		*info = iinfo + j - 1;
	    }
/* Computing MIN */
	    i__4 = *m, i__5 = j + jb - 1;
	    i__3 = min(i__4,i__5);
	    for (i__ = j; i__ <= i__3; ++i__) {
		ipiv[i__] = j - 1 + ipiv[i__];
/* L10: */
	    }

/*           Apply interchanges to column 1:J-1 */

	    i__3 = j - 1;
	    i__4 = j + jb - 1;
	    claswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);

	    if (j + jb <= *n) {

/*              Apply interchanges to column J+JB:N */

		i__3 = *n - j - jb + 1;
		i__4 = j + jb - 1;
		claswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &
			ipiv[1], &c__1);

		i__3 = *n - j - jb + 1;
		i__4 = j - 1;
		q__1.r = -1.f, q__1.i = -0.f;
		cgemm_("No transpose", "No transpose", &jb, &i__3, &i__4, &
			q__1, &a[j + a_dim1], lda, &a[(j + jb) * a_dim1 + 1], 
			lda, &c_b1, &a[j + (j + jb) * a_dim1], lda);

/*              Compute block row of U. */

		i__3 = *n - j - jb + 1;
		ctrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
			c_b1, &a[j + j * a_dim1], lda, &a[j + (j + jb) * 
			a_dim1], lda);
	    }
/* L20: */
	}
    }
    return 0;

/*     End of CGETRF */

} /* cgetrf_ */

Example #4

Show file

File: cqrt17.c Project: kstraube/hysim

doublereal cqrt17_(char *trans, integer *iresid, integer *m, integer *n, 
	integer *nrhs, complex *a, integer *lda, complex *x, integer *ldx, 
	complex *b, integer *ldb, complex *c__, complex *work, integer *lwork)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, x_dim1, 
	    x_offset, i__1;
    real ret_val;

    /* Local variables */
    real err;
    integer iscl, info;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
	    integer *, complex *, complex *, integer *, complex *, integer *, 
	    complex *, complex *, integer *);
    extern logical lsame_(char *, char *);
    real norma, normb;
    integer ncols;
    real normx, rwork[1];
    integer nrows;
    extern doublereal clange_(char *, integer *, integer *, complex *, 
	    integer *, real *);
    extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *, 
	    real *, integer *, integer *, complex *, integer *, integer *);
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex 
	    *, integer *, complex *, integer *), xerbla_(char *, 
	    integer *);
    real bignum, smlnum, normrs;


/*  -- LAPACK test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CQRT17 computes the ratio */

/*     || R'*op(A) ||/(||A||*alpha*max(M,N,NRHS)*eps) */

/*  where R = op(A)*X - B, op(A) is A or A', and */

/*     alpha = ||B|| if IRESID = 1 (zero-residual problem) */
/*     alpha = ||R|| if IRESID = 2 (otherwise). */

/*  Arguments */
/*  ========= */

/*  TRANS   (input) CHARACTER*1 */
/*          Specifies whether or not the transpose of A is used. */
/*          = 'N':  No transpose, op(A) = A. */
/*          = 'C':  Conjugate transpose, op(A) = A'. */

/*  IRESID  (input) INTEGER */
/*          IRESID = 1 indicates zero-residual problem. */
/*          IRESID = 2 indicates non-zero residual. */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix A. */
/*          If TRANS = 'N', the number of rows of the matrix B. */
/*          If TRANS = 'C', the number of rows of the matrix X. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix  A. */
/*          If TRANS = 'N', the number of rows of the matrix X. */
/*          If TRANS = 'C', the number of rows of the matrix B. */

/*  NRHS    (input) INTEGER */
/*          The number of columns of the matrices X and B. */

/*  A       (input) COMPLEX array, dimension (LDA,N) */
/*          The m-by-n matrix A. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A. LDA >= M. */

/*  X       (input) COMPLEX array, dimension (LDX,NRHS) */
/*          If TRANS = 'N', the n-by-nrhs matrix X. */
/*          If TRANS = 'C', the m-by-nrhs matrix X. */

/*  LDX     (input) INTEGER */
/*          The leading dimension of the array X. */
/*          If TRANS = 'N', LDX >= N. */
/*          If TRANS = 'C', LDX >= M. */

/*  B       (input) COMPLEX array, dimension (LDB,NRHS) */
/*          If TRANS = 'N', the m-by-nrhs matrix B. */
/*          If TRANS = 'C', the n-by-nrhs matrix B. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B. */
/*          If TRANS = 'N', LDB >= M. */
/*          If TRANS = 'C', LDB >= N. */

/*  C       (workspace) COMPLEX array, dimension (LDB,NRHS) */

/*  WORK    (workspace) COMPLEX array, dimension (LWORK) */

/*  LWORK   (input) INTEGER */
/*          The length of the array WORK.  LWORK >= NRHS*(M+N). */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1;
    x -= x_offset;
    c_dim1 = *ldb;
    c_offset = 1 + c_dim1;
    c__ -= c_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    --work;

    /* Function Body */
    ret_val = 0.f;

    if (lsame_(trans, "N")) {
	nrows = *m;
	ncols = *n;
    } else if (lsame_(trans, "C")) {
	nrows = *n;
	ncols = *m;
    } else {
	xerbla_("CQRT17", &c__1);
	return ret_val;
    }

    if (*lwork < ncols * *nrhs) {
	xerbla_("CQRT17", &c__13);
	return ret_val;
    }

    if (*m <= 0 || *n <= 0 || *nrhs <= 0) {
	return ret_val;
    }

    norma = clange_("One-norm", m, n, &a[a_offset], lda, rwork);
    smlnum = slamch_("Safe minimum") / slamch_("Precision");
    bignum = 1.f / smlnum;
    iscl = 0;

/*     compute residual and scale it */

    clacpy_("All", &nrows, nrhs, &b[b_offset], ldb, &c__[c_offset], ldb);
    cgemm_(trans, "No transpose", &nrows, nrhs, &ncols, &c_b13, &a[a_offset], 
	    lda, &x[x_offset], ldx, &c_b14, &c__[c_offset], ldb);
    normrs = clange_("Max", &nrows, nrhs, &c__[c_offset], ldb, rwork);
    if (normrs > smlnum) {
	iscl = 1;
	clascl_("General", &c__0, &c__0, &normrs, &c_b19, &nrows, nrhs, &c__[
		c_offset], ldb, &info);
    }

/*     compute R'*A */

    cgemm_("Conjugate transpose", trans, nrhs, &ncols, &nrows, &c_b14, &c__[
	    c_offset], ldb, &a[a_offset], lda, &c_b22, &work[1], nrhs);

/*     compute and properly scale error */

    err = clange_("One-norm", nrhs, &ncols, &work[1], nrhs, rwork);
    if (norma != 0.f) {
	err /= norma;
    }

    if (iscl == 1) {
	err *= normrs;
    }

    if (*iresid == 1) {
	normb = clange_("One-norm", &nrows, nrhs, &b[b_offset], ldb, rwork);
	if (normb != 0.f) {
	    err /= normb;
	}
    } else {
	normx = clange_("One-norm", &ncols, nrhs, &x[x_offset], ldx, rwork);
	if (normx != 0.f) {
	    err /= normx;
	}
    }

/* Computing MAX */
    i__1 = max(*m,*n);
    ret_val = err / (slamch_("Epsilon") * (real) max(i__1,*nrhs));
    return ret_val;

/*     End of CQRT17 */

} /* cqrt17_ */

Example #5

Show file

File: cchkbd.c Project: zangel/uquad

/* Subroutine */ int cchkbd_(integer *nsizes, integer *mval, integer *nval, 
	integer *ntypes, logical *dotype, integer *nrhs, integer *iseed, real 
	*thresh, complex *a, integer *lda, real *bd, real *be, real *s1, real 
	*s2, complex *x, integer *ldx, complex *y, complex *z__, complex *q, 
	integer *ldq, complex *pt, integer *ldpt, complex *u, complex *vt, 
	complex *work, integer *lwork, real *rwork, integer *nout, integer *
	info)
{
    /* Initialized data */

    static integer ktype[16] = { 1,2,4,4,4,4,4,6,6,6,6,6,9,9,9,10 };
    static integer kmagn[16] = { 1,1,1,1,1,2,3,1,1,1,2,3,1,2,3,0 };
    static integer kmode[16] = { 0,0,4,3,1,4,4,4,3,1,4,4,0,0,0,0 };

    /* Format strings */
    static char fmt_9998[] = "(\002 CCHKBD: \002,a,\002 returned INFO=\002,i"
	    "6,\002.\002,/9x,\002M=\002,i6,\002, N=\002,i6,\002, JTYPE=\002,i"
	    "6,\002, ISEED=(\002,3(i5,\002,\002),i5,\002)\002)";
    static char fmt_9999[] = "(\002 M=\002,i5,\002, N=\002,i5,\002, type "
	    "\002,i2,\002, seed=\002,4(i4,\002,\002),\002 test(\002,i2,\002)"
	    "=\002,g11.4)";

    /* System generated locals */
    integer a_dim1, a_offset, pt_dim1, pt_offset, q_dim1, q_offset, u_dim1, 
	    u_offset, vt_dim1, vt_offset, x_dim1, x_offset, y_dim1, y_offset, 
	    z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7;
    real r__1, r__2, r__3, r__4, r__5, r__6, r__7;

    /* Builtin functions   
       Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);
    double log(doublereal), sqrt(doublereal), exp(doublereal);
    integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void);

    /* Local variables */
    static real cond;
    static integer jcol;
    static char path[3];
    static integer mmax, nmax;
    static real unfl, ovfl;
    static char uplo[1];
    static real temp1, temp2;
    static integer i__, j, m, n;
    extern /* Subroutine */ int cbdt01_(integer *, integer *, integer *, 
	    complex *, integer *, complex *, integer *, real *, real *, 
	    complex *, integer *, complex *, real *, real *);
    static logical badmm, badnn;
    extern /* Subroutine */ int cbdt02_(integer *, integer *, complex *, 
	    integer *, complex *, integer *, complex *, integer *, complex *, 
	    real *, real *), cbdt03_(char *, integer *, integer *, real *, 
	    real *, complex *, integer *, real *, complex *, integer *, 
	    complex *, real *);
    static integer nfail, imode;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
	    integer *, complex *, complex *, integer *, complex *, integer *, 
	    complex *, complex *, integer *);
    static real dumma[1];
    static integer iinfo;
    extern /* Subroutine */ int cunt01_(char *, integer *, integer *, complex 
	    *, integer *, complex *, integer *, real *, real *);
    static real anorm;
    static integer mnmin, mnmax, jsize, itype, jtype, iwork[1], ntest;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *), slahd2_(integer *, char *);
    static integer log2ui;
    static logical bidiag;
    extern /* Subroutine */ int cgebrd_(integer *, integer *, complex *, 
	    integer *, real *, real *, complex *, complex *, complex *, 
	    integer *, integer *), slabad_(real *, real *);
    static integer mq;
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex 
	    *, integer *, complex *, integer *), claset_(char *, 
	    integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *);
    static integer ioldsd[4];
    extern /* Subroutine */ int cbdsqr_(char *, integer *, integer *, integer 
	    *, integer *, real *, real *, complex *, integer *, complex *, 
	    integer *, complex *, integer *, real *, integer *), 
	    cungbr_(char *, integer *, integer *, integer *, complex *, 
	    integer *, complex *, complex *, integer *, integer *), 
	    alasum_(char *, integer *, integer *, integer *, integer *);
    extern doublereal slarnd_(integer *, integer *);
    extern /* Subroutine */ int clatmr_(integer *, integer *, char *, integer 
	    *, char *, complex *, integer *, real *, complex *, char *, char *
	    , complex *, integer *, real *, complex *, integer *, real *, 
	    char *, integer *, integer *, integer *, real *, real *, char *, 
	    complex *, integer *, integer *, integer *), clatms_(integer *, integer *, 
	    char *, integer *, char *, real *, integer *, real *, real *, 
	    integer *, integer *, char *, complex *, integer *, complex *, 
	    integer *);
    static real amninv;
    extern /* Subroutine */ int ssvdch_(integer *, real *, real *, real *, 
	    real *, integer *);
    static integer minwrk;
    static real rtunfl, rtovfl, ulpinv, result[14];
    static integer mtypes;
    static real ulp;

    /* Fortran I/O blocks */
    static cilist io___40 = { 0, 0, 0, fmt_9998, 0 };
    static cilist io___41 = { 0, 0, 0, fmt_9998, 0 };
    static cilist io___43 = { 0, 0, 0, fmt_9998, 0 };
    static cilist io___44 = { 0, 0, 0, fmt_9998, 0 };
    static cilist io___45 = { 0, 0, 0, fmt_9998, 0 };
    static cilist io___46 = { 0, 0, 0, fmt_9998, 0 };
    static cilist io___50 = { 0, 0, 0, fmt_9999, 0 };



#define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1
#define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CCHKBD checks the singular value decomposition (SVD) routines.   

    CGEBRD reduces a complex general m by n matrix A to real upper or   
    lower bidiagonal form by an orthogonal transformation: Q' * A * P = B   
    (or A = Q * B * P').  The matrix B is upper bidiagonal if m >= n   
    and lower bidiagonal if m < n.   

    CUNGBR generates the orthogonal matrices Q and P' from CGEBRD.   
    Note that Q and P are not necessarily square.   

    CBDSQR computes the singular value decomposition of the bidiagonal   
    matrix B as B = U S V'.  It is called three times to compute   
       1)  B = U S1 V', where S1 is the diagonal matrix of singular   
           values and the columns of the matrices U and V are the left   
           and right singular vectors, respectively, of B.   
       2)  Same as 1), but the singular values are stored in S2 and the   
           singular vectors are not computed.   
       3)  A = (UQ) S (P'V'), the SVD of the original matrix A.   
    In addition, CBDSQR has an option to apply the left orthogonal matrix   
    U to a matrix X, useful in least squares applications.   

    For each pair of matrix dimensions (M,N) and each selected matrix   
    type, an M by N matrix A and an M by NRHS matrix X are generated.   
    The problem dimensions are as follows   
       A:          M x N   
       Q:          M x min(M,N) (but M x M if NRHS > 0)   
       P:          min(M,N) x N   
       B:          min(M,N) x min(M,N)   
       U, V:       min(M,N) x min(M,N)   
       S1, S2      diagonal, order min(M,N)   
       X:          M x NRHS   

    For each generated matrix, 14 tests are performed:   

    Test CGEBRD and CUNGBR   

    (1)   | A - Q B PT | / ( |A| max(M,N) ulp ), PT = P'   

    (2)   | I - Q' Q | / ( M ulp )   

    (3)   | I - PT PT' | / ( N ulp )   

    Test CBDSQR on bidiagonal matrix B   

    (4)   | B - U S1 VT | / ( |B| min(M,N) ulp ), VT = V'   

    (5)   | Y - U Z | / ( |Y| max(min(M,N),k) ulp ), where Y = Q' X   
                                                     and   Z = U' Y.   
    (6)   | I - U' U | / ( min(M,N) ulp )   

    (7)   | I - VT VT' | / ( min(M,N) ulp )   

    (8)   S1 contains min(M,N) nonnegative values in decreasing order.   
          (Return 0 if true, 1/ULP if false.)   

    (9)   0 if the true singular values of B are within THRESH of   
          those in S1.  2*THRESH if they are not.  (Tested using   
          SSVDCH)   

    (10)  | S1 - S2 | / ( |S1| ulp ), where S2 is computed without   
                                      computing U and V.   

    Test CBDSQR on matrix A   

    (11)  | A - (QU) S (VT PT) | / ( |A| max(M,N) ulp )   

    (12)  | X - (QU) Z | / ( |X| max(M,k) ulp )   

    (13)  | I - (QU)'(QU) | / ( M ulp )   

    (14)  | I - (VT PT) (PT'VT') | / ( N ulp )   

    The possible matrix types are   

    (1)  The zero matrix.   
    (2)  The identity matrix.   

    (3)  A diagonal matrix with evenly spaced entries   
         1, ..., ULP  and random signs.   
         (ULP = (first number larger than 1) - 1 )   
    (4)  A diagonal matrix with geometrically spaced entries   
         1, ..., ULP  and random signs.   
    (5)  A diagonal matrix with "clustered" entries 1, ULP, ..., ULP   
         and random signs.   

    (6)  Same as (3), but multiplied by SQRT( overflow threshold )   
    (7)  Same as (3), but multiplied by SQRT( underflow threshold )   

    (8)  A matrix of the form  U D V, where U and V are orthogonal and   
         D has evenly spaced entries 1, ..., ULP with random signs   
         on the diagonal.   

    (9)  A matrix of the form  U D V, where U and V are orthogonal and   
         D has geometrically spaced entries 1, ..., ULP with random   
         signs on the diagonal.   

    (10) A matrix of the form  U D V, where U and V are orthogonal and   
         D has "clustered" entries 1, ULP,..., ULP with random   
         signs on the diagonal.   

    (11) Same as (8), but multiplied by SQRT( overflow threshold )   
    (12) Same as (8), but multiplied by SQRT( underflow threshold )   

    (13) Rectangular matrix with random entries chosen from (-1,1).   
    (14) Same as (13), but multiplied by SQRT( overflow threshold )   
    (15) Same as (13), but multiplied by SQRT( underflow threshold )   

    Special case:   
    (16) A bidiagonal matrix with random entries chosen from a   
         logarithmic distribution on [ulp^2,ulp^(-2)]  (I.e., each   
         entry is  e^x, where x is chosen uniformly on   
         [ 2 log(ulp), -2 log(ulp) ] .)  For *this* type:   
         (a) CGEBRD is not called to reduce it to bidiagonal form.   
         (b) the bidiagonal is  min(M,N) x min(M,N); if M<N, the   
             matrix will be lower bidiagonal, otherwise upper.   
         (c) only tests 5--8 and 14 are performed.   

    A subset of the full set of matrix types may be selected through   
    the logical array DOTYPE.   

    Arguments   
    ==========   

    NSIZES  (input) INTEGER   
            The number of values of M and N contained in the vectors   
            MVAL and NVAL.  The matrix sizes are used in pairs (M,N).   

    MVAL    (input) INTEGER array, dimension (NM)   
            The values of the matrix row dimension M.   

    NVAL    (input) INTEGER array, dimension (NM)   
            The values of the matrix column dimension N.   

    NTYPES  (input) INTEGER   
            The number of elements in DOTYPE.   If it is zero, CCHKBD   
            does nothing.  It must be at least zero.  If it is MAXTYP+1   
            and NSIZES is 1, then an additional type, MAXTYP+1 is   
            defined, which is to use whatever matrices are in A and B.   
            This is only useful if DOTYPE(1:MAXTYP) is .FALSE. and   
            DOTYPE(MAXTYP+1) is .TRUE. .   

    DOTYPE  (input) LOGICAL array, dimension (NTYPES)   
            If DOTYPE(j) is .TRUE., then for each size (m,n), a matrix   
            of type j will be generated.  If NTYPES is smaller than the   
            maximum number of types defined (PARAMETER MAXTYP), then   
            types NTYPES+1 through MAXTYP will not be generated.  If   
            NTYPES is larger than MAXTYP, DOTYPE(MAXTYP+1) through   
            DOTYPE(NTYPES) will be ignored.   

    NRHS    (input) INTEGER   
            The number of columns in the "right-hand side" matrices X, Y,   
            and Z, used in testing CBDSQR.  If NRHS = 0, then the   
            operations on the right-hand side will not be tested.   
            NRHS must be at least 0.   

    ISEED   (input/output) INTEGER array, dimension (4)   
            On entry ISEED specifies the seed of the random number   
            generator. The array elements should be between 0 and 4095;   
            if not they will be reduced mod 4096.  Also, ISEED(4) must   
            be odd.  The values of ISEED are changed on exit, and can be   
            used in the next call to CCHKBD to continue the same random   
            number sequence.   

    THRESH  (input) REAL   
            The threshold value for the test ratios.  A result is   
            included in the output file if RESULT >= THRESH.  To have   
            every test ratio printed, use THRESH = 0.  Note that the   
            expected value of the test ratios is O(1), so THRESH should   
            be a reasonably small multiple of 1, e.g., 10 or 100.   

    A       (workspace) COMPLEX array, dimension (LDA,NMAX)   
            where NMAX is the maximum value of N in NVAL.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,MMAX),   
            where MMAX is the maximum value of M in MVAL.   

    BD      (workspace) REAL array, dimension   
                        (max(min(MVAL(j),NVAL(j))))   

    BE      (workspace) REAL array, dimension   
                        (max(min(MVAL(j),NVAL(j))))   

    S1      (workspace) REAL array, dimension   
                        (max(min(MVAL(j),NVAL(j))))   

    S2      (workspace) REAL array, dimension   
                        (max(min(MVAL(j),NVAL(j))))   

    X       (workspace) COMPLEX array, dimension (LDX,NRHS)   

    LDX     (input) INTEGER   
            The leading dimension of the arrays X, Y, and Z.   
            LDX >= max(1,MMAX).   

    Y       (workspace) COMPLEX array, dimension (LDX,NRHS)   

    Z       (workspace) COMPLEX array, dimension (LDX,NRHS)   

    Q       (workspace) COMPLEX array, dimension (LDQ,MMAX)   

    LDQ     (input) INTEGER   
            The leading dimension of the array Q.  LDQ >= max(1,MMAX).   

    PT      (workspace) COMPLEX array, dimension (LDPT,NMAX)   

    LDPT    (input) INTEGER   
            The leading dimension of the arrays PT, U, and V.   
            LDPT >= max(1, max(min(MVAL(j),NVAL(j)))).   

    U       (workspace) COMPLEX array, dimension   
                        (LDPT,max(min(MVAL(j),NVAL(j))))   

    V       (workspace) COMPLEX array, dimension   
                        (LDPT,max(min(MVAL(j),NVAL(j))))   

    WORK    (workspace) COMPLEX array, dimension (LWORK)   

    LWORK   (input) INTEGER   
            The number of entries in WORK.  This must be at least   
            3(M+N) and  M(M + max(M,N,k) + 1) + N*min(M,N)  for all   
            pairs  (M,N)=(MM(j),NN(j))   

    RWORK   (workspace) REAL array, dimension   
                        (5*max(min(M,N)))   

    NOUT    (input) INTEGER   
            The FORTRAN unit number for printing out error messages   
            (e.g., if a routine returns IINFO not equal to 0.)   

    INFO    (output) INTEGER   
            If 0, then everything ran OK.   
             -1: NSIZES < 0   
             -2: Some MM(j) < 0   
             -3: Some NN(j) < 0   
             -4: NTYPES < 0   
             -6: NRHS  < 0   
             -8: THRESH < 0   
            -11: LDA < 1 or LDA < MMAX, where MMAX is max( MM(j) ).   
            -17: LDB < 1 or LDB < MMAX.   
            -21: LDQ < 1 or LDQ < MMAX.   
            -23: LDP < 1 or LDP < MNMAX.   
            -27: LWORK too small.   
            If  CLATMR, CLATMS, CGEBRD, CUNGBR, or CBDSQR,   
                returns an error code, the   
                absolute value of it is returned.   

   -----------------------------------------------------------------------   

       Some Local Variables and Parameters:   
       ---- ----- --------- --- ----------   

       ZERO, ONE       Real 0 and 1.   
       MAXTYP          The number of types defined.   
       NTEST           The number of tests performed, or which can   
                       be performed so far, for the current matrix.   
       MMAX            Largest value in NN.   
       NMAX            Largest value in NN.   
       MNMIN           min(MM(j), NN(j)) (the dimension of the bidiagonal   
                       matrix.)   
       MNMAX           The maximum value of MNMIN for j=1,...,NSIZES.   
       NFAIL           The number of tests which have exceeded THRESH   
       COND, IMODE     Values to be passed to the matrix generators.   
       ANORM           Norm of A; passed to matrix generators.   

       OVFL, UNFL      Overflow and underflow thresholds.   
       RTOVFL, RTUNFL  Square roots of the previous 2 values.   
       ULP, ULPINV     Finest relative precision and its inverse.   

               The following four arrays decode JTYPE:   
       KTYPE(j)        The general type (1-10) for type "j".   
       KMODE(j)        The MODE value to be passed to the matrix   
                       generator for type "j".   
       KMAGN(j)        The order of magnitude ( O(1),   
                       O(overflow^(1/2) ), O(underflow^(1/2) )   

   ======================================================================   

       Parameter adjustments */
    --mval;
    --nval;
    --dotype;
    --iseed;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --bd;
    --be;
    --s1;
    --s2;
    z_dim1 = *ldx;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    y_dim1 = *ldx;
    y_offset = 1 + y_dim1 * 1;
    y -= y_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    q_dim1 = *ldq;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    vt_dim1 = *ldpt;
    vt_offset = 1 + vt_dim1 * 1;
    vt -= vt_offset;
    u_dim1 = *ldpt;
    u_offset = 1 + u_dim1 * 1;
    u -= u_offset;
    pt_dim1 = *ldpt;
    pt_offset = 1 + pt_dim1 * 1;
    pt -= pt_offset;
    --work;
    --rwork;

    /* Function Body   

       Check for errors */

    *info = 0;

    badmm = FALSE_;
    badnn = FALSE_;
    mmax = 1;
    nmax = 1;
    mnmax = 1;
    minwrk = 1;
    i__1 = *nsizes;
    for (j = 1; j <= i__1; ++j) {
/* Computing MAX */
	i__2 = mmax, i__3 = mval[j];
	mmax = max(i__2,i__3);
	if (mval[j] < 0) {
	    badmm = TRUE_;
	}
/* Computing MAX */
	i__2 = nmax, i__3 = nval[j];
	nmax = max(i__2,i__3);
	if (nval[j] < 0) {
	    badnn = TRUE_;
	}
/* Computing MAX   
   Computing MIN */
	i__4 = mval[j], i__5 = nval[j];
	i__2 = mnmax, i__3 = min(i__4,i__5);
	mnmax = max(i__2,i__3);
/* Computing MAX   
   Computing MAX */
	i__4 = mval[j], i__5 = nval[j], i__4 = max(i__4,i__5);
/* Computing MIN */
	i__6 = nval[j], i__7 = mval[j];
	i__2 = minwrk, i__3 = (mval[j] + nval[j]) * 3, i__2 = max(i__2,i__3), 
		i__3 = mval[j] * (mval[j] + max(i__4,*nrhs) + 1) + nval[j] * 
		min(i__6,i__7);
	minwrk = max(i__2,i__3);
/* L10: */
    }

/*     Check for errors */

    if (*nsizes < 0) {
	*info = -1;
    } else if (badmm) {
	*info = -2;
    } else if (badnn) {
	*info = -3;
    } else if (*ntypes < 0) {
	*info = -4;
    } else if (*nrhs < 0) {
	*info = -6;
    } else if (*lda < mmax) {
	*info = -11;
    } else if (*ldx < mmax) {
	*info = -17;
    } else if (*ldq < mmax) {
	*info = -21;
    } else if (*ldpt < mnmax) {
	*info = -23;
    } else if (minwrk > *lwork) {
	*info = -27;
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CCHKBD", &i__1);
	return 0;
    }

/*     Initialize constants */

    s_copy(path, "Complex precision", (ftnlen)1, (ftnlen)17);
    s_copy(path + 1, "BD", (ftnlen)2, (ftnlen)2);
    nfail = 0;
    ntest = 0;
    unfl = slamch_("Safe minimum");
    ovfl = slamch_("Overflow");
    slabad_(&unfl, &ovfl);
    ulp = slamch_("Precision");
    ulpinv = 1.f / ulp;
    log2ui = (integer) (log(ulpinv) / log(2.f));
    rtunfl = sqrt(unfl);
    rtovfl = sqrt(ovfl);
    infoc_1.infot = 0;

/*     Loop over sizes, types */

    i__1 = *nsizes;
    for (jsize = 1; jsize <= i__1; ++jsize) {
	m = mval[jsize];
	n = nval[jsize];
	mnmin = min(m,n);
/* Computing MAX */
	i__2 = max(m,n);
	amninv = 1.f / max(i__2,1);

	if (*nsizes != 1) {
	    mtypes = min(16,*ntypes);
	} else {
	    mtypes = min(17,*ntypes);
	}

	i__2 = mtypes;
	for (jtype = 1; jtype <= i__2; ++jtype) {
	    if (! dotype[jtype]) {
		goto L170;
	    }

	    for (j = 1; j <= 4; ++j) {
		ioldsd[j - 1] = iseed[j];
/* L20: */
	    }

	    for (j = 1; j <= 14; ++j) {
		result[j - 1] = -1.f;
/* L30: */
	    }

	    *(unsigned char *)uplo = ' ';

/*           Compute "A"   

             Control parameters:   

             KMAGN  KMODE        KTYPE   
         =1  O(1)   clustered 1  zero   
         =2  large  clustered 2  identity   
         =3  small  exponential  (none)   
         =4         arithmetic   diagonal, (w/ eigenvalues)   
         =5         random       symmetric, w/ eigenvalues   
         =6                      nonsymmetric, w/ singular values   
         =7                      random diagonal   
         =8                      random symmetric   
         =9                      random nonsymmetric   
         =10                     random bidiagonal (log. distrib.) */

	    if (mtypes > 16) {
		goto L100;
	    }

	    itype = ktype[jtype - 1];
	    imode = kmode[jtype - 1];

/*           Compute norm */

	    switch (kmagn[jtype - 1]) {
		case 1:  goto L40;
		case 2:  goto L50;
		case 3:  goto L60;
	    }

L40:
	    anorm = 1.f;
	    goto L70;

L50:
	    anorm = rtovfl * ulp * amninv;
	    goto L70;

L60:
	    anorm = rtunfl * max(m,n) * ulpinv;
	    goto L70;

L70:

	    claset_("Full", lda, &n, &c_b1, &c_b1, &a[a_offset], lda);
	    iinfo = 0;
	    cond = ulpinv;

	    bidiag = FALSE_;
	    if (itype == 1) {

/*              Zero matrix */

		iinfo = 0;

	    } else if (itype == 2) {

/*              Identity */

		i__3 = mnmin;
		for (jcol = 1; jcol <= i__3; ++jcol) {
		    i__4 = a_subscr(jcol, jcol);
		    a[i__4].r = anorm, a[i__4].i = 0.f;
/* L80: */
		}

	    } else if (itype == 4) {

/*              Diagonal Matrix, [Eigen]values Specified */

		clatms_(&mnmin, &mnmin, "S", &iseed[1], "N", &rwork[1], &
			imode, &cond, &anorm, &c__0, &c__0, "N", &a[a_offset],
			 lda, &work[1], &iinfo);

	    } else if (itype == 5) {

/*              Symmetric, eigenvalues specified */

		clatms_(&mnmin, &mnmin, "S", &iseed[1], "S", &rwork[1], &
			imode, &cond, &anorm, &m, &n, "N", &a[a_offset], lda, 
			&work[1], &iinfo);

	    } else if (itype == 6) {

/*              Nonsymmetric, singular values specified */

		clatms_(&m, &n, "S", &iseed[1], "N", &rwork[1], &imode, &cond,
			 &anorm, &m, &n, "N", &a[a_offset], lda, &work[1], &
			iinfo);

	    } else if (itype == 7) {

/*              Diagonal, random entries */

		clatmr_(&mnmin, &mnmin, "S", &iseed[1], "N", &work[1], &c__6, 
			&c_b37, &c_b2, "T", "N", &work[mnmin + 1], &c__1, &
			c_b37, &work[(mnmin << 1) + 1], &c__1, &c_b37, "N", 
			iwork, &c__0, &c__0, &c_b47, &anorm, "NO", &a[
			a_offset], lda, iwork, &iinfo);

	    } else if (itype == 8) {

/*              Symmetric, random entries */

		clatmr_(&mnmin, &mnmin, "S", &iseed[1], "S", &work[1], &c__6, 
			&c_b37, &c_b2, "T", "N", &work[mnmin + 1], &c__1, &
			c_b37, &work[m + mnmin + 1], &c__1, &c_b37, "N", 
			iwork, &m, &n, &c_b47, &anorm, "NO", &a[a_offset], 
			lda, iwork, &iinfo);

	    } else if (itype == 9) {

/*              Nonsymmetric, random entries */

		clatmr_(&m, &n, "S", &iseed[1], "N", &work[1], &c__6, &c_b37, 
			&c_b2, "T", "N", &work[mnmin + 1], &c__1, &c_b37, &
			work[m + mnmin + 1], &c__1, &c_b37, "N", iwork, &m, &
			n, &c_b47, &anorm, "NO", &a[a_offset], lda, iwork, &
			iinfo);

	    } else if (itype == 10) {

/*              Bidiagonal, random entries */

		temp1 = log(ulp) * -2.f;
		i__3 = mnmin;
		for (j = 1; j <= i__3; ++j) {
		    bd[j] = exp(temp1 * slarnd_(&c__2, &iseed[1]));
		    if (j < mnmin) {
			be[j] = exp(temp1 * slarnd_(&c__2, &iseed[1]));
		    }
/* L90: */
		}

		iinfo = 0;
		bidiag = TRUE_;
		if (m >= n) {
		    *(unsigned char *)uplo = 'U';
		} else {
		    *(unsigned char *)uplo = 'L';
		}
	    } else {
		iinfo = 1;
	    }

	    if (iinfo == 0) {

/*              Generate Right-Hand Side */

		if (bidiag) {
		    clatmr_(&mnmin, nrhs, "S", &iseed[1], "N", &work[1], &
			    c__6, &c_b37, &c_b2, "T", "N", &work[mnmin + 1], &
			    c__1, &c_b37, &work[(mnmin << 1) + 1], &c__1, &
			    c_b37, "N", iwork, &mnmin, nrhs, &c_b47, &c_b37, 
			    "NO", &y[y_offset], ldx, iwork, &iinfo);
		} else {
		    clatmr_(&m, nrhs, "S", &iseed[1], "N", &work[1], &c__6, &
			    c_b37, &c_b2, "T", "N", &work[m + 1], &c__1, &
			    c_b37, &work[(m << 1) + 1], &c__1, &c_b37, "N", 
			    iwork, &m, nrhs, &c_b47, &c_b37, "NO", &x[
			    x_offset], ldx, iwork, &iinfo);
		}
	    }

/*           Error Exit */

	    if (iinfo != 0) {
		io___40.ciunit = *nout;
		s_wsfe(&io___40);
		do_fio(&c__1, "Generator", (ftnlen)9);
		do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer));
		do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer));
		do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
		do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer));
		do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer));
		e_wsfe();
		*info = abs(iinfo);
		return 0;
	    }

L100:

/*           Call CGEBRD and CUNGBR to compute B, Q, and P, do tests. */

	    if (! bidiag) {

/*              Compute transformations to reduce A to bidiagonal form:   
                B := Q' * A * P. */

		clacpy_(" ", &m, &n, &a[a_offset], lda, &q[q_offset], ldq);
		i__3 = *lwork - (mnmin << 1);
		cgebrd_(&m, &n, &q[q_offset], ldq, &bd[1], &be[1], &work[1], &
			work[mnmin + 1], &work[(mnmin << 1) + 1], &i__3, &
			iinfo);

/*              Check error code from CGEBRD. */

		if (iinfo != 0) {
		    io___41.ciunit = *nout;
		    s_wsfe(&io___41);
		    do_fio(&c__1, "CGEBRD", (ftnlen)6);
		    do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer));
		    do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer));
		    do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
		    do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer));
		    do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer))
			    ;
		    e_wsfe();
		    *info = abs(iinfo);
		    return 0;
		}

		clacpy_(" ", &m, &n, &q[q_offset], ldq, &pt[pt_offset], ldpt);
		if (m >= n) {
		    *(unsigned char *)uplo = 'U';
		} else {
		    *(unsigned char *)uplo = 'L';
		}

/*              Generate Q */

		mq = m;
		if (*nrhs <= 0) {
		    mq = mnmin;
		}
		i__3 = *lwork - (mnmin << 1);
		cungbr_("Q", &m, &mq, &n, &q[q_offset], ldq, &work[1], &work[(
			mnmin << 1) + 1], &i__3, &iinfo);

/*              Check error code from CUNGBR. */

		if (iinfo != 0) {
		    io___43.ciunit = *nout;
		    s_wsfe(&io___43);
		    do_fio(&c__1, "CUNGBR(Q)", (ftnlen)9);
		    do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer));
		    do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer));
		    do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
		    do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer));
		    do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer))
			    ;
		    e_wsfe();
		    *info = abs(iinfo);
		    return 0;
		}

/*              Generate P' */

		i__3 = *lwork - (mnmin << 1);
		cungbr_("P", &mnmin, &n, &m, &pt[pt_offset], ldpt, &work[
			mnmin + 1], &work[(mnmin << 1) + 1], &i__3, &iinfo);

/*              Check error code from CUNGBR. */

		if (iinfo != 0) {
		    io___44.ciunit = *nout;
		    s_wsfe(&io___44);
		    do_fio(&c__1, "CUNGBR(P)", (ftnlen)9);
		    do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer));
		    do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer));
		    do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
		    do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer));
		    do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer))
			    ;
		    e_wsfe();
		    *info = abs(iinfo);
		    return 0;
		}

/*              Apply Q' to an M by NRHS matrix X:  Y := Q' * X. */

		cgemm_("Conjugate transpose", "No transpose", &m, nrhs, &m, &
			c_b2, &q[q_offset], ldq, &x[x_offset], ldx, &c_b1, &y[
			y_offset], ldx);

/*              Test 1:  Check the decomposition A := Q * B * PT   
                     2:  Check the orthogonality of Q   
                     3:  Check the orthogonality of PT */

		cbdt01_(&m, &n, &c__1, &a[a_offset], lda, &q[q_offset], ldq, &
			bd[1], &be[1], &pt[pt_offset], ldpt, &work[1], &rwork[
			1], result);
		cunt01_("Columns", &m, &mq, &q[q_offset], ldq, &work[1], 
			lwork, &rwork[1], &result[1]);
		cunt01_("Rows", &mnmin, &n, &pt[pt_offset], ldpt, &work[1], 
			lwork, &rwork[1], &result[2]);
	    }

/*           Use CBDSQR to form the SVD of the bidiagonal matrix B:   
             B := U * S1 * VT, and compute Z = U' * Y. */

	    scopy_(&mnmin, &bd[1], &c__1, &s1[1], &c__1);
	    if (mnmin > 0) {
		i__3 = mnmin - 1;
		scopy_(&i__3, &be[1], &c__1, &rwork[1], &c__1);
	    }
	    clacpy_(" ", &m, nrhs, &y[y_offset], ldx, &z__[z_offset], ldx);
	    claset_("Full", &mnmin, &mnmin, &c_b1, &c_b2, &u[u_offset], ldpt);
	    claset_("Full", &mnmin, &mnmin, &c_b1, &c_b2, &vt[vt_offset], 
		    ldpt);

	    cbdsqr_(uplo, &mnmin, &mnmin, &mnmin, nrhs, &s1[1], &rwork[1], &
		    vt[vt_offset], ldpt, &u[u_offset], ldpt, &z__[z_offset], 
		    ldx, &rwork[mnmin + 1], &iinfo);

/*           Check error code from CBDSQR. */

	    if (iinfo != 0) {
		io___45.ciunit = *nout;
		s_wsfe(&io___45);
		do_fio(&c__1, "CBDSQR(vects)", (ftnlen)13);
		do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer));
		do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer));
		do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
		do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer));
		do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer));
		e_wsfe();
		*info = abs(iinfo);
		if (iinfo < 0) {
		    return 0;
		} else {
		    result[3] = ulpinv;
		    goto L150;
		}
	    }

/*           Use CBDSQR to compute only the singular values of the   
             bidiagonal matrix B;  U, VT, and Z should not be modified. */

	    scopy_(&mnmin, &bd[1], &c__1, &s2[1], &c__1);
	    if (mnmin > 0) {
		i__3 = mnmin - 1;
		scopy_(&i__3, &be[1], &c__1, &rwork[1], &c__1);
	    }

	    cbdsqr_(uplo, &mnmin, &c__0, &c__0, &c__0, &s2[1], &rwork[1], &vt[
		    vt_offset], ldpt, &u[u_offset], ldpt, &z__[z_offset], ldx,
		     &rwork[mnmin + 1], &iinfo);

/*           Check error code from CBDSQR. */

	    if (iinfo != 0) {
		io___46.ciunit = *nout;
		s_wsfe(&io___46);
		do_fio(&c__1, "CBDSQR(values)", (ftnlen)14);
		do_fio(&c__1, (char *)&iinfo, (ftnlen)sizeof(integer));
		do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer));
		do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
		do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer));
		do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer));
		e_wsfe();
		*info = abs(iinfo);
		if (iinfo < 0) {
		    return 0;
		} else {
		    result[8] = ulpinv;
		    goto L150;
		}
	    }

/*           Test 4:  Check the decomposition B := U * S1 * VT   
                  5:  Check the computation Z := U' * Y   
                  6:  Check the orthogonality of U   
                  7:  Check the orthogonality of VT */

	    cbdt03_(uplo, &mnmin, &c__1, &bd[1], &be[1], &u[u_offset], ldpt, &
		    s1[1], &vt[vt_offset], ldpt, &work[1], &result[3]);
	    cbdt02_(&mnmin, nrhs, &y[y_offset], ldx, &z__[z_offset], ldx, &u[
		    u_offset], ldpt, &work[1], &rwork[1], &result[4]);
	    cunt01_("Columns", &mnmin, &mnmin, &u[u_offset], ldpt, &work[1], 
		    lwork, &rwork[1], &result[5]);
	    cunt01_("Rows", &mnmin, &mnmin, &vt[vt_offset], ldpt, &work[1], 
		    lwork, &rwork[1], &result[6]);

/*           Test 8:  Check that the singular values are sorted in   
                      non-increasing order and are non-negative */

	    result[7] = 0.f;
	    i__3 = mnmin - 1;
	    for (i__ = 1; i__ <= i__3; ++i__) {
		if (s1[i__] < s1[i__ + 1]) {
		    result[7] = ulpinv;
		}
		if (s1[i__] < 0.f) {
		    result[7] = ulpinv;
		}
/* L110: */
	    }
	    if (mnmin >= 1) {
		if (s1[mnmin] < 0.f) {
		    result[7] = ulpinv;
		}
	    }

/*           Test 9:  Compare CBDSQR with and without singular vectors */

	    temp2 = 0.f;

	    i__3 = mnmin;
	    for (j = 1; j <= i__3; ++j) {
/* Computing MAX   
   Computing MAX */
		r__6 = (r__1 = s1[j], dabs(r__1)), r__7 = (r__2 = s2[j], dabs(
			r__2));
		r__4 = sqrt(unfl) * dmax(s1[1],1.f), r__5 = ulp * dmax(r__6,
			r__7);
		temp1 = (r__3 = s1[j] - s2[j], dabs(r__3)) / dmax(r__4,r__5);
		temp2 = dmax(temp1,temp2);
/* L120: */
	    }

	    result[8] = temp2;

/*           Test 10:  Sturm sequence test of singular values   
                       Go up by factors of two until it succeeds */

	    temp1 = *thresh * (.5f - ulp);

	    i__3 = log2ui;
	    for (j = 0; j <= i__3; ++j) {
		ssvdch_(&mnmin, &bd[1], &be[1], &s1[1], &temp1, &iinfo);
		if (iinfo == 0) {
		    goto L140;
		}
		temp1 *= 2.f;
/* L130: */
	    }

L140:
	    result[9] = temp1;

/*           Use CBDSQR to form the decomposition A := (QU) S (VT PT)   
             from the bidiagonal form A := Q B PT. */

	    if (! bidiag) {
		scopy_(&mnmin, &bd[1], &c__1, &s2[1], &c__1);
		if (mnmin > 0) {
		    i__3 = mnmin - 1;
		    scopy_(&i__3, &be[1], &c__1, &rwork[1], &c__1);
		}

		cbdsqr_(uplo, &mnmin, &n, &m, nrhs, &s2[1], &rwork[1], &pt[
			pt_offset], ldpt, &q[q_offset], ldq, &y[y_offset], 
			ldx, &rwork[mnmin + 1], &iinfo);

/*              Test 11:  Check the decomposition A := Q*U * S2 * VT*PT   
                     12:  Check the computation Z := U' * Q' * X   
                     13:  Check the orthogonality of Q*U   
                     14:  Check the orthogonality of VT*PT */

		cbdt01_(&m, &n, &c__0, &a[a_offset], lda, &q[q_offset], ldq, &
			s2[1], dumma, &pt[pt_offset], ldpt, &work[1], &rwork[
			1], &result[10]);
		cbdt02_(&m, nrhs, &x[x_offset], ldx, &y[y_offset], ldx, &q[
			q_offset], ldq, &work[1], &rwork[1], &result[11]);
		cunt01_("Columns", &m, &mq, &q[q_offset], ldq, &work[1], 
			lwork, &rwork[1], &result[12]);
		cunt01_("Rows", &mnmin, &n, &pt[pt_offset], ldpt, &work[1], 
			lwork, &rwork[1], &result[13]);
	    }

/*           End of Loop -- Check for RESULT(j) > THRESH */

L150:
	    for (j = 1; j <= 14; ++j) {
		if (result[j - 1] >= *thresh) {
		    if (nfail == 0) {
			slahd2_(nout, path);
		    }
		    io___50.ciunit = *nout;
		    s_wsfe(&io___50);
		    do_fio(&c__1, (char *)&m, (ftnlen)sizeof(integer));
		    do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer));
		    do_fio(&c__1, (char *)&jtype, (ftnlen)sizeof(integer));
		    do_fio(&c__4, (char *)&ioldsd[0], (ftnlen)sizeof(integer))
			    ;
		    do_fio(&c__1, (char *)&j, (ftnlen)sizeof(integer));
		    do_fio(&c__1, (char *)&result[j - 1], (ftnlen)sizeof(real)
			    );
		    e_wsfe();
		    ++nfail;
		}
/* L160: */
	    }
	    if (! bidiag) {
		ntest += 14;
	    } else {
		ntest += 5;
	    }

L170:
	    ;
	}
/* L180: */
    }

/*     Summary */

    alasum_(path, nout, &nfail, &ntest, &c__0);

    return 0;

/*     End of CCHKBD */


} /* cchkbd_ */

Example #6

Show file

File: cgrqts.c Project: zangel/uquad

/* Subroutine */ int cgrqts_(integer *m, integer *p, integer *n, complex *a, 
	complex *af, complex *q, complex *r__, integer *lda, complex *taua, 
	complex *b, complex *bf, complex *z__, complex *t, complex *bwk, 
	integer *ldb, complex *taub, complex *work, integer *lwork, real *
	rwork, real *result)
{
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, r_dim1, r_offset, q_dim1, 
	    q_offset, b_dim1, b_offset, bf_dim1, bf_offset, t_dim1, t_offset, 
	    z_dim1, z_offset, bwk_dim1, bwk_offset, i__1, i__2;
    real r__1;
    complex q__1;

    /* Local variables */
    static integer info;
    static real unfl;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
	    integer *, complex *, complex *, integer *, complex *, integer *, 
	    complex *, complex *, integer *), cherk_(char *, 
	    char *, integer *, integer *, real *, complex *, integer *, real *
	    , complex *, integer *);
    static real resid, anorm, bnorm;
    extern doublereal clange_(char *, integer *, integer *, complex *, 
	    integer *, real *), clanhe_(char *, char *, integer *, 
	    complex *, integer *, real *), slamch_(char *);
    extern /* Subroutine */ int cggrqf_(integer *, integer *, integer *, 
	    complex *, integer *, complex *, complex *, integer *, complex *, 
	    complex *, integer *, integer *), clacpy_(char *, integer *, 
	    integer *, complex *, integer *, complex *, integer *), 
	    claset_(char *, integer *, integer *, complex *, complex *, 
	    complex *, integer *), cungqr_(integer *, integer *, 
	    integer *, complex *, integer *, complex *, complex *, integer *, 
	    integer *), cungrq_(integer *, integer *, integer *, complex *, 
	    integer *, complex *, complex *, integer *, integer *);
    static real ulp;


#define q_subscr(a_1,a_2) (a_2)*q_dim1 + a_1
#define q_ref(a_1,a_2) q[q_subscr(a_1,a_2)]
#define r___subscr(a_1,a_2) (a_2)*r_dim1 + a_1
#define r___ref(a_1,a_2) r__[r___subscr(a_1,a_2)]
#define z___subscr(a_1,a_2) (a_2)*z_dim1 + a_1
#define z___ref(a_1,a_2) z__[z___subscr(a_1,a_2)]
#define af_subscr(a_1,a_2) (a_2)*af_dim1 + a_1
#define af_ref(a_1,a_2) af[af_subscr(a_1,a_2)]
#define bf_subscr(a_1,a_2) (a_2)*bf_dim1 + a_1
#define bf_ref(a_1,a_2) bf[bf_subscr(a_1,a_2)]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CGRQTS tests CGGRQF, which computes the GRQ factorization of an   
    M-by-N matrix A and a P-by-N matrix B: A = R*Q and B = Z*T*Q.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of the matrix A.  M >= 0.   

    P       (input) INTEGER   
            The number of rows of the matrix B.  P >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrices A and B.  N >= 0.   

    A       (input) COMPLEX array, dimension (LDA,N)   
            The M-by-N matrix A.   

    AF      (output) COMPLEX array, dimension (LDA,N)   
            Details of the GRQ factorization of A and B, as returned   
            by CGGRQF, see CGGRQF for further details.   

    Q       (output) COMPLEX array, dimension (LDA,N)   
            The N-by-N unitary matrix Q.   

    R       (workspace) COMPLEX array, dimension (LDA,MAX(M,N))   

    LDA     (input) INTEGER   
            The leading dimension of the arrays A, AF, R and Q.   
            LDA >= max(M,N).   

    TAUA    (output) COMPLEX array, dimension (min(M,N))   
            The scalar factors of the elementary reflectors, as returned   
            by SGGQRC.   

    B       (input) COMPLEX array, dimension (LDB,N)   
            On entry, the P-by-N matrix A.   

    BF      (output) COMPLEX array, dimension (LDB,N)   
            Details of the GQR factorization of A and B, as returned   
            by CGGRQF, see CGGRQF for further details.   

    Z       (output) REAL array, dimension (LDB,P)   
            The P-by-P unitary matrix Z.   

    T       (workspace) COMPLEX array, dimension (LDB,max(P,N))   

    BWK     (workspace) COMPLEX array, dimension (LDB,N)   

    LDB     (input) INTEGER   
            The leading dimension of the arrays B, BF, Z and T.   
            LDB >= max(P,N).   

    TAUB    (output) COMPLEX array, dimension (min(P,N))   
            The scalar factors of the elementary reflectors, as returned   
            by SGGRQF.   

    WORK    (workspace) COMPLEX array, dimension (LWORK)   

    LWORK   (input) INTEGER   
            The dimension of the array WORK, LWORK >= max(M,P,N)**2.   

    RWORK   (workspace) REAL array, dimension (M)   

    RESULT  (output) REAL array, dimension (4)   
            The test ratios:   
              RESULT(1) = norm( R - A*Q' ) / ( MAX(M,N)*norm(A)*ULP)   
              RESULT(2) = norm( T*Q - Z'*B ) / (MAX(P,N)*norm(B)*ULP)   
              RESULT(3) = norm( I - Q'*Q ) / ( N*ULP )   
              RESULT(4) = norm( I - Z'*Z ) / ( P*ULP )   

    =====================================================================   


       Parameter adjustments */
    r_dim1 = *lda;
    r_offset = 1 + r_dim1 * 1;
    r__ -= r_offset;
    q_dim1 = *lda;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    af_dim1 = *lda;
    af_offset = 1 + af_dim1 * 1;
    af -= af_offset;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --taua;
    bwk_dim1 = *ldb;
    bwk_offset = 1 + bwk_dim1 * 1;
    bwk -= bwk_offset;
    t_dim1 = *ldb;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    z_dim1 = *ldb;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    bf_dim1 = *ldb;
    bf_offset = 1 + bf_dim1 * 1;
    bf -= bf_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --taub;
    --work;
    --rwork;
    --result;

    /* Function Body */
    ulp = slamch_("Precision");
    unfl = slamch_("Safe minimum");

/*     Copy the matrix A to the array AF. */

    clacpy_("Full", m, n, &a[a_offset], lda, &af[af_offset], lda);
    clacpy_("Full", p, n, &b[b_offset], ldb, &bf[bf_offset], ldb);

/* Computing MAX */
    r__1 = clange_("1", m, n, &a[a_offset], lda, &rwork[1]);
    anorm = dmax(r__1,unfl);
/* Computing MAX */
    r__1 = clange_("1", p, n, &b[b_offset], ldb, &rwork[1]);
    bnorm = dmax(r__1,unfl);

/*     Factorize the matrices A and B in the arrays AF and BF. */

    cggrqf_(m, p, n, &af[af_offset], lda, &taua[1], &bf[bf_offset], ldb, &
	    taub[1], &work[1], lwork, &info);

/*     Generate the N-by-N matrix Q */

    claset_("Full", n, n, &c_b3, &c_b3, &q[q_offset], lda);
    if (*m <= *n) {
	if (*m > 0 && *m < *n) {
	    i__1 = *n - *m;
	    clacpy_("Full", m, &i__1, &af[af_offset], lda, &q_ref(*n - *m + 1,
		     1), lda);
	}
	if (*m > 1) {
	    i__1 = *m - 1;
	    i__2 = *m - 1;
	    clacpy_("Lower", &i__1, &i__2, &af_ref(2, *n - *m + 1), lda, &
		    q_ref(*n - *m + 2, *n - *m + 1), lda);
	}
    } else {
	if (*n > 1) {
	    i__1 = *n - 1;
	    i__2 = *n - 1;
	    clacpy_("Lower", &i__1, &i__2, &af_ref(*m - *n + 2, 1), lda, &
		    q_ref(2, 1), lda);
	}
    }
    i__1 = min(*m,*n);
    cungrq_(n, n, &i__1, &q[q_offset], lda, &taua[1], &work[1], lwork, &info);

/*     Generate the P-by-P matrix Z */

    claset_("Full", p, p, &c_b3, &c_b3, &z__[z_offset], ldb);
    if (*p > 1) {
	i__1 = *p - 1;
	clacpy_("Lower", &i__1, n, &bf_ref(2, 1), ldb, &z___ref(2, 1), ldb);
    }
    i__1 = min(*p,*n);
    cungqr_(p, p, &i__1, &z__[z_offset], ldb, &taub[1], &work[1], lwork, &
	    info);

/*     Copy R */

    claset_("Full", m, n, &c_b1, &c_b1, &r__[r_offset], lda);
    if (*m <= *n) {
	clacpy_("Upper", m, m, &af_ref(1, *n - *m + 1), lda, &r___ref(1, *n - 
		*m + 1), lda);
    } else {
	i__1 = *m - *n;
	clacpy_("Full", &i__1, n, &af[af_offset], lda, &r__[r_offset], lda);
	clacpy_("Upper", n, n, &af_ref(*m - *n + 1, 1), lda, &r___ref(*m - *n 
		+ 1, 1), lda);
    }

/*     Copy T */

    claset_("Full", p, n, &c_b1, &c_b1, &t[t_offset], ldb);
    clacpy_("Upper", p, n, &bf[bf_offset], ldb, &t[t_offset], ldb);

/*     Compute R - A*Q' */

    q__1.r = -1.f, q__1.i = 0.f;
    cgemm_("No transpose", "Conjugate transpose", m, n, n, &q__1, &a[a_offset]
	    , lda, &q[q_offset], lda, &c_b2, &r__[r_offset], lda);

/*     Compute norm( R - A*Q' ) / ( MAX(M,N)*norm(A)*ULP ) . */

    resid = clange_("1", m, n, &r__[r_offset], lda, &rwork[1]);
    if (anorm > 0.f) {
/* Computing MAX */
	i__1 = max(1,*m);
	result[1] = resid / (real) max(i__1,*n) / anorm / ulp;
    } else {
	result[1] = 0.f;
    }

/*     Compute T*Q - Z'*B */

    cgemm_("Conjugate transpose", "No transpose", p, n, p, &c_b2, &z__[
	    z_offset], ldb, &b[b_offset], ldb, &c_b1, &bwk[bwk_offset], ldb);
    q__1.r = -1.f, q__1.i = 0.f;
    cgemm_("No transpose", "No transpose", p, n, n, &c_b2, &t[t_offset], ldb, 
	    &q[q_offset], lda, &q__1, &bwk[bwk_offset], ldb);

/*     Compute norm( T*Q - Z'*B ) / ( MAX(P,N)*norm(A)*ULP ) . */

    resid = clange_("1", p, n, &bwk[bwk_offset], ldb, &rwork[1]);
    if (bnorm > 0.f) {
/* Computing MAX */
	i__1 = max(1,*p);
	result[2] = resid / (real) max(i__1,*m) / bnorm / ulp;
    } else {
	result[2] = 0.f;
    }

/*     Compute I - Q*Q' */

    claset_("Full", n, n, &c_b1, &c_b2, &r__[r_offset], lda);
    cherk_("Upper", "No Transpose", n, n, &c_b34, &q[q_offset], lda, &c_b35, &
	    r__[r_offset], lda);

/*     Compute norm( I - Q'*Q ) / ( N * ULP ) . */

    resid = clanhe_("1", "Upper", n, &r__[r_offset], lda, &rwork[1]);
    result[3] = resid / (real) max(1,*n) / ulp;

/*     Compute I - Z'*Z */

    claset_("Full", p, p, &c_b1, &c_b2, &t[t_offset], ldb);
    cherk_("Upper", "Conjugate transpose", p, p, &c_b34, &z__[z_offset], ldb, 
	    &c_b35, &t[t_offset], ldb);

/*     Compute norm( I - Z'*Z ) / ( P*ULP ) . */

    resid = clanhe_("1", "Upper", p, &t[t_offset], ldb, &rwork[1]);
    result[4] = resid / (real) max(1,*p) / ulp;

    return 0;

/*     End of CGRQTS */

} /* cgrqts_ */

Example #7

Show file

File: clqt03.c Project: zangel/uquad

/* Subroutine */ int clqt03_(integer *m, integer *n, integer *k, complex *af, 
	complex *c__, complex *cc, complex *q, integer *lda, complex *tau, 
	complex *work, integer *lwork, real *rwork, real *result)
{
    /* Initialized data */

    static integer iseed[4] = { 1988,1989,1990,1991 };

    /* System generated locals */
    integer af_dim1, af_offset, c_dim1, c_offset, cc_dim1, cc_offset, q_dim1, 
	    q_offset, i__1;

    /* Builtin functions   
       Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);

    /* Local variables */
    static char side[1];
    static integer info, j;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
	    integer *, complex *, complex *, integer *, complex *, integer *, 
	    complex *, complex *, integer *);
    static integer iside;
    extern logical lsame_(char *, char *);
    static real resid, cnorm;
    static char trans[1];
    static integer mc, nc;
    extern doublereal clange_(char *, integer *, integer *, complex *, 
	    integer *, real *), slamch_(char *);
    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex 
	    *, integer *, complex *, integer *), claset_(char *, 
	    integer *, integer *, complex *, complex *, complex *, integer *), clarnv_(integer *, integer *, integer *, complex *), 
	    cunglq_(integer *, integer *, integer *, complex *, integer *, 
	    complex *, complex *, integer *, integer *), cunmlq_(char *, char 
	    *, integer *, integer *, integer *, complex *, integer *, complex 
	    *, complex *, integer *, complex *, integer *, integer *);
    static integer itrans;
    static real eps;


#define c___subscr(a_1,a_2) (a_2)*c_dim1 + a_1
#define c___ref(a_1,a_2) c__[c___subscr(a_1,a_2)]
#define q_subscr(a_1,a_2) (a_2)*q_dim1 + a_1
#define q_ref(a_1,a_2) q[q_subscr(a_1,a_2)]
#define af_subscr(a_1,a_2) (a_2)*af_dim1 + a_1
#define af_ref(a_1,a_2) af[af_subscr(a_1,a_2)]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CLQT03 tests CUNMLQ, which computes Q*C, Q'*C, C*Q or C*Q'.   

    CLQT03 compares the results of a call to CUNMLQ with the results of   
    forming Q explicitly by a call to CUNGLQ and then performing matrix   
    multiplication by a call to CGEMM.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows or columns of the matrix C; C is n-by-m if   
            Q is applied from the left, or m-by-n if Q is applied from   
            the right.  M >= 0.   

    N       (input) INTEGER   
            The order of the orthogonal matrix Q.  N >= 0.   

    K       (input) INTEGER   
            The number of elementary reflectors whose product defines the   
            orthogonal matrix Q.  N >= K >= 0.   

    AF      (input) COMPLEX array, dimension (LDA,N)   
            Details of the LQ factorization of an m-by-n matrix, as   
            returned by CGELQF. See CGELQF for further details.   

    C       (workspace) COMPLEX array, dimension (LDA,N)   

    CC      (workspace) COMPLEX array, dimension (LDA,N)   

    Q       (workspace) COMPLEX array, dimension (LDA,N)   

    LDA     (input) INTEGER   
            The leading dimension of the arrays AF, C, CC, and Q.   

    TAU     (input) COMPLEX array, dimension (min(M,N))   
            The scalar factors of the elementary reflectors corresponding   
            to the LQ factorization in AF.   

    WORK    (workspace) COMPLEX array, dimension (LWORK)   

    LWORK   (input) INTEGER   
            The length of WORK.  LWORK must be at least M, and should be   
            M*NB, where NB is the blocksize for this environment.   

    RWORK   (workspace) REAL array, dimension (M)   

    RESULT  (output) REAL array, dimension (4)   
            The test ratios compare two techniques for multiplying a   
            random matrix C by an n-by-n orthogonal matrix Q.   
            RESULT(1) = norm( Q*C - Q*C )  / ( N * norm(C) * EPS )   
            RESULT(2) = norm( C*Q - C*Q )  / ( N * norm(C) * EPS )   
            RESULT(3) = norm( Q'*C - Q'*C )/ ( N * norm(C) * EPS )   
            RESULT(4) = norm( C*Q' - C*Q' )/ ( N * norm(C) * EPS )   

    =====================================================================   

       Parameter adjustments */
    q_dim1 = *lda;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    cc_dim1 = *lda;
    cc_offset = 1 + cc_dim1 * 1;
    cc -= cc_offset;
    c_dim1 = *lda;
    c_offset = 1 + c_dim1 * 1;
    c__ -= c_offset;
    af_dim1 = *lda;
    af_offset = 1 + af_dim1 * 1;
    af -= af_offset;
    --tau;
    --work;
    --rwork;
    --result;

    /* Function Body */

    eps = slamch_("Epsilon");

/*     Copy the first k rows of the factorization to the array Q */

    claset_("Full", n, n, &c_b1, &c_b1, &q[q_offset], lda);
    i__1 = *n - 1;
    clacpy_("Upper", k, &i__1, &af_ref(1, 2), lda, &q_ref(1, 2), lda);

/*     Generate the n-by-n matrix Q */

    s_copy(srnamc_1.srnamt, "CUNGLQ", (ftnlen)6, (ftnlen)6);
    cunglq_(n, n, k, &q[q_offset], lda, &tau[1], &work[1], lwork, &info);

    for (iside = 1; iside <= 2; ++iside) {
	if (iside == 1) {
	    *(unsigned char *)side = 'L';
	    mc = *n;
	    nc = *m;
	} else {
	    *(unsigned char *)side = 'R';
	    mc = *m;
	    nc = *n;
	}

/*        Generate MC by NC matrix C */

	i__1 = nc;
	for (j = 1; j <= i__1; ++j) {
	    clarnv_(&c__2, iseed, &mc, &c___ref(1, j));
/* L10: */
	}
	cnorm = clange_("1", &mc, &nc, &c__[c_offset], lda, &rwork[1]);
	if (cnorm == 0.f) {
	    cnorm = 1.f;
	}

	for (itrans = 1; itrans <= 2; ++itrans) {
	    if (itrans == 1) {
		*(unsigned char *)trans = 'N';
	    } else {
		*(unsigned char *)trans = 'C';
	    }

/*           Copy C */

	    clacpy_("Full", &mc, &nc, &c__[c_offset], lda, &cc[cc_offset], 
		    lda);

/*           Apply Q or Q' to C */

	    s_copy(srnamc_1.srnamt, "CUNMLQ", (ftnlen)6, (ftnlen)6);
	    cunmlq_(side, trans, &mc, &nc, k, &af[af_offset], lda, &tau[1], &
		    cc[cc_offset], lda, &work[1], lwork, &info);

/*           Form explicit product and subtract */

	    if (lsame_(side, "L")) {
		cgemm_(trans, "No transpose", &mc, &nc, &mc, &c_b20, &q[
			q_offset], lda, &c__[c_offset], lda, &c_b21, &cc[
			cc_offset], lda);
	    } else {
		cgemm_("No transpose", trans, &mc, &nc, &nc, &c_b20, &c__[
			c_offset], lda, &q[q_offset], lda, &c_b21, &cc[
			cc_offset], lda);
	    }

/*           Compute error in the difference */

	    resid = clange_("1", &mc, &nc, &cc[cc_offset], lda, &rwork[1]);
	    result[(iside - 1 << 1) + itrans] = resid / ((real) max(1,*n) * 
		    cnorm * eps);

/* L20: */
	}
/* L30: */
    }

    return 0;

/*     End of CLQT03 */

} /* clqt03_ */

Example #8

Show file

File: cpbtrf.c Project: juanjosegarciaripoll/cblapack

/* Subroutine */ int cpbtrf_(char *uplo, integer *n, integer *kd, complex *ab, 
	 integer *ldab, integer *info)
{
    /* System generated locals */
    integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4, i__5, i__6;
    complex q__1;

    /* Local variables */
    integer i__, j, i2, i3, ib, nb, ii, jj;
    complex work[1056]	/* was [33][32] */;

/*  -- LAPACK routine (version 3.2) -- */
/*     November 2006 */

/*  Purpose */
/*  ======= */

/*  CPBTRF computes the Cholesky factorization of a complex Hermitian */
/*  positive definite band matrix A. */

/*  The factorization has the form */
/*     A = U**H * U,  if UPLO = 'U', or */
/*     A = L  * L**H,  if UPLO = 'L', */
/*  where U is an upper triangular matrix and L is lower triangular. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          = 'U':  Upper triangle of A is stored; */
/*          = 'L':  Lower triangle of A is stored. */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  KD      (input) INTEGER */
/*          The number of superdiagonals of the matrix A if UPLO = 'U', */
/*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0. */

/*  AB      (input/output) COMPLEX array, dimension (LDAB,N) */
/*          On entry, the upper or lower triangle of the Hermitian band */
/*          matrix A, stored in the first KD+1 rows of the array.  The */
/*          j-th column of A is stored in the j-th column of the array AB */
/*          as follows: */
/*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j; */
/*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd). */

/*          On exit, if INFO = 0, the triangular factor U or L from the */
/*          Cholesky factorization A = U**H*U or A = L*L**H of the band */
/*          matrix A, in the same storage format as A. */

/*  LDAB    (input) INTEGER */
/*          The leading dimension of the array AB.  LDAB >= KD+1. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
/*          > 0:  if INFO = i, the leading minor of order i is not */
/*                positive definite, and the factorization could not be */
/*                completed. */

/*  Further Details */
/*  =============== */

/*  The band storage scheme is illustrated by the following example, when */
/*  N = 6, KD = 2, and UPLO = 'U': */

/*  On entry:                       On exit: */

/*      *    *   a13  a24  a35  a46      *    *   u13  u24  u35  u46 */
/*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56 */
/*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66 */

/*  Similarly, if UPLO = 'L' the format of A is as follows: */

/*  On entry:                       On exit: */

/*     a11  a22  a33  a44  a55  a66     l11  l22  l33  l44  l55  l66 */
/*     a21  a32  a43  a54  a65   *      l21  l32  l43  l54  l65   * */
/*     a31  a42  a53  a64   *    *      l31  l42  l53  l64   *    * */

/*  Array elements marked * are not used by the routine. */

/*  Contributed by */
/*  Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989 */

/*  ===================================================================== */

/*     Test the input parameters. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1;
    ab -= ab_offset;

    /* Function Body */
    *info = 0;
    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*kd < 0) {
	*info = -3;
    } else if (*ldab < *kd + 1) {
	*info = -5;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CPBTRF", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine the block size for this environment */

    nb = ilaenv_(&c__1, "CPBTRF", uplo, n, kd, &c_n1, &c_n1);

/*     The block size must not exceed the semi-bandwidth KD, and must not */
/*     exceed the limit set by the size of the local array WORK. */

    nb = min(nb,32);

    if (nb <= 1 || nb > *kd) {

/*        Use unblocked code */

	cpbtf2_(uplo, n, kd, &ab[ab_offset], ldab, info);
    } else {

/*        Use blocked code */

	if (lsame_(uplo, "U")) {

/*           Compute the Cholesky factorization of a Hermitian band */
/*           matrix, given the upper triangle of the matrix in band */
/*           storage. */

/*           Zero the upper triangle of the work array. */

	    i__1 = nb;
	    for (j = 1; j <= i__1; ++j) {
		i__2 = j - 1;
		for (i__ = 1; i__ <= i__2; ++i__) {
		    i__3 = i__ + j * 33 - 34;
		    work[i__3].r = 0.f, work[i__3].i = 0.f;
		}
	    }

/*           Process the band matrix one diagonal block at a time. */

	    i__1 = *n;
	    i__2 = nb;
	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
/* Computing MIN */
		i__3 = nb, i__4 = *n - i__ + 1;
		ib = min(i__3,i__4);

/*              Factorize the diagonal block */

		i__3 = *ldab - 1;
		cpotf2_(uplo, &ib, &ab[*kd + 1 + i__ * ab_dim1], &i__3, &ii);
		if (ii != 0) {
		    *info = i__ + ii - 1;
		    goto L150;
		}
		if (i__ + ib <= *n) {

/*                 Update the relevant part of the trailing submatrix. */
/*                 If A11 denotes the diagonal block which has just been */
/*                 factorized, then we need to update the remaining */
/*                 blocks in the diagram: */

/*                    A11   A12   A13 */
/*                          A22   A23 */
/*                                A33 */

/*                 The numbers of rows and columns in the partitioning */
/*                 are IB, I2, I3 respectively. The blocks A12, A22 and */
/*                 A23 are empty if IB = KD. The upper triangle of A13 */
/*                 lies outside the band. */

/* Computing MIN */
		    i__3 = *kd - ib, i__4 = *n - i__ - ib + 1;
		    i2 = min(i__3,i__4);
/* Computing MIN */
		    i__3 = ib, i__4 = *n - i__ - *kd + 1;
		    i3 = min(i__3,i__4);

		    if (i2 > 0) {

/*                    Update A12 */

			i__3 = *ldab - 1;
			i__4 = *ldab - 1;
			ctrsm_("Left", "Upper", "Conjugate transpose", "Non-"
				"unit", &ib, &i2, &c_b1, &ab[*kd + 1 + i__ * 
				ab_dim1], &i__3, &ab[*kd + 1 - ib + (i__ + ib)
				 * ab_dim1], &i__4);

/*                    Update A22 */

			i__3 = *ldab - 1;
			i__4 = *ldab - 1;
			cherk_("Upper", "Conjugate transpose", &i2, &ib, &
				c_b21, &ab[*kd + 1 - ib + (i__ + ib) * 
				ab_dim1], &i__3, &c_b22, &ab[*kd + 1 + (i__ + 
				ib) * ab_dim1], &i__4);
		    }

		    if (i3 > 0) {

/*                    Copy the lower triangle of A13 into the work array. */

			i__3 = i3;
			for (jj = 1; jj <= i__3; ++jj) {
			    i__4 = ib;
			    for (ii = jj; ii <= i__4; ++ii) {
				i__5 = ii + jj * 33 - 34;
				i__6 = ii - jj + 1 + (jj + i__ + *kd - 1) * 
					ab_dim1;
				work[i__5].r = ab[i__6].r, work[i__5].i = ab[
					i__6].i;
			    }
			}

/*                    Update A13 (in the work array). */

			i__3 = *ldab - 1;
			ctrsm_("Left", "Upper", "Conjugate transpose", "Non-"
				"unit", &ib, &i3, &c_b1, &ab[*kd + 1 + i__ * 
				ab_dim1], &i__3, work, &c__33);

/*                    Update A23 */

			if (i2 > 0) {
			    q__1.r = -1.f, q__1.i = -0.f;
			    i__3 = *ldab - 1;
			    i__4 = *ldab - 1;
			    cgemm_("Conjugate transpose", "No transpose", &i2, 
				     &i3, &ib, &q__1, &ab[*kd + 1 - ib + (i__ 
				    + ib) * ab_dim1], &i__3, work, &c__33, &
				    c_b1, &ab[ib + 1 + (i__ + *kd) * ab_dim1], 
				     &i__4);
			}

/*                    Update A33 */

			i__3 = *ldab - 1;
			cherk_("Upper", "Conjugate transpose", &i3, &ib, &
				c_b21, work, &c__33, &c_b22, &ab[*kd + 1 + (
				i__ + *kd) * ab_dim1], &i__3);

/*                    Copy the lower triangle of A13 back into place. */

			i__3 = i3;
			for (jj = 1; jj <= i__3; ++jj) {
			    i__4 = ib;
			    for (ii = jj; ii <= i__4; ++ii) {
				i__5 = ii - jj + 1 + (jj + i__ + *kd - 1) * 
					ab_dim1;
				i__6 = ii + jj * 33 - 34;
				ab[i__5].r = work[i__6].r, ab[i__5].i = work[
					i__6].i;
			    }
			}
		    }
		}
	    }
	} else {

/*           Compute the Cholesky factorization of a Hermitian band */
/*           matrix, given the lower triangle of the matrix in band */
/*           storage. */

/*           Zero the lower triangle of the work array. */

	    i__2 = nb;
	    for (j = 1; j <= i__2; ++j) {
		i__1 = nb;
		for (i__ = j + 1; i__ <= i__1; ++i__) {
		    i__3 = i__ + j * 33 - 34;
		    work[i__3].r = 0.f, work[i__3].i = 0.f;
		}
	    }

/*           Process the band matrix one diagonal block at a time. */

	    i__2 = *n;
	    i__1 = nb;
	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
/* Computing MIN */
		i__3 = nb, i__4 = *n - i__ + 1;
		ib = min(i__3,i__4);

/*              Factorize the diagonal block */

		i__3 = *ldab - 1;
		cpotf2_(uplo, &ib, &ab[i__ * ab_dim1 + 1], &i__3, &ii);
		if (ii != 0) {
		    *info = i__ + ii - 1;
		    goto L150;
		}
		if (i__ + ib <= *n) {

/*                 Update the relevant part of the trailing submatrix. */
/*                 If A11 denotes the diagonal block which has just been */
/*                 factorized, then we need to update the remaining */
/*                 blocks in the diagram: */

/*                    A11 */
/*                    A21   A22 */
/*                    A31   A32   A33 */

/*                 The numbers of rows and columns in the partitioning */
/*                 are IB, I2, I3 respectively. The blocks A21, A22 and */
/*                 A32 are empty if IB = KD. The lower triangle of A31 */
/*                 lies outside the band. */

/* Computing MIN */
		    i__3 = *kd - ib, i__4 = *n - i__ - ib + 1;
		    i2 = min(i__3,i__4);
/* Computing MIN */
		    i__3 = ib, i__4 = *n - i__ - *kd + 1;
		    i3 = min(i__3,i__4);

		    if (i2 > 0) {

/*                    Update A21 */

			i__3 = *ldab - 1;
			i__4 = *ldab - 1;
			ctrsm_("Right", "Lower", "Conjugate transpose", "Non"
				"-unit", &i2, &ib, &c_b1, &ab[i__ * ab_dim1 + 
				1], &i__3, &ab[ib + 1 + i__ * ab_dim1], &i__4);

/*                    Update A22 */

			i__3 = *ldab - 1;
			i__4 = *ldab - 1;
			cherk_("Lower", "No transpose", &i2, &ib, &c_b21, &ab[
				ib + 1 + i__ * ab_dim1], &i__3, &c_b22, &ab[(
				i__ + ib) * ab_dim1 + 1], &i__4);
		    }

		    if (i3 > 0) {

/*                    Copy the upper triangle of A31 into the work array. */

			i__3 = ib;
			for (jj = 1; jj <= i__3; ++jj) {
			    i__4 = min(jj,i3);
			    for (ii = 1; ii <= i__4; ++ii) {
				i__5 = ii + jj * 33 - 34;
				i__6 = *kd + 1 - jj + ii + (jj + i__ - 1) * 
					ab_dim1;
				work[i__5].r = ab[i__6].r, work[i__5].i = ab[
					i__6].i;
			    }
			}

/*                    Update A31 (in the work array). */

			i__3 = *ldab - 1;
			ctrsm_("Right", "Lower", "Conjugate transpose", "Non"
				"-unit", &i3, &ib, &c_b1, &ab[i__ * ab_dim1 + 
				1], &i__3, work, &c__33);

/*                    Update A32 */

			if (i2 > 0) {
			    q__1.r = -1.f, q__1.i = -0.f;
			    i__3 = *ldab - 1;
			    i__4 = *ldab - 1;
			    cgemm_("No transpose", "Conjugate transpose", &i3, 
				     &i2, &ib, &q__1, work, &c__33, &ab[ib + 
				    1 + i__ * ab_dim1], &i__3, &c_b1, &ab[*kd 
				    + 1 - ib + (i__ + ib) * ab_dim1], &i__4);
			}

/*                    Update A33 */

			i__3 = *ldab - 1;
			cherk_("Lower", "No transpose", &i3, &ib, &c_b21, 
				work, &c__33, &c_b22, &ab[(i__ + *kd) * 
				ab_dim1 + 1], &i__3);

/*                    Copy the upper triangle of A31 back into place. */

			i__3 = ib;
			for (jj = 1; jj <= i__3; ++jj) {
			    i__4 = min(jj,i3);
			    for (ii = 1; ii <= i__4; ++ii) {
				i__5 = *kd + 1 - jj + ii + (jj + i__ - 1) * 
					ab_dim1;
				i__6 = ii + jj * 33 - 34;
				ab[i__5].r = work[i__6].r, ab[i__5].i = work[
					i__6].i;
			    }
			}
		    }
		}
	    }
	}
    }
    return 0;

L150:
    return 0;

/*     End of CPBTRF */

} /* cpbtrf_ */

Example #9

Show file

File: cqrt16.c Project: zangel/uquad

/* Subroutine */ int cqrt16_(char *trans, integer *m, integer *n, integer *
	nrhs, complex *a, integer *lda, complex *x, integer *ldx, complex *b, 
	integer *ldb, real *rwork, real *resid)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset, i__1;
    real r__1, r__2;
    complex q__1;

    /* Local variables */
    static integer j;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
	    integer *, complex *, complex *, integer *, complex *, integer *, 
	    complex *, complex *, integer *);
    extern logical lsame_(char *, char *);
    static real anorm, bnorm;
    static integer n1, n2;
    static real xnorm;
    extern doublereal clange_(char *, integer *, integer *, complex *, 
	    integer *, real *), slamch_(char *), scasum_(
	    integer *, complex *, integer *);
    static real eps;


#define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1
#define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)]
#define x_subscr(a_1,a_2) (a_2)*x_dim1 + a_1
#define x_ref(a_1,a_2) x[x_subscr(a_1,a_2)]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CQRT16 computes the residual for a solution of a system of linear   
    equations  A*x = b  or  A'*x = b:   
       RESID = norm(B - A*X) / ( max(m,n) * norm(A) * norm(X) * EPS ),   
    where EPS is the machine epsilon.   

    Arguments   
    =========   

    TRANS   (input) CHARACTER*1   
            Specifies the form of the system of equations:   
            = 'N':  A *x = b   
            = 'T':  A^T*x = b, where A^T is the transpose of A   
            = 'C':  A^H*x = b, where A^H is the conjugate transpose of A   

    M       (input) INTEGER   
            The number of rows of the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix A.  N >= 0.   

    NRHS    (input) INTEGER   
            The number of columns of B, the matrix of right hand sides.   
            NRHS >= 0.   

    A       (input) COMPLEX array, dimension (LDA,N)   
            The original M x N matrix A.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,M).   

    X       (input) COMPLEX array, dimension (LDX,NRHS)   
            The computed solution vectors for the system of linear   
            equations.   

    LDX     (input) INTEGER   
            The leading dimension of the array X.  If TRANS = 'N',   
            LDX >= max(1,N); if TRANS = 'T' or 'C', LDX >= max(1,M).   

    B       (input/output) COMPLEX array, dimension (LDB,NRHS)   
            On entry, the right hand side vectors for the system of   
            linear equations.   
            On exit, B is overwritten with the difference B - A*X.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  IF TRANS = 'N',   
            LDB >= max(1,M); if TRANS = 'T' or 'C', LDB >= max(1,N).   

    RWORK   (workspace) REAL array, dimension (M)   

    RESID   (output) REAL   
            The maximum over the number of right hand sides of   
            norm(B - A*X) / ( max(m,n) * norm(A) * norm(X) * EPS ).   

    =====================================================================   


       Quick exit if M = 0 or N = 0 or NRHS = 0   

       Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --rwork;

    /* Function Body */
    if (*m <= 0 || *n <= 0 || *nrhs == 0) {
	*resid = 0.f;
	return 0;
    }

    if (lsame_(trans, "T") || lsame_(trans, "C")) {
	anorm = clange_("I", m, n, &a[a_offset], lda, &rwork[1]);
	n1 = *n;
	n2 = *m;
    } else {
	anorm = clange_("1", m, n, &a[a_offset], lda, &rwork[1]);
	n1 = *m;
	n2 = *n;
    }

    eps = slamch_("Epsilon");

/*     Compute  B - A*X  (or  B - A'*X ) and store in B. */

    q__1.r = -1.f, q__1.i = 0.f;
    cgemm_(trans, "No transpose", &n1, nrhs, &n2, &q__1, &a[a_offset], lda, &
	    x[x_offset], ldx, &c_b1, &b[b_offset], ldb)
	    ;

/*     Compute the maximum over the number of right hand sides of   
          norm(B - A*X) / ( max(m,n) * norm(A) * norm(X) * EPS ) . */

    *resid = 0.f;
    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
	bnorm = scasum_(&n1, &b_ref(1, j), &c__1);
	xnorm = scasum_(&n2, &x_ref(1, j), &c__1);
	if (anorm == 0.f && bnorm == 0.f) {
	    *resid = 0.f;
	} else if (anorm <= 0.f || xnorm <= 0.f) {
	    *resid = 1.f / eps;
	} else {
/* Computing MAX */
	    r__1 = *resid, r__2 = bnorm / anorm / xnorm / (max(*m,*n) * eps);
	    *resid = dmax(r__1,r__2);
	}
/* L10: */
    }

    return 0;

/*     End of CQRT16 */

} /* cqrt16_ */

Example #10

Show file

File: claqps.c Project: juanjosegarciaripoll/cblapack

/* Subroutine */ int claqps_(integer *m, integer *n, integer *offset, integer 
	*nb, integer *kb, complex *a, integer *lda, integer *jpvt, complex *
	tau, real *vn1, real *vn2, complex *auxv, complex *f, integer *ldf)
{
    /* System generated locals */
    integer a_dim1, a_offset, f_dim1, f_offset, i__1, i__2, i__3;
    real r__1, r__2;
    complex q__1;

    /* Local variables */
    integer j, k, rk;
    complex akk;
    integer pvt;
    real temp, temp2, tol3z;
    integer itemp;
    integer lsticc;
    integer lastrk;

/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
/*     November 2006 */

/*  Purpose */
/*  ======= */

/*  CLAQPS computes a step of QR factorization with column pivoting */
/*  of a complex M-by-N matrix A by using Blas-3.  It tries to factorize */
/*  NB columns from A starting from the row OFFSET+1, and updates all */
/*  of the matrix with Blas-3 xGEMM. */

/*  In some cases, due to catastrophic cancellations, it cannot */
/*  factorize NB columns.  Hence, the actual number of factorized */
/*  columns is returned in KB. */

/*  Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. */

/*  Arguments */
/*  ========= */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix A. M >= 0. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix A. N >= 0 */

/*  OFFSET  (input) INTEGER */
/*          The number of rows of A that have been factorized in */
/*          previous steps. */

/*  NB      (input) INTEGER */
/*          The number of columns to factorize. */

/*  KB      (output) INTEGER */
/*          The number of columns actually factorized. */

/*  A       (input/output) COMPLEX array, dimension (LDA,N) */
/*          On entry, the M-by-N matrix A. */
/*          On exit, block A(OFFSET+1:M,1:KB) is the triangular */
/*          factor obtained and block A(1:OFFSET,1:N) has been */
/*          accordingly pivoted, but no factorized. */
/*          The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has */
/*          been updated. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A. LDA >= max(1,M). */

/*  JPVT    (input/output) INTEGER array, dimension (N) */
/*          JPVT(I) = K <==> Column K of the full matrix A has been */
/*          permuted into position I in AP. */

/*  TAU     (output) COMPLEX array, dimension (KB) */
/*          The scalar factors of the elementary reflectors. */

/*  VN1     (input/output) REAL array, dimension (N) */
/*          The vector with the partial column norms. */

/*  VN2     (input/output) REAL array, dimension (N) */
/*          The vector with the exact column norms. */

/*  AUXV    (input/output) COMPLEX array, dimension (NB) */
/*          Auxiliar vector. */

/*  F       (input/output) COMPLEX array, dimension (LDF,NB) */
/*          Matrix F' = L*Y'*A. */

/*  LDF     (input) INTEGER */
/*          The leading dimension of the array F. LDF >= max(1,N). */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain */
/*    X. Sun, Computer Science Dept., Duke University, USA */

/*  Partial column norm updating strategy modified by */
/*    Z. Drmac and Z. Bujanovic, Dept. of Mathematics, */
/*    University of Zagreb, Croatia. */
/*    June 2006. */
/*  For more details see LAPACK Working Note 176. */
/*  ===================================================================== */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --jpvt;
    --tau;
    --vn1;
    --vn2;
    --auxv;
    f_dim1 = *ldf;
    f_offset = 1 + f_dim1;
    f -= f_offset;

    /* Function Body */
/* Computing MIN */
    i__1 = *m, i__2 = *n + *offset;
    lastrk = min(i__1,i__2);
    lsticc = 0;
    k = 0;
    tol3z = sqrt(slamch_("Epsilon"));

/*     Beginning of while loop. */

L10:
    if (k < *nb && lsticc == 0) {
	++k;
	rk = *offset + k;

/*        Determine ith pivot column and swap if necessary */

	i__1 = *n - k + 1;
	pvt = k - 1 + isamax_(&i__1, &vn1[k], &c__1);
	if (pvt != k) {
	    cswap_(m, &a[pvt * a_dim1 + 1], &c__1, &a[k * a_dim1 + 1], &c__1);
	    i__1 = k - 1;
	    cswap_(&i__1, &f[pvt + f_dim1], ldf, &f[k + f_dim1], ldf);
	    itemp = jpvt[pvt];
	    jpvt[pvt] = jpvt[k];
	    jpvt[k] = itemp;
	    vn1[pvt] = vn1[k];
	    vn2[pvt] = vn2[k];
	}

/*        Apply previous Householder reflectors to column K: */
/*        A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)'. */

	if (k > 1) {
	    i__1 = k - 1;
	    for (j = 1; j <= i__1; ++j) {
		i__2 = k + j * f_dim1;
		r_cnjg(&q__1, &f[k + j * f_dim1]);
		f[i__2].r = q__1.r, f[i__2].i = q__1.i;
	    }
	    i__1 = *m - rk + 1;
	    i__2 = k - 1;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgemv_("No transpose", &i__1, &i__2, &q__1, &a[rk + a_dim1], lda, 
		    &f[k + f_dim1], ldf, &c_b2, &a[rk + k * a_dim1], &c__1);
	    i__1 = k - 1;
	    for (j = 1; j <= i__1; ++j) {
		i__2 = k + j * f_dim1;
		r_cnjg(&q__1, &f[k + j * f_dim1]);
		f[i__2].r = q__1.r, f[i__2].i = q__1.i;
	    }
	}

/*        Generate elementary reflector H(k). */

	if (rk < *m) {
	    i__1 = *m - rk + 1;
	    clarfp_(&i__1, &a[rk + k * a_dim1], &a[rk + 1 + k * a_dim1], &
		    c__1, &tau[k]);
	} else {
	    clarfp_(&c__1, &a[rk + k * a_dim1], &a[rk + k * a_dim1], &c__1, &
		    tau[k]);
	}

	i__1 = rk + k * a_dim1;
	akk.r = a[i__1].r, akk.i = a[i__1].i;
	i__1 = rk + k * a_dim1;
	a[i__1].r = 1.f, a[i__1].i = 0.f;

/*        Compute Kth column of F: */

/*        Compute  F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)'*A(RK:M,K). */

	if (k < *n) {
	    i__1 = *m - rk + 1;
	    i__2 = *n - k;
	    cgemv_("Conjugate transpose", &i__1, &i__2, &tau[k], &a[rk + (k + 
		    1) * a_dim1], lda, &a[rk + k * a_dim1], &c__1, &c_b1, &f[
		    k + 1 + k * f_dim1], &c__1);
	}

/*        Padding F(1:K,K) with zeros. */

	i__1 = k;
	for (j = 1; j <= i__1; ++j) {
	    i__2 = j + k * f_dim1;
	    f[i__2].r = 0.f, f[i__2].i = 0.f;
	}

/*        Incremental updating of F: */
/*        F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)' */
/*                    *A(RK:M,K). */

	if (k > 1) {
	    i__1 = *m - rk + 1;
	    i__2 = k - 1;
	    i__3 = k;
	    q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
	    cgemv_("Conjugate transpose", &i__1, &i__2, &q__1, &a[rk + a_dim1]
, lda, &a[rk + k * a_dim1], &c__1, &c_b1, &auxv[1], &c__1);

	    i__1 = k - 1;
	    cgemv_("No transpose", n, &i__1, &c_b2, &f[f_dim1 + 1], ldf, &
		    auxv[1], &c__1, &c_b2, &f[k * f_dim1 + 1], &c__1);
	}

/*        Update the current row of A: */
/*        A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)'. */

	if (k < *n) {
	    i__1 = *n - k;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgemm_("No transpose", "Conjugate transpose", &c__1, &i__1, &k, &
		    q__1, &a[rk + a_dim1], lda, &f[k + 1 + f_dim1], ldf, &
		    c_b2, &a[rk + (k + 1) * a_dim1], lda);
	}

/*        Update partial column norms. */

	if (rk < lastrk) {
	    i__1 = *n;
	    for (j = k + 1; j <= i__1; ++j) {
		if (vn1[j] != 0.f) {

/*                 NOTE: The following 4 lines follow from the analysis in */
/*                 Lapack Working Note 176. */

		    temp = c_abs(&a[rk + j * a_dim1]) / vn1[j];
/* Computing MAX */
		    r__1 = 0.f, r__2 = (temp + 1.f) * (1.f - temp);
		    temp = dmax(r__1,r__2);
/* Computing 2nd power */
		    r__1 = vn1[j] / vn2[j];
		    temp2 = temp * (r__1 * r__1);
		    if (temp2 <= tol3z) {
			vn2[j] = (real) lsticc;
			lsticc = j;
		    } else {
			vn1[j] *= sqrt(temp);
		    }
		}
	    }
	}

	i__1 = rk + k * a_dim1;
	a[i__1].r = akk.r, a[i__1].i = akk.i;

/*        End of while loop. */

	goto L10;
    }
    *kb = k;
    rk = *offset + *kb;

/*     Apply the block reflector to the rest of the matrix: */
/*     A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) - */
/*                         A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)'. */

/* Computing MIN */
    i__1 = *n, i__2 = *m - *offset;
    if (*kb < min(i__1,i__2)) {
	i__1 = *m - rk;
	i__2 = *n - *kb;
	q__1.r = -1.f, q__1.i = -0.f;
	cgemm_("No transpose", "Conjugate transpose", &i__1, &i__2, kb, &q__1, 
		 &a[rk + 1 + a_dim1], lda, &f[*kb + 1 + f_dim1], ldf, &c_b2, &
		a[rk + 1 + (*kb + 1) * a_dim1], lda);
    }

/*     Recomputation of difficult columns. */

L60:
    if (lsticc > 0) {
	itemp = i_nint(&vn2[lsticc]);
	i__1 = *m - rk;
	vn1[lsticc] = scnrm2_(&i__1, &a[rk + 1 + lsticc * a_dim1], &c__1);

/*        NOTE: The computation of VN1( LSTICC ) relies on the fact that */
/*        SNRM2 does not fail on vectors with norm below the value of */
/*        SQRT(DLAMCH('S')) */

	vn2[lsticc] = vn1[lsticc];
	lsticc = itemp;
	goto L60;
    }

    return 0;

/*     End of CLAQPS */

} /* claqps_ */

Example #11

Show file

File: crqt02.c Project: 3deggi/levmar-ndk

/* Subroutine */ int crqt02_(integer *m, integer *n, integer *k, complex *a, 
	complex *af, complex *q, complex *r__, integer *lda, complex *tau, 
	complex *work, integer *lwork, real *rwork, real *result)
{
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, q_dim1, q_offset, r_dim1, 
	    r_offset, i__1, i__2;

    /* Builtin functions */
    /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);

    /* Local variables */
    real eps;
    integer info;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
	    integer *, complex *, complex *, integer *, complex *, integer *, 
	    complex *, complex *, integer *), cherk_(char *, 
	    char *, integer *, integer *, real *, complex *, integer *, real *
, complex *, integer *);
    real resid, anorm;
    extern doublereal clange_(char *, integer *, integer *, complex *, 
	    integer *, real *), slamch_(char *);
    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex 
	    *, integer *, complex *, integer *), claset_(char *, 
	    integer *, integer *, complex *, complex *, complex *, integer *);
    extern doublereal clansy_(char *, char *, integer *, complex *, integer *, 
	     real *);
    extern /* Subroutine */ int cungrq_(integer *, integer *, integer *, 
	    complex *, integer *, complex *, complex *, integer *, integer *);


/*  -- LAPACK test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CRQT02 tests CUNGRQ, which generates an m-by-n matrix Q with */
/*  orthonornmal rows that is defined as the product of k elementary */
/*  reflectors. */

/*  Given the RQ factorization of an m-by-n matrix A, CRQT02 generates */
/*  the orthogonal matrix Q defined by the factorization of the last k */
/*  rows of A; it compares R(m-k+1:m,n-m+1:n) with */
/*  A(m-k+1:m,1:n)*Q(n-m+1:n,1:n)', and checks that the rows of Q are */
/*  orthonormal. */

/*  Arguments */
/*  ========= */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix Q to be generated.  M >= 0. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix Q to be generated. */
/*          N >= M >= 0. */

/*  K       (input) INTEGER */
/*          The number of elementary reflectors whose product defines the */
/*          matrix Q. M >= K >= 0. */

/*  A       (input) COMPLEX array, dimension (LDA,N) */
/*          The m-by-n matrix A which was factorized by CRQT01. */

/*  AF      (input) COMPLEX array, dimension (LDA,N) */
/*          Details of the RQ factorization of A, as returned by CGERQF. */
/*          See CGERQF for further details. */

/*  Q       (workspace) COMPLEX array, dimension (LDA,N) */

/*  R       (workspace) COMPLEX array, dimension (LDA,M) */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the arrays A, AF, Q and L. LDA >= N. */

/*  TAU     (input) COMPLEX array, dimension (M) */
/*          The scalar factors of the elementary reflectors corresponding */
/*          to the RQ factorization in AF. */

/*  WORK    (workspace) COMPLEX array, dimension (LWORK) */

/*  LWORK   (input) INTEGER */
/*          The dimension of the array WORK. */

/*  RWORK   (workspace) REAL array, dimension (M) */

/*  RESULT  (output) REAL array, dimension (2) */
/*          The test ratios: */
/*          RESULT(1) = norm( R - A*Q' ) / ( N * norm(A) * EPS ) */
/*          RESULT(2) = norm( I - Q*Q' ) / ( N * EPS ) */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Scalars in Common .. */
/*     .. */
/*     .. Common blocks .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Quick return if possible */

    /* Parameter adjustments */
    r_dim1 = *lda;
    r_offset = 1 + r_dim1;
    r__ -= r_offset;
    q_dim1 = *lda;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    af_dim1 = *lda;
    af_offset = 1 + af_dim1;
    af -= af_offset;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --tau;
    --work;
    --rwork;
    --result;

    /* Function Body */
    if (*m == 0 || *n == 0 || *k == 0) {
	result[1] = 0.f;
	result[2] = 0.f;
	return 0;
    }

    eps = slamch_("Epsilon");

/*     Copy the last k rows of the factorization to the array Q */

    claset_("Full", m, n, &c_b1, &c_b1, &q[q_offset], lda);
    if (*k < *n) {
	i__1 = *n - *k;
	clacpy_("Full", k, &i__1, &af[*m - *k + 1 + af_dim1], lda, &q[*m - *k 
		+ 1 + q_dim1], lda);
    }
    if (*k > 1) {
	i__1 = *k - 1;
	i__2 = *k - 1;
	clacpy_("Lower", &i__1, &i__2, &af[*m - *k + 2 + (*n - *k + 1) * 
		af_dim1], lda, &q[*m - *k + 2 + (*n - *k + 1) * q_dim1], lda);
    }

/*     Generate the last n rows of the matrix Q */

    s_copy(srnamc_1.srnamt, "CUNGRQ", (ftnlen)32, (ftnlen)6);
    cungrq_(m, n, k, &q[q_offset], lda, &tau[*m - *k + 1], &work[1], lwork, &
	    info);

/*     Copy R(m-k+1:m,n-m+1:n) */

    claset_("Full", k, m, &c_b9, &c_b9, &r__[*m - *k + 1 + (*n - *m + 1) * 
	    r_dim1], lda);
    clacpy_("Upper", k, k, &af[*m - *k + 1 + (*n - *k + 1) * af_dim1], lda, &
	    r__[*m - *k + 1 + (*n - *k + 1) * r_dim1], lda);

/*     Compute R(m-k+1:m,n-m+1:n) - A(m-k+1:m,1:n) * Q(n-m+1:n,1:n)' */

    cgemm_("No transpose", "Conjugate transpose", k, m, n, &c_b14, &a[*m - *k 
	    + 1 + a_dim1], lda, &q[q_offset], lda, &c_b15, &r__[*m - *k + 1 + 
	    (*n - *m + 1) * r_dim1], lda);

/*     Compute norm( R - A*Q' ) / ( N * norm(A) * EPS ) . */

    anorm = clange_("1", k, n, &a[*m - *k + 1 + a_dim1], lda, &rwork[1]);
    resid = clange_("1", k, m, &r__[*m - *k + 1 + (*n - *m + 1) * r_dim1], 
	    lda, &rwork[1]);
    if (anorm > 0.f) {
	result[1] = resid / (real) max(1,*n) / anorm / eps;
    } else {
	result[1] = 0.f;
    }

/*     Compute I - Q*Q' */

    claset_("Full", m, m, &c_b9, &c_b15, &r__[r_offset], lda);
    cherk_("Upper", "No transpose", m, n, &c_b23, &q[q_offset], lda, &c_b24, &
	    r__[r_offset], lda);

/*     Compute norm( I - Q*Q' ) / ( N * EPS ) . */

    resid = clansy_("1", "Upper", m, &r__[r_offset], lda, &rwork[1]);

    result[2] = resid / (real) max(1,*n) / eps;

    return 0;

/*     End of CRQT02 */

} /* crqt02_ */

Example #12

Show file

File: clauum.c Project: deepakantony/vispack

/* Subroutine */ int clauum_(char *uplo, integer *n, complex *a, integer *lda,
	 integer *info)
{
/*  -- LAPACK auxiliary routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CLAUUM computes the product U * U' or L' * L, where the triangular   
    factor U or L is stored in the upper or lower triangular part of   
    the array A.   

    If UPLO = 'U' or 'u' then the upper triangle of the result is stored, 
  
    overwriting the factor U in A.   
    If UPLO = 'L' or 'l' then the lower triangle of the result is stored, 
  
    overwriting the factor L in A.   

    This is the blocked form of the algorithm, calling Level 3 BLAS.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the triangular factor stored in the array A 
  
            is upper or lower triangular:   
            = 'U':  Upper triangular   
            = 'L':  Lower triangular   

    N       (input) INTEGER   
            The order of the triangular factor U or L.  N >= 0.   

    A       (input/output) COMPLEX array, dimension (LDA,N)   
            On entry, the triangular factor U or L.   
            On exit, if UPLO = 'U', the upper triangle of A is   
            overwritten with the upper triangle of the product U * U';   
            if UPLO = 'L', the lower triangle of A is overwritten with   
            the lower triangle of the product L' * L.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -k, the k-th argument had an illegal value   

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static complex c_b1 = {1.f,0.f};
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static real c_b21 = 1.f;
    
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    /* Local variables */
    static integer i;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
	    integer *, complex *, complex *, integer *, complex *, integer *, 
	    complex *, complex *, integer *), cherk_(char *, 
	    char *, integer *, integer *, real *, complex *, integer *, real *
	    , complex *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *, 
	    integer *, integer *, complex *, complex *, integer *, complex *, 
	    integer *);
    static logical upper;
    extern /* Subroutine */ int clauu2_(char *, integer *, complex *, integer 
	    *, integer *);
    static integer ib, nb;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);




#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CLAUUM", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine the block size for this environment. */

    nb = ilaenv_(&c__1, "CLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, 6L, 1L);

    if (nb <= 1 || nb >= *n) {

/*        Use unblocked code */

	clauu2_(uplo, n, &A(1,1), lda, info);
    } else {

/*        Use blocked code */

	if (upper) {

/*           Compute the product U * U'. */

	    i__1 = *n;
	    i__2 = nb;
	    for (i = 1; nb < 0 ? i >= *n : i <= *n; i += nb) {
/* Computing MIN */
		i__3 = nb, i__4 = *n - i + 1;
		ib = min(i__3,i__4);
		i__3 = i - 1;
		ctrmm_("Right", "Upper", "Conjugate transpose", "Non-unit", &
			i__3, &ib, &c_b1, &A(i,i), lda, &A(1,i), lda);
		clauu2_("Upper", &ib, &A(i,i), lda, info);
		if (i + ib <= *n) {
		    i__3 = i - 1;
		    i__4 = *n - i - ib + 1;
		    cgemm_("No transpose", "Conjugate transpose", &i__3, &ib, 
			    &i__4, &c_b1, &A(1,i+ib), lda, &A(i,i+ib), lda, &c_b1, &A(1,i), lda);
		    i__3 = *n - i - ib + 1;
		    cherk_("Upper", "No transpose", &ib, &i__3, &c_b21, &A(i,i+ib), lda, &c_b21, &A(i,i), lda);
		}
/* L10: */
	    }
	} else {

/*           Compute the product L' * L. */

	    i__2 = *n;
	    i__1 = nb;
	    for (i = 1; nb < 0 ? i >= *n : i <= *n; i += nb) {
/* Computing MIN */
		i__3 = nb, i__4 = *n - i + 1;
		ib = min(i__3,i__4);
		i__3 = i - 1;
		ctrmm_("Left", "Lower", "Conjugate transpose", "Non-unit", &
			ib, &i__3, &c_b1, &A(i,i), lda, &A(i,1), lda);
		clauu2_("Lower", &ib, &A(i,i), lda, info);
		if (i + ib <= *n) {
		    i__3 = i - 1;
		    i__4 = *n - i - ib + 1;
		    cgemm_("Conjugate transpose", "No transpose", &ib, &i__3, 
			    &i__4, &c_b1, &A(i+ib,i), lda, &A(i+ib,1), lda, &c_b1, &A(i,1), lda);
		    i__3 = *n - i - ib + 1;
		    cherk_("Lower", "Conjugate transpose", &ib, &i__3, &c_b21,
			     &A(i+ib,i), lda, &c_b21, &A(i,i), lda);
		}
/* L20: */
	    }
	}
    }

    return 0;

/*     End of CLAUUM */

} /* clauum_ */

Example #13

Show file

File: test_gemm.c Project: mindis/blis

int main( int argc, char** argv )
{
	obj_t    a, b, c;
	obj_t    c_save;
	obj_t    alpha, beta;
	dim_t    m, n, k;
	dim_t    p;
	dim_t    p_begin, p_end, p_inc;
	int      m_input, n_input, k_input;
	num_t    dt, dt_real;
	char     dt_ch;
	int      r, n_repeats;
	trans_t  transa;
	trans_t  transb;
	f77_char f77_transa;
	f77_char f77_transb;

	double   dtime;
	double   dtime_save;
	double   gflops;

	extern blksz_t* gemm_kc;

	bli_init();

	//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );

	n_repeats = 3;

	dt      = DT;
	dt_real = bli_datatype_proj_to_real( DT );

	p_begin = P_BEGIN;
	p_end   = P_END;
	p_inc   = P_INC;

	m_input = -1;
	n_input = -1;
	k_input = -1;

	// Extract the kc blocksize for the requested datatype and its
	// real analogue.
	dim_t kc      = bli_blksz_get_def( dt,      gemm_kc );
	dim_t kc_real = bli_blksz_get_def( dt_real, gemm_kc );

	// Assign the k dimension depending on which implementation is
	// being tested. Note that the BLIS_NAT case handles the real
	// domain cases as well as native complex.
	if      ( IND == BLIS_NAT  ) k_input = kc;
	else if ( IND == BLIS_3M1  ) k_input = kc_real / 3;
	else if ( IND == BLIS_4M1A ) k_input = kc_real / 2;
	else                         k_input = kc_real;

	// Adjust the relative dimensions, if requested.
#if   (defined ADJ_MK)
	m_input = -2; k_input = -2; n_input = -1;
#elif (defined ADJ_KN)
	k_input = -2; n_input = -2; m_input = -1;
#elif (defined ADJ_MN)
	m_input = -2; n_input = -2; k_input = -1;
#endif

	// Choose the char corresponding to the requested datatype.
	if      ( bli_is_float( dt ) )    dt_ch = 's';
	else if ( bli_is_double( dt ) )   dt_ch = 'd';
	else if ( bli_is_scomplex( dt ) ) dt_ch = 'c';
	else                              dt_ch = 'z';

	transa = BLIS_NO_TRANSPOSE;
	transb = BLIS_NO_TRANSPOSE;

	bli_param_map_blis_to_netlib_trans( transa, &f77_transa );
	bli_param_map_blis_to_netlib_trans( transb, &f77_transb );

	// Begin with initializing the last entry to zero so that
	// matlab allocates space for the entire array once up-front.
	for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
#ifdef BLIS
	printf( "data_%s_%cgemm_%s_blis", THR_STR, dt_ch, STR );
#else
	printf( "data_%s_%cgemm_%s",      THR_STR, dt_ch, STR );
#endif
	printf( "( %2lu, 1:5 ) = [ %4lu %4lu %4lu  %10.3e  %6.3f ];\n",
	        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
	        ( unsigned long )0,
	        ( unsigned long )0,
	        ( unsigned long )0, 0.0, 0.0 );


	for ( p = p_begin; p <= p_end; p += p_inc )
	{
		if ( m_input < 0 ) m = p / ( dim_t )abs(m_input);
		else               m =     ( dim_t )    m_input;
		if ( n_input < 0 ) n = p / ( dim_t )abs(n_input);
		else               n =     ( dim_t )    n_input;
		if ( k_input < 0 ) k = p / ( dim_t )abs(k_input);
		else               k =     ( dim_t )    k_input;

		bli_obj_create( dt, 1, 1, 0, 0, &alpha );
		bli_obj_create( dt, 1, 1, 0, 0, &beta );

		bli_obj_create( dt, m, k, 0, 0, &a );
		bli_obj_create( dt, k, n, 0, 0, &b );
		bli_obj_create( dt, m, n, 0, 0, &c );
		//bli_obj_create( dt, m, k, 2, 2*m, &a );
		//bli_obj_create( dt, k, n, 2, 2*k, &b );
		//bli_obj_create( dt, m, n, 2, 2*m, &c );
		bli_obj_create( dt, m, n, 0, 0, &c_save );

		bli_randm( &a );
		bli_randm( &b );
		bli_randm( &c );

		bli_obj_set_conjtrans( transa, a );
		bli_obj_set_conjtrans( transb, b );

		bli_setsc(  (2.0/1.0), 0.0, &alpha );
		bli_setsc( -(1.0/1.0), 0.0, &beta );


		bli_copym( &c, &c_save );
	
#ifdef BLIS
		bli_ind_disable_all_dt( dt );
		bli_ind_enable_dt( IND, dt );
#endif

		dtime_save = 1.0e9;

		for ( r = 0; r < n_repeats; ++r )
		{
			bli_copym( &c_save, &c );


			dtime = bli_clock();


#ifdef PRINT
			bli_printm( "a", &a, "%4.1f", "" );
			bli_printm( "b", &b, "%4.1f", "" );
			bli_printm( "c", &c, "%4.1f", "" );
#endif

#ifdef BLIS

			bli_gemm( &alpha,
			          &a,
			          &b,
			          &beta,
			          &c );

#else

		if ( bli_is_float( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  kk     = bli_obj_width_after_trans( a );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldb    = bli_obj_col_stride( b );
			f77_int  ldc    = bli_obj_col_stride( c );
			float*   alphap = bli_obj_buffer( alpha );
			float*   ap     = bli_obj_buffer( a );
			float*   bp     = bli_obj_buffer( b );
			float*   betap  = bli_obj_buffer( beta );
			float*   cp     = bli_obj_buffer( c );

			sgemm_( &f77_transa,
			        &f77_transb,
			        &mm,
			        &nn,
			        &kk,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
		}
		else if ( bli_is_double( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  kk     = bli_obj_width_after_trans( a );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldb    = bli_obj_col_stride( b );
			f77_int  ldc    = bli_obj_col_stride( c );
			double*  alphap = bli_obj_buffer( alpha );
			double*  ap     = bli_obj_buffer( a );
			double*  bp     = bli_obj_buffer( b );
			double*  betap  = bli_obj_buffer( beta );
			double*  cp     = bli_obj_buffer( c );

			dgemm_( &f77_transa,
			        &f77_transb,
			        &mm,
			        &nn,
			        &kk,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
		}
		else if ( bli_is_scomplex( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  kk     = bli_obj_width_after_trans( a );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldb    = bli_obj_col_stride( b );
			f77_int  ldc    = bli_obj_col_stride( c );
			scomplex*  alphap = bli_obj_buffer( alpha );
			scomplex*  ap     = bli_obj_buffer( a );
			scomplex*  bp     = bli_obj_buffer( b );
			scomplex*  betap  = bli_obj_buffer( beta );
			scomplex*  cp     = bli_obj_buffer( c );

			cgemm_( &f77_transa,
			        &f77_transb,
			        &mm,
			        &nn,
			        &kk,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
		}
		else if ( bli_is_dcomplex( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  kk     = bli_obj_width_after_trans( a );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldb    = bli_obj_col_stride( b );
			f77_int  ldc    = bli_obj_col_stride( c );
			dcomplex*  alphap = bli_obj_buffer( alpha );
			dcomplex*  ap     = bli_obj_buffer( a );
			dcomplex*  bp     = bli_obj_buffer( b );
			dcomplex*  betap  = bli_obj_buffer( beta );
			dcomplex*  cp     = bli_obj_buffer( c );

			zgemm_( &f77_transa,
			//zgemm3m_( &f77_transa,
			        &f77_transb,
			        &mm,
			        &nn,
			        &kk,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
		}
#endif

#ifdef PRINT
			bli_printm( "c after", &c, "%4.1f", "" );
			exit(1);
#endif


			dtime_save = bli_clock_min_diff( dtime_save, dtime );
		}

		gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 );

		if ( bli_is_complex( dt ) ) gflops *= 4.0;

#ifdef BLIS
		printf( "data_%s_%cgemm_%s_blis", THR_STR, dt_ch, STR );
#else
		printf( "data_%s_%cgemm_%s",      THR_STR, dt_ch, STR );
#endif
		printf( "( %2lu, 1:5 ) = [ %4lu %4lu %4lu  %10.3e  %6.3f ];\n",
		        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
		        ( unsigned long )m,
		        ( unsigned long )k,
		        ( unsigned long )n, dtime_save, gflops );

		bli_obj_free( &alpha );
		bli_obj_free( &beta );

		bli_obj_free( &a );
		bli_obj_free( &b );
		bli_obj_free( &c );
		bli_obj_free( &c_save );
	}

	bli_finalize();

	return 0;
}

Example #14

Show file

File: chet22.c Project: kstraube/hysim

/* Subroutine */ int chet22_(integer *itype, char *uplo, integer *n, integer *
	m, integer *kband, complex *a, integer *lda, real *d__, real *e, 
	complex *u, integer *ldu, complex *v, integer *ldv, complex *tau, 
	complex *work, real *rwork, real *result)
{
    /* System generated locals */
    integer a_dim1, a_offset, u_dim1, u_offset, v_dim1, v_offset, i__1, i__2, 
	    i__3, i__4;
    real r__1, r__2;
    complex q__1;

    /* Local variables */
    integer j, jj, nn, jj1, jj2;
    real ulp;
    integer nnp1;
    real unfl;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
	    integer *, complex *, complex *, integer *, complex *, integer *, 
	    complex *, complex *, integer *), chemm_(char *, 
	    char *, integer *, integer *, complex *, complex *, integer *, 
	    complex *, integer *, complex *, complex *, integer *), cunt01_(char *, integer *, integer *, complex *, integer 
	    *, complex *, integer *, real *, real *);
    real anorm, wnorm;
    extern doublereal clanhe_(char *, char *, integer *, complex *, integer *, 
	     real *), slamch_(char *);


/*  -- LAPACK test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*       CHET22  generally checks a decomposition of the form */

/*               A U = U S */

/*       where A is complex Hermitian, the columns of U are orthonormal, */
/*       and S is diagonal (if KBAND=0) or symmetric tridiagonal (if */
/*       KBAND=1).  If ITYPE=1, then U is represented as a dense matrix, */
/*       otherwise the U is expressed as a product of Householder */
/*       transformations, whose vectors are stored in the array "V" and */
/*       whose scaling constants are in "TAU"; we shall use the letter */
/*       "V" to refer to the product of Householder transformations */
/*       (which should be equal to U). */

/*       Specifically, if ITYPE=1, then: */

/*               RESULT(1) = | U' A U - S | / ( |A| m ulp ) *and* */
/*               RESULT(2) = | I - U'U | / ( m ulp ) */

/*  Arguments */
/*  ========= */

/*  ITYPE   INTEGER */
/*          Specifies the type of tests to be performed. */
/*          1: U expressed as a dense orthogonal matrix: */
/*             RESULT(1) = | A - U S U' | / ( |A| n ulp )   *and* */
/*             RESULT(2) = | I - UU' | / ( n ulp ) */

/*  UPLO    CHARACTER */
/*          If UPLO='U', the upper triangle of A will be used and the */
/*          (strictly) lower triangle will not be referenced.  If */
/*          UPLO='L', the lower triangle of A will be used and the */
/*          (strictly) upper triangle will not be referenced. */
/*          Not modified. */

/*  N       INTEGER */
/*          The size of the matrix.  If it is zero, CHET22 does nothing. */
/*          It must be at least zero. */
/*          Not modified. */

/*  M       INTEGER */
/*          The number of columns of U.  If it is zero, CHET22 does */
/*          nothing.  It must be at least zero. */
/*          Not modified. */

/*  KBAND   INTEGER */
/*          The bandwidth of the matrix.  It may only be zero or one. */
/*          If zero, then S is diagonal, and E is not referenced.  If */
/*          one, then S is symmetric tri-diagonal. */
/*          Not modified. */

/*  A       COMPLEX array, dimension (LDA , N) */
/*          The original (unfactored) matrix.  It is assumed to be */
/*          symmetric, and only the upper (UPLO='U') or only the lower */
/*          (UPLO='L') will be referenced. */
/*          Not modified. */

/*  LDA     INTEGER */
/*          The leading dimension of A.  It must be at least 1 */
/*          and at least N. */
/*          Not modified. */

/*  D       REAL array, dimension (N) */
/*          The diagonal of the (symmetric tri-) diagonal matrix. */
/*          Not modified. */

/*  E       REAL array, dimension (N) */
/*          The off-diagonal of the (symmetric tri-) diagonal matrix. */
/*          E(1) is ignored, E(2) is the (1,2) and (2,1) element, etc. */
/*          Not referenced if KBAND=0. */
/*          Not modified. */

/*  U       COMPLEX array, dimension (LDU, N) */
/*          If ITYPE=1, this contains the orthogonal matrix in */
/*          the decomposition, expressed as a dense matrix. */
/*          Not modified. */

/*  LDU     INTEGER */
/*          The leading dimension of U.  LDU must be at least N and */
/*          at least 1. */
/*          Not modified. */

/*  V       COMPLEX array, dimension (LDV, N) */
/*          If ITYPE=2 or 3, the lower triangle of this array contains */
/*          the Householder vectors used to describe the orthogonal */
/*          matrix in the decomposition.  If ITYPE=1, then it is not */
/*          referenced. */
/*          Not modified. */

/*  LDV     INTEGER */
/*          The leading dimension of V.  LDV must be at least N and */
/*          at least 1. */
/*          Not modified. */

/*  TAU     COMPLEX array, dimension (N) */
/*          If ITYPE >= 2, then TAU(j) is the scalar factor of */
/*          v(j) v(j)' in the Householder transformation H(j) of */
/*          the product  U = H(1)...H(n-2) */
/*          If ITYPE < 2, then TAU is not referenced. */
/*          Not modified. */

/*  WORK    COMPLEX array, dimension (2*N**2) */
/*          Workspace. */
/*          Modified. */

/*  RWORK   REAL array, dimension (N) */
/*          Workspace. */
/*          Modified. */

/*  RESULT  REAL array, dimension (2) */
/*          The values computed by the two tests described above.  The */
/*          values are currently limited to 1/ulp, to avoid overflow. */
/*          RESULT(1) is always modified.  RESULT(2) is modified only */
/*          if LDU is at least N. */
/*          Modified. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --d__;
    --e;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1;
    u -= u_offset;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1;
    v -= v_offset;
    --tau;
    --work;
    --rwork;
    --result;

    /* Function Body */
    result[1] = 0.f;
    result[2] = 0.f;
    if (*n <= 0 || *m <= 0) {
	return 0;
    }

    unfl = slamch_("Safe minimum");
    ulp = slamch_("Precision");

/*     Do Test 1 */

/*     Norm of A: */

/* Computing MAX */
    r__1 = clanhe_("1", uplo, n, &a[a_offset], lda, &rwork[1]);
    anorm = dmax(r__1,unfl);

/*     Compute error matrix: */

/*     ITYPE=1: error = U' A U - S */

    chemm_("L", uplo, n, m, &c_b2, &a[a_offset], lda, &u[u_offset], ldu, &
	    c_b1, &work[1], n);
    nn = *n * *n;
    nnp1 = nn + 1;
    cgemm_("C", "N", m, m, n, &c_b2, &u[u_offset], ldu, &work[1], n, &c_b1, &
	    work[nnp1], n);
    i__1 = *m;
    for (j = 1; j <= i__1; ++j) {
	jj = nn + (j - 1) * *n + j;
	i__2 = jj;
	i__3 = jj;
	i__4 = j;
	q__1.r = work[i__3].r - d__[i__4], q__1.i = work[i__3].i;
	work[i__2].r = q__1.r, work[i__2].i = q__1.i;
/* L10: */
    }
    if (*kband == 1 && *n > 1) {
	i__1 = *m;
	for (j = 2; j <= i__1; ++j) {
	    jj1 = nn + (j - 1) * *n + j - 1;
	    jj2 = nn + (j - 2) * *n + j;
	    i__2 = jj1;
	    i__3 = jj1;
	    i__4 = j - 1;
	    q__1.r = work[i__3].r - e[i__4], q__1.i = work[i__3].i;
	    work[i__2].r = q__1.r, work[i__2].i = q__1.i;
	    i__2 = jj2;
	    i__3 = jj2;
	    i__4 = j - 1;
	    q__1.r = work[i__3].r - e[i__4], q__1.i = work[i__3].i;
	    work[i__2].r = q__1.r, work[i__2].i = q__1.i;
/* L20: */
	}
    }
    wnorm = clanhe_("1", uplo, m, &work[nnp1], n, &rwork[1]);

    if (anorm > wnorm) {
	result[1] = wnorm / anorm / (*m * ulp);
    } else {
	if (anorm < 1.f) {
/* Computing MIN */
	    r__1 = wnorm, r__2 = *m * anorm;
	    result[1] = dmin(r__1,r__2) / anorm / (*m * ulp);
	} else {
/* Computing MIN */
	    r__1 = wnorm / anorm, r__2 = (real) (*m);
	    result[1] = dmin(r__1,r__2) / (*m * ulp);
	}
    }

/*     Do Test 2 */

/*     Compute  U'U - I */

    if (*itype == 1) {
	i__1 = (*n << 1) * *n;
	cunt01_("Columns", n, m, &u[u_offset], ldu, &work[1], &i__1, &rwork[1]
, &result[2]);
    }

    return 0;

/*     End of CHET22 */

} /* chet22_ */

Example #15

Show file

File: chfrk.c Project: 0u812/roadrunner-backup

/* Subroutine */ int chfrk_(char *transr, char *uplo, char *trans, integer *n, 
	 integer *k, real *alpha, complex *a, integer *lda, real *beta, 
	complex *c__)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2;
    complex q__1;

    /* Local variables */
    integer j, n1, n2, nk, info;
    complex cbeta;
    logical normaltransr;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
	    integer *, complex *, complex *, integer *, complex *, integer *, 
	    complex *, complex *, integer *), cherk_(char *, 
	    char *, integer *, integer *, real *, complex *, integer *, real *
, complex *, integer *);
    extern logical lsame_(char *, char *);
    integer nrowa;
    logical lower;
    complex calpha;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    logical nisodd, notrans;


/*  -- LAPACK routine (version 3.2)                                    -- */

/*  -- Contributed by Julien Langou of the Univ. of Colorado Denver    -- */
/*  -- November 2008                                                   -- */

/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */

/*     .. */
/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  Level 3 BLAS like routine for C in RFP Format. */

/*  CHFRK performs one of the Hermitian rank--k operations */

/*     C := alpha*A*conjg( A' ) + beta*C, */

/*  or */

/*     C := alpha*conjg( A' )*A + beta*C, */

/*  where alpha and beta are real scalars, C is an n--by--n Hermitian */
/*  matrix and A is an n--by--k matrix in the first case and a k--by--n */
/*  matrix in the second case. */

/*  Arguments */
/*  ========== */

/*  TRANSR - (input) CHARACTER. */
/*          = 'N':  The Normal Form of RFP A is stored; */
/*          = 'C':  The Conjugate-transpose Form of RFP A is stored. */

/*  UPLO   - (input) CHARACTER. */
/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
/*           triangular  part  of the  array  C  is to be  referenced  as */
/*           follows: */

/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
/*                                  is to be referenced. */

/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
/*                                  is to be referenced. */

/*           Unchanged on exit. */

/*  TRANS  - (input) CHARACTER. */
/*           On entry,  TRANS  specifies the operation to be performed as */
/*           follows: */

/*              TRANS = 'N' or 'n'   C := alpha*A*conjg( A' ) + beta*C. */

/*              TRANS = 'C' or 'c'   C := alpha*conjg( A' )*A + beta*C. */

/*           Unchanged on exit. */

/*  N      - (input) INTEGER. */
/*           On entry,  N specifies the order of the matrix C.  N must be */
/*           at least zero. */
/*           Unchanged on exit. */

/*  K      - (input) INTEGER. */
/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
/*           of  columns   of  the   matrix   A,   and  on   entry   with */
/*           TRANS = 'C' or 'c',  K  specifies  the number of rows of the */
/*           matrix A.  K must be at least zero. */
/*           Unchanged on exit. */

/*  ALPHA  - (input) REAL. */
/*           On entry, ALPHA specifies the scalar alpha. */
/*           Unchanged on exit. */

/*  A      - (input) COMPLEX array of DIMENSION ( LDA, ka ), where KA */
/*           is K  when TRANS = 'N' or 'n', and is N otherwise. Before */
/*           entry with TRANS = 'N' or 'n', the leading N--by--K part of */
/*           the array A must contain the matrix A, otherwise the leading */
/*           K--by--N part of the array A must contain the matrix A. */
/*           Unchanged on exit. */

/*  LDA    - (input) INTEGER. */
/*           On entry, LDA specifies the first dimension of A as declared */
/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
/*           be at least  max( 1, k ). */
/*           Unchanged on exit. */

/*  BETA   - (input) REAL. */
/*           On entry, BETA specifies the scalar beta. */
/*           Unchanged on exit. */

/*  C      - (input/output) COMPLEX array, dimension ( N*(N+1)/2 ). */
/*           On entry, the matrix A in RFP Format. RFP Format is */
/*           described by TRANSR, UPLO and N. Note that the imaginary */
/*           parts of the diagonal elements need not be set, they are */
/*           assumed to be zero, and on exit they are set to zero. */

/*  Arguments */
/*  ========== */

/*     .. */
/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */


/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --c__;

    /* Function Body */
    info = 0;
    normaltransr = lsame_(transr, "N");
    lower = lsame_(uplo, "L");
    notrans = lsame_(trans, "N");

    if (notrans) {
	nrowa = *n;
    } else {
	nrowa = *k;
    }

    if (! normaltransr && ! lsame_(transr, "C")) {
	info = -1;
    } else if (! lower && ! lsame_(uplo, "U")) {
	info = -2;
    } else if (! notrans && ! lsame_(trans, "C")) {
	info = -3;
    } else if (*n < 0) {
	info = -4;
    } else if (*k < 0) {
	info = -5;
    } else if (*lda < max(1,nrowa)) {
	info = -8;
    }
    if (info != 0) {
	i__1 = -info;
	xerbla_("CHFRK ", &i__1);
	return 0;
    }

/*     Quick return if possible. */

/*     The quick return case: ((ALPHA.EQ.0).AND.(BETA.NE.ZERO)) is not */
/*     done (it is in CHERK for example) and left in the general case. */

    if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) {
	return 0;
    }

    if (*alpha == 0.f && *beta == 0.f) {
	i__1 = *n * (*n + 1) / 2;
	for (j = 1; j <= i__1; ++j) {
	    i__2 = j;
	    c__[i__2].r = 0.f, c__[i__2].i = 0.f;
	}
	return 0;
    }

    q__1.r = *alpha, q__1.i = 0.f;
    calpha.r = q__1.r, calpha.i = q__1.i;
    q__1.r = *beta, q__1.i = 0.f;
    cbeta.r = q__1.r, cbeta.i = q__1.i;

/*     C is N-by-N. */
/*     If N is odd, set NISODD = .TRUE., and N1 and N2. */
/*     If N is even, NISODD = .FALSE., and NK. */

    if (*n % 2 == 0) {
	nisodd = FALSE_;
	nk = *n / 2;
    } else {
	nisodd = TRUE_;
	if (lower) {
	    n2 = *n / 2;
	    n1 = *n - n2;
	} else {
	    n1 = *n / 2;
	    n2 = *n - n1;
	}
    }

    if (nisodd) {

/*        N is odd */

	if (normaltransr) {

/*           N is odd and TRANSR = 'N' */

	    if (lower) {

/*              N is odd, TRANSR = 'N', and UPLO = 'L' */

		if (notrans) {

/*                 N is odd, TRANSR = 'N', UPLO = 'L', and TRANS = 'N' */

		    cherk_("L", "N", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[1], n);
		    cherk_("U", "N", &n2, k, alpha, &a[n1 + 1 + a_dim1], lda, 
			    beta, &c__[*n + 1], n);
		    cgemm_("N", "C", &n2, &n1, k, &calpha, &a[n1 + 1 + a_dim1]
, lda, &a[a_dim1 + 1], lda, &cbeta, &c__[n1 + 1], 
			    n);

		} else {

/*                 N is odd, TRANSR = 'N', UPLO = 'L', and TRANS = 'C' */

		    cherk_("L", "C", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[1], n);
		    cherk_("U", "C", &n2, k, alpha, &a[(n1 + 1) * a_dim1 + 1], 
			     lda, beta, &c__[*n + 1], n)
			    ;
		    cgemm_("C", "N", &n2, &n1, k, &calpha, &a[(n1 + 1) * 
			    a_dim1 + 1], lda, &a[a_dim1 + 1], lda, &cbeta, &
			    c__[n1 + 1], n);

		}

	    } else {

/*              N is odd, TRANSR = 'N', and UPLO = 'U' */

		if (notrans) {

/*                 N is odd, TRANSR = 'N', UPLO = 'U', and TRANS = 'N' */

		    cherk_("L", "N", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[n2 + 1], n);
		    cherk_("U", "N", &n2, k, alpha, &a[n2 + a_dim1], lda, 
			    beta, &c__[n1 + 1], n);
		    cgemm_("N", "C", &n1, &n2, k, &calpha, &a[a_dim1 + 1], 
			    lda, &a[n2 + a_dim1], lda, &cbeta, &c__[1], n);

		} else {

/*                 N is odd, TRANSR = 'N', UPLO = 'U', and TRANS = 'C' */

		    cherk_("L", "C", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[n2 + 1], n);
		    cherk_("U", "C", &n2, k, alpha, &a[n2 * a_dim1 + 1], lda, 
			    beta, &c__[n1 + 1], n);
		    cgemm_("C", "N", &n1, &n2, k, &calpha, &a[a_dim1 + 1], 
			    lda, &a[n2 * a_dim1 + 1], lda, &cbeta, &c__[1], n);

		}

	    }

	} else {

/*           N is odd, and TRANSR = 'C' */

	    if (lower) {

/*              N is odd, TRANSR = 'C', and UPLO = 'L' */

		if (notrans) {

/*                 N is odd, TRANSR = 'C', UPLO = 'L', and TRANS = 'N' */

		    cherk_("U", "N", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[1], &n1);
		    cherk_("L", "N", &n2, k, alpha, &a[n1 + 1 + a_dim1], lda, 
			    beta, &c__[2], &n1);
		    cgemm_("N", "C", &n1, &n2, k, &calpha, &a[a_dim1 + 1], 
			    lda, &a[n1 + 1 + a_dim1], lda, &cbeta, &c__[n1 * 
			    n1 + 1], &n1);

		} else {

/*                 N is odd, TRANSR = 'C', UPLO = 'L', and TRANS = 'C' */

		    cherk_("U", "C", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[1], &n1);
		    cherk_("L", "C", &n2, k, alpha, &a[(n1 + 1) * a_dim1 + 1], 
			     lda, beta, &c__[2], &n1);
		    cgemm_("C", "N", &n1, &n2, k, &calpha, &a[a_dim1 + 1], 
			    lda, &a[(n1 + 1) * a_dim1 + 1], lda, &cbeta, &c__[
			    n1 * n1 + 1], &n1);

		}

	    } else {

/*              N is odd, TRANSR = 'C', and UPLO = 'U' */

		if (notrans) {

/*                 N is odd, TRANSR = 'C', UPLO = 'U', and TRANS = 'N' */

		    cherk_("U", "N", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[n2 * n2 + 1], &n2);
		    cherk_("L", "N", &n2, k, alpha, &a[n1 + 1 + a_dim1], lda, 
			    beta, &c__[n1 * n2 + 1], &n2);
		    cgemm_("N", "C", &n2, &n1, k, &calpha, &a[n1 + 1 + a_dim1]
, lda, &a[a_dim1 + 1], lda, &cbeta, &c__[1], &n2);

		} else {

/*                 N is odd, TRANSR = 'C', UPLO = 'U', and TRANS = 'C' */

		    cherk_("U", "C", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[n2 * n2 + 1], &n2);
		    cherk_("L", "C", &n2, k, alpha, &a[(n1 + 1) * a_dim1 + 1], 
			     lda, beta, &c__[n1 * n2 + 1], &n2);
		    cgemm_("C", "N", &n2, &n1, k, &calpha, &a[(n1 + 1) * 
			    a_dim1 + 1], lda, &a[a_dim1 + 1], lda, &cbeta, &
			    c__[1], &n2);

		}

	    }

	}

    } else {

/*        N is even */

	if (normaltransr) {

/*           N is even and TRANSR = 'N' */

	    if (lower) {

/*              N is even, TRANSR = 'N', and UPLO = 'L' */

		if (notrans) {

/*                 N is even, TRANSR = 'N', UPLO = 'L', and TRANS = 'N' */

		    i__1 = *n + 1;
		    cherk_("L", "N", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[2], &i__1);
		    i__1 = *n + 1;
		    cherk_("U", "N", &nk, k, alpha, &a[nk + 1 + a_dim1], lda, 
			    beta, &c__[1], &i__1);
		    i__1 = *n + 1;
		    cgemm_("N", "C", &nk, &nk, k, &calpha, &a[nk + 1 + a_dim1]
, lda, &a[a_dim1 + 1], lda, &cbeta, &c__[nk + 2], 
			    &i__1);

		} else {

/*                 N is even, TRANSR = 'N', UPLO = 'L', and TRANS = 'C' */

		    i__1 = *n + 1;
		    cherk_("L", "C", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[2], &i__1);
		    i__1 = *n + 1;
		    cherk_("U", "C", &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], 
			     lda, beta, &c__[1], &i__1);
		    i__1 = *n + 1;
		    cgemm_("C", "N", &nk, &nk, k, &calpha, &a[(nk + 1) * 
			    a_dim1 + 1], lda, &a[a_dim1 + 1], lda, &cbeta, &
			    c__[nk + 2], &i__1);

		}

	    } else {

/*              N is even, TRANSR = 'N', and UPLO = 'U' */

		if (notrans) {

/*                 N is even, TRANSR = 'N', UPLO = 'U', and TRANS = 'N' */

		    i__1 = *n + 1;
		    cherk_("L", "N", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[nk + 2], &i__1);
		    i__1 = *n + 1;
		    cherk_("U", "N", &nk, k, alpha, &a[nk + 1 + a_dim1], lda, 
			    beta, &c__[nk + 1], &i__1);
		    i__1 = *n + 1;
		    cgemm_("N", "C", &nk, &nk, k, &calpha, &a[a_dim1 + 1], 
			    lda, &a[nk + 1 + a_dim1], lda, &cbeta, &c__[1], &
			    i__1);

		} else {

/*                 N is even, TRANSR = 'N', UPLO = 'U', and TRANS = 'C' */

		    i__1 = *n + 1;
		    cherk_("L", "C", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[nk + 2], &i__1);
		    i__1 = *n + 1;
		    cherk_("U", "C", &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], 
			     lda, beta, &c__[nk + 1], &i__1);
		    i__1 = *n + 1;
		    cgemm_("C", "N", &nk, &nk, k, &calpha, &a[a_dim1 + 1], 
			    lda, &a[(nk + 1) * a_dim1 + 1], lda, &cbeta, &c__[
			    1], &i__1);

		}

	    }

	} else {

/*           N is even, and TRANSR = 'C' */

	    if (lower) {

/*              N is even, TRANSR = 'C', and UPLO = 'L' */

		if (notrans) {

/*                 N is even, TRANSR = 'C', UPLO = 'L', and TRANS = 'N' */

		    cherk_("U", "N", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[nk + 1], &nk);
		    cherk_("L", "N", &nk, k, alpha, &a[nk + 1 + a_dim1], lda, 
			    beta, &c__[1], &nk);
		    cgemm_("N", "C", &nk, &nk, k, &calpha, &a[a_dim1 + 1], 
			    lda, &a[nk + 1 + a_dim1], lda, &cbeta, &c__[(nk + 
			    1) * nk + 1], &nk);

		} else {

/*                 N is even, TRANSR = 'C', UPLO = 'L', and TRANS = 'C' */

		    cherk_("U", "C", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[nk + 1], &nk);
		    cherk_("L", "C", &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], 
			     lda, beta, &c__[1], &nk);
		    cgemm_("C", "N", &nk, &nk, k, &calpha, &a[a_dim1 + 1], 
			    lda, &a[(nk + 1) * a_dim1 + 1], lda, &cbeta, &c__[
			    (nk + 1) * nk + 1], &nk);

		}

	    } else {

/*              N is even, TRANSR = 'C', and UPLO = 'U' */

		if (notrans) {

/*                 N is even, TRANSR = 'C', UPLO = 'U', and TRANS = 'N' */

		    cherk_("U", "N", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[nk * (nk + 1) + 1], &nk);
		    cherk_("L", "N", &nk, k, alpha, &a[nk + 1 + a_dim1], lda, 
			    beta, &c__[nk * nk + 1], &nk);
		    cgemm_("N", "C", &nk, &nk, k, &calpha, &a[nk + 1 + a_dim1]
, lda, &a[a_dim1 + 1], lda, &cbeta, &c__[1], &nk);

		} else {

/*                 N is even, TRANSR = 'C', UPLO = 'U', and TRANS = 'C' */

		    cherk_("U", "C", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[nk * (nk + 1) + 1], &nk);
		    cherk_("L", "C", &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], 
			     lda, beta, &c__[nk * nk + 1], &nk);
		    cgemm_("C", "N", &nk, &nk, k, &calpha, &a[(nk + 1) * 
			    a_dim1 + 1], lda, &a[a_dim1 + 1], lda, &cbeta, &
			    c__[1], &nk);

		}

	    }

	}

    }

    return 0;

/*     End of CHFRK */

} /* chfrk_ */

Example #16

Show file

File: claqr2.c Project: 0u812/roadrunner-backup

/* Subroutine */ int claqr2_(logical *wantt, logical *wantz, integer *n, 
	integer *ktop, integer *kbot, integer *nw, complex *h__, integer *ldh, 
	 integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer *
	ns, integer *nd, complex *sh, complex *v, integer *ldv, integer *nh, 
	complex *t, integer *ldt, integer *nv, complex *wv, integer *ldwv, 
	complex *work, integer *lwork)
{
    /* System generated locals */
    integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1, 
	    wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
    real r__1, r__2, r__3, r__4, r__5, r__6;
    complex q__1, q__2;

    /* Builtin functions */
    double r_imag(complex *);
    void r_cnjg(complex *, complex *);

    /* Local variables */
    integer i__, j;
    complex s;
    integer jw;
    real foo;
    integer kln;
    complex tau;
    integer knt;
    real ulp;
    integer lwk1, lwk2;
    complex beta;
    integer kcol, info, ifst, ilst, ltop, krow;
    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
, integer *, complex *, complex *, integer *, complex *), 
	    cgemm_(char *, char *, integer *, integer *, integer *, complex *, 
	     complex *, integer *, complex *, integer *, complex *, complex *, 
	     integer *), ccopy_(integer *, complex *, integer 
	    *, complex *, integer *);
    integer infqr, kwtop;
    extern /* Subroutine */ int slabad_(real *, real *), cgehrd_(integer *, 
	    integer *, integer *, complex *, integer *, complex *, complex *, 
	    integer *, integer *), clarfg_(integer *, complex *, complex *, 
	    integer *, complex *);
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int clahqr_(logical *, logical *, integer *, 
	    integer *, integer *, complex *, integer *, complex *, integer *, 
	    integer *, complex *, integer *, integer *), clacpy_(char *, 
	    integer *, integer *, complex *, integer *, complex *, integer *), claset_(char *, integer *, integer *, complex *, complex 
	    *, complex *, integer *);
    real safmin, safmax;
    extern /* Subroutine */ int ctrexc_(char *, integer *, complex *, integer 
	    *, complex *, integer *, integer *, integer *, integer *),
	     cunmhr_(char *, char *, integer *, integer *, integer *, integer 
	    *, complex *, integer *, complex *, complex *, integer *, complex 
	    *, integer *, integer *);
    real smlnum;
    integer lwkopt;


/*  -- LAPACK auxiliary routine (version 3.2.1)                        -- */
/*     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd.. */
/*  -- April 2009                                                      -- */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*     This subroutine is identical to CLAQR3 except that it avoids */
/*     recursion by calling CLAHQR instead of CLAQR4. */


/*     ****************************************************************** */
/*     Aggressive early deflation: */

/*     This subroutine accepts as input an upper Hessenberg matrix */
/*     H and performs an unitary similarity transformation */
/*     designed to detect and deflate fully converged eigenvalues from */
/*     a trailing principal submatrix.  On output H has been over- */
/*     written by a new Hessenberg matrix that is a perturbation of */
/*     an unitary similarity transformation of H.  It is to be */
/*     hoped that the final version of H has many zero subdiagonal */
/*     entries. */

/*     ****************************************************************** */
/*     WANTT   (input) LOGICAL */
/*          If .TRUE., then the Hessenberg matrix H is fully updated */
/*          so that the triangular Schur factor may be */
/*          computed (in cooperation with the calling subroutine). */
/*          If .FALSE., then only enough of H is updated to preserve */
/*          the eigenvalues. */

/*     WANTZ   (input) LOGICAL */
/*          If .TRUE., then the unitary matrix Z is updated so */
/*          so that the unitary Schur factor may be computed */
/*          (in cooperation with the calling subroutine). */
/*          If .FALSE., then Z is not referenced. */

/*     N       (input) INTEGER */
/*          The order of the matrix H and (if WANTZ is .TRUE.) the */
/*          order of the unitary matrix Z. */

/*     KTOP    (input) INTEGER */
/*          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0. */
/*          KBOT and KTOP together determine an isolated block */
/*          along the diagonal of the Hessenberg matrix. */

/*     KBOT    (input) INTEGER */
/*          It is assumed without a check that either */
/*          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together */
/*          determine an isolated block along the diagonal of the */
/*          Hessenberg matrix. */

/*     NW      (input) INTEGER */
/*          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1). */

/*     H       (input/output) COMPLEX array, dimension (LDH,N) */
/*          On input the initial N-by-N section of H stores the */
/*          Hessenberg matrix undergoing aggressive early deflation. */
/*          On output H has been transformed by a unitary */
/*          similarity transformation, perturbed, and the returned */
/*          to Hessenberg form that (it is to be hoped) has some */
/*          zero subdiagonal entries. */

/*     LDH     (input) integer */
/*          Leading dimension of H just as declared in the calling */
/*          subroutine.  N .LE. LDH */

/*     ILOZ    (input) INTEGER */
/*     IHIZ    (input) INTEGER */
/*          Specify the rows of Z to which transformations must be */
/*          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N. */

/*     Z       (input/output) COMPLEX array, dimension (LDZ,N) */
/*          IF WANTZ is .TRUE., then on output, the unitary */
/*          similarity transformation mentioned above has been */
/*          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right. */
/*          If WANTZ is .FALSE., then Z is unreferenced. */

/*     LDZ     (input) integer */
/*          The leading dimension of Z just as declared in the */
/*          calling subroutine.  1 .LE. LDZ. */

/*     NS      (output) integer */
/*          The number of unconverged (ie approximate) eigenvalues */
/*          returned in SR and SI that may be used as shifts by the */
/*          calling subroutine. */

/*     ND      (output) integer */
/*          The number of converged eigenvalues uncovered by this */
/*          subroutine. */

/*     SH      (output) COMPLEX array, dimension KBOT */
/*          On output, approximate eigenvalues that may */
/*          be used for shifts are stored in SH(KBOT-ND-NS+1) */
/*          through SR(KBOT-ND).  Converged eigenvalues are */
/*          stored in SH(KBOT-ND+1) through SH(KBOT). */

/*     V       (workspace) COMPLEX array, dimension (LDV,NW) */
/*          An NW-by-NW work array. */

/*     LDV     (input) integer scalar */
/*          The leading dimension of V just as declared in the */
/*          calling subroutine.  NW .LE. LDV */

/*     NH      (input) integer scalar */
/*          The number of columns of T.  NH.GE.NW. */

/*     T       (workspace) COMPLEX array, dimension (LDT,NW) */

/*     LDT     (input) integer */
/*          The leading dimension of T just as declared in the */
/*          calling subroutine.  NW .LE. LDT */

/*     NV      (input) integer */
/*          The number of rows of work array WV available for */
/*          workspace.  NV.GE.NW. */

/*     WV      (workspace) COMPLEX array, dimension (LDWV,NW) */

/*     LDWV    (input) integer */
/*          The leading dimension of W just as declared in the */
/*          calling subroutine.  NW .LE. LDV */

/*     WORK    (workspace) COMPLEX array, dimension LWORK. */
/*          On exit, WORK(1) is set to an estimate of the optimal value */
/*          of LWORK for the given values of N, NW, KTOP and KBOT. */

/*     LWORK   (input) integer */
/*          The dimension of the work array WORK.  LWORK = 2*NW */
/*          suffices, but greater efficiency may result from larger */
/*          values of LWORK. */

/*          If LWORK = -1, then a workspace query is assumed; CLAQR2 */
/*          only estimates the optimal workspace size for the given */
/*          values of N, NW, KTOP and KBOT.  The estimate is returned */
/*          in WORK(1).  No error message related to LWORK is issued */
/*          by XERBLA.  Neither H nor Z are accessed. */

/*     ================================================================ */
/*     Based on contributions by */
/*        Karen Braman and Ralph Byers, Department of Mathematics, */
/*        University of Kansas, USA */

/*     ================================================================ */
/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Statement Functions .. */
/*     .. */
/*     .. Statement Function definitions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     ==== Estimate optimal workspace. ==== */

    /* Parameter adjustments */
    h_dim1 = *ldh;
    h_offset = 1 + h_dim1;
    h__ -= h_offset;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1;
    z__ -= z_offset;
    --sh;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1;
    v -= v_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1;
    t -= t_offset;
    wv_dim1 = *ldwv;
    wv_offset = 1 + wv_dim1;
    wv -= wv_offset;
    --work;

    /* Function Body */
/* Computing MIN */
    i__1 = *nw, i__2 = *kbot - *ktop + 1;
    jw = min(i__1,i__2);
    if (jw <= 2) {
	lwkopt = 1;
    } else {

/*        ==== Workspace query call to CGEHRD ==== */

	i__1 = jw - 1;
	cgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], &
		c_n1, &info);
	lwk1 = (integer) work[1].r;

/*        ==== Workspace query call to CUNMHR ==== */

	i__1 = jw - 1;
	cunmhr_("R", "N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], 
		 &v[v_offset], ldv, &work[1], &c_n1, &info);
	lwk2 = (integer) work[1].r;

/*        ==== Optimal workspace ==== */

	lwkopt = jw + max(lwk1,lwk2);
    }

/*     ==== Quick return in case of workspace query. ==== */

    if (*lwork == -1) {
	r__1 = (real) lwkopt;
	q__1.r = r__1, q__1.i = 0.f;
	work[1].r = q__1.r, work[1].i = q__1.i;
	return 0;
    }

/*     ==== Nothing to do ... */
/*     ... for an empty active block ... ==== */
    *ns = 0;
    *nd = 0;
    work[1].r = 1.f, work[1].i = 0.f;
    if (*ktop > *kbot) {
	return 0;
    }
/*     ... nor for an empty deflation window. ==== */
    if (*nw < 1) {
	return 0;
    }

/*     ==== Machine constants ==== */

    safmin = slamch_("SAFE MINIMUM");
    safmax = 1.f / safmin;
    slabad_(&safmin, &safmax);
    ulp = slamch_("PRECISION");
    smlnum = safmin * ((real) (*n) / ulp);

/*     ==== Setup deflation window ==== */

/* Computing MIN */
    i__1 = *nw, i__2 = *kbot - *ktop + 1;
    jw = min(i__1,i__2);
    kwtop = *kbot - jw + 1;
    if (kwtop == *ktop) {
	s.r = 0.f, s.i = 0.f;
    } else {
	i__1 = kwtop + (kwtop - 1) * h_dim1;
	s.r = h__[i__1].r, s.i = h__[i__1].i;
    }

    if (*kbot == kwtop) {

/*        ==== 1-by-1 deflation window: not much to do ==== */

	i__1 = kwtop;
	i__2 = kwtop + kwtop * h_dim1;
	sh[i__1].r = h__[i__2].r, sh[i__1].i = h__[i__2].i;
	*ns = 1;
	*nd = 0;
/* Computing MAX */
	i__1 = kwtop + kwtop * h_dim1;
	r__5 = smlnum, r__6 = ulp * ((r__1 = h__[i__1].r, dabs(r__1)) + (r__2 
		= r_imag(&h__[kwtop + kwtop * h_dim1]), dabs(r__2)));
	if ((r__3 = s.r, dabs(r__3)) + (r__4 = r_imag(&s), dabs(r__4)) <= 
		dmax(r__5,r__6)) {
	    *ns = 0;
	    *nd = 1;
	    if (kwtop > *ktop) {
		i__1 = kwtop + (kwtop - 1) * h_dim1;
		h__[i__1].r = 0.f, h__[i__1].i = 0.f;
	    }
	}
	work[1].r = 1.f, work[1].i = 0.f;
	return 0;
    }

/*     ==== Convert to spike-triangular form.  (In case of a */
/*     .    rare QR failure, this routine continues to do */
/*     .    aggressive early deflation using that part of */
/*     .    the deflation window that converged using INFQR */
/*     .    here and there to keep track.) ==== */

    clacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset], 
	    ldt);
    i__1 = jw - 1;
    i__2 = *ldh + 1;
    i__3 = *ldt + 1;
    ccopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], &
	    i__3);

    claset_("A", &jw, &jw, &c_b1, &c_b2, &v[v_offset], ldv);
    clahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sh[kwtop], 
	    &c__1, &jw, &v[v_offset], ldv, &infqr);

/*     ==== Deflation detection loop ==== */

    *ns = jw;
    ilst = infqr + 1;
    i__1 = jw;
    for (knt = infqr + 1; knt <= i__1; ++knt) {

/*        ==== Small spike tip deflation test ==== */

	i__2 = *ns + *ns * t_dim1;
	foo = (r__1 = t[i__2].r, dabs(r__1)) + (r__2 = r_imag(&t[*ns + *ns * 
		t_dim1]), dabs(r__2));
	if (foo == 0.f) {
	    foo = (r__1 = s.r, dabs(r__1)) + (r__2 = r_imag(&s), dabs(r__2));
	}
	i__2 = *ns * v_dim1 + 1;
/* Computing MAX */
	r__5 = smlnum, r__6 = ulp * foo;
	if (((r__1 = s.r, dabs(r__1)) + (r__2 = r_imag(&s), dabs(r__2))) * ((
		r__3 = v[i__2].r, dabs(r__3)) + (r__4 = r_imag(&v[*ns * 
		v_dim1 + 1]), dabs(r__4))) <= dmax(r__5,r__6)) {

/*           ==== One more converged eigenvalue ==== */

	    --(*ns);
	} else {

/*           ==== One undeflatable eigenvalue.  Move it up out of the */
/*           .    way.   (CTREXC can not fail in this case.) ==== */

	    ifst = *ns;
	    ctrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &
		    ilst, &info);
	    ++ilst;
	}
/* L10: */
    }

/*        ==== Return to Hessenberg form ==== */

    if (*ns == 0) {
	s.r = 0.f, s.i = 0.f;
    }

    if (*ns < jw) {

/*        ==== sorting the diagonal of T improves accuracy for */
/*        .    graded matrices.  ==== */

	i__1 = *ns;
	for (i__ = infqr + 1; i__ <= i__1; ++i__) {
	    ifst = i__;
	    i__2 = *ns;
	    for (j = i__ + 1; j <= i__2; ++j) {
		i__3 = j + j * t_dim1;
		i__4 = ifst + ifst * t_dim1;
		if ((r__1 = t[i__3].r, dabs(r__1)) + (r__2 = r_imag(&t[j + j *
			 t_dim1]), dabs(r__2)) > (r__3 = t[i__4].r, dabs(r__3)
			) + (r__4 = r_imag(&t[ifst + ifst * t_dim1]), dabs(
			r__4))) {
		    ifst = j;
		}
/* L20: */
	    }
	    ilst = i__;
	    if (ifst != ilst) {
		ctrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, 
			 &ilst, &info);
	    }
/* L30: */
	}
    }

/*     ==== Restore shift/eigenvalue array from T ==== */

    i__1 = jw;
    for (i__ = infqr + 1; i__ <= i__1; ++i__) {
	i__2 = kwtop + i__ - 1;
	i__3 = i__ + i__ * t_dim1;
	sh[i__2].r = t[i__3].r, sh[i__2].i = t[i__3].i;
/* L40: */
    }


    if (*ns < jw || s.r == 0.f && s.i == 0.f) {
	if (*ns > 1 && (s.r != 0.f || s.i != 0.f)) {

/*           ==== Reflect spike back into lower triangle ==== */

	    ccopy_(ns, &v[v_offset], ldv, &work[1], &c__1);
	    i__1 = *ns;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		i__2 = i__;
		r_cnjg(&q__1, &work[i__]);
		work[i__2].r = q__1.r, work[i__2].i = q__1.i;
/* L50: */
	    }
	    beta.r = work[1].r, beta.i = work[1].i;
	    clarfg_(ns, &beta, &work[2], &c__1, &tau);
	    work[1].r = 1.f, work[1].i = 0.f;

	    i__1 = jw - 2;
	    i__2 = jw - 2;
	    claset_("L", &i__1, &i__2, &c_b1, &c_b1, &t[t_dim1 + 3], ldt);

	    r_cnjg(&q__1, &tau);
	    clarf_("L", ns, &jw, &work[1], &c__1, &q__1, &t[t_offset], ldt, &
		    work[jw + 1]);
	    clarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, &
		    work[jw + 1]);
	    clarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, &
		    work[jw + 1]);

	    i__1 = *lwork - jw;
	    cgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1]
, &i__1, &info);
	}

/*        ==== Copy updated reduced window into place ==== */

	if (kwtop > 1) {
	    i__1 = kwtop + (kwtop - 1) * h_dim1;
	    r_cnjg(&q__2, &v[v_dim1 + 1]);
	    q__1.r = s.r * q__2.r - s.i * q__2.i, q__1.i = s.r * q__2.i + s.i 
		    * q__2.r;
	    h__[i__1].r = q__1.r, h__[i__1].i = q__1.i;
	}
	clacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1]
, ldh);
	i__1 = jw - 1;
	i__2 = *ldt + 1;
	i__3 = *ldh + 1;
	ccopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1], 
		 &i__3);

/*        ==== Accumulate orthogonal matrix in order update */
/*        .    H and Z, if requested.  ==== */

	if (*ns > 1 && (s.r != 0.f || s.i != 0.f)) {
	    i__1 = *lwork - jw;
	    cunmhr_("R", "N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1], 
		     &v[v_offset], ldv, &work[jw + 1], &i__1, &info);
	}

/*        ==== Update vertical slab in H ==== */

	if (*wantt) {
	    ltop = 1;
	} else {
	    ltop = *ktop;
	}
	i__1 = kwtop - 1;
	i__2 = *nv;
	for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow += 
		i__2) {
/* Computing MIN */
	    i__3 = *nv, i__4 = kwtop - krow;
	    kln = min(i__3,i__4);
	    cgemm_("N", "N", &kln, &jw, &jw, &c_b2, &h__[krow + kwtop * 
		    h_dim1], ldh, &v[v_offset], ldv, &c_b1, &wv[wv_offset], 
		    ldwv);
	    clacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop * 
		    h_dim1], ldh);
/* L60: */
	}

/*        ==== Update horizontal slab in H ==== */

	if (*wantt) {
	    i__2 = *n;
	    i__1 = *nh;
	    for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2; 
		    kcol += i__1) {
/* Computing MIN */
		i__3 = *nh, i__4 = *n - kcol + 1;
		kln = min(i__3,i__4);
		cgemm_("C", "N", &jw, &kln, &jw, &c_b2, &v[v_offset], ldv, &
			h__[kwtop + kcol * h_dim1], ldh, &c_b1, &t[t_offset], 
			ldt);
		clacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol *
			 h_dim1], ldh);
/* L70: */
	    }
	}

/*        ==== Update vertical slab in Z ==== */

	if (*wantz) {
	    i__1 = *ihiz;
	    i__2 = *nv;
	    for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow +=
		     i__2) {
/* Computing MIN */
		i__3 = *nv, i__4 = *ihiz - krow + 1;
		kln = min(i__3,i__4);
		cgemm_("N", "N", &kln, &jw, &jw, &c_b2, &z__[krow + kwtop * 
			z_dim1], ldz, &v[v_offset], ldv, &c_b1, &wv[wv_offset]
, ldwv);
		clacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow + 
			kwtop * z_dim1], ldz);
/* L80: */
	    }
	}
    }

/*     ==== Return the number of deflations ... ==== */

    *nd = jw - *ns;

/*     ==== ... and the number of shifts. (Subtracting */
/*     .    INFQR from the spike length takes care */
/*     .    of the case of a rare QR failure while */
/*     .    calculating eigenvalues of the deflation */
/*     .    window.)  ==== */

    *ns -= infqr;

/*      ==== Return optimal workspace. ==== */

    r__1 = (real) lwkopt;
    q__1.r = r__1, q__1.i = 0.f;
    work[1].r = q__1.r, work[1].i = q__1.i;

/*     ==== End of CLAQR2 ==== */

    return 0;
} /* claqr2_ */

Example #17

Show file

File: cgebrd.c Project: deepakantony/vispack

/* Subroutine */ int cgebrd_(integer *m, integer *n, complex *a, integer *lda,
	 real *d, real *e, complex *tauq, complex *taup, complex *work, 
	integer *lwork, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CGEBRD reduces a general complex M-by-N matrix A to upper or lower   
    bidiagonal form B by a unitary transformation: Q**H * A * P = B.   

    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows in the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns in the matrix A.  N >= 0.   

    A       (input/output) COMPLEX array, dimension (LDA,N)   
            On entry, the M-by-N general matrix to be reduced.   
            On exit,   
            if m >= n, the diagonal and the first superdiagonal are   
              overwritten with the upper bidiagonal matrix B; the   
              elements below the diagonal, with the array TAUQ, represent 
  
              the unitary matrix Q as a product of elementary   
              reflectors, and the elements above the first superdiagonal, 
  
              with the array TAUP, represent the unitary matrix P as   
              a product of elementary reflectors;   
            if m < n, the diagonal and the first subdiagonal are   
              overwritten with the lower bidiagonal matrix B; the   
              elements below the first subdiagonal, with the array TAUQ, 
  
              represent the unitary matrix Q as a product of   
              elementary reflectors, and the elements above the diagonal, 
  
              with the array TAUP, represent the unitary matrix P as   
              a product of elementary reflectors.   
            See Further Details.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,M).   

    D       (output) REAL array, dimension (min(M,N))   
            The diagonal elements of the bidiagonal matrix B:   
            D(i) = A(i,i).   

    E       (output) REAL array, dimension (min(M,N)-1)   
            The off-diagonal elements of the bidiagonal matrix B:   
            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;   
            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.   

    TAUQ    (output) COMPLEX array dimension (min(M,N))   
            The scalar factors of the elementary reflectors which   
            represent the unitary matrix Q. See Further Details.   

    TAUP    (output) COMPLEX array, dimension (min(M,N))   
            The scalar factors of the elementary reflectors which   
            represent the unitary matrix P. See Further Details.   

    WORK    (workspace/output) COMPLEX array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The length of the array WORK.  LWORK >= max(1,M,N).   
            For optimum performance LWORK >= (M+N)*NB, where NB   
            is the optimal blocksize.   

    INFO    (output) INTEGER   
            = 0:  successful exit.   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   

    Further Details   
    ===============   

    The matrices Q and P are represented as products of elementary   
    reflectors:   

    If m >= n,   

       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)   

    Each H(i) and G(i) has the form:   

       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'   

    where tauq and taup are complex scalars, and v and u are complex   
    vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in   
    A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in 
  
    A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i).   

    If m < n,   

       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)   

    Each H(i) and G(i) has the form:   

       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'   

    where tauq and taup are complex scalars, and v and u are complex   
    vectors; v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in   
    A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in 
  
    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).   

    The contents of A on exit are illustrated by the following examples: 
  

    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):   

      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )   
      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )   
      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )   
      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )   
      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )   
      (  v1  v2  v3  v4  v5 )   

    where d and e denote diagonal and off-diagonal elements of B, vi   
    denotes an element of the vector defining H(i), and ui an element of 
  
    the vector defining G(i).   

    ===================================================================== 
  


       Test the input parameters   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static complex c_b1 = {1.f,0.f};
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static integer c__3 = 3;
    static integer c__2 = 2;
    
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
    complex q__1;
    /* Local variables */
    static integer i, j;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
	    integer *, complex *, complex *, integer *, complex *, integer *, 
	    complex *, complex *, integer *);
    static integer nbmin, iinfo, minmn;
    extern /* Subroutine */ int cgebd2_(integer *, integer *, complex *, 
	    integer *, real *, real *, complex *, complex *, complex *, 
	    integer *);
    static integer nb;
    extern /* Subroutine */ int clabrd_(integer *, integer *, integer *, 
	    complex *, integer *, real *, real *, complex *, complex *, 
	    complex *, integer *, complex *, integer *);
    static integer nx;
    static real ws;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    static integer ldwrkx, ldwrky;



#define D(I) d[(I)-1]
#define E(I) e[(I)-1]
#define TAUQ(I) tauq[(I)-1]
#define TAUP(I) taup[(I)-1]
#define WORK(I) work[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    *info = 0;
    if (*m < 0) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*m)) {
	*info = -4;
    } else /* if(complicated condition) */ {
/* Computing MAX */
	i__1 = max(1,*m);
	if (*lwork < max(i__1,*n)) {
	    *info = -10;
	}
    }
    if (*info < 0) {
	i__1 = -(*info);
	xerbla_("CGEBRD", &i__1);
	return 0;
    }

/*     Quick return if possible */

    minmn = min(*m,*n);
    if (minmn == 0) {
	WORK(1).r = 1.f, WORK(1).i = 0.f;
	return 0;
    }

    ws = (real) max(*m,*n);
    ldwrkx = *m;
    ldwrky = *n;

/*     Set the block size NB and the crossover point NX.   

   Computing MAX */
    i__1 = 1, i__2 = ilaenv_(&c__1, "CGEBRD", " ", m, n, &c_n1, &c_n1, 6L, 1L)
	    ;
    nb = max(i__1,i__2);

    if (nb > 1 && nb < minmn) {

/*        Determine when to switch from blocked to unblocked code.   

   Computing MAX */
	i__1 = nb, i__2 = ilaenv_(&c__3, "CGEBRD", " ", m, n, &c_n1, &c_n1, 
		6L, 1L);
	nx = max(i__1,i__2);
	if (nx < minmn) {
	    ws = (real) ((*m + *n) * nb);
	    if ((real) (*lwork) < ws) {

/*              Not enough work space for the optimal NB, cons
ider using   
                a smaller block size. */

		nbmin = ilaenv_(&c__2, "CGEBRD", " ", m, n, &c_n1, &c_n1, 6L, 
			1L);
		if (*lwork >= (*m + *n) * nbmin) {
		    nb = *lwork / (*m + *n);
		} else {
		    nb = 1;
		    nx = minmn;
		}
	    }
	}
    } else {
	nx = minmn;
    }

    i__1 = minmn - nx;
    i__2 = nb;
    for (i = 1; nb < 0 ? i >= minmn-nx : i <= minmn-nx; i += nb) {

/*        Reduce rows and columns i:i+ib-1 to bidiagonal form and retu
rn   
          the matrices X and Y which are needed to update the unreduce
d   
          part of the matrix */

	i__3 = *m - i + 1;
	i__4 = *n - i + 1;
	clabrd_(&i__3, &i__4, &nb, &A(i,i), lda, &D(i), &E(i), &
		TAUQ(i), &TAUP(i), &WORK(1), &ldwrkx, &WORK(ldwrkx * nb + 1), 
		&ldwrky);

/*        Update the trailing submatrix A(i+ib:m,i+ib:n), using   
          an update of the form  A := A - V*Y' - X*U' */

	i__3 = *m - i - nb + 1;
	i__4 = *n - i - nb + 1;
	q__1.r = -1.f, q__1.i = 0.f;
	cgemm_("No transpose", "Conjugate transpose", &i__3, &i__4, &nb, &
		q__1, &A(i+nb,i), lda, &WORK(ldwrkx * nb + nb + 
		1), &ldwrky, &c_b1, &A(i+nb,i+nb), lda);
	i__3 = *m - i - nb + 1;
	i__4 = *n - i - nb + 1;
	q__1.r = -1.f, q__1.i = 0.f;
	cgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &q__1, &
		WORK(nb + 1), &ldwrkx, &A(i,i+nb), lda, &c_b1, 
		&A(i+nb,i+nb), lda);

/*        Copy diagonal and off-diagonal elements of B back into A */

	if (*m >= *n) {
	    i__3 = i + nb - 1;
	    for (j = i; j <= i+nb-1; ++j) {
		i__4 = j + j * a_dim1;
		i__5 = j;
		A(j,j).r = D(j), A(j,j).i = 0.f;
		i__4 = j + (j + 1) * a_dim1;
		i__5 = j;
		A(j,j+1).r = E(j), A(j,j+1).i = 0.f;
/* L10: */
	    }
	} else {
	    i__3 = i + nb - 1;
	    for (j = i; j <= i+nb-1; ++j) {
		i__4 = j + j * a_dim1;
		i__5 = j;
		A(j,j).r = D(j), A(j,j).i = 0.f;
		i__4 = j + 1 + j * a_dim1;
		i__5 = j;
		A(j+1,j).r = E(j), A(j+1,j).i = 0.f;
/* L20: */
	    }
	}
/* L30: */
    }

/*     Use unblocked code to reduce the remainder of the matrix */

    i__2 = *m - i + 1;
    i__1 = *n - i + 1;
    cgebd2_(&i__2, &i__1, &A(i,i), lda, &D(i), &E(i), &TAUQ(i), &
	    TAUP(i), &WORK(1), &iinfo);
    WORK(1).r = ws, WORK(1).i = 0.f;
    return 0;

/*     End of CGEBRD */

} /* cgebrd_ */

Example #18

Show file

File: ctgsyl.c Project: MichaelH13/sdkpub

/* Subroutine */ int ctgsyl_(char *trans, integer *ijob, integer *m, integer *
	n, complex *a, integer *lda, complex *b, integer *ldb, complex *c__, 
	integer *ldc, complex *d__, integer *ldd, complex *e, integer *lde, 
	complex *f, integer *ldf, real *scale, real *dif, complex *work, 
	integer *lwork, integer *iwork, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    CTGSYL solves the generalized Sylvester equation:   

                A * R - L * B = scale * C            (1)   
                D * R - L * E = scale * F   

    where R and L are unknown m-by-n matrices, (A, D), (B, E) and   
    (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n,   
    respectively, with complex entries. A, B, D and E are upper   
    triangular (i.e., (A,D) and (B,E) in generalized Schur form).   

    The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1   
    is an output scaling factor chosen to avoid overflow.   

    In matrix notation (1) is equivalent to solve Zx = scale*b, where Z   
    is defined as   

           Z = [ kron(In, A)  -kron(B', Im) ]        (2)   
               [ kron(In, D)  -kron(E', Im) ],   

    Here Ix is the identity matrix of size x and X' is the conjugate   
    transpose of X. Kron(X, Y) is the Kronecker product between the   
    matrices X and Y.   

    If TRANS = 'C', y in the conjugate transposed system Z'*y = scale*b   
    is solved for, which is equivalent to solve for R and L in   

                A' * R + D' * L = scale * C           (3)   
                R * B' + L * E' = scale * -F   

    This case (TRANS = 'C') is used to compute an one-norm-based estimate   
    of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D)   
    and (B,E), using CLACON.   

    If IJOB >= 1, CTGSYL computes a Frobenius norm-based estimate of   
    Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the   
    reciprocal of the smallest singular value of Z.   

    This is a level-3 BLAS algorithm.   

    Arguments   
    =========   

    TRANS   (input) CHARACTER*1   
            = 'N': solve the generalized sylvester equation (1).   
            = 'C': solve the "conjugate transposed" system (3).   

    IJOB    (input) INTEGER   
            Specifies what kind of functionality to be performed.   
            =0: solve (1) only.   
            =1: The functionality of 0 and 3.   
            =2: The functionality of 0 and 4.   
            =3: Only an estimate of Dif[(A,D), (B,E)] is computed.   
                (look ahead strategy is used).   
            =4: Only an estimate of Dif[(A,D), (B,E)] is computed.   
                (CGECON on sub-systems is used).   
            Not referenced if TRANS = 'C'.   

    M       (input) INTEGER   
            The order of the matrices A and D, and the row dimension of   
            the matrices C, F, R and L.   

    N       (input) INTEGER   
            The order of the matrices B and E, and the column dimension   
            of the matrices C, F, R and L.   

    A       (input) COMPLEX array, dimension (LDA, M)   
            The upper triangular matrix A.   

    LDA     (input) INTEGER   
            The leading dimension of the array A. LDA >= max(1, M).   

    B       (input) COMPLEX array, dimension (LDB, N)   
            The upper triangular matrix B.   

    LDB     (input) INTEGER   
            The leading dimension of the array B. LDB >= max(1, N).   

    C       (input/output) COMPLEX array, dimension (LDC, N)   
            On entry, C contains the right-hand-side of the first matrix   
            equation in (1) or (3).   
            On exit, if IJOB = 0, 1 or 2, C has been overwritten by   
            the solution R. If IJOB = 3 or 4 and TRANS = 'N', C holds R,   
            the solution achieved during the computation of the   
            Dif-estimate.   

    LDC     (input) INTEGER   
            The leading dimension of the array C. LDC >= max(1, M).   

    D       (input) COMPLEX array, dimension (LDD, M)   
            The upper triangular matrix D.   

    LDD     (input) INTEGER   
            The leading dimension of the array D. LDD >= max(1, M).   

    E       (input) COMPLEX array, dimension (LDE, N)   
            The upper triangular matrix E.   

    LDE     (input) INTEGER   
            The leading dimension of the array E. LDE >= max(1, N).   

    F       (input/output) COMPLEX array, dimension (LDF, N)   
            On entry, F contains the right-hand-side of the second matrix   
            equation in (1) or (3).   
            On exit, if IJOB = 0, 1 or 2, F has been overwritten by   
            the solution L. If IJOB = 3 or 4 and TRANS = 'N', F holds L,   
            the solution achieved during the computation of the   
            Dif-estimate.   

    LDF     (input) INTEGER   
            The leading dimension of the array F. LDF >= max(1, M).   

    DIF     (output) REAL   
            On exit DIF is the reciprocal of a lower bound of the   
            reciprocal of the Dif-function, i.e. DIF is an upper bound of   
            Dif[(A,D), (B,E)] = sigma-min(Z), where Z as in (2).   
            IF IJOB = 0 or TRANS = 'C', DIF is not referenced.   

    SCALE   (output) REAL   
            On exit SCALE is the scaling factor in (1) or (3).   
            If 0 < SCALE < 1, C and F hold the solutions R and L, resp.,   
            to a slightly perturbed system but the input matrices A, B,   
            D and E have not been changed. If SCALE = 0, R and L will   
            hold the solutions to the homogenious system with C = F = 0.   

    WORK    (workspace/output) COMPLEX array, dimension (LWORK)   
            IF IJOB = 0, WORK is not referenced.  Otherwise,   

    LWORK   (input) INTEGER   
            The dimension of the array WORK. LWORK > = 1.   
            If IJOB = 1 or 2 and TRANS = 'N', LWORK >= 2*M*N.   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    IWORK   (workspace) INTEGER array, dimension (M+N+2)   
            If IJOB = 0, IWORK is not referenced.   

    INFO    (output) INTEGER   
              =0: successful exit   
              <0: If INFO = -i, the i-th argument had an illegal value.   
              >0: (A, D) and (B, E) have common or very close   
                  eigenvalues.   

    Further Details   
    ===============   

    Based on contributions by   
       Bo Kagstrom and Peter Poromaa, Department of Computing Science,   
       Umea University, S-901 87 Umea, Sweden.   

    [1] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software   
        for Solving the Generalized Sylvester Equation and Estimating the   
        Separation between Regular Matrix Pairs, Report UMINF - 93.23,   
        Department of Computing Science, Umea University, S-901 87 Umea,   
        Sweden, December 1993, Revised April 1994, Also as LAPACK Working   
        Note 75.  To appear in ACM Trans. on Math. Software, Vol 22,   
        No 1, 1996.   

    [2] B. Kagstrom, A Perturbation Analysis of the Generalized Sylvester   
        Equation (AR - LB, DR - LE ) = (C, F), SIAM J. Matrix Anal.   
        Appl., 15(4):1045-1060, 1994.   

    [3] B. Kagstrom and L. Westin, Generalized Schur Methods with   
        Condition Estimators for Solving the Generalized Sylvester   
        Equation, IEEE Transactions on Automatic Control, Vol. 34, No. 7,   
        July 1989, pp 745-751.   

    =====================================================================   


       Decode and test input parameters   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__2 = 2;
    static integer c_n1 = -1;
    static integer c__5 = 5;
    static integer c__0 = 0;
    static integer c__1 = 1;
    static complex c_b16 = {0.f,0.f};
    static complex c_b53 = {-1.f,0.f};
    static complex c_b54 = {1.f,0.f};
    
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, d_dim1, 
	    d_offset, e_dim1, e_offset, f_dim1, f_offset, i__1, i__2, i__3, 
	    i__4;
    complex q__1;
    /* Builtin functions */
    double sqrt(doublereal);
    /* Local variables */
    static real dsum;
    static integer i__, j, k, p, q;
    extern /* Subroutine */ int cscal_(integer *, complex *, complex *, 
	    integer *), cgemm_(char *, char *, integer *, integer *, integer *
	    , complex *, complex *, integer *, complex *, integer *, complex *
	    , complex *, integer *);
    extern logical lsame_(char *, char *);
    static integer ifunc, linfo;
    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, 
	    complex *, integer *);
    static integer lwmin;
    static real scale2;
    extern /* Subroutine */ int ctgsy2_(char *, integer *, integer *, integer 
	    *, complex *, integer *, complex *, integer *, complex *, integer 
	    *, complex *, integer *, complex *, integer *, complex *, integer 
	    *, real *, real *, real *, integer *);
    static integer ie, je, mb, nb;
    static real dscale;
    static integer is, js, pq;
    static real scaloc;
    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex 
	    *, integer *, complex *, integer *), xerbla_(char *, 
	    integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    static integer iround;
    static logical notran;
    static integer isolve;
    static logical lquery;
#define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1
#define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)]
#define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1
#define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)]
#define c___subscr(a_1,a_2) (a_2)*c_dim1 + a_1
#define c___ref(a_1,a_2) c__[c___subscr(a_1,a_2)]
#define d___subscr(a_1,a_2) (a_2)*d_dim1 + a_1
#define d___ref(a_1,a_2) d__[d___subscr(a_1,a_2)]
#define e_subscr(a_1,a_2) (a_2)*e_dim1 + a_1
#define e_ref(a_1,a_2) e[e_subscr(a_1,a_2)]
#define f_subscr(a_1,a_2) (a_2)*f_dim1 + a_1
#define f_ref(a_1,a_2) f[f_subscr(a_1,a_2)]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1 * 1;
    c__ -= c_offset;
    d_dim1 = *ldd;
    d_offset = 1 + d_dim1 * 1;
    d__ -= d_offset;
    e_dim1 = *lde;
    e_offset = 1 + e_dim1 * 1;
    e -= e_offset;
    f_dim1 = *ldf;
    f_offset = 1 + f_dim1 * 1;
    f -= f_offset;
    --work;
    --iwork;

    /* Function Body */
    *info = 0;
    notran = lsame_(trans, "N");
    lquery = *lwork == -1;

    if ((*ijob == 1 || *ijob == 2) && notran) {
/* Computing MAX */
	i__1 = 1, i__2 = (*m << 1) * *n;
	lwmin = max(i__1,i__2);
    } else {
	lwmin = 1;
    }

    if (! notran && ! lsame_(trans, "C")) {
	*info = -1;
    } else if (*ijob < 0 || *ijob > 4) {
	*info = -2;
    } else if (*m <= 0) {
	*info = -3;
    } else if (*n <= 0) {
	*info = -4;
    } else if (*lda < max(1,*m)) {
	*info = -6;
    } else if (*ldb < max(1,*n)) {
	*info = -8;
    } else if (*ldc < max(1,*m)) {
	*info = -10;
    } else if (*ldd < max(1,*m)) {
	*info = -12;
    } else if (*lde < max(1,*n)) {
	*info = -14;
    } else if (*ldf < max(1,*m)) {
	*info = -16;
    } else if (*lwork < lwmin && ! lquery) {
	*info = -20;
    }

    if (*info == 0) {
	work[1].r = (real) lwmin, work[1].i = 0.f;
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CTGSYL", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Determine  optimal block sizes MB and NB */

    mb = ilaenv_(&c__2, "CTGSYL", trans, m, n, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)1);
    nb = ilaenv_(&c__5, "CTGSYL", trans, m, n, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)1);

    isolve = 1;
    ifunc = 0;
    if (*ijob >= 3 && notran) {
	ifunc = *ijob - 2;
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    ccopy_(m, &c_b16, &c__0, &c___ref(1, j), &c__1);
	    ccopy_(m, &c_b16, &c__0, &f_ref(1, j), &c__1);
/* L10: */
	}
    } else if (*ijob >= 1 && notran) {
	isolve = 2;
    }

    if (mb <= 1 && nb <= 1 || mb >= *m && nb >= *n) {

/*        Use unblocked Level 2 solver */

	i__1 = isolve;
	for (iround = 1; iround <= i__1; ++iround) {

	    *scale = 1.f;
	    dscale = 0.f;
	    dsum = 1.f;
	    pq = *m * *n;
	    ctgsy2_(trans, &ifunc, m, n, &a[a_offset], lda, &b[b_offset], ldb,
		     &c__[c_offset], ldc, &d__[d_offset], ldd, &e[e_offset], 
		    lde, &f[f_offset], ldf, scale, &dsum, &dscale, info);
	    if (dscale != 0.f) {
		if (*ijob == 1 || *ijob == 3) {
		    *dif = sqrt((real) ((*m << 1) * *n)) / (dscale * sqrt(
			    dsum));
		} else {
		    *dif = sqrt((real) pq) / (dscale * sqrt(dsum));
		}
	    }
	    if (isolve == 2 && iround == 1) {
		ifunc = *ijob;
		scale2 = *scale;
		clacpy_("F", m, n, &c__[c_offset], ldc, &work[1], m);
		clacpy_("F", m, n, &f[f_offset], ldf, &work[*m * *n + 1], m);
		i__2 = *n;
		for (j = 1; j <= i__2; ++j) {
		    ccopy_(m, &c_b16, &c__0, &c___ref(1, j), &c__1);
		    ccopy_(m, &c_b16, &c__0, &f_ref(1, j), &c__1);
/* L20: */
		}
	    } else if (isolve == 2 && iround == 2) {
		clacpy_("F", m, n, &work[1], m, &c__[c_offset], ldc);
		clacpy_("F", m, n, &work[*m * *n + 1], m, &f[f_offset], ldf);
		*scale = scale2;
	    }
/* L30: */
	}

	return 0;

    }

/*     Determine block structure of A */

    p = 0;
    i__ = 1;
L40:
    if (i__ > *m) {
	goto L50;
    }
    ++p;
    iwork[p] = i__;
    i__ += mb;
    if (i__ >= *m) {
	goto L50;
    }
    goto L40;
L50:
    iwork[p + 1] = *m + 1;
    if (iwork[p] == iwork[p + 1]) {
	--p;
    }

/*     Determine block structure of B */

    q = p + 1;
    j = 1;
L60:
    if (j > *n) {
	goto L70;
    }

    ++q;
    iwork[q] = j;
    j += nb;
    if (j >= *n) {
	goto L70;
    }
    goto L60;

L70:
    iwork[q + 1] = *n + 1;
    if (iwork[q] == iwork[q + 1]) {
	--q;
    }

    if (notran) {
	i__1 = isolve;
	for (iround = 1; iround <= i__1; ++iround) {

/*           Solve (I, J) - subsystem   
                 A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J)   
                 D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J)   
             for I = P, P - 1, ..., 1; J = 1, 2, ..., Q */

	    pq = 0;
	    *scale = 1.f;
	    dscale = 0.f;
	    dsum = 1.f;
	    i__2 = q;
	    for (j = p + 2; j <= i__2; ++j) {
		js = iwork[j];
		je = iwork[j + 1] - 1;
		nb = je - js + 1;
		for (i__ = p; i__ >= 1; --i__) {
		    is = iwork[i__];
		    ie = iwork[i__ + 1] - 1;
		    mb = ie - is + 1;
		    ctgsy2_(trans, &ifunc, &mb, &nb, &a_ref(is, is), lda, &
			    b_ref(js, js), ldb, &c___ref(is, js), ldc, &
			    d___ref(is, is), ldd, &e_ref(js, js), lde, &f_ref(
			    is, js), ldf, &scaloc, &dsum, &dscale, &linfo);
		    if (linfo > 0) {
			*info = linfo;
		    }
		    pq += mb * nb;
		    if (scaloc != 1.f) {
			i__3 = js - 1;
			for (k = 1; k <= i__3; ++k) {
			    q__1.r = scaloc, q__1.i = 0.f;
			    cscal_(m, &q__1, &c___ref(1, k), &c__1);
			    q__1.r = scaloc, q__1.i = 0.f;
			    cscal_(m, &q__1, &f_ref(1, k), &c__1);
/* L80: */
			}
			i__3 = je;
			for (k = js; k <= i__3; ++k) {
			    i__4 = is - 1;
			    q__1.r = scaloc, q__1.i = 0.f;
			    cscal_(&i__4, &q__1, &c___ref(1, k), &c__1);
			    i__4 = is - 1;
			    q__1.r = scaloc, q__1.i = 0.f;
			    cscal_(&i__4, &q__1, &f_ref(1, k), &c__1);
/* L90: */
			}
			i__3 = je;
			for (k = js; k <= i__3; ++k) {
			    i__4 = *m - ie;
			    q__1.r = scaloc, q__1.i = 0.f;
			    cscal_(&i__4, &q__1, &c___ref(ie + 1, k), &c__1);
			    i__4 = *m - ie;
			    q__1.r = scaloc, q__1.i = 0.f;
			    cscal_(&i__4, &q__1, &f_ref(ie + 1, k), &c__1);
/* L100: */
			}
			i__3 = *n;
			for (k = je + 1; k <= i__3; ++k) {
			    q__1.r = scaloc, q__1.i = 0.f;
			    cscal_(m, &q__1, &c___ref(1, k), &c__1);
			    q__1.r = scaloc, q__1.i = 0.f;
			    cscal_(m, &q__1, &f_ref(1, k), &c__1);
/* L110: */
			}
			*scale *= scaloc;
		    }

/*                 Substitute R(I,J) and L(I,J) into remaining equation. */

		    if (i__ > 1) {
			i__3 = is - 1;
			cgemm_("N", "N", &i__3, &nb, &mb, &c_b53, &a_ref(1, 
				is), lda, &c___ref(is, js), ldc, &c_b54, &
				c___ref(1, js), ldc);
			i__3 = is - 1;
			cgemm_("N", "N", &i__3, &nb, &mb, &c_b53, &d___ref(1, 
				is), ldd, &c___ref(is, js), ldc, &c_b54, &
				f_ref(1, js), ldf);
		    }
		    if (j < q) {
			i__3 = *n - je;
			cgemm_("N", "N", &mb, &i__3, &nb, &c_b54, &f_ref(is, 
				js), ldf, &b_ref(js, je + 1), ldb, &c_b54, &
				c___ref(is, je + 1), ldc);
			i__3 = *n - je;
			cgemm_("N", "N", &mb, &i__3, &nb, &c_b54, &f_ref(is, 
				js), ldf, &e_ref(js, je + 1), lde, &c_b54, &
				f_ref(is, je + 1), ldf);
		    }
/* L120: */
		}
/* L130: */
	    }
	    if (dscale != 0.f) {
		if (*ijob == 1 || *ijob == 3) {
		    *dif = sqrt((real) ((*m << 1) * *n)) / (dscale * sqrt(
			    dsum));
		} else {
		    *dif = sqrt((real) pq) / (dscale * sqrt(dsum));
		}
	    }
	    if (isolve == 2 && iround == 1) {
		ifunc = *ijob;
		scale2 = *scale;
		clacpy_("F", m, n, &c__[c_offset], ldc, &work[1], m);
		clacpy_("F", m, n, &f[f_offset], ldf, &work[*m * *n + 1], m);
		i__2 = *n;
		for (j = 1; j <= i__2; ++j) {
		    ccopy_(m, &c_b16, &c__0, &c___ref(1, j), &c__1);
		    ccopy_(m, &c_b16, &c__0, &f_ref(1, j), &c__1);
/* L140: */
		}
	    } else if (isolve == 2 && iround == 2) {
		clacpy_("F", m, n, &work[1], m, &c__[c_offset], ldc);
		clacpy_("F", m, n, &work[*m * *n + 1], m, &f[f_offset], ldf);
		*scale = scale2;
	    }
/* L150: */
	}
    } else {

/*        Solve transposed (I, J)-subsystem   
              A(I, I)' * R(I, J) + D(I, I)' * L(I, J) = C(I, J)   
              R(I, J) * B(J, J)  + L(I, J) * E(J, J) = -F(I, J)   
          for I = 1,2,..., P; J = Q, Q-1,..., 1 */

	*scale = 1.f;
	i__1 = p;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    is = iwork[i__];
	    ie = iwork[i__ + 1] - 1;
	    mb = ie - is + 1;
	    i__2 = p + 2;
	    for (j = q; j >= i__2; --j) {
		js = iwork[j];
		je = iwork[j + 1] - 1;
		nb = je - js + 1;
		ctgsy2_(trans, &ifunc, &mb, &nb, &a_ref(is, is), lda, &b_ref(
			js, js), ldb, &c___ref(is, js), ldc, &d___ref(is, is),
			 ldd, &e_ref(js, js), lde, &f_ref(is, js), ldf, &
			scaloc, &dsum, &dscale, &linfo);
		if (linfo > 0) {
		    *info = linfo;
		}
		if (scaloc != 1.f) {
		    i__3 = js - 1;
		    for (k = 1; k <= i__3; ++k) {
			q__1.r = scaloc, q__1.i = 0.f;
			cscal_(m, &q__1, &c___ref(1, k), &c__1);
			q__1.r = scaloc, q__1.i = 0.f;
			cscal_(m, &q__1, &f_ref(1, k), &c__1);
/* L160: */
		    }
		    i__3 = je;
		    for (k = js; k <= i__3; ++k) {
			i__4 = is - 1;
			q__1.r = scaloc, q__1.i = 0.f;
			cscal_(&i__4, &q__1, &c___ref(1, k), &c__1);
			i__4 = is - 1;
			q__1.r = scaloc, q__1.i = 0.f;
			cscal_(&i__4, &q__1, &f_ref(1, k), &c__1);
/* L170: */
		    }
		    i__3 = je;
		    for (k = js; k <= i__3; ++k) {
			i__4 = *m - ie;
			q__1.r = scaloc, q__1.i = 0.f;
			cscal_(&i__4, &q__1, &c___ref(ie + 1, k), &c__1);
			i__4 = *m - ie;
			q__1.r = scaloc, q__1.i = 0.f;
			cscal_(&i__4, &q__1, &f_ref(ie + 1, k), &c__1);
/* L180: */
		    }
		    i__3 = *n;
		    for (k = je + 1; k <= i__3; ++k) {
			q__1.r = scaloc, q__1.i = 0.f;
			cscal_(m, &q__1, &c___ref(1, k), &c__1);
			q__1.r = scaloc, q__1.i = 0.f;
			cscal_(m, &q__1, &f_ref(1, k), &c__1);
/* L190: */
		    }
		    *scale *= scaloc;
		}

/*              Substitute R(I,J) and L(I,J) into remaining equation. */

		if (j > p + 2) {
		    i__3 = js - 1;
		    cgemm_("N", "C", &mb, &i__3, &nb, &c_b54, &c___ref(is, js)
			    , ldc, &b_ref(1, js), ldb, &c_b54, &f_ref(is, 1), 
			    ldf);
		    i__3 = js - 1;
		    cgemm_("N", "C", &mb, &i__3, &nb, &c_b54, &f_ref(is, js), 
			    ldf, &e_ref(1, js), lde, &c_b54, &f_ref(is, 1), 
			    ldf);
		}
		if (i__ < p) {
		    i__3 = *m - ie;
		    cgemm_("C", "N", &i__3, &nb, &mb, &c_b53, &a_ref(is, ie + 
			    1), lda, &c___ref(is, js), ldc, &c_b54, &c___ref(
			    ie + 1, js), ldc);
		    i__3 = *m - ie;
		    cgemm_("C", "N", &i__3, &nb, &mb, &c_b53, &d___ref(is, ie 
			    + 1), ldd, &f_ref(is, js), ldf, &c_b54, &c___ref(
			    ie + 1, js), ldc);
		}
/* L200: */
	    }
/* L210: */
	}
    }

    work[1].r = (real) lwmin, work[1].i = 0.f;

    return 0;

/*     End of CTGSYL */

} /* ctgsyl_ */

Example #19

Show file

File: cgehrd.c Project: GuillaumeFuchs/Ensimag

 int cgehrd_(int *n, int *ilo, int *ihi, complex *
	a, int *lda, complex *tau, complex *work, int *lwork, int 
	*info)
{
    /* System generated locals */
    int a_dim1, a_offset, i__1, i__2, i__3, i__4;
    complex q__1;

    /* Local variables */
    int i__, j;
    complex t[4160]	/* was [65][64] */;
    int ib;
    complex ei;
    int nb, nh, nx, iws;
    extern  int cgemm_(char *, char *, int *, int *, 
	    int *, complex *, complex *, int *, complex *, int *, 
	    complex *, complex *, int *);
    int nbmin, iinfo;
    extern  int ctrmm_(char *, char *, char *, char *, 
	    int *, int *, complex *, complex *, int *, complex *, 
	    int *), caxpy_(int *, 
	    complex *, complex *, int *, complex *, int *), cgehd2_(
	    int *, int *, int *, complex *, int *, complex *, 
	    complex *, int *), clahr2_(int *, int *, int *, 
	    complex *, int *, complex *, complex *, int *, complex *, 
	    int *), clarfb_(char *, char *, char *, char *, int *, 
	    int *, int *, complex *, int *, complex *, int *, 
	    complex *, int *, complex *, int *), xerbla_(char *, int *);
    extern int ilaenv_(int *, char *, char *, int *, int *, 
	    int *, int *);
    int ldwork, lwkopt;
    int lquery;


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CGEHRD reduces a complex general matrix A to upper Hessenberg form H by */
/*  an unitary similarity transformation:  Q' * A * Q = H . */

/*  Arguments */
/*  ========= */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  ILO     (input) INTEGER */
/*  IHI     (input) INTEGER */
/*          It is assumed that A is already upper triangular in rows */
/*          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */
/*          set by a previous call to CGEBAL; otherwise they should be */
/*          set to 1 and N respectively. See Further Details. */
/*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */

/*  A       (input/output) COMPLEX array, dimension (LDA,N) */
/*          On entry, the N-by-N general matrix to be reduced. */
/*          On exit, the upper triangle and the first subdiagonal of A */
/*          are overwritten with the upper Hessenberg matrix H, and the */
/*          elements below the first subdiagonal, with the array TAU, */
/*          represent the unitary matrix Q as a product of elementary */
/*          reflectors. See Further Details. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= MAX(1,N). */

/*  TAU     (output) COMPLEX array, dimension (N-1) */
/*          The scalar factors of the elementary reflectors (see Further */
/*          Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to */
/*          zero. */

/*  WORK    (workspace/output) COMPLEX array, dimension (LWORK) */
/*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */

/*  LWORK   (input) INTEGER */
/*          The length of the array WORK.  LWORK >= MAX(1,N). */
/*          For optimum performance LWORK >= N*NB, where NB is the */
/*          optimal blocksize. */

/*          If LWORK = -1, then a workspace query is assumed; the routine */
/*          only calculates the optimal size of the WORK array, returns */
/*          this value as the first entry of the WORK array, and no error */
/*          message related to LWORK is issued by XERBLA. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */

/*  Further Details */
/*  =============== */

/*  The matrix Q is represented as a product of (ihi-ilo) elementary */
/*  reflectors */

/*     Q = H(ilo) H(ilo+1) . . . H(ihi-1). */

/*  Each H(i) has the form */

/*     H(i) = I - tau * v * v' */

/*  where tau is a complex scalar, and v is a complex vector with */
/*  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on */
/*  exit in A(i+2:ihi,i), and tau in TAU(i). */

/*  The contents of A are illustrated by the following example, with */
/*  n = 7, ilo = 2 and ihi = 6: */

/*  on entry,                        on exit, */

/*  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a ) */
/*  (     a   a   a   a   a   a )    (      a   h   h   h   h   a ) */
/*  (     a   a   a   a   a   a )    (      h   h   h   h   h   h ) */
/*  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h ) */
/*  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h ) */
/*  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h ) */
/*  (                         a )    (                          a ) */

/*  where a denotes an element of the original matrix A, h denotes a */
/*  modified element of the upper Hessenberg matrix H, and vi denotes an */
/*  element of the vector defining H(i). */

/*  This file is a slight modification of LAPACK-3.0's CGEHRD */
/*  subroutine incorporating improvements proposed by Quintana-Orti and */
/*  Van de Geijn (2005). */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
/* Computing MIN */
    i__1 = 64, i__2 = ilaenv_(&c__1, "CGEHRD", " ", n, ilo, ihi, &c_n1);
    nb = MIN(i__1,i__2);
    lwkopt = *n * nb;
    work[1].r = (float) lwkopt, work[1].i = 0.f;
    lquery = *lwork == -1;
    if (*n < 0) {
	*info = -1;
    } else if (*ilo < 1 || *ilo > MAX(1,*n)) {
	*info = -2;
    } else if (*ihi < MIN(*ilo,*n) || *ihi > *n) {
	*info = -3;
    } else if (*lda < MAX(1,*n)) {
	*info = -5;
    } else if (*lwork < MAX(1,*n) && ! lquery) {
	*info = -8;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CGEHRD", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */

    i__1 = *ilo - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
	i__2 = i__;
	tau[i__2].r = 0.f, tau[i__2].i = 0.f;
/* L10: */
    }
    i__1 = *n - 1;
    for (i__ = MAX(1,*ihi); i__ <= i__1; ++i__) {
	i__2 = i__;
	tau[i__2].r = 0.f, tau[i__2].i = 0.f;
/* L20: */
    }

/*     Quick return if possible */

    nh = *ihi - *ilo + 1;
    if (nh <= 1) {
	work[1].r = 1.f, work[1].i = 0.f;
	return 0;
    }

/*     Determine the block size */

/* Computing MIN */
    i__1 = 64, i__2 = ilaenv_(&c__1, "CGEHRD", " ", n, ilo, ihi, &c_n1);
    nb = MIN(i__1,i__2);
    nbmin = 2;
    iws = 1;
    if (nb > 1 && nb < nh) {

/*        Determine when to cross over from blocked to unblocked code */
/*        (last block is always handled by unblocked code) */

/* Computing MAX */
	i__1 = nb, i__2 = ilaenv_(&c__3, "CGEHRD", " ", n, ilo, ihi, &c_n1);
	nx = MAX(i__1,i__2);
	if (nx < nh) {

/*           Determine if workspace is large enough for blocked code */

	    iws = *n * nb;
	    if (*lwork < iws) {

/*              Not enough workspace to use optimal NB:  determine the */
/*              minimum value of NB, and reduce NB or force use of */
/*              unblocked code */

/* Computing MAX */
		i__1 = 2, i__2 = ilaenv_(&c__2, "CGEHRD", " ", n, ilo, ihi, &
			c_n1);
		nbmin = MAX(i__1,i__2);
		if (*lwork >= *n * nbmin) {
		    nb = *lwork / *n;
		} else {
		    nb = 1;
		}
	    }
	}
    }
    ldwork = *n;

    if (nb < nbmin || nb >= nh) {

/*        Use unblocked code below */

	i__ = *ilo;

    } else {

/*        Use blocked code */

	i__1 = *ihi - 1 - nx;
	i__2 = nb;
	for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
/* Computing MIN */
	    i__3 = nb, i__4 = *ihi - i__;
	    ib = MIN(i__3,i__4);

/*           Reduce columns i:i+ib-1 to Hessenberg form, returning the */
/*           matrices V and T of the block reflector H = I - V*T*V' */
/*           which performs the reduction, and also the matrix Y = A*V*T */

	    clahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
		    c__65, &work[1], &ldwork);

/*           Apply the block reflector H to A(1:ihi,i+ib:ihi) from the */
/*           right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set */
/*           to 1 */

	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
	    ei.r = a[i__3].r, ei.i = a[i__3].i;
	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
	    a[i__3].r = 1.f, a[i__3].i = 0.f;
	    i__3 = *ihi - i__ - ib + 1;
	    q__1.r = -1.f, q__1.i = -0.f;
	    cgemm_("No transpose", "Conjugate transpose", ihi, &i__3, &ib, &
		    q__1, &work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, 
		     &c_b2, &a[(i__ + ib) * a_dim1 + 1], lda);
	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
	    a[i__3].r = ei.r, a[i__3].i = ei.i;

/*           Apply the block reflector H to A(1:i,i+1:i+ib-1) from the */
/*           right */

	    i__3 = ib - 1;
	    ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", &i__, &
		    i__3, &c_b2, &a[i__ + 1 + i__ * a_dim1], lda, &work[1], &
		    ldwork);
	    i__3 = ib - 2;
	    for (j = 0; j <= i__3; ++j) {
		q__1.r = -1.f, q__1.i = -0.f;
		caxpy_(&i__, &q__1, &work[ldwork * j + 1], &c__1, &a[(i__ + j 
			+ 1) * a_dim1 + 1], &c__1);
/* L30: */
	    }

/*           Apply the block reflector H to A(i+1:ihi,i+ib:n) from the */
/*           left */

	    i__3 = *ihi - i__;
	    i__4 = *n - i__ - ib + 1;
	    clarfb_("Left", "Conjugate transpose", "Forward", "Columnwise", &
		    i__3, &i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &
		    c__65, &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &
		    ldwork);
/* L40: */
	}
    }

/*     Use unblocked code to reduce the rest of the matrix */

    cgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
    work[1].r = (float) iws, work[1].i = 0.f;

    return 0;

/*     End of CGEHRD */

} /* cgehrd_ */

Example #20

Show file

File: cgemm.c Project: xianyi/clOpenBLAS

void cblas_cgemm(int order, int TransA, int TransB,
           blasint m, blasint n, blasint k,
           float alpha,
           float *a, blasint lda,
           float *b, blasint ldb,
           float beta,
           float *c, blasint ldc) 
{

	char transa[2] = "P";
	char transb[2] = "P"; 

	blasint M = m;
	blasint N = n;
	blasint K = k;
	blasint LDA = lda;
	blasint LDB = ldb;
	blasint LDC = ldc;
	float ALPHA = alpha;
	float BETA  = beta;

	#ifdef DEBUG
		FILE *fp;
		fp = fopen("/tmp/wrap.log", "a+");
		fprintf(fp, "Running cblas_cgemm M=%d N=%d K=%d\n", m, n, k);
		printf("Running cblas_cgemm M=%d N=%d K=%d\n", m, n, k);
	#endif

	#ifdef DEBUG
		if(fp)
			fclose(fp);
	#endif

	int info = 0;
	if ( (TransB != 111) && (TransB != 112 ) && (TransB != 113) ) info=3;
	if ( (TransA != 111) && (TransA != 112 ) && (TransA != 113) ) info=2;
	if ( (order != 101)  && (order != 102 ) ) info=1;

	if (info > 0)
	{
                printf("** On entry to cgemm_ parameter number %d had a illegal value\n",info);
                return;
	}

	if ( order == 102 ) // ColMajor
	{

		if ( TransA == 111 ) // Notrans
			transa[0] = 'N';
		else
		   if ( TransA == 112 )
			transa[0] = 'T';
		   else
			transa[0] = 'C';

		if ( TransB == 111 )
			transb[0] = 'N';
		else
	             if ( TransB == 112 )
			transb[0] = 'T';
                     else
			transb[0] = 'C';

	}
	else			// RowMajor
	{

		if ( TransA == 111 ) // Notrans
			transa[0] = 'T';
		else
                    if ( TransA == 112 )
			transa[0] = 'C';
                    else
			transa[0] = 'N';

		if ( TransB == 111 )
			transb[0] = 'T';
		else
                    if ( TransB == 112 )
			transb[0] = 'C';
                    else
			transb[0] = 'N';

	}

	cgemm_(transa, transb, &M, &N, &K, &ALPHA, a, &LDA, b, &LDB, &BETA, c, &LDC);

}

Example #21

Show file

File: blas-lapack.c Project: BenjaminCoquelle/clBLAS

void
cgemm(char transa, char transb, int m, int n, int k, complex *alpha, complex *a, int lda, complex *b, int ldb, complex *beta, complex *c, int ldc)
{
    cgemm_(&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc);
}

Example #22

Show file

File: clasyf.c Project: flame/libflame

/* Subroutine */
int clasyf_(char *uplo, integer *n, integer *nb, integer *kb, complex *a, integer *lda, integer *ipiv, complex *w, integer *ldw, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3, i__4, i__5;
    real r__1, r__2, r__3, r__4;
    complex q__1, q__2, q__3;
    /* Builtin functions */
    double sqrt(doublereal), r_imag(complex *);
    void c_div(complex *, complex *, complex *);
    /* Local variables */
    integer j, k;
    complex t, r1, d11, d21, d22;
    integer jb, jj, kk, jp, kp, kw, kkw, imax, jmax;
    real alpha;
    extern /* Subroutine */
    int cscal_(integer *, complex *, complex *, integer *), cgemm_(char *, char *, integer *, integer *, integer * , complex *, complex *, integer *, complex *, integer *, complex * , complex *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */
    int cgemv_(char *, integer *, integer *, complex * , complex *, integer *, complex *, integer *, complex *, complex * , integer *), ccopy_(integer *, complex *, integer *, complex *, integer *), cswap_(integer *, complex *, integer *, complex *, integer *);
    integer kstep;
    real absakk;
    extern integer icamax_(integer *, complex *, integer *);
    real colmax, rowmax;
    /* -- LAPACK computational routine (version 3.5.0) -- */
    /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
    /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
    /* November 2013 */
    /* .. Scalar Arguments .. */
    /* .. */
    /* .. Array Arguments .. */
    /* .. */
    /* ===================================================================== */
    /* .. Parameters .. */
    /* .. */
    /* .. Local Scalars .. */
    /* .. */
    /* .. External Functions .. */
    /* .. */
    /* .. External Subroutines .. */
    /* .. */
    /* .. Intrinsic Functions .. */
    /* .. */
    /* .. Statement Functions .. */
    /* .. */
    /* .. Statement Function definitions .. */
    /* .. */
    /* .. Executable Statements .. */
    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --ipiv;
    w_dim1 = *ldw;
    w_offset = 1 + w_dim1;
    w -= w_offset;
    /* Function Body */
    *info = 0;
    /* Initialize ALPHA for use in choosing pivot block size. */
    alpha = (sqrt(17.f) + 1.f) / 8.f;
    if (lsame_(uplo, "U"))
    {
        /* Factorize the trailing columns of A using the upper triangle */
        /* of A and working backwards, and compute the matrix W = U12*D */
        /* for use in updating A11 */
        /* K is the main loop index, decreasing from N in steps of 1 or 2 */
        /* KW is the column of W which corresponds to column K of A */
        k = *n;
L10:
        kw = *nb + k - *n;
        /* Exit from loop */
        if (k <= *n - *nb + 1 && *nb < *n || k < 1)
        {
            goto L30;
        }
        /* Copy column K of A to column KW of W and update it */
        ccopy_(&k, &a[k * a_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1);
        if (k < *n)
        {
            i__1 = *n - k;
            q__1.r = -1.f;
            q__1.i = -0.f; // , expr subst
            cgemv_("No transpose", &k, &i__1, &q__1, &a[(k + 1) * a_dim1 + 1], lda, &w[k + (kw + 1) * w_dim1], ldw, &c_b1, &w[kw * w_dim1 + 1], &c__1);
        }
        kstep = 1;
        /* Determine rows and columns to be interchanged and whether */
        /* a 1-by-1 or 2-by-2 pivot block will be used */
        i__1 = k + kw * w_dim1;
        absakk = (r__1 = w[i__1].r, f2c_abs(r__1)) + (r__2 = r_imag(&w[k + kw * w_dim1]), f2c_abs(r__2));
        /* IMAX is the row-index of the largest off-diagonal element in */
        /* column K, and COLMAX is its absolute value. */
        /* Determine both COLMAX and IMAX. */
        if (k > 1)
        {
            i__1 = k - 1;
            imax = icamax_(&i__1, &w[kw * w_dim1 + 1], &c__1);
            i__1 = imax + kw * w_dim1;
            colmax = (r__1 = w[i__1].r, f2c_abs(r__1)) + (r__2 = r_imag(&w[imax + kw * w_dim1]), f2c_abs(r__2));
        }
        else
        {
            colmax = 0.f;
        }
        if (max(absakk,colmax) == 0.f)
        {
            /* Column K is zero or underflow: set INFO and continue */
            if (*info == 0)
            {
                *info = k;
            }
            kp = k;
        }
        else
        {
            if (absakk >= alpha * colmax)
            {
                /* no interchange, use 1-by-1 pivot block */
                kp = k;
            }
            else
            {
                /* Copy column IMAX to column KW-1 of W and update it */
                ccopy_(&imax, &a[imax * a_dim1 + 1], &c__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
                i__1 = k - imax;
                ccopy_(&i__1, &a[imax + (imax + 1) * a_dim1], lda, &w[imax + 1 + (kw - 1) * w_dim1], &c__1);
                if (k < *n)
                {
                    i__1 = *n - k;
                    q__1.r = -1.f;
                    q__1.i = -0.f; // , expr subst
                    cgemv_("No transpose", &k, &i__1, &q__1, &a[(k + 1) * a_dim1 + 1], lda, &w[imax + (kw + 1) * w_dim1], ldw, &c_b1, &w[(kw - 1) * w_dim1 + 1], &c__1);
                }
                /* JMAX is the column-index of the largest off-diagonal */
                /* element in row IMAX, and ROWMAX is its absolute value */
                i__1 = k - imax;
                jmax = imax + icamax_(&i__1, &w[imax + 1 + (kw - 1) * w_dim1], &c__1);
                i__1 = jmax + (kw - 1) * w_dim1;
                rowmax = (r__1 = w[i__1].r, f2c_abs(r__1)) + (r__2 = r_imag(&w[ jmax + (kw - 1) * w_dim1]), f2c_abs(r__2));
                if (imax > 1)
                {
                    i__1 = imax - 1;
                    jmax = icamax_(&i__1, &w[(kw - 1) * w_dim1 + 1], &c__1);
                    /* Computing MAX */
                    i__1 = jmax + (kw - 1) * w_dim1;
                    r__3 = rowmax;
                    r__4 = (r__1 = w[i__1].r, f2c_abs(r__1)) + ( r__2 = r_imag(&w[jmax + (kw - 1) * w_dim1]), f2c_abs( r__2)); // , expr subst
                    rowmax = max(r__3,r__4);
                }
                if (absakk >= alpha * colmax * (colmax / rowmax))
                {
                    /* no interchange, use 1-by-1 pivot block */
                    kp = k;
                }
                else /* if(complicated condition) */
                {
                    i__1 = imax + (kw - 1) * w_dim1;
                    if ((r__1 = w[i__1].r, f2c_abs(r__1)) + (r__2 = r_imag(&w[ imax + (kw - 1) * w_dim1]), f2c_abs(r__2)) >= alpha * rowmax)
                    {
                        /* interchange rows and columns K and IMAX, use 1-by-1 */
                        /* pivot block */
                        kp = imax;
                        /* copy column KW-1 of W to column KW of W */
                        ccopy_(&k, &w[(kw - 1) * w_dim1 + 1], &c__1, &w[kw * w_dim1 + 1], &c__1);
                    }
                    else
                    {
                        /* interchange rows and columns K-1 and IMAX, use 2-by-2 */
                        /* pivot block */
                        kp = imax;
                        kstep = 2;
                    }
                }
            }
            /* ============================================================ */
            /* KK is the column of A where pivoting step stopped */
            kk = k - kstep + 1;
            /* KKW is the column of W which corresponds to column KK of A */
            kkw = *nb + kk - *n;
            /* Interchange rows and columns KP and KK. */
            /* Updated column KP is already stored in column KKW of W. */
            if (kp != kk)
            {
                /* Copy non-updated column KK to column KP of submatrix A */
                /* at step K. No need to copy element into column K */
                /* (or K and K-1 for 2-by-2 pivot) of A, since these columns */
                /* will be later overwritten. */
                i__1 = kp + kp * a_dim1;
                i__2 = kk + kk * a_dim1;
                a[i__1].r = a[i__2].r;
                a[i__1].i = a[i__2].i; // , expr subst
                i__1 = kk - 1 - kp;
                ccopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + (kp + 1) * a_dim1], lda);
                if (kp > 1)
                {
                    i__1 = kp - 1;
                    ccopy_(&i__1, &a[kk * a_dim1 + 1], &c__1, &a[kp * a_dim1 + 1], &c__1);
                }
                /* Interchange rows KK and KP in last K+1 to N columns of A */
                /* (columns K (or K and K-1 for 2-by-2 pivot) of A will be */
                /* later overwritten). Interchange rows KK and KP */
                /* in last KKW to NB columns of W. */
                if (k < *n)
                {
                    i__1 = *n - k;
                    cswap_(&i__1, &a[kk + (k + 1) * a_dim1], lda, &a[kp + (k + 1) * a_dim1], lda);
                }
                i__1 = *n - kk + 1;
                cswap_(&i__1, &w[kk + kkw * w_dim1], ldw, &w[kp + kkw * w_dim1], ldw);
            }
            if (kstep == 1)
            {
                /* 1-by-1 pivot block D(k): column kw of W now holds */
                /* W(kw) = U(k)*D(k), */
                /* where U(k) is the k-th column of U */
                /* Store subdiag. elements of column U(k) */
                /* and 1-by-1 block D(k) in column k of A. */
                /* NOTE: Diagonal element U(k,k) is a UNIT element */
                /* and not stored. */
                /* A(k,k) := D(k,k) = W(k,kw) */
                /* A(1:k-1,k) := U(1:k-1,k) = W(1:k-1,kw)/D(k,k) */
                ccopy_(&k, &w[kw * w_dim1 + 1], &c__1, &a[k * a_dim1 + 1], & c__1);
                c_div(&q__1, &c_b1, &a[k + k * a_dim1]);
                r1.r = q__1.r;
                r1.i = q__1.i; // , expr subst
                i__1 = k - 1;
                cscal_(&i__1, &r1, &a[k * a_dim1 + 1], &c__1);
            }
            else
            {
                /* 2-by-2 pivot block D(k): columns kw and kw-1 of W now hold */
                /* ( W(kw-1) W(kw) ) = ( U(k-1) U(k) )*D(k) */
                /* where U(k) and U(k-1) are the k-th and (k-1)-th columns */
                /* of U */
                /* Store U(1:k-2,k-1) and U(1:k-2,k) and 2-by-2 */
                /* block D(k-1:k,k-1:k) in columns k-1 and k of A. */
                /* NOTE: 2-by-2 diagonal block U(k-1:k,k-1:k) is a UNIT */
                /* block and not stored. */
                /* A(k-1:k,k-1:k) := D(k-1:k,k-1:k) = W(k-1:k,kw-1:kw) */
                /* A(1:k-2,k-1:k) := U(1:k-2,k:k-1:k) = */
                /* = W(1:k-2,kw-1:kw) * ( D(k-1:k,k-1:k)**(-1) ) */
                if (k > 2)
                {
                    /* Compose the columns of the inverse of 2-by-2 pivot */
                    /* block D in the following way to reduce the number */
                    /* of FLOPS when we myltiply panel ( W(kw-1) W(kw) ) by */
                    /* this inverse */
                    /* D**(-1) = ( d11 d21 )**(-1) = */
                    /* ( d21 d22 ) */
                    /* = 1/(d11*d22-d21**2) * ( ( d22 ) (-d21 ) ) = */
                    /* ( (-d21 ) ( d11 ) ) */
                    /* = 1/d21 * 1/((d11/d21)*(d22/d21)-1) * */
                    /* * ( ( d22/d21 ) ( -1 ) ) = */
                    /* ( ( -1 ) ( d11/d21 ) ) */
                    /* = 1/d21 * 1/(D22*D11-1) * ( ( D11 ) ( -1 ) ) = */
                    /* ( ( -1 ) ( D22 ) ) */
                    /* = 1/d21 * T * ( ( D11 ) ( -1 ) ) */
                    /* ( ( -1 ) ( D22 ) ) */
                    /* = D21 * ( ( D11 ) ( -1 ) ) */
                    /* ( ( -1 ) ( D22 ) ) */
                    i__1 = k - 1 + kw * w_dim1;
                    d21.r = w[i__1].r;
                    d21.i = w[i__1].i; // , expr subst
                    c_div(&q__1, &w[k + kw * w_dim1], &d21);
                    d11.r = q__1.r;
                    d11.i = q__1.i; // , expr subst
                    c_div(&q__1, &w[k - 1 + (kw - 1) * w_dim1], &d21);
                    d22.r = q__1.r;
                    d22.i = q__1.i; // , expr subst
                    q__3.r = d11.r * d22.r - d11.i * d22.i;
                    q__3.i = d11.r * d22.i + d11.i * d22.r; // , expr subst
                    q__2.r = q__3.r - 1.f;
                    q__2.i = q__3.i - 0.f; // , expr subst
                    c_div(&q__1, &c_b1, &q__2);
                    t.r = q__1.r;
                    t.i = q__1.i; // , expr subst
                    /* Update elements in columns A(k-1) and A(k) as */
                    /* dot products of rows of ( W(kw-1) W(kw) ) and columns */
                    /* of D**(-1) */
                    c_div(&q__1, &t, &d21);
                    d21.r = q__1.r;
                    d21.i = q__1.i; // , expr subst
                    i__1 = k - 2;
                    for (j = 1;
                            j <= i__1;
                            ++j)
                    {
                        i__2 = j + (k - 1) * a_dim1;
                        i__3 = j + (kw - 1) * w_dim1;
                        q__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i;
                        q__3.i = d11.r * w[i__3].i + d11.i * w[i__3] .r; // , expr subst
                        i__4 = j + kw * w_dim1;
                        q__2.r = q__3.r - w[i__4].r;
                        q__2.i = q__3.i - w[i__4] .i; // , expr subst
                        q__1.r = d21.r * q__2.r - d21.i * q__2.i;
                        q__1.i = d21.r * q__2.i + d21.i * q__2.r; // , expr subst
                        a[i__2].r = q__1.r;
                        a[i__2].i = q__1.i; // , expr subst
                        i__2 = j + k * a_dim1;
                        i__3 = j + kw * w_dim1;
                        q__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i;
                        q__3.i = d22.r * w[i__3].i + d22.i * w[i__3] .r; // , expr subst
                        i__4 = j + (kw - 1) * w_dim1;
                        q__2.r = q__3.r - w[i__4].r;
                        q__2.i = q__3.i - w[i__4] .i; // , expr subst
                        q__1.r = d21.r * q__2.r - d21.i * q__2.i;
                        q__1.i = d21.r * q__2.i + d21.i * q__2.r; // , expr subst
                        a[i__2].r = q__1.r;
                        a[i__2].i = q__1.i; // , expr subst
                        /* L20: */
                    }
                }
                /* Copy D(k) to A */
                i__1 = k - 1 + (k - 1) * a_dim1;
                i__2 = k - 1 + (kw - 1) * w_dim1;
                a[i__1].r = w[i__2].r;
                a[i__1].i = w[i__2].i; // , expr subst
                i__1 = k - 1 + k * a_dim1;
                i__2 = k - 1 + kw * w_dim1;
                a[i__1].r = w[i__2].r;
                a[i__1].i = w[i__2].i; // , expr subst
                i__1 = k + k * a_dim1;
                i__2 = k + kw * w_dim1;
                a[i__1].r = w[i__2].r;
                a[i__1].i = w[i__2].i; // , expr subst
            }
        }
        /* Store details of the interchanges in IPIV */
        if (kstep == 1)
        {
            ipiv[k] = kp;
        }
        else
        {
            ipiv[k] = -kp;
            ipiv[k - 1] = -kp;
        }
        /* Decrease K and return to the start of the main loop */
        k -= kstep;
        goto L10;
L30: /* Update the upper triangle of A11 (= A(1:k,1:k)) as */
        /* A11 := A11 - U12*D*U12**T = A11 - U12*W**T */
        /* computing blocks of NB columns at a time */
        i__1 = -(*nb);
        for (j = (k - 1) / *nb * *nb + 1;
                i__1 < 0 ? j >= 1 : j <= 1;
                j += i__1)
        {
            /* Computing MIN */
            i__2 = *nb;
            i__3 = k - j + 1; // , expr subst
            jb = min(i__2,i__3);
            /* Update the upper triangle of the diagonal block */
            i__2 = j + jb - 1;
            for (jj = j;
                    jj <= i__2;
                    ++jj)
            {
                i__3 = jj - j + 1;
                i__4 = *n - k;
                q__1.r = -1.f;
                q__1.i = -0.f; // , expr subst
                cgemv_("No transpose", &i__3, &i__4, &q__1, &a[j + (k + 1) * a_dim1], lda, &w[jj + (kw + 1) * w_dim1], ldw, &c_b1, &a[j + jj * a_dim1], &c__1);
                /* L40: */
            }
            /* Update the rectangular superdiagonal block */
            i__2 = j - 1;
            i__3 = *n - k;
            q__1.r = -1.f;
            q__1.i = -0.f; // , expr subst
            cgemm_("No transpose", "Transpose", &i__2, &jb, &i__3, &q__1, &a[( k + 1) * a_dim1 + 1], lda, &w[j + (kw + 1) * w_dim1], ldw, &c_b1, &a[j * a_dim1 + 1], lda);
            /* L50: */
        }
        /* Put U12 in standard form by partially undoing the interchanges */
        /* in columns k+1:n looping backwards from k+1 to n */
        j = k + 1;
L60: /* Undo the interchanges (if any) of rows JJ and JP at each */
        /* step J */
        /* (Here, J is a diagonal index) */
        jj = j;
        jp = ipiv[j];
        if (jp < 0)
        {
            jp = -jp;
            /* (Here, J is a diagonal index) */
            ++j;
        }
        /* (NOTE: Here, J is used to determine row length. Length N-J+1 */
        /* of the rows to swap back doesn't include diagonal element) */
        ++j;
        if (jp != jj && j <= *n)
        {
            i__1 = *n - j + 1;
            cswap_(&i__1, &a[jp + j * a_dim1], lda, &a[jj + j * a_dim1], lda);
        }
        if (j < *n)
        {
            goto L60;
        }
        /* Set KB to the number of columns factorized */
        *kb = *n - k;
    }
    else
    {
        /* Factorize the leading columns of A using the lower triangle */
        /* of A and working forwards, and compute the matrix W = L21*D */
        /* for use in updating A22 */
        /* K is the main loop index, increasing from 1 in steps of 1 or 2 */
        k = 1;
L70: /* Exit from loop */
        if (k >= *nb && *nb < *n || k > *n)
        {
            goto L90;
        }
        /* Copy column K of A to column K of W and update it */
        i__1 = *n - k + 1;
        ccopy_(&i__1, &a[k + k * a_dim1], &c__1, &w[k + k * w_dim1], &c__1);
        i__1 = *n - k + 1;
        i__2 = k - 1;
        q__1.r = -1.f;
        q__1.i = -0.f; // , expr subst
        cgemv_("No transpose", &i__1, &i__2, &q__1, &a[k + a_dim1], lda, &w[k + w_dim1], ldw, &c_b1, &w[k + k * w_dim1], &c__1);
        kstep = 1;
        /* Determine rows and columns to be interchanged and whether */
        /* a 1-by-1 or 2-by-2 pivot block will be used */
        i__1 = k + k * w_dim1;
        absakk = (r__1 = w[i__1].r, f2c_abs(r__1)) + (r__2 = r_imag(&w[k + k * w_dim1]), f2c_abs(r__2));
        /* IMAX is the row-index of the largest off-diagonal element in */
        /* column K, and COLMAX is its absolute value. */
        /* Determine both COLMAX and IMAX. */
        if (k < *n)
        {
            i__1 = *n - k;
            imax = k + icamax_(&i__1, &w[k + 1 + k * w_dim1], &c__1);
            i__1 = imax + k * w_dim1;
            colmax = (r__1 = w[i__1].r, f2c_abs(r__1)) + (r__2 = r_imag(&w[imax + k * w_dim1]), f2c_abs(r__2));
        }
        else
        {
            colmax = 0.f;
        }
        if (max(absakk,colmax) == 0.f)
        {
            /* Column K is zero or underflow: set INFO and continue */
            if (*info == 0)
            {
                *info = k;
            }
            kp = k;
        }
        else
        {
            if (absakk >= alpha * colmax)
            {
                /* no interchange, use 1-by-1 pivot block */
                kp = k;
            }
            else
            {
                /* Copy column IMAX to column K+1 of W and update it */
                i__1 = imax - k;
                ccopy_(&i__1, &a[imax + k * a_dim1], lda, &w[k + (k + 1) * w_dim1], &c__1);
                i__1 = *n - imax + 1;
                ccopy_(&i__1, &a[imax + imax * a_dim1], &c__1, &w[imax + (k + 1) * w_dim1], &c__1);
                i__1 = *n - k + 1;
                i__2 = k - 1;
                q__1.r = -1.f;
                q__1.i = -0.f; // , expr subst
                cgemv_("No transpose", &i__1, &i__2, &q__1, &a[k + a_dim1], lda, &w[imax + w_dim1], ldw, &c_b1, &w[k + (k + 1) * w_dim1], &c__1);
                /* JMAX is the column-index of the largest off-diagonal */
                /* element in row IMAX, and ROWMAX is its absolute value */
                i__1 = imax - k;
                jmax = k - 1 + icamax_(&i__1, &w[k + (k + 1) * w_dim1], &c__1) ;
                i__1 = jmax + (k + 1) * w_dim1;
                rowmax = (r__1 = w[i__1].r, f2c_abs(r__1)) + (r__2 = r_imag(&w[ jmax + (k + 1) * w_dim1]), f2c_abs(r__2));
                if (imax < *n)
                {
                    i__1 = *n - imax;
                    jmax = imax + icamax_(&i__1, &w[imax + 1 + (k + 1) * w_dim1], &c__1);
                    /* Computing MAX */
                    i__1 = jmax + (k + 1) * w_dim1;
                    r__3 = rowmax;
                    r__4 = (r__1 = w[i__1].r, f2c_abs(r__1)) + ( r__2 = r_imag(&w[jmax + (k + 1) * w_dim1]), f2c_abs( r__2)); // , expr subst
                    rowmax = max(r__3,r__4);
                }
                if (absakk >= alpha * colmax * (colmax / rowmax))
                {
                    /* no interchange, use 1-by-1 pivot block */
                    kp = k;
                }
                else /* if(complicated condition) */
                {
                    i__1 = imax + (k + 1) * w_dim1;
                    if ((r__1 = w[i__1].r, f2c_abs(r__1)) + (r__2 = r_imag(&w[ imax + (k + 1) * w_dim1]), f2c_abs(r__2)) >= alpha * rowmax)
                    {
                        /* interchange rows and columns K and IMAX, use 1-by-1 */
                        /* pivot block */
                        kp = imax;
                        /* copy column K+1 of W to column K of W */
                        i__1 = *n - k + 1;
                        ccopy_(&i__1, &w[k + (k + 1) * w_dim1], &c__1, &w[k + k * w_dim1], &c__1);
                    }
                    else
                    {
                        /* interchange rows and columns K+1 and IMAX, use 2-by-2 */
                        /* pivot block */
                        kp = imax;
                        kstep = 2;
                    }
                }
            }
            /* ============================================================ */
            /* KK is the column of A where pivoting step stopped */
            kk = k + kstep - 1;
            /* Interchange rows and columns KP and KK. */
            /* Updated column KP is already stored in column KK of W. */
            if (kp != kk)
            {
                /* Copy non-updated column KK to column KP of submatrix A */
                /* at step K. No need to copy element into column K */
                /* (or K and K+1 for 2-by-2 pivot) of A, since these columns */
                /* will be later overwritten. */
                i__1 = kp + kp * a_dim1;
                i__2 = kk + kk * a_dim1;
                a[i__1].r = a[i__2].r;
                a[i__1].i = a[i__2].i; // , expr subst
                i__1 = kp - kk - 1;
                ccopy_(&i__1, &a[kk + 1 + kk * a_dim1], &c__1, &a[kp + (kk + 1) * a_dim1], lda);
                if (kp < *n)
                {
                    i__1 = *n - kp;
                    ccopy_(&i__1, &a[kp + 1 + kk * a_dim1], &c__1, &a[kp + 1 + kp * a_dim1], &c__1);
                }
                /* Interchange rows KK and KP in first K-1 columns of A */
                /* (columns K (or K and K+1 for 2-by-2 pivot) of A will be */
                /* later overwritten). Interchange rows KK and KP */
                /* in first KK columns of W. */
                if (k > 1)
                {
                    i__1 = k - 1;
                    cswap_(&i__1, &a[kk + a_dim1], lda, &a[kp + a_dim1], lda);
                }
                cswap_(&kk, &w[kk + w_dim1], ldw, &w[kp + w_dim1], ldw);
            }
            if (kstep == 1)
            {
                /* 1-by-1 pivot block D(k): column k of W now holds */
                /* W(k) = L(k)*D(k), */
                /* where L(k) is the k-th column of L */
                /* Store subdiag. elements of column L(k) */
                /* and 1-by-1 block D(k) in column k of A. */
                /* (NOTE: Diagonal element L(k,k) is a UNIT element */
                /* and not stored) */
                /* A(k,k) := D(k,k) = W(k,k) */
                /* A(k+1:N,k) := L(k+1:N,k) = W(k+1:N,k)/D(k,k) */
                i__1 = *n - k + 1;
                ccopy_(&i__1, &w[k + k * w_dim1], &c__1, &a[k + k * a_dim1], & c__1);
                if (k < *n)
                {
                    c_div(&q__1, &c_b1, &a[k + k * a_dim1]);
                    r1.r = q__1.r;
                    r1.i = q__1.i; // , expr subst
                    i__1 = *n - k;
                    cscal_(&i__1, &r1, &a[k + 1 + k * a_dim1], &c__1);
                }
            }
            else
            {
                /* 2-by-2 pivot block D(k): columns k and k+1 of W now hold */
                /* ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k) */
                /* where L(k) and L(k+1) are the k-th and (k+1)-th columns */
                /* of L */
                /* Store L(k+2:N,k) and L(k+2:N,k+1) and 2-by-2 */
                /* block D(k:k+1,k:k+1) in columns k and k+1 of A. */
                /* (NOTE: 2-by-2 diagonal block L(k:k+1,k:k+1) is a UNIT */
                /* block and not stored) */
                /* A(k:k+1,k:k+1) := D(k:k+1,k:k+1) = W(k:k+1,k:k+1) */
                /* A(k+2:N,k:k+1) := L(k+2:N,k:k+1) = */
                /* = W(k+2:N,k:k+1) * ( D(k:k+1,k:k+1)**(-1) ) */
                if (k < *n - 1)
                {
                    /* Compose the columns of the inverse of 2-by-2 pivot */
                    /* block D in the following way to reduce the number */
                    /* of FLOPS when we myltiply panel ( W(k) W(k+1) ) by */
                    /* this inverse */
                    /* D**(-1) = ( d11 d21 )**(-1) = */
                    /* ( d21 d22 ) */
                    /* = 1/(d11*d22-d21**2) * ( ( d22 ) (-d21 ) ) = */
                    /* ( (-d21 ) ( d11 ) ) */
                    /* = 1/d21 * 1/((d11/d21)*(d22/d21)-1) * */
                    /* * ( ( d22/d21 ) ( -1 ) ) = */
                    /* ( ( -1 ) ( d11/d21 ) ) */
                    /* = 1/d21 * 1/(D22*D11-1) * ( ( D11 ) ( -1 ) ) = */
                    /* ( ( -1 ) ( D22 ) ) */
                    /* = 1/d21 * T * ( ( D11 ) ( -1 ) ) */
                    /* ( ( -1 ) ( D22 ) ) */
                    /* = D21 * ( ( D11 ) ( -1 ) ) */
                    /* ( ( -1 ) ( D22 ) ) */
                    i__1 = k + 1 + k * w_dim1;
                    d21.r = w[i__1].r;
                    d21.i = w[i__1].i; // , expr subst
                    c_div(&q__1, &w[k + 1 + (k + 1) * w_dim1], &d21);
                    d11.r = q__1.r;
                    d11.i = q__1.i; // , expr subst
                    c_div(&q__1, &w[k + k * w_dim1], &d21);
                    d22.r = q__1.r;
                    d22.i = q__1.i; // , expr subst
                    q__3.r = d11.r * d22.r - d11.i * d22.i;
                    q__3.i = d11.r * d22.i + d11.i * d22.r; // , expr subst
                    q__2.r = q__3.r - 1.f;
                    q__2.i = q__3.i - 0.f; // , expr subst
                    c_div(&q__1, &c_b1, &q__2);
                    t.r = q__1.r;
                    t.i = q__1.i; // , expr subst
                    c_div(&q__1, &t, &d21);
                    d21.r = q__1.r;
                    d21.i = q__1.i; // , expr subst
                    /* Update elements in columns A(k) and A(k+1) as */
                    /* dot products of rows of ( W(k) W(k+1) ) and columns */
                    /* of D**(-1) */
                    i__1 = *n;
                    for (j = k + 2;
                            j <= i__1;
                            ++j)
                    {
                        i__2 = j + k * a_dim1;
                        i__3 = j + k * w_dim1;
                        q__3.r = d11.r * w[i__3].r - d11.i * w[i__3].i;
                        q__3.i = d11.r * w[i__3].i + d11.i * w[i__3] .r; // , expr subst
                        i__4 = j + (k + 1) * w_dim1;
                        q__2.r = q__3.r - w[i__4].r;
                        q__2.i = q__3.i - w[i__4] .i; // , expr subst
                        q__1.r = d21.r * q__2.r - d21.i * q__2.i;
                        q__1.i = d21.r * q__2.i + d21.i * q__2.r; // , expr subst
                        a[i__2].r = q__1.r;
                        a[i__2].i = q__1.i; // , expr subst
                        i__2 = j + (k + 1) * a_dim1;
                        i__3 = j + (k + 1) * w_dim1;
                        q__3.r = d22.r * w[i__3].r - d22.i * w[i__3].i;
                        q__3.i = d22.r * w[i__3].i + d22.i * w[i__3] .r; // , expr subst
                        i__4 = j + k * w_dim1;
                        q__2.r = q__3.r - w[i__4].r;
                        q__2.i = q__3.i - w[i__4] .i; // , expr subst
                        q__1.r = d21.r * q__2.r - d21.i * q__2.i;
                        q__1.i = d21.r * q__2.i + d21.i * q__2.r; // , expr subst
                        a[i__2].r = q__1.r;
                        a[i__2].i = q__1.i; // , expr subst
                        /* L80: */
                    }
                }
                /* Copy D(k) to A */
                i__1 = k + k * a_dim1;
                i__2 = k + k * w_dim1;
                a[i__1].r = w[i__2].r;
                a[i__1].i = w[i__2].i; // , expr subst
                i__1 = k + 1 + k * a_dim1;
                i__2 = k + 1 + k * w_dim1;
                a[i__1].r = w[i__2].r;
                a[i__1].i = w[i__2].i; // , expr subst
                i__1 = k + 1 + (k + 1) * a_dim1;
                i__2 = k + 1 + (k + 1) * w_dim1;
                a[i__1].r = w[i__2].r;
                a[i__1].i = w[i__2].i; // , expr subst
            }
        }
        /* Store details of the interchanges in IPIV */
        if (kstep == 1)
        {
            ipiv[k] = kp;
        }
        else
        {
            ipiv[k] = -kp;
            ipiv[k + 1] = -kp;
        }
        /* Increase K and return to the start of the main loop */
        k += kstep;
        goto L70;
L90: /* Update the lower triangle of A22 (= A(k:n,k:n)) as */
        /* A22 := A22 - L21*D*L21**T = A22 - L21*W**T */
        /* computing blocks of NB columns at a time */
        i__1 = *n;
        i__2 = *nb;
        for (j = k;
                i__2 < 0 ? j >= i__1 : j <= i__1;
                j += i__2)
        {
            /* Computing MIN */
            i__3 = *nb;
            i__4 = *n - j + 1; // , expr subst
            jb = min(i__3,i__4);
            /* Update the lower triangle of the diagonal block */
            i__3 = j + jb - 1;
            for (jj = j;
                    jj <= i__3;
                    ++jj)
            {
                i__4 = j + jb - jj;
                i__5 = k - 1;
                q__1.r = -1.f;
                q__1.i = -0.f; // , expr subst
                cgemv_("No transpose", &i__4, &i__5, &q__1, &a[jj + a_dim1], lda, &w[jj + w_dim1], ldw, &c_b1, &a[jj + jj * a_dim1] , &c__1);
                /* L100: */
            }
            /* Update the rectangular subdiagonal block */
            if (j + jb <= *n)
            {
                i__3 = *n - j - jb + 1;
                i__4 = k - 1;
                q__1.r = -1.f;
                q__1.i = -0.f; // , expr subst
                cgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &q__1, &a[j + jb + a_dim1], lda, &w[j + w_dim1], ldw, &c_b1, &a[j + jb + j * a_dim1], lda);
            }
            /* L110: */
        }
        /* Put L21 in standard form by partially undoing the interchanges */
        /* of rows in columns 1:k-1 looping backwards from k-1 to 1 */
        j = k - 1;
L120: /* Undo the interchanges (if any) of rows JJ and JP at each */
        /* step J */
        /* (Here, J is a diagonal index) */
        jj = j;
        jp = ipiv[j];
        if (jp < 0)
        {
            jp = -jp;
            /* (Here, J is a diagonal index) */
            --j;
        }
        /* (NOTE: Here, J is used to determine row length. Length J */
        /* of the rows to swap back doesn't include diagonal element) */
        --j;
        if (jp != jj && j >= 1)
        {
            cswap_(&j, &a[jp + a_dim1], lda, &a[jj + a_dim1], lda);
        }
        if (j > 1)
        {
            goto L120;
        }
        /* Set KB to the number of columns factorized */
        *kb = k - 1;
    }
    return 0;
    /* End of CLASYF */
}

Example #23

Show file

File: chbgvd.c Project: csapng/libflame

/* Subroutine */
int chbgvd_(char *jobz, char *uplo, integer *n, integer *ka, integer *kb, complex *ab, integer *ldab, complex *bb, integer *ldbb, real *w, complex *z__, integer *ldz, complex *work, integer *lwork, real *rwork, integer *lrwork, integer *iwork, integer *liwork, integer *info)
{
    /* System generated locals */
    integer ab_dim1, ab_offset, bb_dim1, bb_offset, z_dim1, z_offset, i__1;
    /* Local variables */
    integer inde;
    char vect[1];
    integer llwk2;
    extern /* Subroutine */
    int cgemm_(char *, char *, integer *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, complex *, complex *, integer *);
    extern logical lsame_(char *, char *);
    integer iinfo, lwmin;
    logical upper;
    integer llrwk;
    logical wantz;
    integer indwk2;
    extern /* Subroutine */
    int cstedc_(char *, integer *, real *, real *, complex *, integer *, complex *, integer *, real *, integer *, integer *, integer *, integer *), chbtrd_(char *, char *, integer *, integer *, complex *, integer *, real *, real *, complex *, integer *, complex *, integer *), chbgst_(char *, char *, integer *, integer *, integer *, complex * , integer *, complex *, integer *, complex *, integer *, complex * , real *, integer *), clacpy_(char *, integer *, integer *, complex *, integer *, complex *, integer *), xerbla_(char *, integer *), cpbstf_(char *, integer *, integer *, complex *, integer *, integer *);
    integer indwrk, liwmin;
    extern /* Subroutine */
    int ssterf_(integer *, real *, real *, integer *);
    integer lrwmin;
    logical lquery;
    /* -- LAPACK driver routine (version 3.4.0) -- */
    /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
    /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
    /* November 2011 */
    /* .. Scalar Arguments .. */
    /* .. */
    /* .. Array Arguments .. */
    /* .. */
    /* ===================================================================== */
    /* .. Parameters .. */
    /* .. */
    /* .. Local Scalars .. */
    /* .. */
    /* .. External Functions .. */
    /* .. */
    /* .. External Subroutines .. */
    /* .. */
    /* .. Executable Statements .. */
    /* Test the input parameters. */
    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1;
    ab -= ab_offset;
    bb_dim1 = *ldbb;
    bb_offset = 1 + bb_dim1;
    bb -= bb_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1;
    z__ -= z_offset;
    --work;
    --rwork;
    --iwork;
    /* Function Body */
    wantz = lsame_(jobz, "V");
    upper = lsame_(uplo, "U");
    lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1;
    *info = 0;
    if (*n <= 1)
    {
        lwmin = *n + 1;
        lrwmin = *n + 1;
        liwmin = 1;
    }
    else if (wantz)
    {
        /* Computing 2nd power */
        i__1 = *n;
        lwmin = i__1 * i__1 << 1;
        /* Computing 2nd power */
        i__1 = *n;
        lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1);
        liwmin = *n * 5 + 3;
    }
    else
    {
        lwmin = *n;
        lrwmin = *n;
        liwmin = 1;
    }
    if (! (wantz || lsame_(jobz, "N")))
    {
        *info = -1;
    }
    else if (! (upper || lsame_(uplo, "L")))
    {
        *info = -2;
    }
    else if (*n < 0)
    {
        *info = -3;
    }
    else if (*ka < 0)
    {
        *info = -4;
    }
    else if (*kb < 0 || *kb > *ka)
    {
        *info = -5;
    }
    else if (*ldab < *ka + 1)
    {
        *info = -7;
    }
    else if (*ldbb < *kb + 1)
    {
        *info = -9;
    }
    else if (*ldz < 1 || wantz && *ldz < *n)
    {
        *info = -12;
    }
    if (*info == 0)
    {
        work[1].r = (real) lwmin;
        work[1].i = 0.f; // , expr subst
        rwork[1] = (real) lrwmin;
        iwork[1] = liwmin;
        if (*lwork < lwmin && ! lquery)
        {
            *info = -14;
        }
        else if (*lrwork < lrwmin && ! lquery)
        {
            *info = -16;
        }
        else if (*liwork < liwmin && ! lquery)
        {
            *info = -18;
        }
    }
    if (*info != 0)
    {
        i__1 = -(*info);
        xerbla_("CHBGVD", &i__1);
        return 0;
    }
    else if (lquery)
    {
        return 0;
    }
    /* Quick return if possible */
    if (*n == 0)
    {
        return 0;
    }
    /* Form a split Cholesky factorization of B. */
    cpbstf_(uplo, n, kb, &bb[bb_offset], ldbb, info);
    if (*info != 0)
    {
        *info = *n + *info;
        return 0;
    }
    /* Transform problem to standard eigenvalue problem. */
    inde = 1;
    indwrk = inde + *n;
    indwk2 = *n * *n + 1;
    llwk2 = *lwork - indwk2 + 2;
    llrwk = *lrwork - indwrk + 2;
    chbgst_(jobz, uplo, n, ka, kb, &ab[ab_offset], ldab, &bb[bb_offset], ldbb, &z__[z_offset], ldz, &work[1], &rwork[indwrk], &iinfo);
    /* Reduce Hermitian band matrix to tridiagonal form. */
    if (wantz)
    {
        *(unsigned char *)vect = 'U';
    }
    else
    {
        *(unsigned char *)vect = 'N';
    }
    chbtrd_(vect, uplo, n, ka, &ab[ab_offset], ldab, &w[1], &rwork[inde], & z__[z_offset], ldz, &work[1], &iinfo);
    /* For eigenvalues only, call SSTERF. For eigenvectors, call CSTEDC. */
    if (! wantz)
    {
        ssterf_(n, &w[1], &rwork[inde], info);
    }
    else
    {
        cstedc_("I", n, &w[1], &rwork[inde], &work[1], n, &work[indwk2], & llwk2, &rwork[indwrk], &llrwk, &iwork[1], liwork, info);
        cgemm_("N", "N", n, n, n, &c_b1, &z__[z_offset], ldz, &work[1], n, & c_b2, &work[indwk2], n);
        clacpy_("A", n, n, &work[indwk2], n, &z__[z_offset], ldz);
    }
    work[1].r = (real) lwmin;
    work[1].i = 0.f; // , expr subst
    rwork[1] = (real) lrwmin;
    iwork[1] = liwmin;
    return 0;
    /* End of CHBGVD */
}

Example #24

Show file

File: cqlt01.c Project: kstraube/hysim

/* Subroutine */ int cqlt01_(integer *m, integer *n, complex *a, complex *af, 
	complex *q, complex *l, integer *lda, complex *tau, complex *work, 
	integer *lwork, real *rwork, real *result)
{
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, l_dim1, l_offset, q_dim1, 
	    q_offset, i__1, i__2;

    /* Builtin functions */
    /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);

    /* Local variables */
    real eps;
    integer info;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
	    integer *, complex *, complex *, integer *, complex *, integer *, 
	    complex *, complex *, integer *), cherk_(char *, 
	    char *, integer *, integer *, real *, complex *, integer *, real *
, complex *, integer *);
    real resid, anorm;
    integer minmn;
    extern doublereal clange_(char *, integer *, integer *, complex *, 
	    integer *, real *);
    extern /* Subroutine */ int cgeqlf_(integer *, integer *, complex *, 
	    integer *, complex *, complex *, integer *, integer *);
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex 
	    *, integer *, complex *, integer *), claset_(char *, 
	    integer *, integer *, complex *, complex *, complex *, integer *);
    extern doublereal clansy_(char *, char *, integer *, complex *, integer *, 
	     real *);
    extern /* Subroutine */ int cungql_(integer *, integer *, integer *, 
	    complex *, integer *, complex *, complex *, integer *, integer *);


/*  -- LAPACK test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CQLT01 tests CGEQLF, which computes the QL factorization of an m-by-n */
/*  matrix A, and partially tests CUNGQL which forms the m-by-m */
/*  orthogonal matrix Q. */

/*  CQLT01 compares L with Q'*A, and checks that Q is orthogonal. */

/*  Arguments */
/*  ========= */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix A.  M >= 0. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix A.  N >= 0. */

/*  A       (input) COMPLEX array, dimension (LDA,N) */
/*          The m-by-n matrix A. */

/*  AF      (output) COMPLEX array, dimension (LDA,N) */
/*          Details of the QL factorization of A, as returned by CGEQLF. */
/*          See CGEQLF for further details. */

/*  Q       (output) COMPLEX array, dimension (LDA,M) */
/*          The m-by-m orthogonal matrix Q. */

/*  L       (workspace) COMPLEX array, dimension (LDA,max(M,N)) */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the arrays A, AF, Q and R. */
/*          LDA >= max(M,N). */

/*  TAU     (output) COMPLEX array, dimension (min(M,N)) */
/*          The scalar factors of the elementary reflectors, as returned */
/*          by CGEQLF. */

/*  WORK    (workspace) COMPLEX array, dimension (LWORK) */

/*  LWORK   (input) INTEGER */
/*          The dimension of the array WORK. */

/*  RWORK   (workspace) REAL array, dimension (M) */

/*  RESULT  (output) REAL array, dimension (2) */
/*          The test ratios: */
/*          RESULT(1) = norm( L - Q'*A ) / ( M * norm(A) * EPS ) */
/*          RESULT(2) = norm( I - Q'*Q ) / ( M * EPS ) */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Scalars in Common .. */
/*     .. */
/*     .. Common blocks .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    l_dim1 = *lda;
    l_offset = 1 + l_dim1;
    l -= l_offset;
    q_dim1 = *lda;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    af_dim1 = *lda;
    af_offset = 1 + af_dim1;
    af -= af_offset;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --tau;
    --work;
    --rwork;
    --result;

    /* Function Body */
    minmn = min(*m,*n);
    eps = slamch_("Epsilon");

/*     Copy the matrix A to the array AF. */

    clacpy_("Full", m, n, &a[a_offset], lda, &af[af_offset], lda);

/*     Factorize the matrix A in the array AF. */

    s_copy(srnamc_1.srnamt, "CGEQLF", (ftnlen)6, (ftnlen)6);
    cgeqlf_(m, n, &af[af_offset], lda, &tau[1], &work[1], lwork, &info);

/*     Copy details of Q */

    claset_("Full", m, m, &c_b1, &c_b1, &q[q_offset], lda);
    if (*m >= *n) {
	if (*n < *m && *n > 0) {
	    i__1 = *m - *n;
	    clacpy_("Full", &i__1, n, &af[af_offset], lda, &q[(*m - *n + 1) * 
		    q_dim1 + 1], lda);
	}
	if (*n > 1) {
	    i__1 = *n - 1;
	    i__2 = *n - 1;
	    clacpy_("Upper", &i__1, &i__2, &af[*m - *n + 1 + (af_dim1 << 1)], 
		    lda, &q[*m - *n + 1 + (*m - *n + 2) * q_dim1], lda);
	}
    } else {
	if (*m > 1) {
	    i__1 = *m - 1;
	    i__2 = *m - 1;
	    clacpy_("Upper", &i__1, &i__2, &af[(*n - *m + 2) * af_dim1 + 1], 
		    lda, &q[(q_dim1 << 1) + 1], lda);
	}
    }

/*     Generate the m-by-m matrix Q */

    s_copy(srnamc_1.srnamt, "CUNGQL", (ftnlen)6, (ftnlen)6);
    cungql_(m, m, &minmn, &q[q_offset], lda, &tau[1], &work[1], lwork, &info);

/*     Copy L */

    claset_("Full", m, n, &c_b12, &c_b12, &l[l_offset], lda);
    if (*m >= *n) {
	if (*n > 0) {
	    clacpy_("Lower", n, n, &af[*m - *n + 1 + af_dim1], lda, &l[*m - *
		    n + 1 + l_dim1], lda);
	}
    } else {
	if (*n > *m && *m > 0) {
	    i__1 = *n - *m;
	    clacpy_("Full", m, &i__1, &af[af_offset], lda, &l[l_offset], lda);
	}
	if (*m > 0) {
	    clacpy_("Lower", m, m, &af[(*n - *m + 1) * af_dim1 + 1], lda, &l[(
		    *n - *m + 1) * l_dim1 + 1], lda);
	}
    }

/*     Compute L - Q'*A */

    cgemm_("Conjugate transpose", "No transpose", m, n, m, &c_b19, &q[
	    q_offset], lda, &a[a_offset], lda, &c_b20, &l[l_offset], lda);

/*     Compute norm( L - Q'*A ) / ( M * norm(A) * EPS ) . */

    anorm = clange_("1", m, n, &a[a_offset], lda, &rwork[1]);
    resid = clange_("1", m, n, &l[l_offset], lda, &rwork[1]);
    if (anorm > 0.f) {
	result[1] = resid / (real) max(1,*m) / anorm / eps;
    } else {
	result[1] = 0.f;
    }

/*     Compute I - Q'*Q */

    claset_("Full", m, m, &c_b12, &c_b20, &l[l_offset], lda);
    cherk_("Upper", "Conjugate transpose", m, m, &c_b28, &q[q_offset], lda, &
	    c_b29, &l[l_offset], lda);

/*     Compute norm( I - Q'*Q ) / ( M * EPS ) . */

    resid = clansy_("1", "Upper", m, &l[l_offset], lda, &rwork[1]);

    result[2] = resid / (real) max(1,*m) / eps;

    return 0;

/*     End of CQLT01 */

} /* cqlt01_ */

Example #25

Show file

File: cstt22.c Project: zangel/uquad

/* Subroutine */ int cstt22_(integer *n, integer *m, integer *kband, real *ad,
	 real *ae, real *sd, real *se, complex *u, integer *ldu, complex *
	work, integer *ldwork, real *rwork, real *result)
{
    /* System generated locals */
    integer u_dim1, u_offset, work_dim1, work_offset, i__1, i__2, i__3, i__4, 
	    i__5, i__6;
    real r__1, r__2, r__3, r__4, r__5;
    complex q__1, q__2;

    /* Local variables */
    static complex aukj;
    static real unfl;
    static integer i__, j, k;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
	    integer *, complex *, complex *, integer *, complex *, integer *, 
	    complex *, complex *, integer *);
    static real anorm, wnorm;
    extern doublereal clange_(char *, integer *, integer *, complex *, 
	    integer *, real *), slamch_(char *), clansy_(char 
	    *, char *, integer *, complex *, integer *, real *);
    static real ulp;


#define work_subscr(a_1,a_2) (a_2)*work_dim1 + a_1
#define work_ref(a_1,a_2) work[work_subscr(a_1,a_2)]
#define u_subscr(a_1,a_2) (a_2)*u_dim1 + a_1
#define u_ref(a_1,a_2) u[u_subscr(a_1,a_2)]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    CSTT22  checks a set of M eigenvalues and eigenvectors,   

        A U = U S   

    where A is Hermitian tridiagonal, the columns of U are unitary,   
    and S is diagonal (if KBAND=0) or Hermitian tridiagonal (if KBAND=1).   
    Two tests are performed:   

       RESULT(1) = | U* A U - S | / ( |A| m ulp )   

       RESULT(2) = | I - U*U | / ( m ulp )   

    Arguments   
    =========   

    N       (input) INTEGER   
            The size of the matrix.  If it is zero, CSTT22 does nothing.   
            It must be at least zero.   

    M       (input) INTEGER   
            The number of eigenpairs to check.  If it is zero, CSTT22   
            does nothing.  It must be at least zero.   

    KBAND   (input) INTEGER   
            The bandwidth of the matrix S.  It may only be zero or one.   
            If zero, then S is diagonal, and SE is not referenced.  If   
            one, then S is Hermitian tri-diagonal.   

    AD      (input) REAL array, dimension (N)   
            The diagonal of the original (unfactored) matrix A.  A is   
            assumed to be Hermitian tridiagonal.   

    AE      (input) REAL array, dimension (N)   
            The off-diagonal of the original (unfactored) matrix A.  A   
            is assumed to be Hermitian tridiagonal.  AE(1) is ignored,   
            AE(2) is the (1,2) and (2,1) element, etc.   

    SD      (input) REAL array, dimension (N)   
            The diagonal of the (Hermitian tri-) diagonal matrix S.   

    SE      (input) REAL array, dimension (N)   
            The off-diagonal of the (Hermitian tri-) diagonal matrix S.   
            Not referenced if KBSND=0.  If KBAND=1, then AE(1) is   
            ignored, SE(2) is the (1,2) and (2,1) element, etc.   

    U       (input) REAL array, dimension (LDU, N)   
            The unitary matrix in the decomposition.   

    LDU     (input) INTEGER   
            The leading dimension of U.  LDU must be at least N.   

    WORK    (workspace) COMPLEX array, dimension (LDWORK, M+1)   

    LDWORK  (input) INTEGER   
            The leading dimension of WORK.  LDWORK must be at least   
            max(1,M).   

    RWORK   (workspace) REAL array, dimension (N)   

    RESULT  (output) REAL array, dimension (2)   
            The values computed by the two tests described above.  The   
            values are currently limited to 1/ulp, to avoid overflow.   

    =====================================================================   


       Parameter adjustments */
    --ad;
    --ae;
    --sd;
    --se;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1 * 1;
    u -= u_offset;
    work_dim1 = *ldwork;
    work_offset = 1 + work_dim1 * 1;
    work -= work_offset;
    --rwork;
    --result;

    /* Function Body */
    result[1] = 0.f;
    result[2] = 0.f;
    if (*n <= 0 || *m <= 0) {
	return 0;
    }

    unfl = slamch_("Safe minimum");
    ulp = slamch_("Epsilon");

/*     Do Test 1   

       Compute the 1-norm of A. */

    if (*n > 1) {
	anorm = dabs(ad[1]) + dabs(ae[1]);
	i__1 = *n - 1;
	for (j = 2; j <= i__1; ++j) {
/* Computing MAX */
	    r__4 = anorm, r__5 = (r__1 = ad[j], dabs(r__1)) + (r__2 = ae[j], 
		    dabs(r__2)) + (r__3 = ae[j - 1], dabs(r__3));
	    anorm = dmax(r__4,r__5);
/* L10: */
	}
/* Computing MAX */
	r__3 = anorm, r__4 = (r__1 = ad[*n], dabs(r__1)) + (r__2 = ae[*n - 1],
		 dabs(r__2));
	anorm = dmax(r__3,r__4);
    } else {
	anorm = dabs(ad[1]);
    }
    anorm = dmax(anorm,unfl);

/*     Norm of U*AU - S */

    i__1 = *m;
    for (i__ = 1; i__ <= i__1; ++i__) {
	i__2 = *m;
	for (j = 1; j <= i__2; ++j) {
	    i__3 = work_subscr(i__, j);
	    work[i__3].r = 0.f, work[i__3].i = 0.f;
	    i__3 = *n;
	    for (k = 1; k <= i__3; ++k) {
		i__4 = k;
		i__5 = u_subscr(k, j);
		q__1.r = ad[i__4] * u[i__5].r, q__1.i = ad[i__4] * u[i__5].i;
		aukj.r = q__1.r, aukj.i = q__1.i;
		if (k != *n) {
		    i__4 = k;
		    i__5 = u_subscr(k + 1, j);
		    q__2.r = ae[i__4] * u[i__5].r, q__2.i = ae[i__4] * u[i__5]
			    .i;
		    q__1.r = aukj.r + q__2.r, q__1.i = aukj.i + q__2.i;
		    aukj.r = q__1.r, aukj.i = q__1.i;
		}
		if (k != 1) {
		    i__4 = k - 1;
		    i__5 = u_subscr(k - 1, j);
		    q__2.r = ae[i__4] * u[i__5].r, q__2.i = ae[i__4] * u[i__5]
			    .i;
		    q__1.r = aukj.r + q__2.r, q__1.i = aukj.i + q__2.i;
		    aukj.r = q__1.r, aukj.i = q__1.i;
		}
		i__4 = work_subscr(i__, j);
		i__5 = work_subscr(i__, j);
		i__6 = u_subscr(k, i__);
		q__2.r = u[i__6].r * aukj.r - u[i__6].i * aukj.i, q__2.i = u[
			i__6].r * aukj.i + u[i__6].i * aukj.r;
		q__1.r = work[i__5].r + q__2.r, q__1.i = work[i__5].i + 
			q__2.i;
		work[i__4].r = q__1.r, work[i__4].i = q__1.i;
/* L20: */
	    }
/* L30: */
	}
	i__2 = work_subscr(i__, i__);
	i__3 = work_subscr(i__, i__);
	i__4 = i__;
	q__1.r = work[i__3].r - sd[i__4], q__1.i = work[i__3].i;
	work[i__2].r = q__1.r, work[i__2].i = q__1.i;
	if (*kband == 1) {
	    if (i__ != 1) {
		i__2 = work_subscr(i__, i__ - 1);
		i__3 = work_subscr(i__, i__ - 1);
		i__4 = i__ - 1;
		q__1.r = work[i__3].r - se[i__4], q__1.i = work[i__3].i;
		work[i__2].r = q__1.r, work[i__2].i = q__1.i;
	    }
	    if (i__ != *n) {
		i__2 = work_subscr(i__, i__ + 1);
		i__3 = work_subscr(i__, i__ + 1);
		i__4 = i__;
		q__1.r = work[i__3].r - se[i__4], q__1.i = work[i__3].i;
		work[i__2].r = q__1.r, work[i__2].i = q__1.i;
	    }
	}
/* L40: */
    }

    wnorm = clansy_("1", "L", m, &work[work_offset], m, &rwork[1]);

    if (anorm > wnorm) {
	result[1] = wnorm / anorm / (*m * ulp);
    } else {
	if (anorm < 1.f) {
/* Computing MIN */
	    r__1 = wnorm, r__2 = *m * anorm;
	    result[1] = dmin(r__1,r__2) / anorm / (*m * ulp);
	} else {
/* Computing MIN */
	    r__1 = wnorm / anorm, r__2 = (real) (*m);
	    result[1] = dmin(r__1,r__2) / (*m * ulp);
	}
    }

/*     Do Test 2   

       Compute  U*U - I */

    cgemm_("T", "N", m, m, n, &c_b2, &u[u_offset], ldu, &u[u_offset], ldu, &
	    c_b1, &work[work_offset], m);

    i__1 = *m;
    for (j = 1; j <= i__1; ++j) {
	i__2 = work_subscr(j, j);
	i__3 = work_subscr(j, j);
	q__1.r = work[i__3].r - 1.f, q__1.i = work[i__3].i;
	work[i__2].r = q__1.r, work[i__2].i = q__1.i;
/* L50: */
    }

/* Computing MIN */
    r__1 = (real) (*m), r__2 = clange_("1", m, m, &work[work_offset], m, &
	    rwork[1]);
    result[2] = dmin(r__1,r__2) / (*m * ulp);

    return 0;

/*     End of CSTT22 */

} /* cstt22_ */

Example #26

Show file

File: cqlt01.c Project: zangel/uquad

/* Subroutine */ int cqlt01_(integer *m, integer *n, complex *a, complex *af, 
	complex *q, complex *l, integer *lda, complex *tau, complex *work, 
	integer *lwork, real *rwork, real *result)
{
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, l_dim1, l_offset, q_dim1, 
	    q_offset, i__1, i__2;

    /* Builtin functions   
       Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);

    /* Local variables */
    static integer info;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
	    integer *, complex *, complex *, integer *, complex *, integer *, 
	    complex *, complex *, integer *), cherk_(char *, 
	    char *, integer *, integer *, real *, complex *, integer *, real *
	    , complex *, integer *);
    static real resid, anorm;
    static integer minmn;
    extern doublereal clange_(char *, integer *, integer *, complex *, 
	    integer *, real *);
    extern /* Subroutine */ int cgeqlf_(integer *, integer *, complex *, 
	    integer *, complex *, complex *, integer *, integer *);
    extern doublereal slamch_(char *);
    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex 
	    *, integer *, complex *, integer *), claset_(char *, 
	    integer *, integer *, complex *, complex *, complex *, integer *);
    extern doublereal clansy_(char *, char *, integer *, complex *, integer *,
	     real *);
    extern /* Subroutine */ int cungql_(integer *, integer *, integer *, 
	    complex *, integer *, complex *, complex *, integer *, integer *);
    static real eps;


#define l_subscr(a_1,a_2) (a_2)*l_dim1 + a_1
#define l_ref(a_1,a_2) l[l_subscr(a_1,a_2)]
#define q_subscr(a_1,a_2) (a_2)*q_dim1 + a_1
#define q_ref(a_1,a_2) q[q_subscr(a_1,a_2)]
#define af_subscr(a_1,a_2) (a_2)*af_dim1 + a_1
#define af_ref(a_1,a_2) af[af_subscr(a_1,a_2)]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    CQLT01 tests CGEQLF, which computes the QL factorization of an m-by-n   
    matrix A, and partially tests CUNGQL which forms the m-by-m   
    orthogonal matrix Q.   

    CQLT01 compares L with Q'*A, and checks that Q is orthogonal.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix A.  N >= 0.   

    A       (input) COMPLEX array, dimension (LDA,N)   
            The m-by-n matrix A.   

    AF      (output) COMPLEX array, dimension (LDA,N)   
            Details of the QL factorization of A, as returned by CGEQLF.   
            See CGEQLF for further details.   

    Q       (output) COMPLEX array, dimension (LDA,M)   
            The m-by-m orthogonal matrix Q.   

    L       (workspace) COMPLEX array, dimension (LDA,max(M,N))   

    LDA     (input) INTEGER   
            The leading dimension of the arrays A, AF, Q and R.   
            LDA >= max(M,N).   

    TAU     (output) COMPLEX array, dimension (min(M,N))   
            The scalar factors of the elementary reflectors, as returned   
            by CGEQLF.   

    WORK    (workspace) COMPLEX array, dimension (LWORK)   

    LWORK   (input) INTEGER   
            The dimension of the array WORK.   

    RWORK   (workspace) REAL array, dimension (M)   

    RESULT  (output) REAL array, dimension (2)   
            The test ratios:   
            RESULT(1) = norm( L - Q'*A ) / ( M * norm(A) * EPS )   
            RESULT(2) = norm( I - Q'*Q ) / ( M * EPS )   

    =====================================================================   


       Parameter adjustments */
    l_dim1 = *lda;
    l_offset = 1 + l_dim1 * 1;
    l -= l_offset;
    q_dim1 = *lda;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    af_dim1 = *lda;
    af_offset = 1 + af_dim1 * 1;
    af -= af_offset;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --tau;
    --work;
    --rwork;
    --result;

    /* Function Body */
    minmn = min(*m,*n);
    eps = slamch_("Epsilon");

/*     Copy the matrix A to the array AF. */

    clacpy_("Full", m, n, &a[a_offset], lda, &af[af_offset], lda);

/*     Factorize the matrix A in the array AF. */

    s_copy(srnamc_1.srnamt, "CGEQLF", (ftnlen)6, (ftnlen)6);
    cgeqlf_(m, n, &af[af_offset], lda, &tau[1], &work[1], lwork, &info);

/*     Copy details of Q */

    claset_("Full", m, m, &c_b1, &c_b1, &q[q_offset], lda);
    if (*m >= *n) {
	if (*n < *m && *n > 0) {
	    i__1 = *m - *n;
	    clacpy_("Full", &i__1, n, &af[af_offset], lda, &q_ref(1, *m - *n 
		    + 1), lda);
	}
	if (*n > 1) {
	    i__1 = *n - 1;
	    i__2 = *n - 1;
	    clacpy_("Upper", &i__1, &i__2, &af_ref(*m - *n + 1, 2), lda, &
		    q_ref(*m - *n + 1, *m - *n + 2), lda);
	}
    } else {
	if (*m > 1) {
	    i__1 = *m - 1;
	    i__2 = *m - 1;
	    clacpy_("Upper", &i__1, &i__2, &af_ref(1, *n - *m + 2), lda, &
		    q_ref(1, 2), lda);
	}
    }

/*     Generate the m-by-m matrix Q */

    s_copy(srnamc_1.srnamt, "CUNGQL", (ftnlen)6, (ftnlen)6);
    cungql_(m, m, &minmn, &q[q_offset], lda, &tau[1], &work[1], lwork, &info);

/*     Copy L */

    claset_("Full", m, n, &c_b12, &c_b12, &l[l_offset], lda);
    if (*m >= *n) {
	if (*n > 0) {
	    clacpy_("Lower", n, n, &af_ref(*m - *n + 1, 1), lda, &l_ref(*m - *
		    n + 1, 1), lda);
	}
    } else {
	if (*n > *m && *m > 0) {
	    i__1 = *n - *m;
	    clacpy_("Full", m, &i__1, &af[af_offset], lda, &l[l_offset], lda);
	}
	if (*m > 0) {
	    clacpy_("Lower", m, m, &af_ref(1, *n - *m + 1), lda, &l_ref(1, *n 
		    - *m + 1), lda);
	}
    }

/*     Compute L - Q'*A */

    cgemm_("Conjugate transpose", "No transpose", m, n, m, &c_b19, &q[
	    q_offset], lda, &a[a_offset], lda, &c_b20, &l[l_offset], lda);

/*     Compute norm( L - Q'*A ) / ( M * norm(A) * EPS ) . */

    anorm = clange_("1", m, n, &a[a_offset], lda, &rwork[1]);
    resid = clange_("1", m, n, &l[l_offset], lda, &rwork[1]);
    if (anorm > 0.f) {
	result[1] = resid / (real) max(1,*m) / anorm / eps;
    } else {
	result[1] = 0.f;
    }

/*     Compute I - Q'*Q */

    claset_("Full", m, m, &c_b12, &c_b20, &l[l_offset], lda);
    cherk_("Upper", "Conjugate transpose", m, m, &c_b28, &q[q_offset], lda, &
	    c_b29, &l[l_offset], lda);

/*     Compute norm( I - Q'*Q ) / ( M * EPS ) . */

    resid = clansy_("1", "Upper", m, &l[l_offset], lda, &rwork[1]);

    result[2] = resid / (real) max(1,*m) / eps;

    return 0;

/*     End of CQLT01 */

} /* cqlt01_ */

Example #27

Show file

File: clarzb.c Project: Electrostatics/FETK

/* Subroutine */ int clarzb_(char *side, char *trans, char *direct, char *
                             storev, integer *m, integer *n, integer *k, integer *l, complex *v,
                             integer *ldv, complex *t, integer *ldt, complex *c__, integer *ldc,
                             complex *work, integer *ldwork, ftnlen side_len, ftnlen trans_len,
                             ftnlen direct_len, ftnlen storev_len)
{
    /* System generated locals */
    integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1,
            work_offset, i__1, i__2, i__3, i__4, i__5;
    complex q__1;

    /* Local variables */
    static integer i__, j, info;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
                                       integer *, complex *, complex *, integer *, complex *, integer *,
                                       complex *, complex *, integer *, ftnlen, ftnlen);
    extern logical lsame_(char *, char *, ftnlen, ftnlen);
    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *,
                                       complex *, integer *), ctrmm_(char *, char *, char *, char *,
                                               integer *, integer *, complex *, complex *, integer *, complex *,
                                               integer *, ftnlen, ftnlen, ftnlen, ftnlen), clacgv_(integer *,
                                                       complex *, integer *), xerbla_(char *, integer *, ftnlen);
    static char transt[1];


    /*  -- LAPACK routine (version 3.0) -- */
    /*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */
    /*     Courant Institute, Argonne National Lab, and Rice University */
    /*     December 1, 1999 */

    /*     .. Scalar Arguments .. */
    /*     .. */
    /*     .. Array Arguments .. */
    /*     .. */

    /*  Purpose */
    /*  ======= */

    /*  CLARZB applies a complex block reflector H or its transpose H**H */
    /*  to a complex distributed M-by-N  C from the left or the right. */

    /*  Currently, only STOREV = 'R' and DIRECT = 'B' are supported. */

    /*  Arguments */
    /*  ========= */

    /*  SIDE    (input) CHARACTER*1 */
    /*          = 'L': apply H or H' from the Left */
    /*          = 'R': apply H or H' from the Right */

    /*  TRANS   (input) CHARACTER*1 */
    /*          = 'N': apply H (No transpose) */
    /*          = 'C': apply H' (Conjugate transpose) */

    /*  DIRECT  (input) CHARACTER*1 */
    /*          Indicates how H is formed from a product of elementary */
    /*          reflectors */
    /*          = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) */
    /*          = 'B': H = H(k) . . . H(2) H(1) (Backward) */

    /*  STOREV  (input) CHARACTER*1 */
    /*          Indicates how the vectors which define the elementary */
    /*          reflectors are stored: */
    /*          = 'C': Columnwise                        (not supported yet) */
    /*          = 'R': Rowwise */

    /*  M       (input) INTEGER */
    /*          The number of rows of the matrix C. */

    /*  N       (input) INTEGER */
    /*          The number of columns of the matrix C. */

    /*  K       (input) INTEGER */
    /*          The order of the matrix T (= the number of elementary */
    /*          reflectors whose product defines the block reflector). */

    /*  L       (input) INTEGER */
    /*          The number of columns of the matrix V containing the */
    /*          meaningful part of the Householder reflectors. */
    /*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. */

    /*  V       (input) COMPLEX array, dimension (LDV,NV). */
    /*          If STOREV = 'C', NV = K; if STOREV = 'R', NV = L. */

    /*  LDV     (input) INTEGER */
    /*          The leading dimension of the array V. */
    /*          If STOREV = 'C', LDV >= L; if STOREV = 'R', LDV >= K. */

    /*  T       (input) COMPLEX array, dimension (LDT,K) */
    /*          The triangular K-by-K matrix T in the representation of the */
    /*          block reflector. */

    /*  LDT     (input) INTEGER */
    /*          The leading dimension of the array T. LDT >= K. */

    /*  C       (input/output) COMPLEX array, dimension (LDC,N) */
    /*          On entry, the M-by-N matrix C. */
    /*          On exit, C is overwritten by H*C or H'*C or C*H or C*H'. */

    /*  LDC     (input) INTEGER */
    /*          The leading dimension of the array C. LDC >= max(1,M). */

    /*  WORK    (workspace) COMPLEX array, dimension (LDWORK,K) */

    /*  LDWORK  (input) INTEGER */
    /*          The leading dimension of the array WORK. */
    /*          If SIDE = 'L', LDWORK >= max(1,N); */
    /*          if SIDE = 'R', LDWORK >= max(1,M). */

    /*  Further Details */
    /*  =============== */

    /*  Based on contributions by */
    /*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */

    /*  ===================================================================== */

    /*     .. Parameters .. */
    /*     .. */
    /*     .. Local Scalars .. */
    /*     .. */
    /*     .. External Functions .. */
    /*     .. */
    /*     .. External Subroutines .. */
    /*     .. */
    /*     .. Executable Statements .. */

    /*     Quick return if possible */

    /* Parameter adjustments */
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1;
    v -= v_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1;
    t -= t_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1;
    c__ -= c_offset;
    work_dim1 = *ldwork;
    work_offset = 1 + work_dim1;
    work -= work_offset;

    /* Function Body */
    if (*m <= 0 || *n <= 0) {
        return 0;
    }

    /*     Check for currently supported options */

    info = 0;
    if (! lsame_(direct, "B", (ftnlen)1, (ftnlen)1)) {
        info = -3;
    } else if (! lsame_(storev, "R", (ftnlen)1, (ftnlen)1)) {
        info = -4;
    }
    if (info != 0) {
        i__1 = -info;
        xerbla_("CLARZB", &i__1, (ftnlen)6);
        return 0;
    }

    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
        *(unsigned char *)transt = 'C';
    } else {
        *(unsigned char *)transt = 'N';
    }

    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {

        /*        Form  H * C  or  H' * C */

        /*        W( 1:n, 1:k ) = conjg( C( 1:k, 1:n )' ) */

        i__1 = *k;
        for (j = 1; j <= i__1; ++j) {
            ccopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], &c__1);
            /* L10: */
        }

        /*        W( 1:n, 1:k ) = W( 1:n, 1:k ) + ... */
        /*                        conjg( C( m-l+1:m, 1:n )' ) * V( 1:k, 1:l )' */

        if (*l > 0) {
            cgemm_("Transpose", "Conjugate transpose", n, k, l, &c_b1, &c__[*
                    m - *l + 1 + c_dim1], ldc, &v[v_offset], ldv, &c_b1, &
                   work[work_offset], ldwork, (ftnlen)9, (ftnlen)19);
        }

        /*        W( 1:n, 1:k ) = W( 1:n, 1:k ) * T'  or  W( 1:m, 1:k ) * T */

        ctrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b1, &t[t_offset]
               , ldt, &work[work_offset], ldwork, (ftnlen)5, (ftnlen)5, (
                   ftnlen)1, (ftnlen)8);

        /*        C( 1:k, 1:n ) = C( 1:k, 1:n ) - conjg( W( 1:n, 1:k )' ) */

        i__1 = *n;
        for (j = 1; j <= i__1; ++j) {
            i__2 = *k;
            for (i__ = 1; i__ <= i__2; ++i__) {
                i__3 = i__ + j * c_dim1;
                i__4 = i__ + j * c_dim1;
                i__5 = j + i__ * work_dim1;
                q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[i__4].i -
                         work[i__5].i;
                c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
                /* L20: */
            }
            /* L30: */
        }

        /*        C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... */
        /*                    conjg( V( 1:k, 1:l )' ) * conjg( W( 1:n, 1:k )' ) */

        if (*l > 0) {
            q__1.r = -1.f, q__1.i = -0.f;
            cgemm_("Transpose", "Transpose", l, n, k, &q__1, &v[v_offset],
                   ldv, &work[work_offset], ldwork, &c_b1, &c__[*m - *l + 1
                           + c_dim1], ldc, (ftnlen)9, (ftnlen)9);
        }

    } else if (lsame_(side, "R", (ftnlen)1, (ftnlen)1)) {

        /*        Form  C * H  or  C * H' */

        /*        W( 1:m, 1:k ) = C( 1:m, 1:k ) */

        i__1 = *k;
        for (j = 1; j <= i__1; ++j) {
            ccopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * work_dim1 + 1], &
                   c__1);
            /* L40: */
        }

        /*        W( 1:m, 1:k ) = W( 1:m, 1:k ) + ... */
        /*                        C( 1:m, n-l+1:n ) * conjg( V( 1:k, 1:l )' ) */

        if (*l > 0) {
            cgemm_("No transpose", "Transpose", m, k, l, &c_b1, &c__[(*n - *l
                    + 1) * c_dim1 + 1], ldc, &v[v_offset], ldv, &c_b1, &work[
                       work_offset], ldwork, (ftnlen)12, (ftnlen)9);
        }

        /*        W( 1:m, 1:k ) = W( 1:m, 1:k ) * conjg( T )  or */
        /*                        W( 1:m, 1:k ) * conjg( T' ) */

        i__1 = *k;
        for (j = 1; j <= i__1; ++j) {
            i__2 = *k - j + 1;
            clacgv_(&i__2, &t[j + j * t_dim1], &c__1);
            /* L50: */
        }
        ctrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b1, &t[t_offset],
               ldt, &work[work_offset], ldwork, (ftnlen)5, (ftnlen)5, (
                   ftnlen)1, (ftnlen)8);
        i__1 = *k;
        for (j = 1; j <= i__1; ++j) {
            i__2 = *k - j + 1;
            clacgv_(&i__2, &t[j + j * t_dim1], &c__1);
            /* L60: */
        }

        /*        C( 1:m, 1:k ) = C( 1:m, 1:k ) - W( 1:m, 1:k ) */

        i__1 = *k;
        for (j = 1; j <= i__1; ++j) {
            i__2 = *m;
            for (i__ = 1; i__ <= i__2; ++i__) {
                i__3 = i__ + j * c_dim1;
                i__4 = i__ + j * c_dim1;
                i__5 = i__ + j * work_dim1;
                q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[i__4].i -
                         work[i__5].i;
                c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
                /* L70: */
            }
            /* L80: */
        }

        /*        C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... */
        /*                            W( 1:m, 1:k ) * conjg( V( 1:k, 1:l ) ) */

        i__1 = *l;
        for (j = 1; j <= i__1; ++j) {
            clacgv_(k, &v[j * v_dim1 + 1], &c__1);
            /* L90: */
        }
        if (*l > 0) {
            q__1.r = -1.f, q__1.i = -0.f;
            cgemm_("No transpose", "No transpose", m, l, k, &q__1, &work[
                       work_offset], ldwork, &v[v_offset], ldv, &c_b1, &c__[(*n
                               - *l + 1) * c_dim1 + 1], ldc, (ftnlen)12, (ftnlen)12);
        }
        i__1 = *l;
        for (j = 1; j <= i__1; ++j) {
            clacgv_(k, &v[j * v_dim1 + 1], &c__1);
            /* L100: */
        }

    }

    return 0;

    /*     End of CLARZB */

} /* clarzb_ */

Example #28

Show file

File: chbgvd.c Project: juanjosegarciaripoll/cblapack

/* Subroutine */ int chbgvd_(char *jobz, char *uplo, integer *n, integer *ka, 
	integer *kb, complex *ab, integer *ldab, complex *bb, integer *ldbb, 
	real *w, complex *z__, integer *ldz, complex *work, integer *lwork, 
	real *rwork, integer *lrwork, integer *iwork, integer *liwork, 
	integer *info)
{
    /* System generated locals */
    integer ab_dim1, ab_offset, bb_dim1, bb_offset, z_dim1, z_offset, i__1;

    /* Local variables */
    integer inde;
    char vect[1];
    integer llwk2;
    integer iinfo, lwmin;
    logical upper;
    integer llrwk;
    logical wantz;
    integer indwk2;
    integer indwrk, liwmin;
    integer lrwmin;
    logical lquery;

/*  -- LAPACK driver routine (version 3.2) -- */
/*     November 2006 */

/*  Purpose */
/*  ======= */

/*  CHBGVD computes all the eigenvalues, and optionally, the eigenvectors */
/*  of a complex generalized Hermitian-definite banded eigenproblem, of */
/*  the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian */
/*  and banded, and B is also positive definite.  If eigenvectors are */
/*  desired, it uses a divide and conquer algorithm. */

/*  The divide and conquer algorithm makes very mild assumptions about */
/*  floating point arithmetic. It will work on machines with a guard */
/*  digit in add/subtract, or on those binary machines without guard */
/*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */
/*  Cray-2. It could conceivably fail on hexadecimal or decimal machines */
/*  without guard digits, but we know of none. */

/*  Arguments */
/*  ========= */

/*  JOBZ    (input) CHARACTER*1 */
/*          = 'N':  Compute eigenvalues only; */
/*          = 'V':  Compute eigenvalues and eigenvectors. */

/*  UPLO    (input) CHARACTER*1 */
/*          = 'U':  Upper triangles of A and B are stored; */
/*          = 'L':  Lower triangles of A and B are stored. */

/*  N       (input) INTEGER */
/*          The order of the matrices A and B.  N >= 0. */

/*  KA      (input) INTEGER */
/*          The number of superdiagonals of the matrix A if UPLO = 'U', */
/*          or the number of subdiagonals if UPLO = 'L'. KA >= 0. */

/*  KB      (input) INTEGER */
/*          The number of superdiagonals of the matrix B if UPLO = 'U', */
/*          or the number of subdiagonals if UPLO = 'L'. KB >= 0. */

/*  AB      (input/output) COMPLEX array, dimension (LDAB, N) */
/*          On entry, the upper or lower triangle of the Hermitian band */
/*          matrix A, stored in the first ka+1 rows of the array.  The */
/*          j-th column of A is stored in the j-th column of the array AB */
/*          as follows: */
/*          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j; */
/*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+ka). */

/*          On exit, the contents of AB are destroyed. */

/*  LDAB    (input) INTEGER */
/*          The leading dimension of the array AB.  LDAB >= KA+1. */

/*  BB      (input/output) COMPLEX array, dimension (LDBB, N) */
/*          On entry, the upper or lower triangle of the Hermitian band */
/*          matrix B, stored in the first kb+1 rows of the array.  The */
/*          j-th column of B is stored in the j-th column of the array BB */
/*          as follows: */
/*          if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j; */
/*          if UPLO = 'L', BB(1+i-j,j)    = B(i,j) for j<=i<=min(n,j+kb). */

/*          On exit, the factor S from the split Cholesky factorization */
/*          B = S**H*S, as returned by CPBSTF. */

/*  LDBB    (input) INTEGER */
/*          The leading dimension of the array BB.  LDBB >= KB+1. */

/*  W       (output) REAL array, dimension (N) */
/*          If INFO = 0, the eigenvalues in ascending order. */

/*  Z       (output) COMPLEX array, dimension (LDZ, N) */
/*          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of */
/*          eigenvectors, with the i-th column of Z holding the */
/*          eigenvector associated with W(i). The eigenvectors are */
/*          normalized so that Z**H*B*Z = I. */
/*          If JOBZ = 'N', then Z is not referenced. */

/*  LDZ     (input) INTEGER */
/*          The leading dimension of the array Z.  LDZ >= 1, and if */
/*          JOBZ = 'V', LDZ >= N. */

/*  WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK)) */
/*          On exit, if INFO=0, WORK(1) returns the optimal LWORK. */

/*  LWORK   (input) INTEGER */
/*          The dimension of the array WORK. */
/*          If N <= 1,               LWORK >= 1. */
/*          If JOBZ = 'N' and N > 1, LWORK >= N. */
/*          If JOBZ = 'V' and N > 1, LWORK >= 2*N**2. */

/*          If LWORK = -1, then a workspace query is assumed; the routine */
/*          only calculates the optimal sizes of the WORK, RWORK and */
/*          IWORK arrays, returns these values as the first entries of */
/*          the WORK, RWORK and IWORK arrays, and no error message */
/*          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */

/*  RWORK   (workspace/output) REAL array, dimension (MAX(1,LRWORK)) */
/*          On exit, if INFO=0, RWORK(1) returns the optimal LRWORK. */

/*  LRWORK  (input) INTEGER */
/*          The dimension of array RWORK. */
/*          If N <= 1,               LRWORK >= 1. */
/*          If JOBZ = 'N' and N > 1, LRWORK >= N. */
/*          If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2. */

/*          If LRWORK = -1, then a workspace query is assumed; the */
/*          routine only calculates the optimal sizes of the WORK, RWORK */
/*          and IWORK arrays, returns these values as the first entries */
/*          of the WORK, RWORK and IWORK arrays, and no error message */
/*          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */

/*  IWORK   (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */
/*          On exit, if INFO=0, IWORK(1) returns the optimal LIWORK. */

/*  LIWORK  (input) INTEGER */
/*          The dimension of array IWORK. */
/*          If JOBZ = 'N' or N <= 1, LIWORK >= 1. */
/*          If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. */

/*          If LIWORK = -1, then a workspace query is assumed; the */
/*          routine only calculates the optimal sizes of the WORK, RWORK */
/*          and IWORK arrays, returns these values as the first entries */
/*          of the WORK, RWORK and IWORK arrays, and no error message */
/*          related to LWORK or LRWORK or LIWORK is issued by XERBLA. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
/*          > 0:  if INFO = i, and i is: */
/*             <= N:  the algorithm failed to converge: */
/*                    i off-diagonal elements of an intermediate */
/*                    tridiagonal form did not converge to zero; */
/*             > N:   if INFO = N + i, for 1 <= i <= N, then CPBSTF */
/*                    returned INFO = i: B is not positive definite. */
/*                    The factorization of B could not be completed and */
/*                    no eigenvalues or eigenvectors were computed. */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */

/*  ===================================================================== */

/*     Test the input parameters. */

    /* Parameter adjustments */
    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1;
    ab -= ab_offset;
    bb_dim1 = *ldbb;
    bb_offset = 1 + bb_dim1;
    bb -= bb_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1;
    z__ -= z_offset;
    --work;
    --rwork;
    --iwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    upper = lsame_(uplo, "U");
    lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1;

    *info = 0;
    if (*n <= 1) {
	lwmin = 1;
	lrwmin = 1;
	liwmin = 1;
    } else if (wantz) {
/* Computing 2nd power */
	i__1 = *n;
	lwmin = i__1 * i__1 << 1;
/* Computing 2nd power */
	i__1 = *n;
	lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1);
	liwmin = *n * 5 + 3;
    } else {
	lwmin = *n;
	lrwmin = *n;
	liwmin = 1;
    }
    if (! (wantz || lsame_(jobz, "N"))) {
	*info = -1;
    } else if (! (upper || lsame_(uplo, "L"))) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*ka < 0) {
	*info = -4;
    } else if (*kb < 0 || *kb > *ka) {
	*info = -5;
    } else if (*ldab < *ka + 1) {
	*info = -7;
    } else if (*ldbb < *kb + 1) {
	*info = -9;
    } else if (*ldz < 1 || wantz && *ldz < *n) {
	*info = -12;
    }

    if (*info == 0) {
	work[1].r = (real) lwmin, work[1].i = 0.f;
	rwork[1] = (real) lrwmin;
	iwork[1] = liwmin;

	if (*lwork < lwmin && ! lquery) {
	    *info = -14;
	} else if (*lrwork < lrwmin && ! lquery) {
	    *info = -16;
	} else if (*liwork < liwmin && ! lquery) {
	    *info = -18;
	}
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CHBGVD", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Form a split Cholesky factorization of B. */

    cpbstf_(uplo, n, kb, &bb[bb_offset], ldbb, info);
    if (*info != 0) {
	*info = *n + *info;
	return 0;
    }

/*     Transform problem to standard eigenvalue problem. */

    inde = 1;
    indwrk = inde + *n;
    indwk2 = *n * *n + 1;
    llwk2 = *lwork - indwk2 + 2;
    llrwk = *lrwork - indwrk + 2;
    chbgst_(jobz, uplo, n, ka, kb, &ab[ab_offset], ldab, &bb[bb_offset], ldbb, 
	     &z__[z_offset], ldz, &work[1], &rwork[indwrk], &iinfo);

/*     Reduce Hermitian band matrix to tridiagonal form. */

    if (wantz) {
	*(unsigned char *)vect = 'U';
    } else {
	*(unsigned char *)vect = 'N';
    }
    chbtrd_(vect, uplo, n, ka, &ab[ab_offset], ldab, &w[1], &rwork[inde], &
	    z__[z_offset], ldz, &work[1], &iinfo);

/*     For eigenvalues only, call SSTERF.  For eigenvectors, call CSTEDC. */

    if (! wantz) {
	ssterf_(n, &w[1], &rwork[inde], info);
    } else {
	cstedc_("I", n, &w[1], &rwork[inde], &work[1], n, &work[indwk2], &
		llwk2, &rwork[indwrk], &llrwk, &iwork[1], liwork, info);
	cgemm_("N", "N", n, n, n, &c_b1, &z__[z_offset], ldz, &work[1], n, &
		c_b2, &work[indwk2], n);
	clacpy_("A", n, n, &work[indwk2], n, &z__[z_offset], ldz);
    }

    work[1].r = (real) lwmin, work[1].i = 0.f;
    rwork[1] = (real) lrwmin;
    iwork[1] = liwmin;
    return 0;

/*     End of CHBGVD */

} /* chbgvd_ */

Example #29

Show file

File: cpotrf.c Project: dacap/loseface

/* Subroutine */ int cpotrf_(char *uplo, integer *n, complex *a, integer *lda, 
	 integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    complex q__1;

    /* Local variables */
    integer j, jb, nb;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
	    integer *, complex *, complex *, integer *, complex *, integer *, 
	    complex *, complex *, integer *), cherk_(char *, 
	    char *, integer *, integer *, real *, complex *, integer *, real *
, complex *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *, 
	    integer *, integer *, complex *, complex *, integer *, complex *, 
	    integer *);
    logical upper;
    extern /* Subroutine */ int cpotf2_(char *, integer *, complex *, integer 
	    *, integer *), xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *);


/*  -- LAPACK routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CPOTRF computes the Cholesky factorization of a complex Hermitian */
/*  positive definite matrix A. */

/*  The factorization has the form */
/*     A = U**H * U,  if UPLO = 'U', or */
/*     A = L  * L**H,  if UPLO = 'L', */
/*  where U is an upper triangular matrix and L is lower triangular. */

/*  This is the block version of the algorithm, calling Level 3 BLAS. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          = 'U':  Upper triangle of A is stored; */
/*          = 'L':  Lower triangle of A is stored. */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  A       (input/output) COMPLEX array, dimension (LDA,N) */
/*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading */
/*          N-by-N upper triangular part of A contains the upper */
/*          triangular part of the matrix A, and the strictly lower */
/*          triangular part of A is not referenced.  If UPLO = 'L', the */
/*          leading N-by-N lower triangular part of A contains the lower */
/*          triangular part of the matrix A, and the strictly upper */
/*          triangular part of A is not referenced. */

/*          On exit, if INFO = 0, the factor U or L from the Cholesky */
/*          factorization A = U**H*U or A = L*L**H. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
/*          > 0:  if INFO = i, the leading minor of order i is not */
/*                positive definite, and the factorization could not be */
/*                completed. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CPOTRF", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine the block size for this environment. */

    nb = ilaenv_(&c__1, "CPOTRF", uplo, n, &c_n1, &c_n1, &c_n1);
    if (nb <= 1 || nb >= *n) {

/*        Use unblocked code. */

	cpotf2_(uplo, n, &a[a_offset], lda, info);
    } else {

/*        Use blocked code. */

	if (upper) {

/*           Compute the Cholesky factorization A = U'*U. */

	    i__1 = *n;
	    i__2 = nb;
	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {

/*              Update and factorize the current diagonal block and test */
/*              for non-positive-definiteness. */

/* Computing MIN */
		i__3 = nb, i__4 = *n - j + 1;
		jb = min(i__3,i__4);
		i__3 = j - 1;
		cherk_("Upper", "Conjugate transpose", &jb, &i__3, &c_b14, &a[
			j * a_dim1 + 1], lda, &c_b15, &a[j + j * a_dim1], lda);
		cpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info);
		if (*info != 0) {
		    goto L30;
		}
		if (j + jb <= *n) {

/*                 Compute the current block row. */

		    i__3 = *n - j - jb + 1;
		    i__4 = j - 1;
		    q__1.r = -1.f, q__1.i = -0.f;
		    cgemm_("Conjugate transpose", "No transpose", &jb, &i__3, 
			    &i__4, &q__1, &a[j * a_dim1 + 1], lda, &a[(j + jb)
			     * a_dim1 + 1], lda, &c_b1, &a[j + (j + jb) * 
			    a_dim1], lda);
		    i__3 = *n - j - jb + 1;
		    ctrsm_("Left", "Upper", "Conjugate transpose", "Non-unit", 
			     &jb, &i__3, &c_b1, &a[j + j * a_dim1], lda, &a[j 
			    + (j + jb) * a_dim1], lda);
		}
/* L10: */
	    }

	} else {

/*           Compute the Cholesky factorization A = L*L'. */

	    i__2 = *n;
	    i__1 = nb;
	    for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {

/*              Update and factorize the current diagonal block and test */
/*              for non-positive-definiteness. */

/* Computing MIN */
		i__3 = nb, i__4 = *n - j + 1;
		jb = min(i__3,i__4);
		i__3 = j - 1;
		cherk_("Lower", "No transpose", &jb, &i__3, &c_b14, &a[j + 
			a_dim1], lda, &c_b15, &a[j + j * a_dim1], lda);
		cpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info);
		if (*info != 0) {
		    goto L30;
		}
		if (j + jb <= *n) {

/*                 Compute the current block column. */

		    i__3 = *n - j - jb + 1;
		    i__4 = j - 1;
		    q__1.r = -1.f, q__1.i = -0.f;
		    cgemm_("No transpose", "Conjugate transpose", &i__3, &jb, 
			    &i__4, &q__1, &a[j + jb + a_dim1], lda, &a[j + 
			    a_dim1], lda, &c_b1, &a[j + jb + j * a_dim1], lda);
		    i__3 = *n - j - jb + 1;
		    ctrsm_("Right", "Lower", "Conjugate transpose", "Non-unit"
, &i__3, &jb, &c_b1, &a[j + j * a_dim1], lda, &a[
			    j + jb + j * a_dim1], lda);
		}
/* L20: */
	    }
	}
    }
    goto L40;

L30:
    *info = *info + j - 1;

L40:
    return 0;

/*     End of CPOTRF */

} /* cpotrf_ */

Example #30

Show file

File: cstt22.c Project: kstraube/hysim

/* Subroutine */ int cstt22_(integer *n, integer *m, integer *kband, real *ad, 
	 real *ae, real *sd, real *se, complex *u, integer *ldu, complex *
	work, integer *ldwork, real *rwork, real *result)
{
    /* System generated locals */
    integer u_dim1, u_offset, work_dim1, work_offset, i__1, i__2, i__3, i__4, 
	    i__5, i__6;
    real r__1, r__2, r__3, r__4, r__5;
    complex q__1, q__2;

    /* Local variables */
    integer i__, j, k;
    real ulp;
    complex aukj;
    real unfl;
    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *, 
	    integer *, complex *, complex *, integer *, complex *, integer *, 
	    complex *, complex *, integer *);
    real anorm, wnorm;
    extern doublereal clange_(char *, integer *, integer *, complex *, 
	    integer *, real *), slamch_(char *), clansy_(char 
	    *, char *, integer *, complex *, integer *, real *);


/*  -- LAPACK test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CSTT22  checks a set of M eigenvalues and eigenvectors, */

/*      A U = U S */

/*  where A is Hermitian tridiagonal, the columns of U are unitary, */
/*  and S is diagonal (if KBAND=0) or Hermitian tridiagonal (if KBAND=1). */
/*  Two tests are performed: */

/*     RESULT(1) = | U* A U - S | / ( |A| m ulp ) */

/*     RESULT(2) = | I - U*U | / ( m ulp ) */

/*  Arguments */
/*  ========= */

/*  N       (input) INTEGER */
/*          The size of the matrix.  If it is zero, CSTT22 does nothing. */
/*          It must be at least zero. */

/*  M       (input) INTEGER */
/*          The number of eigenpairs to check.  If it is zero, CSTT22 */
/*          does nothing.  It must be at least zero. */

/*  KBAND   (input) INTEGER */
/*          The bandwidth of the matrix S.  It may only be zero or one. */
/*          If zero, then S is diagonal, and SE is not referenced.  If */
/*          one, then S is Hermitian tri-diagonal. */

/*  AD      (input) REAL array, dimension (N) */
/*          The diagonal of the original (unfactored) matrix A.  A is */
/*          assumed to be Hermitian tridiagonal. */

/*  AE      (input) REAL array, dimension (N) */
/*          The off-diagonal of the original (unfactored) matrix A.  A */
/*          is assumed to be Hermitian tridiagonal.  AE(1) is ignored, */
/*          AE(2) is the (1,2) and (2,1) element, etc. */

/*  SD      (input) REAL array, dimension (N) */
/*          The diagonal of the (Hermitian tri-) diagonal matrix S. */

/*  SE      (input) REAL array, dimension (N) */
/*          The off-diagonal of the (Hermitian tri-) diagonal matrix S. */
/*          Not referenced if KBSND=0.  If KBAND=1, then AE(1) is */
/*          ignored, SE(2) is the (1,2) and (2,1) element, etc. */

/*  U       (input) REAL array, dimension (LDU, N) */
/*          The unitary matrix in the decomposition. */

/*  LDU     (input) INTEGER */
/*          The leading dimension of U.  LDU must be at least N. */

/*  WORK    (workspace) COMPLEX array, dimension (LDWORK, M+1) */

/*  LDWORK  (input) INTEGER */
/*          The leading dimension of WORK.  LDWORK must be at least */
/*          max(1,M). */

/*  RWORK   (workspace) REAL array, dimension (N) */

/*  RESULT  (output) REAL array, dimension (2) */
/*          The values computed by the two tests described above.  The */
/*          values are currently limited to 1/ulp, to avoid overflow. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    --ad;
    --ae;
    --sd;
    --se;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1;
    u -= u_offset;
    work_dim1 = *ldwork;
    work_offset = 1 + work_dim1;
    work -= work_offset;
    --rwork;
    --result;

    /* Function Body */
    result[1] = 0.f;
    result[2] = 0.f;
    if (*n <= 0 || *m <= 0) {
	return 0;
    }

    unfl = slamch_("Safe minimum");
    ulp = slamch_("Epsilon");

/*     Do Test 1 */

/*     Compute the 1-norm of A. */

    if (*n > 1) {
	anorm = dabs(ad[1]) + dabs(ae[1]);
	i__1 = *n - 1;
	for (j = 2; j <= i__1; ++j) {
/* Computing MAX */
	    r__4 = anorm, r__5 = (r__1 = ad[j], dabs(r__1)) + (r__2 = ae[j], 
		    dabs(r__2)) + (r__3 = ae[j - 1], dabs(r__3));
	    anorm = dmax(r__4,r__5);
/* L10: */
	}
/* Computing MAX */
	r__3 = anorm, r__4 = (r__1 = ad[*n], dabs(r__1)) + (r__2 = ae[*n - 1],
		 dabs(r__2));
	anorm = dmax(r__3,r__4);
    } else {
	anorm = dabs(ad[1]);
    }
    anorm = dmax(anorm,unfl);

/*     Norm of U*AU - S */

    i__1 = *m;
    for (i__ = 1; i__ <= i__1; ++i__) {
	i__2 = *m;
	for (j = 1; j <= i__2; ++j) {
	    i__3 = i__ + j * work_dim1;
	    work[i__3].r = 0.f, work[i__3].i = 0.f;
	    i__3 = *n;
	    for (k = 1; k <= i__3; ++k) {
		i__4 = k;
		i__5 = k + j * u_dim1;
		q__1.r = ad[i__4] * u[i__5].r, q__1.i = ad[i__4] * u[i__5].i;
		aukj.r = q__1.r, aukj.i = q__1.i;
		if (k != *n) {
		    i__4 = k;
		    i__5 = k + 1 + j * u_dim1;
		    q__2.r = ae[i__4] * u[i__5].r, q__2.i = ae[i__4] * u[i__5]
			    .i;
		    q__1.r = aukj.r + q__2.r, q__1.i = aukj.i + q__2.i;
		    aukj.r = q__1.r, aukj.i = q__1.i;
		}
		if (k != 1) {
		    i__4 = k - 1;
		    i__5 = k - 1 + j * u_dim1;
		    q__2.r = ae[i__4] * u[i__5].r, q__2.i = ae[i__4] * u[i__5]
			    .i;
		    q__1.r = aukj.r + q__2.r, q__1.i = aukj.i + q__2.i;
		    aukj.r = q__1.r, aukj.i = q__1.i;
		}
		i__4 = i__ + j * work_dim1;
		i__5 = i__ + j * work_dim1;
		i__6 = k + i__ * u_dim1;
		q__2.r = u[i__6].r * aukj.r - u[i__6].i * aukj.i, q__2.i = u[
			i__6].r * aukj.i + u[i__6].i * aukj.r;
		q__1.r = work[i__5].r + q__2.r, q__1.i = work[i__5].i + 
			q__2.i;
		work[i__4].r = q__1.r, work[i__4].i = q__1.i;
/* L20: */
	    }
/* L30: */
	}
	i__2 = i__ + i__ * work_dim1;
	i__3 = i__ + i__ * work_dim1;
	i__4 = i__;
	q__1.r = work[i__3].r - sd[i__4], q__1.i = work[i__3].i;
	work[i__2].r = q__1.r, work[i__2].i = q__1.i;
	if (*kband == 1) {
	    if (i__ != 1) {
		i__2 = i__ + (i__ - 1) * work_dim1;
		i__3 = i__ + (i__ - 1) * work_dim1;
		i__4 = i__ - 1;
		q__1.r = work[i__3].r - se[i__4], q__1.i = work[i__3].i;
		work[i__2].r = q__1.r, work[i__2].i = q__1.i;
	    }
	    if (i__ != *n) {
		i__2 = i__ + (i__ + 1) * work_dim1;
		i__3 = i__ + (i__ + 1) * work_dim1;
		i__4 = i__;
		q__1.r = work[i__3].r - se[i__4], q__1.i = work[i__3].i;
		work[i__2].r = q__1.r, work[i__2].i = q__1.i;
	    }
	}
/* L40: */
    }

    wnorm = clansy_("1", "L", m, &work[work_offset], m, &rwork[1]);

    if (anorm > wnorm) {
	result[1] = wnorm / anorm / (*m * ulp);
    } else {
	if (anorm < 1.f) {
/* Computing MIN */
	    r__1 = wnorm, r__2 = *m * anorm;
	    result[1] = dmin(r__1,r__2) / anorm / (*m * ulp);
	} else {
/* Computing MIN */
	    r__1 = wnorm / anorm, r__2 = (real) (*m);
	    result[1] = dmin(r__1,r__2) / (*m * ulp);
	}
    }

/*     Do Test 2 */

/*     Compute  U*U - I */

    cgemm_("T", "N", m, m, n, &c_b2, &u[u_offset], ldu, &u[u_offset], ldu, &
	    c_b1, &work[work_offset], m);

    i__1 = *m;
    for (j = 1; j <= i__1; ++j) {
	i__2 = j + j * work_dim1;
	i__3 = j + j * work_dim1;
	q__1.r = work[i__3].r - 1.f, q__1.i = work[i__3].i;
	work[i__2].r = q__1.r, work[i__2].i = q__1.i;
/* L50: */
    }

/* Computing MIN */
    r__1 = (real) (*m), r__2 = clange_("1", m, m, &work[work_offset], m, &
	    rwork[1]);
    result[2] = dmin(r__1,r__2) / (*m * ulp);

    return 0;

/*     End of CSTT22 */

} /* cstt22_ */