コード例 #1
0
void STARPU_STRMM(const char *side, const char *uplo, const char *transA,
                 const char *diag, const int m, const int n,
                 const float alpha, const float *A, const int lda,
                 float *B, const int ldb)
{
	strmm_(side, uplo, transA, diag, &m, &n, &alpha, A, &lda, B, &ldb);
}
コード例 #2
0
ファイル: fblaswr.c プロジェクト: CIBC-Internal/clapack
int
f2c_strmm(char* side, char* uplo, char* trans, char* diag, 
          integer* M, integer* N,
          real* alpha,
          real* A, integer* lda,
          real* B, integer* ldb)
{
    strmm_(side, uplo, 
           trans, diag, 
           M, N, alpha, A, lda, B, ldb);
    return 0;
}
コード例 #3
0
ファイル: sneupd.f.c プロジェクト: cadarso/tensor
/* ----------------------------------------------------------------------- */
/* Subroutine */ int sneupd_(logical *rvec, char *howmny, logical *select, 
	real *dr, real *di, real *z__, integer *ldz, real *sigmar, real *
	sigmai, real *workev, char *bmat, integer *n, char *which, integer *
	nev, real *tol, real *resid, integer *ncv, real *v, integer *ldv, 
	integer *iparam, integer *ipntr, real *workd, real *workl, integer *
	lworkl, integer *info, ftnlen howmny_len, ftnlen bmat_len, ftnlen 
	which_len)
{
    /* System generated locals */
    integer v_dim1, v_offset, z_dim1, z_offset, i__1;
    real r__1, r__2;
    doublereal d__1;

    /* Local variables */
    static integer j, k, ih, jj, np;
    static real vl[1]	/* was [1][1] */;
    static integer ibd, ldh, ldq, iri;
    static real sep;
    static integer irr, wri, wrr, mode;
    static real eps23;
    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *, 
	    integer *, real *, integer *, real *, integer *);
    static integer ierr;
    static real temp;
    static integer iwev;
    static char type__[6];
    static real temp1;
    extern doublereal snrm2_(integer *, real *, integer *);
    static integer ihbds, iconj;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
    static real conds;
    static logical reord;
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
	    real *, integer *, real *, integer *, real *, real *, integer *, 
	    ftnlen);
    static integer nconv, iwork[1];
    static real rnorm;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *);
    static integer ritzi;
    extern /* Subroutine */ int strmm_(char *, char *, char *, char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
	    , ftnlen, ftnlen, ftnlen, ftnlen), ivout_(integer *, integer *, 
	    integer *, integer *, char *, ftnlen), smout_(integer *, integer *
	    , integer *, real *, integer *, integer *, char *, ftnlen);
    static integer ritzr;
    extern /* Subroutine */ int svout_(integer *, integer *, real *, integer *
	    , char *, ftnlen), sgeqr2_(integer *, integer *, real *, integer *
	    , real *, real *, integer *);
    static integer nconv2;
    extern doublereal slapy2_(real *, real *);
    extern /* Subroutine */ int sorm2r_(char *, char *, integer *, integer *, 
	    integer *, real *, integer *, real *, real *, integer *, real *, 
	    integer *, ftnlen, ftnlen);
    static integer iheigi, iheigr, bounds, invsub, iuptri, msglvl, outncv, 
	    ishift, numcnv;
    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
	    integer *, real *, integer *, ftnlen), slahqr_(logical *, logical 
	    *, integer *, integer *, integer *, real *, integer *, real *, 
	    real *, integer *, integer *, real *, integer *, integer *), 
	    slaset_(char *, integer *, integer *, real *, real *, real *, 
	    integer *, ftnlen), strevc_(char *, char *, logical *, integer *, 
	    real *, integer *, real *, integer *, real *, integer *, integer *
	    , integer *, real *, integer *, ftnlen, ftnlen), strsen_(char *, 
	    char *, logical *, integer *, real *, integer *, real *, integer *
	    , real *, real *, integer *, real *, real *, real *, integer *, 
	    integer *, integer *, integer *, ftnlen, ftnlen);
    extern doublereal slamch_(char *, ftnlen);
    extern /* Subroutine */ int sngets_(integer *, char *, integer *, integer 
	    *, real *, real *, real *, real *, real *, ftnlen);


/*     %----------------------------------------------------% */
/*     | Include files for debugging and timing information | */
/*     %----------------------------------------------------% */


/* \SCCS Information: @(#) */
/* FILE: debug.h   SID: 2.3   DATE OF SID: 11/16/95   RELEASE: 2 */

/*     %---------------------------------% */
/*     | See debug.doc for documentation | */
/*     %---------------------------------% */

/*     %------------------% */
/*     | Scalar Arguments | */
/*     %------------------% */

/*     %--------------------------------% */
/*     | See stat.doc for documentation | */
/*     %--------------------------------% */

/* \SCCS Information: @(#) */
/* FILE: stat.h   SID: 2.2   DATE OF SID: 11/16/95   RELEASE: 2 */



/*     %-----------------% */
/*     | Array Arguments | */
/*     %-----------------% */


/*     %------------% */
/*     | Parameters | */
/*     %------------% */


/*     %---------------% */
/*     | Local Scalars | */
/*     %---------------% */


/*     %----------------------% */
/*     | External Subroutines | */
/*     %----------------------% */


/*     %--------------------% */
/*     | External Functions | */
/*     %--------------------% */


/*     %---------------------% */
/*     | Intrinsic Functions | */
/*     %---------------------% */


/*     %-----------------------% */
/*     | Executable Statements | */
/*     %-----------------------% */

/*     %------------------------% */
/*     | Set default parameters | */
/*     %------------------------% */

    /* Parameter adjustments */
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1;
    z__ -= z_offset;
    --workd;
    --resid;
    --di;
    --dr;
    --workev;
    --select;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1;
    v -= v_offset;
    --iparam;
    --ipntr;
    --workl;

    /* Function Body */
    msglvl = debug_1.mneupd;
    mode = iparam[7];
    nconv = iparam[5];
    *info = 0;

/*     %---------------------------------% */
/*     | Get machine dependent constant. | */
/*     %---------------------------------% */

    eps23 = slamch_("Epsilon-Machine", (ftnlen)15);
    d__1 = (doublereal) eps23;
    eps23 = pow_dd(&d__1, &c_b3);

/*     %--------------% */
/*     | Quick return | */
/*     %--------------% */

    ierr = 0;

    if (nconv <= 0) {
	ierr = -14;
    } else if (*n <= 0) {
	ierr = -1;
    } else if (*nev <= 0) {
	ierr = -2;
    } else if (*ncv <= *nev + 1 || *ncv > *n) {
	ierr = -3;
    } else if (s_cmp(which, "LM", (ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, 
	    "SM", (ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, "LR", (ftnlen)2, 
	    (ftnlen)2) != 0 && s_cmp(which, "SR", (ftnlen)2, (ftnlen)2) != 0 
	    && s_cmp(which, "LI", (ftnlen)2, (ftnlen)2) != 0 && s_cmp(which, 
	    "SI", (ftnlen)2, (ftnlen)2) != 0) {
	ierr = -5;
    } else if (*(unsigned char *)bmat != 'I' && *(unsigned char *)bmat != 'G')
	     {
	ierr = -6;
    } else /* if(complicated condition) */ {
/* Computing 2nd power */
	i__1 = *ncv;
	if (*lworkl < i__1 * i__1 * 3 + *ncv * 6) {
	    ierr = -7;
	} else if (*(unsigned char *)howmny != 'A' && *(unsigned char *)
		howmny != 'P' && *(unsigned char *)howmny != 'S' && *rvec) {
	    ierr = -13;
	} else if (*(unsigned char *)howmny == 'S') {
	    ierr = -12;
	}
    }

    if (mode == 1 || mode == 2) {
	s_copy(type__, "REGULR", (ftnlen)6, (ftnlen)6);
    } else if (mode == 3 && *sigmai == 0.f) {
	s_copy(type__, "SHIFTI", (ftnlen)6, (ftnlen)6);
    } else if (mode == 3) {
	s_copy(type__, "REALPT", (ftnlen)6, (ftnlen)6);
    } else if (mode == 4) {
	s_copy(type__, "IMAGPT", (ftnlen)6, (ftnlen)6);
    } else {
	ierr = -10;
    }
    if (mode == 1 && *(unsigned char *)bmat == 'G') {
	ierr = -11;
    }

/*     %------------% */
/*     | Error Exit | */
/*     %------------% */

    if (ierr != 0) {
	*info = ierr;
	goto L9000;
    }

/*     %--------------------------------------------------------% */
/*     | Pointer into WORKL for address of H, RITZ, BOUNDS, Q   | */
/*     | etc... and the remaining workspace.                    | */
/*     | Also update pointer to be used on output.              | */
/*     | Memory is laid out as follows:                         | */
/*     | workl(1:ncv*ncv) := generated Hessenberg matrix        | */
/*     | workl(ncv*ncv+1:ncv*ncv+2*ncv) := real and imaginary   | */
/*     |                                   parts of ritz values | */
/*     | workl(ncv*ncv+2*ncv+1:ncv*ncv+3*ncv) := error bounds   | */
/*     %--------------------------------------------------------% */

/*     %-----------------------------------------------------------% */
/*     | The following is used and set by SNEUPD.                  | */
/*     | workl(ncv*ncv+3*ncv+1:ncv*ncv+4*ncv) := The untransformed | */
/*     |                             real part of the Ritz values. | */
/*     | workl(ncv*ncv+4*ncv+1:ncv*ncv+5*ncv) := The untransformed | */
/*     |                        imaginary part of the Ritz values. | */
/*     | workl(ncv*ncv+5*ncv+1:ncv*ncv+6*ncv) := The untransformed | */
/*     |                           error bounds of the Ritz values | */
/*     | workl(ncv*ncv+6*ncv+1:2*ncv*ncv+6*ncv) := Holds the upper | */
/*     |                             quasi-triangular matrix for H | */
/*     | workl(2*ncv*ncv+6*ncv+1: 3*ncv*ncv+6*ncv) := Holds the    | */
/*     |       associated matrix representation of the invariant   | */
/*     |       subspace for H.                                     | */
/*     | GRAND total of NCV * ( 3 * NCV + 6 ) locations.           | */
/*     %-----------------------------------------------------------% */

    ih = ipntr[5];
    ritzr = ipntr[6];
    ritzi = ipntr[7];
    bounds = ipntr[8];
    ldh = *ncv;
    ldq = *ncv;
    iheigr = bounds + ldh;
    iheigi = iheigr + ldh;
    ihbds = iheigi + ldh;
    iuptri = ihbds + ldh;
    invsub = iuptri + ldh * *ncv;
    ipntr[9] = iheigr;
    ipntr[10] = iheigi;
    ipntr[11] = ihbds;
    ipntr[12] = iuptri;
    ipntr[13] = invsub;
    wrr = 1;
    wri = *ncv + 1;
    iwev = wri + *ncv;

/*     %-----------------------------------------% */
/*     | irr points to the REAL part of the Ritz | */
/*     |     values computed by _neigh before    | */
/*     |     exiting _naup2.                     | */
/*     | iri points to the IMAGINARY part of the | */
/*     |     Ritz values computed by _neigh      | */
/*     |     before exiting _naup2.              | */
/*     | ibd points to the Ritz estimates        | */
/*     |     computed by _neigh before exiting   | */
/*     |     _naup2.                             | */
/*     %-----------------------------------------% */

    irr = ipntr[14] + *ncv * *ncv;
    iri = irr + *ncv;
    ibd = iri + *ncv;

/*     %------------------------------------% */
/*     | RNORM is B-norm of the RESID(1:N). | */
/*     %------------------------------------% */

    rnorm = workl[ih + 2];
    workl[ih + 2] = 0.f;

    if (msglvl > 2) {
	svout_(&debug_1.logfil, ncv, &workl[irr], &debug_1.ndigit, "_neupd: "
		"Real part of Ritz values passed in from _NAUPD.", (ftnlen)55);
	svout_(&debug_1.logfil, ncv, &workl[iri], &debug_1.ndigit, "_neupd: "
		"Imag part of Ritz values passed in from _NAUPD.", (ftnlen)55);
	svout_(&debug_1.logfil, ncv, &workl[ibd], &debug_1.ndigit, "_neupd: "
		"Ritz estimates passed in from _NAUPD.", (ftnlen)45);
    }

    if (*rvec) {

	reord = FALSE_;

/*        %---------------------------------------------------% */
/*        | Use the temporary bounds array to store indices   | */
/*        | These will be used to mark the select array later | */
/*        %---------------------------------------------------% */

	i__1 = *ncv;
	for (j = 1; j <= i__1; ++j) {
	    workl[bounds + j - 1] = (real) j;
	    select[j] = FALSE_;
/* L10: */
	}

/*        %-------------------------------------% */
/*        | Select the wanted Ritz values.      | */
/*        | Sort the Ritz values so that the    | */
/*        | wanted ones appear at the tailing   | */
/*        | NEV positions of workl(irr) and     | */
/*        | workl(iri).  Move the corresponding | */
/*        | error estimates in workl(bound)     | */
/*        | accordingly.                        | */
/*        %-------------------------------------% */

	np = *ncv - *nev;
	ishift = 0;
	sngets_(&ishift, which, nev, &np, &workl[irr], &workl[iri], &workl[
		bounds], &workl[1], &workl[np + 1], (ftnlen)2);

	if (msglvl > 2) {
	    svout_(&debug_1.logfil, ncv, &workl[irr], &debug_1.ndigit, "_neu"
		    "pd: Real part of Ritz values after calling _NGETS.", (
		    ftnlen)54);
	    svout_(&debug_1.logfil, ncv, &workl[iri], &debug_1.ndigit, "_neu"
		    "pd: Imag part of Ritz values after calling _NGETS.", (
		    ftnlen)54);
	    svout_(&debug_1.logfil, ncv, &workl[bounds], &debug_1.ndigit, 
		    "_neupd: Ritz value indices after calling _NGETS.", (
		    ftnlen)48);
	}

/*        %-----------------------------------------------------% */
/*        | Record indices of the converged wanted Ritz values  | */
/*        | Mark the select array for possible reordering       | */
/*        %-----------------------------------------------------% */

	numcnv = 0;
	i__1 = *ncv;
	for (j = 1; j <= i__1; ++j) {
/* Computing MAX */
	    r__1 = eps23, r__2 = slapy2_(&workl[irr + *ncv - j], &workl[iri + 
		    *ncv - j]);
	    temp1 = dmax(r__1,r__2);
	    jj = workl[bounds + *ncv - j];
	    if (numcnv < nconv && workl[ibd + jj - 1] <= *tol * temp1) {
		select[jj] = TRUE_;
		++numcnv;
		if (jj > nconv) {
		    reord = TRUE_;
		}
	    }
/* L11: */
	}

/*        %-----------------------------------------------------------% */
/*        | Check the count (numcnv) of converged Ritz values with    | */
/*        | the number (nconv) reported by dnaupd.  If these two      | */
/*        | are different then there has probably been an error       | */
/*        | caused by incorrect passing of the dnaupd data.           | */
/*        %-----------------------------------------------------------% */

	if (msglvl > 2) {
	    ivout_(&debug_1.logfil, &c__1, &numcnv, &debug_1.ndigit, "_neupd"
		    ": Number of specified eigenvalues", (ftnlen)39);
	    ivout_(&debug_1.logfil, &c__1, &nconv, &debug_1.ndigit, "_neupd:"
		    " Number of \"converged\" eigenvalues", (ftnlen)41);
	}

	if (numcnv != nconv) {
	    *info = -15;
	    goto L9000;
	}

/*        %-----------------------------------------------------------% */
/*        | Call LAPACK routine slahqr to compute the real Schur form | */
/*        | of the upper Hessenberg matrix returned by SNAUPD.        | */
/*        | Make a copy of the upper Hessenberg matrix.               | */
/*        | Initialize the Schur vector matrix Q to the identity.     | */
/*        %-----------------------------------------------------------% */

	i__1 = ldh * *ncv;
	scopy_(&i__1, &workl[ih], &c__1, &workl[iuptri], &c__1);
	slaset_("All", ncv, ncv, &c_b37, &c_b38, &workl[invsub], &ldq, (
		ftnlen)3);
	slahqr_(&c_true, &c_true, ncv, &c__1, ncv, &workl[iuptri], &ldh, &
		workl[iheigr], &workl[iheigi], &c__1, ncv, &workl[invsub], &
		ldq, &ierr);
	scopy_(ncv, &workl[invsub + *ncv - 1], &ldq, &workl[ihbds], &c__1);

	if (ierr != 0) {
	    *info = -8;
	    goto L9000;
	}

	if (msglvl > 1) {
	    svout_(&debug_1.logfil, ncv, &workl[iheigr], &debug_1.ndigit, 
		    "_neupd: Real part of the eigenvalues of H", (ftnlen)41);
	    svout_(&debug_1.logfil, ncv, &workl[iheigi], &debug_1.ndigit, 
		    "_neupd: Imaginary part of the Eigenvalues of H", (ftnlen)
		    46);
	    svout_(&debug_1.logfil, ncv, &workl[ihbds], &debug_1.ndigit, 
		    "_neupd: Last row of the Schur vector matrix", (ftnlen)43)
		    ;
	    if (msglvl > 3) {
		smout_(&debug_1.logfil, ncv, ncv, &workl[iuptri], &ldh, &
			debug_1.ndigit, "_neupd: The upper quasi-triangular "
			"matrix ", (ftnlen)42);
	    }
	}

	if (reord) {

/*           %-----------------------------------------------------% */
/*           | Reorder the computed upper quasi-triangular matrix. | */
/*           %-----------------------------------------------------% */

	    strsen_("None", "V", &select[1], ncv, &workl[iuptri], &ldh, &
		    workl[invsub], &ldq, &workl[iheigr], &workl[iheigi], &
		    nconv2, &conds, &sep, &workl[ihbds], ncv, iwork, &c__1, &
		    ierr, (ftnlen)4, (ftnlen)1);

	    if (nconv2 < nconv) {
		nconv = nconv2;
	    }
	    if (ierr == 1) {
		*info = 1;
		goto L9000;
	    }

	    if (msglvl > 2) {
		svout_(&debug_1.logfil, ncv, &workl[iheigr], &debug_1.ndigit, 
			"_neupd: Real part of the eigenvalues of H--reordered"
			, (ftnlen)52);
		svout_(&debug_1.logfil, ncv, &workl[iheigi], &debug_1.ndigit, 
			"_neupd: Imag part of the eigenvalues of H--reordered"
			, (ftnlen)52);
		if (msglvl > 3) {
		    smout_(&debug_1.logfil, ncv, ncv, &workl[iuptri], &ldq, &
			    debug_1.ndigit, "_neupd: Quasi-triangular matrix"
			    " after re-ordering", (ftnlen)49);
		}
	    }

	}

/*        %---------------------------------------% */
/*        | Copy the last row of the Schur vector | */
/*        | into workl(ihbds).  This will be used | */
/*        | to compute the Ritz estimates of      | */
/*        | converged Ritz values.                | */
/*        %---------------------------------------% */

	scopy_(ncv, &workl[invsub + *ncv - 1], &ldq, &workl[ihbds], &c__1);

/*        %----------------------------------------------------% */
/*        | Place the computed eigenvalues of H into DR and DI | */
/*        | if a spectral transformation was not used.         | */
/*        %----------------------------------------------------% */

	if (s_cmp(type__, "REGULR", (ftnlen)6, (ftnlen)6) == 0) {
	    scopy_(&nconv, &workl[iheigr], &c__1, &dr[1], &c__1);
	    scopy_(&nconv, &workl[iheigi], &c__1, &di[1], &c__1);
	}

/*        %----------------------------------------------------------% */
/*        | Compute the QR factorization of the matrix representing  | */
/*        | the wanted invariant subspace located in the first NCONV | */
/*        | columns of workl(invsub,ldq).                            | */
/*        %----------------------------------------------------------% */

	sgeqr2_(ncv, &nconv, &workl[invsub], &ldq, &workev[1], &workev[*ncv + 
		1], &ierr);

/*        %---------------------------------------------------------% */
/*        | * Postmultiply V by Q using sorm2r.                     | */
/*        | * Copy the first NCONV columns of VQ into Z.            | */
/*        | * Postmultiply Z by R.                                  | */
/*        | The N by NCONV matrix Z is now a matrix representation  | */
/*        | of the approximate invariant subspace associated with   | */
/*        | the Ritz values in workl(iheigr) and workl(iheigi)      | */
/*        | The first NCONV columns of V are now approximate Schur  | */
/*        | vectors associated with the real upper quasi-triangular | */
/*        | matrix of order NCONV in workl(iuptri)                  | */
/*        %---------------------------------------------------------% */

	sorm2r_("Right", "Notranspose", n, ncv, &nconv, &workl[invsub], &ldq, 
		&workev[1], &v[v_offset], ldv, &workd[*n + 1], &ierr, (ftnlen)
		5, (ftnlen)11);
	slacpy_("All", n, &nconv, &v[v_offset], ldv, &z__[z_offset], ldz, (
		ftnlen)3);

	i__1 = nconv;
	for (j = 1; j <= i__1; ++j) {

/*           %---------------------------------------------------% */
/*           | Perform both a column and row scaling if the      | */
/*           | diagonal element of workl(invsub,ldq) is negative | */
/*           | I'm lazy and don't take advantage of the upper    | */
/*           | quasi-triangular form of workl(iuptri,ldq)        | */
/*           | Note that since Q is orthogonal, R is a diagonal  | */
/*           | matrix consisting of plus or minus ones           | */
/*           %---------------------------------------------------% */

	    if (workl[invsub + (j - 1) * ldq + j - 1] < 0.f) {
		sscal_(&nconv, &c_b64, &workl[iuptri + j - 1], &ldq);
		sscal_(&nconv, &c_b64, &workl[iuptri + (j - 1) * ldq], &c__1);
	    }

/* L20: */
	}

	if (*(unsigned char *)howmny == 'A') {

/*           %--------------------------------------------% */
/*           | Compute the NCONV wanted eigenvectors of T | */
/*           | located in workl(iuptri,ldq).              | */
/*           %--------------------------------------------% */

	    i__1 = *ncv;
	    for (j = 1; j <= i__1; ++j) {
		if (j <= nconv) {
		    select[j] = TRUE_;
		} else {
		    select[j] = FALSE_;
		}
/* L30: */
	    }

	    strevc_("Right", "Select", &select[1], ncv, &workl[iuptri], &ldq, 
		    vl, &c__1, &workl[invsub], &ldq, ncv, &outncv, &workev[1],
		     &ierr, (ftnlen)5, (ftnlen)6);

	    if (ierr != 0) {
		*info = -9;
		goto L9000;
	    }

/*           %------------------------------------------------% */
/*           | Scale the returning eigenvectors so that their | */
/*           | Euclidean norms are all one. LAPACK subroutine | */
/*           | strevc returns each eigenvector normalized so  | */
/*           | that the element of largest magnitude has      | */
/*           | magnitude 1;                                   | */
/*           %------------------------------------------------% */

	    iconj = 0;
	    i__1 = nconv;
	    for (j = 1; j <= i__1; ++j) {

		if (workl[iheigi + j - 1] == 0.f) {

/*                 %----------------------% */
/*                 | real eigenvalue case | */
/*                 %----------------------% */

		    temp = snrm2_(ncv, &workl[invsub + (j - 1) * ldq], &c__1);
		    r__1 = 1.f / temp;
		    sscal_(ncv, &r__1, &workl[invsub + (j - 1) * ldq], &c__1);

		} else {

/*                 %-------------------------------------------% */
/*                 | Complex conjugate pair case. Note that    | */
/*                 | since the real and imaginary part of      | */
/*                 | the eigenvector are stored in consecutive | */
/*                 | columns, we further normalize by the      | */
/*                 | square root of two.                       | */
/*                 %-------------------------------------------% */

		    if (iconj == 0) {
			r__1 = snrm2_(ncv, &workl[invsub + (j - 1) * ldq], &
				c__1);
			r__2 = snrm2_(ncv, &workl[invsub + j * ldq], &c__1);
			temp = slapy2_(&r__1, &r__2);
			r__1 = 1.f / temp;
			sscal_(ncv, &r__1, &workl[invsub + (j - 1) * ldq], &
				c__1);
			r__1 = 1.f / temp;
			sscal_(ncv, &r__1, &workl[invsub + j * ldq], &c__1);
			iconj = 1;
		    } else {
			iconj = 0;
		    }

		}

/* L40: */
	    }

	    sgemv_("T", ncv, &nconv, &c_b38, &workl[invsub], &ldq, &workl[
		    ihbds], &c__1, &c_b37, &workev[1], &c__1, (ftnlen)1);

	    iconj = 0;
	    i__1 = nconv;
	    for (j = 1; j <= i__1; ++j) {
		if (workl[iheigi + j - 1] != 0.f) {

/*                 %-------------------------------------------% */
/*                 | Complex conjugate pair case. Note that    | */
/*                 | since the real and imaginary part of      | */
/*                 | the eigenvector are stored in consecutive | */
/*                 %-------------------------------------------% */

		    if (iconj == 0) {
			workev[j] = slapy2_(&workev[j], &workev[j + 1]);
			workev[j + 1] = workev[j];
			iconj = 1;
		    } else {
			iconj = 0;
		    }
		}
/* L45: */
	    }

	    if (msglvl > 2) {
		scopy_(ncv, &workl[invsub + *ncv - 1], &ldq, &workl[ihbds], &
			c__1);
		svout_(&debug_1.logfil, ncv, &workl[ihbds], &debug_1.ndigit, 
			"_neupd: Last row of the eigenvector matrix for T", (
			ftnlen)48);
		if (msglvl > 3) {
		    smout_(&debug_1.logfil, ncv, ncv, &workl[invsub], &ldq, &
			    debug_1.ndigit, "_neupd: The eigenvector matrix "
			    "for T", (ftnlen)36);
		}
	    }

/*           %---------------------------------------% */
/*           | Copy Ritz estimates into workl(ihbds) | */
/*           %---------------------------------------% */

	    scopy_(&nconv, &workev[1], &c__1, &workl[ihbds], &c__1);

/*           %---------------------------------------------------------% */
/*           | Compute the QR factorization of the eigenvector matrix  | */
/*           | associated with leading portion of T in the first NCONV | */
/*           | columns of workl(invsub,ldq).                           | */
/*           %---------------------------------------------------------% */

	    sgeqr2_(ncv, &nconv, &workl[invsub], &ldq, &workev[1], &workev[*
		    ncv + 1], &ierr);

/*           %----------------------------------------------% */
/*           | * Postmultiply Z by Q.                       | */
/*           | * Postmultiply Z by R.                       | */
/*           | The N by NCONV matrix Z is now contains the  | */
/*           | Ritz vectors associated with the Ritz values | */
/*           | in workl(iheigr) and workl(iheigi).          | */
/*           %----------------------------------------------% */

	    sorm2r_("Right", "Notranspose", n, ncv, &nconv, &workl[invsub], &
		    ldq, &workev[1], &z__[z_offset], ldz, &workd[*n + 1], &
		    ierr, (ftnlen)5, (ftnlen)11);

	    strmm_("Right", "Upper", "No transpose", "Non-unit", n, &nconv, &
		    c_b38, &workl[invsub], &ldq, &z__[z_offset], ldz, (ftnlen)
		    5, (ftnlen)5, (ftnlen)12, (ftnlen)8);

	}

    } else {

/*        %------------------------------------------------------% */
/*        | An approximate invariant subspace is not needed.     | */
/*        | Place the Ritz values computed SNAUPD into DR and DI | */
/*        %------------------------------------------------------% */

	scopy_(&nconv, &workl[ritzr], &c__1, &dr[1], &c__1);
	scopy_(&nconv, &workl[ritzi], &c__1, &di[1], &c__1);
	scopy_(&nconv, &workl[ritzr], &c__1, &workl[iheigr], &c__1);
	scopy_(&nconv, &workl[ritzi], &c__1, &workl[iheigi], &c__1);
	scopy_(&nconv, &workl[bounds], &c__1, &workl[ihbds], &c__1);
    }

/*     %------------------------------------------------% */
/*     | Transform the Ritz values and possibly vectors | */
/*     | and corresponding error bounds of OP to those  | */
/*     | of A*x = lambda*B*x.                           | */
/*     %------------------------------------------------% */

    if (s_cmp(type__, "REGULR", (ftnlen)6, (ftnlen)6) == 0) {

	if (*rvec) {
	    sscal_(ncv, &rnorm, &workl[ihbds], &c__1);
	}

    } else {

/*        %---------------------------------------% */
/*        |   A spectral transformation was used. | */
/*        | * Determine the Ritz estimates of the | */
/*        |   Ritz values in the original system. | */
/*        %---------------------------------------% */

	if (s_cmp(type__, "SHIFTI", (ftnlen)6, (ftnlen)6) == 0) {

	    if (*rvec) {
		sscal_(ncv, &rnorm, &workl[ihbds], &c__1);
	    }

	    i__1 = *ncv;
	    for (k = 1; k <= i__1; ++k) {
		temp = slapy2_(&workl[iheigr + k - 1], &workl[iheigi + k - 1])
			;
		workl[ihbds + k - 1] = (r__1 = workl[ihbds + k - 1], dabs(
			r__1)) / temp / temp;
/* L50: */
	    }

	} else if (s_cmp(type__, "REALPT", (ftnlen)6, (ftnlen)6) == 0) {

	    i__1 = *ncv;
	    for (k = 1; k <= i__1; ++k) {
/* L60: */
	    }

	} else if (s_cmp(type__, "IMAGPT", (ftnlen)6, (ftnlen)6) == 0) {

	    i__1 = *ncv;
	    for (k = 1; k <= i__1; ++k) {
/* L70: */
	    }

	}

/*        %-----------------------------------------------------------% */
/*        | *  Transform the Ritz values back to the original system. | */
/*        |    For TYPE = 'SHIFTI' the transformation is              | */
/*        |             lambda = 1/theta + sigma                      | */
/*        |    For TYPE = 'REALPT' or 'IMAGPT' the user must from     | */
/*        |    Rayleigh quotients or a projection. See remark 3 above.| */
/*        | NOTES:                                                    | */
/*        | *The Ritz vectors are not affected by the transformation. | */
/*        %-----------------------------------------------------------% */

	if (s_cmp(type__, "SHIFTI", (ftnlen)6, (ftnlen)6) == 0) {

	    i__1 = *ncv;
	    for (k = 1; k <= i__1; ++k) {
		temp = slapy2_(&workl[iheigr + k - 1], &workl[iheigi + k - 1])
			;
		workl[iheigr + k - 1] = workl[iheigr + k - 1] / temp / temp + 
			*sigmar;
		workl[iheigi + k - 1] = -workl[iheigi + k - 1] / temp / temp 
			+ *sigmai;
/* L80: */
	    }

	    scopy_(&nconv, &workl[iheigr], &c__1, &dr[1], &c__1);
	    scopy_(&nconv, &workl[iheigi], &c__1, &di[1], &c__1);

	} else if (s_cmp(type__, "REALPT", (ftnlen)6, (ftnlen)6) == 0 || 
		s_cmp(type__, "IMAGPT", (ftnlen)6, (ftnlen)6) == 0) {

	    scopy_(&nconv, &workl[iheigr], &c__1, &dr[1], &c__1);
	    scopy_(&nconv, &workl[iheigi], &c__1, &di[1], &c__1);

	}

    }

    if (s_cmp(type__, "SHIFTI", (ftnlen)6, (ftnlen)6) == 0 && msglvl > 1) {
	svout_(&debug_1.logfil, &nconv, &dr[1], &debug_1.ndigit, "_neupd: Un"
		"transformed real part of the Ritz valuess.", (ftnlen)52);
	svout_(&debug_1.logfil, &nconv, &di[1], &debug_1.ndigit, "_neupd: Un"
		"transformed imag part of the Ritz valuess.", (ftnlen)52);
	svout_(&debug_1.logfil, &nconv, &workl[ihbds], &debug_1.ndigit, "_ne"
		"upd: Ritz estimates of untransformed Ritz values.", (ftnlen)
		52);
    } else if (s_cmp(type__, "REGULR", (ftnlen)6, (ftnlen)6) == 0 && msglvl > 
	    1) {
	svout_(&debug_1.logfil, &nconv, &dr[1], &debug_1.ndigit, "_neupd: Re"
		"al parts of converged Ritz values.", (ftnlen)44);
	svout_(&debug_1.logfil, &nconv, &di[1], &debug_1.ndigit, "_neupd: Im"
		"ag parts of converged Ritz values.", (ftnlen)44);
	svout_(&debug_1.logfil, &nconv, &workl[ihbds], &debug_1.ndigit, "_ne"
		"upd: Associated Ritz estimates.", (ftnlen)34);
    }

/*     %-------------------------------------------------% */
/*     | Eigenvector Purification step. Formally perform | */
/*     | one of inverse subspace iteration. Only used    | */
/*     | for MODE = 2.                                   | */
/*     %-------------------------------------------------% */

    if (*rvec && *(unsigned char *)howmny == 'A' && s_cmp(type__, "SHIFTI", (
	    ftnlen)6, (ftnlen)6) == 0) {

/*        %------------------------------------------------% */
/*        | Purify the computed Ritz vectors by adding a   | */
/*        | little bit of the residual vector:             | */
/*        |                      T                         | */
/*        |          resid(:)*( e    s ) / theta           | */
/*        |                      NCV                       | */
/*        | where H s = s theta. Remember that when theta  | */
/*        | has nonzero imaginary part, the corresponding  | */
/*        | Ritz vector is stored across two columns of Z. | */
/*        %------------------------------------------------% */

	iconj = 0;
	i__1 = nconv;
	for (j = 1; j <= i__1; ++j) {
	    if (workl[iheigi + j - 1] == 0.f) {
		workev[j] = workl[invsub + (j - 1) * ldq + *ncv - 1] / workl[
			iheigr + j - 1];
	    } else if (iconj == 0) {
		temp = slapy2_(&workl[iheigr + j - 1], &workl[iheigi + j - 1])
			;
		workev[j] = (workl[invsub + (j - 1) * ldq + *ncv - 1] * workl[
			iheigr + j - 1] + workl[invsub + j * ldq + *ncv - 1] *
			 workl[iheigi + j - 1]) / temp / temp;
		workev[j + 1] = (workl[invsub + j * ldq + *ncv - 1] * workl[
			iheigr + j - 1] - workl[invsub + (j - 1) * ldq + *ncv 
			- 1] * workl[iheigi + j - 1]) / temp / temp;
		iconj = 1;
	    } else {
		iconj = 0;
	    }
/* L110: */
	}

/*        %---------------------------------------% */
/*        | Perform a rank one update to Z and    | */
/*        | purify all the Ritz vectors together. | */
/*        %---------------------------------------% */

	sger_(n, &nconv, &c_b38, &resid[1], &c__1, &workev[1], &c__1, &z__[
		z_offset], ldz);

    }

L9000:

    return 0;

/*     %---------------% */
/*     | End of SNEUPD | */
/*     %---------------% */

} /* sneupd_ */
コード例 #4
0
ファイル: slaqr5.c プロジェクト: 3deggi/levmar-ndk
/* Subroutine */ int slaqr5_(logical *wantt, logical *wantz, integer *kacc22, 
	integer *n, integer *ktop, integer *kbot, integer *nshfts, real *sr, 
	real *si, real *h__, integer *ldh, integer *iloz, integer *ihiz, real 
	*z__, integer *ldz, real *v, integer *ldv, real *u, integer *ldu, 
	integer *nv, real *wv, integer *ldwv, integer *nh, real *wh, integer *
	ldwh)
{
    /* System generated locals */
    integer h_dim1, h_offset, u_dim1, u_offset, v_dim1, v_offset, wh_dim1, 
	    wh_offset, wv_dim1, wv_offset, z_dim1, z_offset, i__1, i__2, i__3,
	     i__4, i__5, i__6, i__7;
    real r__1, r__2, r__3, r__4, r__5;

    /* Local variables */
    integer i__, j, k, m, i2, j2, i4, j4, k1;
    real h11, h12, h21, h22;
    integer m22, ns, nu;
    real vt[3], scl;
    integer kdu, kms;
    real ulp;
    integer knz, kzs;
    real tst1, tst2, beta;
    logical blk22, bmp22;
    integer mend, jcol, jlen, jbot, mbot;
    real swap;
    integer jtop, jrow, mtop;
    real alpha;
    logical accum;
    integer ndcol, incol;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    integer krcol, nbmps;
    extern /* Subroutine */ int strmm_(char *, char *, char *, char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
), slaqr1_(integer *, real *, 
	    integer *, real *, real *, real *, real *, real *), slabad_(real *
, real *);
    extern doublereal slamch_(char *);
    real safmin;
    extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *, 
	    real *);
    real safmax;
    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, 
	    integer *, real *, integer *), slaset_(char *, integer *, 
	    integer *, real *, real *, real *, integer *);
    real refsum;
    integer mstart;
    real smlnum;


/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*     This auxiliary subroutine called by SLAQR0 performs a */
/*     single small-bulge multi-shift QR sweep. */

/*      WANTT  (input) logical scalar */
/*             WANTT = .true. if the quasi-triangular Schur factor */
/*             is being computed.  WANTT is set to .false. otherwise. */

/*      WANTZ  (input) logical scalar */
/*             WANTZ = .true. if the orthogonal Schur factor is being */
/*             computed.  WANTZ is set to .false. otherwise. */

/*      KACC22 (input) integer with value 0, 1, or 2. */
/*             Specifies the computation mode of far-from-diagonal */
/*             orthogonal updates. */
/*        = 0: SLAQR5 does not accumulate reflections and does not */
/*             use matrix-matrix multiply to update far-from-diagonal */
/*             matrix entries. */
/*        = 1: SLAQR5 accumulates reflections and uses matrix-matrix */
/*             multiply to update the far-from-diagonal matrix entries. */
/*        = 2: SLAQR5 accumulates reflections, uses matrix-matrix */
/*             multiply to update the far-from-diagonal matrix entries, */
/*             and takes advantage of 2-by-2 block structure during */
/*             matrix multiplies. */

/*      N      (input) integer scalar */
/*             N is the order of the Hessenberg matrix H upon which this */
/*             subroutine operates. */

/*      KTOP   (input) integer scalar */
/*      KBOT   (input) integer scalar */
/*             These are the first and last rows and columns of an */
/*             isolated diagonal block upon which the QR sweep is to be */
/*             applied. It is assumed without a check that */
/*                       either KTOP = 1  or   H(KTOP,KTOP-1) = 0 */
/*             and */
/*                       either KBOT = N  or   H(KBOT+1,KBOT) = 0. */

/*      NSHFTS (input) integer scalar */
/*             NSHFTS gives the number of simultaneous shifts.  NSHFTS */
/*             must be positive and even. */

/*      SR     (input/output) REAL array of size (NSHFTS) */
/*      SI     (input/output) REAL array of size (NSHFTS) */
/*             SR contains the real parts and SI contains the imaginary */
/*             parts of the NSHFTS shifts of origin that define the */
/*             multi-shift QR sweep.  On output SR and SI may be */
/*             reordered. */

/*      H      (input/output) REAL array of size (LDH,N) */
/*             On input H contains a Hessenberg matrix.  On output a */
/*             multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied */
/*             to the isolated diagonal block in rows and columns KTOP */
/*             through KBOT. */

/*      LDH    (input) integer scalar */
/*             LDH is the leading dimension of H just as declared in the */
/*             calling procedure.  LDH.GE.MAX(1,N). */

/*      ILOZ   (input) INTEGER */
/*      IHIZ   (input) INTEGER */
/*             Specify the rows of Z to which transformations must be */
/*             applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N */

/*      Z      (input/output) REAL array of size (LDZ,IHI) */
/*             If WANTZ = .TRUE., then the QR Sweep orthogonal */
/*             similarity transformation is accumulated into */
/*             Z(ILOZ:IHIZ,ILO:IHI) from the right. */
/*             If WANTZ = .FALSE., then Z is unreferenced. */

/*      LDZ    (input) integer scalar */
/*             LDA is the leading dimension of Z just as declared in */
/*             the calling procedure. LDZ.GE.N. */

/*      V      (workspace) REAL array of size (LDV,NSHFTS/2) */

/*      LDV    (input) integer scalar */
/*             LDV is the leading dimension of V as declared in the */
/*             calling procedure.  LDV.GE.3. */

/*      U      (workspace) REAL array of size */
/*             (LDU,3*NSHFTS-3) */

/*      LDU    (input) integer scalar */
/*             LDU is the leading dimension of U just as declared in the */
/*             in the calling subroutine.  LDU.GE.3*NSHFTS-3. */

/*      NH     (input) integer scalar */
/*             NH is the number of columns in array WH available for */
/*             workspace. NH.GE.1. */

/*      WH     (workspace) REAL array of size (LDWH,NH) */

/*      LDWH   (input) integer scalar */
/*             Leading dimension of WH just as declared in the */
/*             calling procedure.  LDWH.GE.3*NSHFTS-3. */

/*      NV     (input) integer scalar */
/*             NV is the number of rows in WV agailable for workspace. */
/*             NV.GE.1. */

/*      WV     (workspace) REAL array of size */
/*             (LDWV,3*NSHFTS-3) */

/*      LDWV   (input) integer scalar */
/*             LDWV is the leading dimension of WV as declared in the */
/*             in the calling subroutine.  LDWV.GE.NV. */

/*     ================================================================ */
/*     Based on contributions by */
/*        Karen Braman and Ralph Byers, Department of Mathematics, */
/*        University of Kansas, USA */

/*     ================================================================ */
/*     Reference: */

/*     K. Braman, R. Byers and R. Mathias, The Multi-Shift QR */
/*     Algorithm Part I: Maintaining Well Focused Shifts, and */
/*     Level 3 Performance, SIAM Journal of Matrix Analysis, */
/*     volume 23, pages 929--947, 2002. */

/*     ================================================================ */
/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Intrinsic Functions .. */

/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Executable Statements .. */

/*     ==== If there are no shifts, then there is nothing to do. ==== */

    /* Parameter adjustments */
    --sr;
    --si;
    h_dim1 = *ldh;
    h_offset = 1 + h_dim1;
    h__ -= h_offset;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1;
    z__ -= z_offset;
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1;
    v -= v_offset;
    u_dim1 = *ldu;
    u_offset = 1 + u_dim1;
    u -= u_offset;
    wv_dim1 = *ldwv;
    wv_offset = 1 + wv_dim1;
    wv -= wv_offset;
    wh_dim1 = *ldwh;
    wh_offset = 1 + wh_dim1;
    wh -= wh_offset;

    /* Function Body */
    if (*nshfts < 2) {
	return 0;
    }

/*     ==== If the active block is empty or 1-by-1, then there */
/*     .    is nothing to do. ==== */

    if (*ktop >= *kbot) {
	return 0;
    }

/*     ==== Shuffle shifts into pairs of real shifts and pairs */
/*     .    of complex conjugate shifts assuming complex */
/*     .    conjugate shifts are already adjacent to one */
/*     .    another. ==== */

    i__1 = *nshfts - 2;
    for (i__ = 1; i__ <= i__1; i__ += 2) {
	if (si[i__] != -si[i__ + 1]) {

	    swap = sr[i__];
	    sr[i__] = sr[i__ + 1];
	    sr[i__ + 1] = sr[i__ + 2];
	    sr[i__ + 2] = swap;

	    swap = si[i__];
	    si[i__] = si[i__ + 1];
	    si[i__ + 1] = si[i__ + 2];
	    si[i__ + 2] = swap;
	}
/* L10: */
    }

/*     ==== NSHFTS is supposed to be even, but if it is odd, */
/*     .    then simply reduce it by one.  The shuffle above */
/*     .    ensures that the dropped shift is real and that */
/*     .    the remaining shifts are paired. ==== */

    ns = *nshfts - *nshfts % 2;

/*     ==== Machine constants for deflation ==== */

    safmin = slamch_("SAFE MINIMUM");
    safmax = 1.f / safmin;
    slabad_(&safmin, &safmax);
    ulp = slamch_("PRECISION");
    smlnum = safmin * ((real) (*n) / ulp);

/*     ==== Use accumulated reflections to update far-from-diagonal */
/*     .    entries ? ==== */

    accum = *kacc22 == 1 || *kacc22 == 2;

/*     ==== If so, exploit the 2-by-2 block structure? ==== */

    blk22 = ns > 2 && *kacc22 == 2;

/*     ==== clear trash ==== */

    if (*ktop + 2 <= *kbot) {
	h__[*ktop + 2 + *ktop * h_dim1] = 0.f;
    }

/*     ==== NBMPS = number of 2-shift bulges in the chain ==== */

    nbmps = ns / 2;

/*     ==== KDU = width of slab ==== */

    kdu = nbmps * 6 - 3;

/*     ==== Create and chase chains of NBMPS bulges ==== */

    i__1 = *kbot - 2;
    i__2 = nbmps * 3 - 2;
    for (incol = (1 - nbmps) * 3 + *ktop - 1; i__2 < 0 ? incol >= i__1 : 
	    incol <= i__1; incol += i__2) {
	ndcol = incol + kdu;
	if (accum) {
	    slaset_("ALL", &kdu, &kdu, &c_b7, &c_b8, &u[u_offset], ldu);
	}

/*        ==== Near-the-diagonal bulge chase.  The following loop */
/*        .    performs the near-the-diagonal part of a small bulge */
/*        .    multi-shift QR sweep.  Each 6*NBMPS-2 column diagonal */
/*        .    chunk extends from column INCOL to column NDCOL */
/*        .    (including both column INCOL and column NDCOL). The */
/*        .    following loop chases a 3*NBMPS column long chain of */
/*        .    NBMPS bulges 3*NBMPS-2 columns to the right.  (INCOL */
/*        .    may be less than KTOP and and NDCOL may be greater than */
/*        .    KBOT indicating phantom columns from which to chase */
/*        .    bulges before they are actually introduced or to which */
/*        .    to chase bulges beyond column KBOT.)  ==== */

/* Computing MIN */
	i__4 = incol + nbmps * 3 - 3, i__5 = *kbot - 2;
	i__3 = min(i__4,i__5);
	for (krcol = incol; krcol <= i__3; ++krcol) {

/*           ==== Bulges number MTOP to MBOT are active double implicit */
/*           .    shift bulges.  There may or may not also be small */
/*           .    2-by-2 bulge, if there is room.  The inactive bulges */
/*           .    (if any) must wait until the active bulges have moved */
/*           .    down the diagonal to make room.  The phantom matrix */
/*           .    paradigm described above helps keep track.  ==== */

/* Computing MAX */
	    i__4 = 1, i__5 = (*ktop - 1 - krcol + 2) / 3 + 1;
	    mtop = max(i__4,i__5);
/* Computing MIN */
	    i__4 = nbmps, i__5 = (*kbot - krcol) / 3;
	    mbot = min(i__4,i__5);
	    m22 = mbot + 1;
	    bmp22 = mbot < nbmps && krcol + (m22 - 1) * 3 == *kbot - 2;

/*           ==== Generate reflections to chase the chain right */
/*           .    one column.  (The minimum value of K is KTOP-1.) ==== */

	    i__4 = mbot;
	    for (m = mtop; m <= i__4; ++m) {
		k = krcol + (m - 1) * 3;
		if (k == *ktop - 1) {
		    slaqr1_(&c__3, &h__[*ktop + *ktop * h_dim1], ldh, &sr[(m 
			    << 1) - 1], &si[(m << 1) - 1], &sr[m * 2], &si[m *
			     2], &v[m * v_dim1 + 1]);
		    alpha = v[m * v_dim1 + 1];
		    slarfg_(&c__3, &alpha, &v[m * v_dim1 + 2], &c__1, &v[m * 
			    v_dim1 + 1]);
		} else {
		    beta = h__[k + 1 + k * h_dim1];
		    v[m * v_dim1 + 2] = h__[k + 2 + k * h_dim1];
		    v[m * v_dim1 + 3] = h__[k + 3 + k * h_dim1];
		    slarfg_(&c__3, &beta, &v[m * v_dim1 + 2], &c__1, &v[m * 
			    v_dim1 + 1]);

/*                 ==== A Bulge may collapse because of vigilant */
/*                 .    deflation or destructive underflow.  In the */
/*                 .    underflow case, try the two-small-subdiagonals */
/*                 .    trick to try to reinflate the bulge.  ==== */

		    if (h__[k + 3 + k * h_dim1] != 0.f || h__[k + 3 + (k + 1) 
			    * h_dim1] != 0.f || h__[k + 3 + (k + 2) * h_dim1] 
			    == 0.f) {

/*                    ==== Typical case: not collapsed (yet). ==== */

			h__[k + 1 + k * h_dim1] = beta;
			h__[k + 2 + k * h_dim1] = 0.f;
			h__[k + 3 + k * h_dim1] = 0.f;
		    } else {

/*                    ==== Atypical case: collapsed.  Attempt to */
/*                    .    reintroduce ignoring H(K+1,K) and H(K+2,K). */
/*                    .    If the fill resulting from the new */
/*                    .    reflector is too large, then abandon it. */
/*                    .    Otherwise, use the new one. ==== */

			slaqr1_(&c__3, &h__[k + 1 + (k + 1) * h_dim1], ldh, &
				sr[(m << 1) - 1], &si[(m << 1) - 1], &sr[m * 
				2], &si[m * 2], vt);
			alpha = vt[0];
			slarfg_(&c__3, &alpha, &vt[1], &c__1, vt);
			refsum = vt[0] * (h__[k + 1 + k * h_dim1] + vt[1] * 
				h__[k + 2 + k * h_dim1]);

			if ((r__1 = h__[k + 2 + k * h_dim1] - refsum * vt[1], 
				dabs(r__1)) + (r__2 = refsum * vt[2], dabs(
				r__2)) > ulp * ((r__3 = h__[k + k * h_dim1], 
				dabs(r__3)) + (r__4 = h__[k + 1 + (k + 1) * 
				h_dim1], dabs(r__4)) + (r__5 = h__[k + 2 + (k 
				+ 2) * h_dim1], dabs(r__5)))) {

/*                       ==== Starting a new bulge here would */
/*                       .    create non-negligible fill.  Use */
/*                       .    the old one with trepidation. ==== */

			    h__[k + 1 + k * h_dim1] = beta;
			    h__[k + 2 + k * h_dim1] = 0.f;
			    h__[k + 3 + k * h_dim1] = 0.f;
			} else {

/*                       ==== Stating a new bulge here would */
/*                       .    create only negligible fill. */
/*                       .    Replace the old reflector with */
/*                       .    the new one. ==== */

			    h__[k + 1 + k * h_dim1] -= refsum;
			    h__[k + 2 + k * h_dim1] = 0.f;
			    h__[k + 3 + k * h_dim1] = 0.f;
			    v[m * v_dim1 + 1] = vt[0];
			    v[m * v_dim1 + 2] = vt[1];
			    v[m * v_dim1 + 3] = vt[2];
			}
		    }
		}
/* L20: */
	    }

/*           ==== Generate a 2-by-2 reflection, if needed. ==== */

	    k = krcol + (m22 - 1) * 3;
	    if (bmp22) {
		if (k == *ktop - 1) {
		    slaqr1_(&c__2, &h__[k + 1 + (k + 1) * h_dim1], ldh, &sr[(
			    m22 << 1) - 1], &si[(m22 << 1) - 1], &sr[m22 * 2], 
			     &si[m22 * 2], &v[m22 * v_dim1 + 1]);
		    beta = v[m22 * v_dim1 + 1];
		    slarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22 
			    * v_dim1 + 1]);
		} else {
		    beta = h__[k + 1 + k * h_dim1];
		    v[m22 * v_dim1 + 2] = h__[k + 2 + k * h_dim1];
		    slarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22 
			    * v_dim1 + 1]);
		    h__[k + 1 + k * h_dim1] = beta;
		    h__[k + 2 + k * h_dim1] = 0.f;
		}
	    }

/*           ==== Multiply H by reflections from the left ==== */

	    if (accum) {
		jbot = min(ndcol,*kbot);
	    } else if (*wantt) {
		jbot = *n;
	    } else {
		jbot = *kbot;
	    }
	    i__4 = jbot;
	    for (j = max(*ktop,krcol); j <= i__4; ++j) {
/* Computing MIN */
		i__5 = mbot, i__6 = (j - krcol + 2) / 3;
		mend = min(i__5,i__6);
		i__5 = mend;
		for (m = mtop; m <= i__5; ++m) {
		    k = krcol + (m - 1) * 3;
		    refsum = v[m * v_dim1 + 1] * (h__[k + 1 + j * h_dim1] + v[
			    m * v_dim1 + 2] * h__[k + 2 + j * h_dim1] + v[m * 
			    v_dim1 + 3] * h__[k + 3 + j * h_dim1]);
		    h__[k + 1 + j * h_dim1] -= refsum;
		    h__[k + 2 + j * h_dim1] -= refsum * v[m * v_dim1 + 2];
		    h__[k + 3 + j * h_dim1] -= refsum * v[m * v_dim1 + 3];
/* L30: */
		}
/* L40: */
	    }
	    if (bmp22) {
		k = krcol + (m22 - 1) * 3;
/* Computing MAX */
		i__4 = k + 1;
		i__5 = jbot;
		for (j = max(i__4,*ktop); j <= i__5; ++j) {
		    refsum = v[m22 * v_dim1 + 1] * (h__[k + 1 + j * h_dim1] + 
			    v[m22 * v_dim1 + 2] * h__[k + 2 + j * h_dim1]);
		    h__[k + 1 + j * h_dim1] -= refsum;
		    h__[k + 2 + j * h_dim1] -= refsum * v[m22 * v_dim1 + 2];
/* L50: */
		}
	    }

/*           ==== Multiply H by reflections from the right. */
/*           .    Delay filling in the last row until the */
/*           .    vigilant deflation check is complete. ==== */

	    if (accum) {
		jtop = max(*ktop,incol);
	    } else if (*wantt) {
		jtop = 1;
	    } else {
		jtop = *ktop;
	    }
	    i__5 = mbot;
	    for (m = mtop; m <= i__5; ++m) {
		if (v[m * v_dim1 + 1] != 0.f) {
		    k = krcol + (m - 1) * 3;
/* Computing MIN */
		    i__6 = *kbot, i__7 = k + 3;
		    i__4 = min(i__6,i__7);
		    for (j = jtop; j <= i__4; ++j) {
			refsum = v[m * v_dim1 + 1] * (h__[j + (k + 1) * 
				h_dim1] + v[m * v_dim1 + 2] * h__[j + (k + 2) 
				* h_dim1] + v[m * v_dim1 + 3] * h__[j + (k + 
				3) * h_dim1]);
			h__[j + (k + 1) * h_dim1] -= refsum;
			h__[j + (k + 2) * h_dim1] -= refsum * v[m * v_dim1 + 
				2];
			h__[j + (k + 3) * h_dim1] -= refsum * v[m * v_dim1 + 
				3];
/* L60: */
		    }

		    if (accum) {

/*                    ==== Accumulate U. (If necessary, update Z later */
/*                    .    with with an efficient matrix-matrix */
/*                    .    multiply.) ==== */

			kms = k - incol;
/* Computing MAX */
			i__4 = 1, i__6 = *ktop - incol;
			i__7 = kdu;
			for (j = max(i__4,i__6); j <= i__7; ++j) {
			    refsum = v[m * v_dim1 + 1] * (u[j + (kms + 1) * 
				    u_dim1] + v[m * v_dim1 + 2] * u[j + (kms 
				    + 2) * u_dim1] + v[m * v_dim1 + 3] * u[j 
				    + (kms + 3) * u_dim1]);
			    u[j + (kms + 1) * u_dim1] -= refsum;
			    u[j + (kms + 2) * u_dim1] -= refsum * v[m * 
				    v_dim1 + 2];
			    u[j + (kms + 3) * u_dim1] -= refsum * v[m * 
				    v_dim1 + 3];
/* L70: */
			}
		    } else if (*wantz) {

/*                    ==== U is not accumulated, so update Z */
/*                    .    now by multiplying by reflections */
/*                    .    from the right. ==== */

			i__7 = *ihiz;
			for (j = *iloz; j <= i__7; ++j) {
			    refsum = v[m * v_dim1 + 1] * (z__[j + (k + 1) * 
				    z_dim1] + v[m * v_dim1 + 2] * z__[j + (k 
				    + 2) * z_dim1] + v[m * v_dim1 + 3] * z__[
				    j + (k + 3) * z_dim1]);
			    z__[j + (k + 1) * z_dim1] -= refsum;
			    z__[j + (k + 2) * z_dim1] -= refsum * v[m * 
				    v_dim1 + 2];
			    z__[j + (k + 3) * z_dim1] -= refsum * v[m * 
				    v_dim1 + 3];
/* L80: */
			}
		    }
		}
/* L90: */
	    }

/*           ==== Special case: 2-by-2 reflection (if needed) ==== */

	    k = krcol + (m22 - 1) * 3;
	    if (bmp22 && v[m22 * v_dim1 + 1] != 0.f) {
/* Computing MIN */
		i__7 = *kbot, i__4 = k + 3;
		i__5 = min(i__7,i__4);
		for (j = jtop; j <= i__5; ++j) {
		    refsum = v[m22 * v_dim1 + 1] * (h__[j + (k + 1) * h_dim1] 
			    + v[m22 * v_dim1 + 2] * h__[j + (k + 2) * h_dim1])
			    ;
		    h__[j + (k + 1) * h_dim1] -= refsum;
		    h__[j + (k + 2) * h_dim1] -= refsum * v[m22 * v_dim1 + 2];
/* L100: */
		}

		if (accum) {
		    kms = k - incol;
/* Computing MAX */
		    i__5 = 1, i__7 = *ktop - incol;
		    i__4 = kdu;
		    for (j = max(i__5,i__7); j <= i__4; ++j) {
			refsum = v[m22 * v_dim1 + 1] * (u[j + (kms + 1) * 
				u_dim1] + v[m22 * v_dim1 + 2] * u[j + (kms + 
				2) * u_dim1]);
			u[j + (kms + 1) * u_dim1] -= refsum;
			u[j + (kms + 2) * u_dim1] -= refsum * v[m22 * v_dim1 
				+ 2];
/* L110: */
		    }
		} else if (*wantz) {
		    i__4 = *ihiz;
		    for (j = *iloz; j <= i__4; ++j) {
			refsum = v[m22 * v_dim1 + 1] * (z__[j + (k + 1) * 
				z_dim1] + v[m22 * v_dim1 + 2] * z__[j + (k + 
				2) * z_dim1]);
			z__[j + (k + 1) * z_dim1] -= refsum;
			z__[j + (k + 2) * z_dim1] -= refsum * v[m22 * v_dim1 
				+ 2];
/* L120: */
		    }
		}
	    }

/*           ==== Vigilant deflation check ==== */

	    mstart = mtop;
	    if (krcol + (mstart - 1) * 3 < *ktop) {
		++mstart;
	    }
	    mend = mbot;
	    if (bmp22) {
		++mend;
	    }
	    if (krcol == *kbot - 2) {
		++mend;
	    }
	    i__4 = mend;
	    for (m = mstart; m <= i__4; ++m) {
/* Computing MIN */
		i__5 = *kbot - 1, i__7 = krcol + (m - 1) * 3;
		k = min(i__5,i__7);

/*              ==== The following convergence test requires that */
/*              .    the tradition small-compared-to-nearby-diagonals */
/*              .    criterion and the Ahues & Tisseur (LAWN 122, 1997) */
/*              .    criteria both be satisfied.  The latter improves */
/*              .    accuracy in some examples. Falling back on an */
/*              .    alternate convergence criterion when TST1 or TST2 */
/*              .    is zero (as done here) is traditional but probably */
/*              .    unnecessary. ==== */

		if (h__[k + 1 + k * h_dim1] != 0.f) {
		    tst1 = (r__1 = h__[k + k * h_dim1], dabs(r__1)) + (r__2 = 
			    h__[k + 1 + (k + 1) * h_dim1], dabs(r__2));
		    if (tst1 == 0.f) {
			if (k >= *ktop + 1) {
			    tst1 += (r__1 = h__[k + (k - 1) * h_dim1], dabs(
				    r__1));
			}
			if (k >= *ktop + 2) {
			    tst1 += (r__1 = h__[k + (k - 2) * h_dim1], dabs(
				    r__1));
			}
			if (k >= *ktop + 3) {
			    tst1 += (r__1 = h__[k + (k - 3) * h_dim1], dabs(
				    r__1));
			}
			if (k <= *kbot - 2) {
			    tst1 += (r__1 = h__[k + 2 + (k + 1) * h_dim1], 
				    dabs(r__1));
			}
			if (k <= *kbot - 3) {
			    tst1 += (r__1 = h__[k + 3 + (k + 1) * h_dim1], 
				    dabs(r__1));
			}
			if (k <= *kbot - 4) {
			    tst1 += (r__1 = h__[k + 4 + (k + 1) * h_dim1], 
				    dabs(r__1));
			}
		    }
/* Computing MAX */
		    r__2 = smlnum, r__3 = ulp * tst1;
		    if ((r__1 = h__[k + 1 + k * h_dim1], dabs(r__1)) <= dmax(
			    r__2,r__3)) {
/* Computing MAX */
			r__3 = (r__1 = h__[k + 1 + k * h_dim1], dabs(r__1)), 
				r__4 = (r__2 = h__[k + (k + 1) * h_dim1], 
				dabs(r__2));
			h12 = dmax(r__3,r__4);
/* Computing MIN */
			r__3 = (r__1 = h__[k + 1 + k * h_dim1], dabs(r__1)), 
				r__4 = (r__2 = h__[k + (k + 1) * h_dim1], 
				dabs(r__2));
			h21 = dmin(r__3,r__4);
/* Computing MAX */
			r__3 = (r__1 = h__[k + 1 + (k + 1) * h_dim1], dabs(
				r__1)), r__4 = (r__2 = h__[k + k * h_dim1] - 
				h__[k + 1 + (k + 1) * h_dim1], dabs(r__2));
			h11 = dmax(r__3,r__4);
/* Computing MIN */
			r__3 = (r__1 = h__[k + 1 + (k + 1) * h_dim1], dabs(
				r__1)), r__4 = (r__2 = h__[k + k * h_dim1] - 
				h__[k + 1 + (k + 1) * h_dim1], dabs(r__2));
			h22 = dmin(r__3,r__4);
			scl = h11 + h12;
			tst2 = h22 * (h11 / scl);

/* Computing MAX */
			r__1 = smlnum, r__2 = ulp * tst2;
			if (tst2 == 0.f || h21 * (h12 / scl) <= dmax(r__1,
				r__2)) {
			    h__[k + 1 + k * h_dim1] = 0.f;
			}
		    }
		}
/* L130: */
	    }

/*           ==== Fill in the last row of each bulge. ==== */

/* Computing MIN */
	    i__4 = nbmps, i__5 = (*kbot - krcol - 1) / 3;
	    mend = min(i__4,i__5);
	    i__4 = mend;
	    for (m = mtop; m <= i__4; ++m) {
		k = krcol + (m - 1) * 3;
		refsum = v[m * v_dim1 + 1] * v[m * v_dim1 + 3] * h__[k + 4 + (
			k + 3) * h_dim1];
		h__[k + 4 + (k + 1) * h_dim1] = -refsum;
		h__[k + 4 + (k + 2) * h_dim1] = -refsum * v[m * v_dim1 + 2];
		h__[k + 4 + (k + 3) * h_dim1] -= refsum * v[m * v_dim1 + 3];
/* L140: */
	    }

/*           ==== End of near-the-diagonal bulge chase. ==== */

/* L150: */
	}

/*        ==== Use U (if accumulated) to update far-from-diagonal */
/*        .    entries in H.  If required, use U to update Z as */
/*        .    well. ==== */

	if (accum) {
	    if (*wantt) {
		jtop = 1;
		jbot = *n;
	    } else {
		jtop = *ktop;
		jbot = *kbot;
	    }
	    if (! blk22 || incol < *ktop || ndcol > *kbot || ns <= 2) {

/*              ==== Updates not exploiting the 2-by-2 block */
/*              .    structure of U.  K1 and NU keep track of */
/*              .    the location and size of U in the special */
/*              .    cases of introducing bulges and chasing */
/*              .    bulges off the bottom.  In these special */
/*              .    cases and in case the number of shifts */
/*              .    is NS = 2, there is no 2-by-2 block */
/*              .    structure to exploit.  ==== */

/* Computing MAX */
		i__3 = 1, i__4 = *ktop - incol;
		k1 = max(i__3,i__4);
/* Computing MAX */
		i__3 = 0, i__4 = ndcol - *kbot;
		nu = kdu - max(i__3,i__4) - k1 + 1;

/*              ==== Horizontal Multiply ==== */

		i__3 = jbot;
		i__4 = *nh;
		for (jcol = min(ndcol,*kbot) + 1; i__4 < 0 ? jcol >= i__3 : 
			jcol <= i__3; jcol += i__4) {
/* Computing MIN */
		    i__5 = *nh, i__7 = jbot - jcol + 1;
		    jlen = min(i__5,i__7);
		    sgemm_("C", "N", &nu, &jlen, &nu, &c_b8, &u[k1 + k1 * 
			    u_dim1], ldu, &h__[incol + k1 + jcol * h_dim1], 
			    ldh, &c_b7, &wh[wh_offset], ldwh);
		    slacpy_("ALL", &nu, &jlen, &wh[wh_offset], ldwh, &h__[
			    incol + k1 + jcol * h_dim1], ldh);
/* L160: */
		}

/*              ==== Vertical multiply ==== */

		i__4 = max(*ktop,incol) - 1;
		i__3 = *nv;
		for (jrow = jtop; i__3 < 0 ? jrow >= i__4 : jrow <= i__4; 
			jrow += i__3) {
/* Computing MIN */
		    i__5 = *nv, i__7 = max(*ktop,incol) - jrow;
		    jlen = min(i__5,i__7);
		    sgemm_("N", "N", &jlen, &nu, &nu, &c_b8, &h__[jrow + (
			    incol + k1) * h_dim1], ldh, &u[k1 + k1 * u_dim1], 
			    ldu, &c_b7, &wv[wv_offset], ldwv);
		    slacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &h__[
			    jrow + (incol + k1) * h_dim1], ldh);
/* L170: */
		}

/*              ==== Z multiply (also vertical) ==== */

		if (*wantz) {
		    i__3 = *ihiz;
		    i__4 = *nv;
		    for (jrow = *iloz; i__4 < 0 ? jrow >= i__3 : jrow <= i__3;
			     jrow += i__4) {
/* Computing MIN */
			i__5 = *nv, i__7 = *ihiz - jrow + 1;
			jlen = min(i__5,i__7);
			sgemm_("N", "N", &jlen, &nu, &nu, &c_b8, &z__[jrow + (
				incol + k1) * z_dim1], ldz, &u[k1 + k1 * 
				u_dim1], ldu, &c_b7, &wv[wv_offset], ldwv);
			slacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &z__[
				jrow + (incol + k1) * z_dim1], ldz)
				;
/* L180: */
		    }
		}
	    } else {

/*              ==== Updates exploiting U's 2-by-2 block structure. */
/*              .    (I2, I4, J2, J4 are the last rows and columns */
/*              .    of the blocks.) ==== */

		i2 = (kdu + 1) / 2;
		i4 = kdu;
		j2 = i4 - i2;
		j4 = kdu;

/*              ==== KZS and KNZ deal with the band of zeros */
/*              .    along the diagonal of one of the triangular */
/*              .    blocks. ==== */

		kzs = j4 - j2 - (ns + 1);
		knz = ns + 1;

/*              ==== Horizontal multiply ==== */

		i__4 = jbot;
		i__3 = *nh;
		for (jcol = min(ndcol,*kbot) + 1; i__3 < 0 ? jcol >= i__4 : 
			jcol <= i__4; jcol += i__3) {
/* Computing MIN */
		    i__5 = *nh, i__7 = jbot - jcol + 1;
		    jlen = min(i__5,i__7);

/*                 ==== Copy bottom of H to top+KZS of scratch ==== */
/*                  (The first KZS rows get multiplied by zero.) ==== */

		    slacpy_("ALL", &knz, &jlen, &h__[incol + 1 + j2 + jcol * 
			    h_dim1], ldh, &wh[kzs + 1 + wh_dim1], ldwh);

/*                 ==== Multiply by U21' ==== */

		    slaset_("ALL", &kzs, &jlen, &c_b7, &c_b7, &wh[wh_offset], 
			    ldwh);
		    strmm_("L", "U", "C", "N", &knz, &jlen, &c_b8, &u[j2 + 1 
			    + (kzs + 1) * u_dim1], ldu, &wh[kzs + 1 + wh_dim1]
, ldwh);

/*                 ==== Multiply top of H by U11' ==== */

		    sgemm_("C", "N", &i2, &jlen, &j2, &c_b8, &u[u_offset], 
			    ldu, &h__[incol + 1 + jcol * h_dim1], ldh, &c_b8, 
			    &wh[wh_offset], ldwh);

/*                 ==== Copy top of H to bottom of WH ==== */

		    slacpy_("ALL", &j2, &jlen, &h__[incol + 1 + jcol * h_dim1]
, ldh, &wh[i2 + 1 + wh_dim1], ldwh);

/*                 ==== Multiply by U21' ==== */

		    strmm_("L", "L", "C", "N", &j2, &jlen, &c_b8, &u[(i2 + 1) 
			    * u_dim1 + 1], ldu, &wh[i2 + 1 + wh_dim1], ldwh);

/*                 ==== Multiply by U22 ==== */

		    i__5 = i4 - i2;
		    i__7 = j4 - j2;
		    sgemm_("C", "N", &i__5, &jlen, &i__7, &c_b8, &u[j2 + 1 + (
			    i2 + 1) * u_dim1], ldu, &h__[incol + 1 + j2 + 
			    jcol * h_dim1], ldh, &c_b8, &wh[i2 + 1 + wh_dim1], 
			     ldwh);

/*                 ==== Copy it back ==== */

		    slacpy_("ALL", &kdu, &jlen, &wh[wh_offset], ldwh, &h__[
			    incol + 1 + jcol * h_dim1], ldh);
/* L190: */
		}

/*              ==== Vertical multiply ==== */

		i__3 = max(incol,*ktop) - 1;
		i__4 = *nv;
		for (jrow = jtop; i__4 < 0 ? jrow >= i__3 : jrow <= i__3; 
			jrow += i__4) {
/* Computing MIN */
		    i__5 = *nv, i__7 = max(incol,*ktop) - jrow;
		    jlen = min(i__5,i__7);

/*                 ==== Copy right of H to scratch (the first KZS */
/*                 .    columns get multiplied by zero) ==== */

		    slacpy_("ALL", &jlen, &knz, &h__[jrow + (incol + 1 + j2) *
			     h_dim1], ldh, &wv[(kzs + 1) * wv_dim1 + 1], ldwv);

/*                 ==== Multiply by U21 ==== */

		    slaset_("ALL", &jlen, &kzs, &c_b7, &c_b7, &wv[wv_offset], 
			    ldwv);
		    strmm_("R", "U", "N", "N", &jlen, &knz, &c_b8, &u[j2 + 1 
			    + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1) * 
			    wv_dim1 + 1], ldwv);

/*                 ==== Multiply by U11 ==== */

		    sgemm_("N", "N", &jlen, &i2, &j2, &c_b8, &h__[jrow + (
			    incol + 1) * h_dim1], ldh, &u[u_offset], ldu, &
			    c_b8, &wv[wv_offset], ldwv);

/*                 ==== Copy left of H to right of scratch ==== */

		    slacpy_("ALL", &jlen, &j2, &h__[jrow + (incol + 1) * 
			    h_dim1], ldh, &wv[(i2 + 1) * wv_dim1 + 1], ldwv);

/*                 ==== Multiply by U21 ==== */

		    i__5 = i4 - i2;
		    strmm_("R", "L", "N", "N", &jlen, &i__5, &c_b8, &u[(i2 + 
			    1) * u_dim1 + 1], ldu, &wv[(i2 + 1) * wv_dim1 + 1]
, ldwv);

/*                 ==== Multiply by U22 ==== */

		    i__5 = i4 - i2;
		    i__7 = j4 - j2;
		    sgemm_("N", "N", &jlen, &i__5, &i__7, &c_b8, &h__[jrow + (
			    incol + 1 + j2) * h_dim1], ldh, &u[j2 + 1 + (i2 + 
			    1) * u_dim1], ldu, &c_b8, &wv[(i2 + 1) * wv_dim1 
			    + 1], ldwv);

/*                 ==== Copy it back ==== */

		    slacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, &h__[
			    jrow + (incol + 1) * h_dim1], ldh);
/* L200: */
		}

/*              ==== Multiply Z (also vertical) ==== */

		if (*wantz) {
		    i__4 = *ihiz;
		    i__3 = *nv;
		    for (jrow = *iloz; i__3 < 0 ? jrow >= i__4 : jrow <= i__4;
			     jrow += i__3) {
/* Computing MIN */
			i__5 = *nv, i__7 = *ihiz - jrow + 1;
			jlen = min(i__5,i__7);

/*                    ==== Copy right of Z to left of scratch (first */
/*                    .     KZS columns get multiplied by zero) ==== */

			slacpy_("ALL", &jlen, &knz, &z__[jrow + (incol + 1 + 
				j2) * z_dim1], ldz, &wv[(kzs + 1) * wv_dim1 + 
				1], ldwv);

/*                    ==== Multiply by U12 ==== */

			slaset_("ALL", &jlen, &kzs, &c_b7, &c_b7, &wv[
				wv_offset], ldwv);
			strmm_("R", "U", "N", "N", &jlen, &knz, &c_b8, &u[j2 
				+ 1 + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1) 
				* wv_dim1 + 1], ldwv);

/*                    ==== Multiply by U11 ==== */

			sgemm_("N", "N", &jlen, &i2, &j2, &c_b8, &z__[jrow + (
				incol + 1) * z_dim1], ldz, &u[u_offset], ldu, 
				&c_b8, &wv[wv_offset], ldwv);

/*                    ==== Copy left of Z to right of scratch ==== */

			slacpy_("ALL", &jlen, &j2, &z__[jrow + (incol + 1) * 
				z_dim1], ldz, &wv[(i2 + 1) * wv_dim1 + 1], 
				ldwv);

/*                    ==== Multiply by U21 ==== */

			i__5 = i4 - i2;
			strmm_("R", "L", "N", "N", &jlen, &i__5, &c_b8, &u[(
				i2 + 1) * u_dim1 + 1], ldu, &wv[(i2 + 1) * 
				wv_dim1 + 1], ldwv);

/*                    ==== Multiply by U22 ==== */

			i__5 = i4 - i2;
			i__7 = j4 - j2;
			sgemm_("N", "N", &jlen, &i__5, &i__7, &c_b8, &z__[
				jrow + (incol + 1 + j2) * z_dim1], ldz, &u[j2 
				+ 1 + (i2 + 1) * u_dim1], ldu, &c_b8, &wv[(i2 
				+ 1) * wv_dim1 + 1], ldwv);

/*                    ==== Copy the result back to Z ==== */

			slacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, &
				z__[jrow + (incol + 1) * z_dim1], ldz);
/* L210: */
		    }
		}
	    }
	}
/* L220: */
    }

/*     ==== End of SLAQR5 ==== */

    return 0;
} /* slaqr5_ */
コード例 #5
0
ファイル: sgehrd.c プロジェクト: 0u812/roadrunner-backup
/* Subroutine */ int sgehrd_(integer *n, integer *ilo, integer *ihi, real *a, 
	integer *lda, real *tau, real *work, integer *lwork, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;

    /* Local variables */
    integer i__, j;
    real t[4160]	/* was [65][64] */;
    integer ib;
    real ei;
    integer nb, nh, nx, iws, nbmin, iinfo;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *), strmm_(char *, char *, char *, 
	     char *, integer *, integer *, real *, real *, integer *, real *, 
	    integer *), saxpy_(integer *, 
	    real *, real *, integer *, real *, integer *), sgehd2_(integer *, 
	    integer *, integer *, real *, integer *, real *, real *, integer *
), slahr2_(integer *, integer *, integer *, real *, integer *, 
	    real *, real *, integer *, real *, integer *), slarfb_(char *, 
	    char *, char *, char *, integer *, integer *, integer *, real *, 
	    integer *, real *, integer *, real *, integer *, real *, integer *
), xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *);
    integer ldwork, lwkopt;
    logical lquery;


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SGEHRD reduces a real general matrix A to upper Hessenberg form H by */
/*  an orthogonal similarity transformation:  Q' * A * Q = H . */

/*  Arguments */
/*  ========= */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  ILO     (input) INTEGER */
/*  IHI     (input) INTEGER */
/*          It is assumed that A is already upper triangular in rows */
/*          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */
/*          set by a previous call to SGEBAL; otherwise they should be */
/*          set to 1 and N respectively. See Further Details. */
/*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */

/*  A       (input/output) REAL array, dimension (LDA,N) */
/*          On entry, the N-by-N general matrix to be reduced. */
/*          On exit, the upper triangle and the first subdiagonal of A */
/*          are overwritten with the upper Hessenberg matrix H, and the */
/*          elements below the first subdiagonal, with the array TAU, */
/*          represent the orthogonal matrix Q as a product of elementary */
/*          reflectors. See Further Details. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  TAU     (output) REAL array, dimension (N-1) */
/*          The scalar factors of the elementary reflectors (see Further */
/*          Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to */
/*          zero. */

/*  WORK    (workspace/output) REAL array, dimension (LWORK) */
/*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */

/*  LWORK   (input) INTEGER */
/*          The length of the array WORK.  LWORK >= max(1,N). */
/*          For optimum performance LWORK >= N*NB, where NB is the */
/*          optimal blocksize. */

/*          If LWORK = -1, then a workspace query is assumed; the routine */
/*          only calculates the optimal size of the WORK array, returns */
/*          this value as the first entry of the WORK array, and no error */
/*          message related to LWORK is issued by XERBLA. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */

/*  Further Details */
/*  =============== */

/*  The matrix Q is represented as a product of (ihi-ilo) elementary */
/*  reflectors */

/*     Q = H(ilo) H(ilo+1) . . . H(ihi-1). */

/*  Each H(i) has the form */

/*     H(i) = I - tau * v * v' */

/*  where tau is a real scalar, and v is a real vector with */
/*  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on */
/*  exit in A(i+2:ihi,i), and tau in TAU(i). */

/*  The contents of A are illustrated by the following example, with */
/*  n = 7, ilo = 2 and ihi = 6: */

/*  on entry,                        on exit, */

/*  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a ) */
/*  (     a   a   a   a   a   a )    (      a   h   h   h   h   a ) */
/*  (     a   a   a   a   a   a )    (      h   h   h   h   h   h ) */
/*  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h ) */
/*  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h ) */
/*  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h ) */
/*  (                         a )    (                          a ) */

/*  where a denotes an element of the original matrix A, h denotes a */
/*  modified element of the upper Hessenberg matrix H, and vi denotes an */
/*  element of the vector defining H(i). */

/*  This file is a slight modification of LAPACK-3.0's SGEHRD */
/*  subroutine incorporating improvements proposed by Quintana-Orti and */
/*  Van de Geijn (2005). */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --tau;
    --work;

    /* Function Body */
    *info = 0;
/* Computing MIN */
    i__1 = 64, i__2 = ilaenv_(&c__1, "SGEHRD", " ", n, ilo, ihi, &c_n1);
    nb = min(i__1,i__2);
    lwkopt = *n * nb;
    work[1] = (real) lwkopt;
    lquery = *lwork == -1;
    if (*n < 0) {
	*info = -1;
    } else if (*ilo < 1 || *ilo > max(1,*n)) {
	*info = -2;
    } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    } else if (*lwork < max(1,*n) && ! lquery) {
	*info = -8;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SGEHRD", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */

    i__1 = *ilo - 1;
    for (i__ = 1; i__ <= i__1; ++i__) {
	tau[i__] = 0.f;
/* L10: */
    }
    i__1 = *n - 1;
    for (i__ = max(1,*ihi); i__ <= i__1; ++i__) {
	tau[i__] = 0.f;
/* L20: */
    }

/*     Quick return if possible */

    nh = *ihi - *ilo + 1;
    if (nh <= 1) {
	work[1] = 1.f;
	return 0;
    }

/*     Determine the block size */

/* Computing MIN */
    i__1 = 64, i__2 = ilaenv_(&c__1, "SGEHRD", " ", n, ilo, ihi, &c_n1);
    nb = min(i__1,i__2);
    nbmin = 2;
    iws = 1;
    if (nb > 1 && nb < nh) {

/*        Determine when to cross over from blocked to unblocked code */
/*        (last block is always handled by unblocked code) */

/* Computing MAX */
	i__1 = nb, i__2 = ilaenv_(&c__3, "SGEHRD", " ", n, ilo, ihi, &c_n1);
	nx = max(i__1,i__2);
	if (nx < nh) {

/*           Determine if workspace is large enough for blocked code */

	    iws = *n * nb;
	    if (*lwork < iws) {

/*              Not enough workspace to use optimal NB:  determine the */
/*              minimum value of NB, and reduce NB or force use of */
/*              unblocked code */

/* Computing MAX */
		i__1 = 2, i__2 = ilaenv_(&c__2, "SGEHRD", " ", n, ilo, ihi, &
			c_n1);
		nbmin = max(i__1,i__2);
		if (*lwork >= *n * nbmin) {
		    nb = *lwork / *n;
		} else {
		    nb = 1;
		}
	    }
	}
    }
    ldwork = *n;

    if (nb < nbmin || nb >= nh) {

/*        Use unblocked code below */

	i__ = *ilo;

    } else {

/*        Use blocked code */

	i__1 = *ihi - 1 - nx;
	i__2 = nb;
	for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
/* Computing MIN */
	    i__3 = nb, i__4 = *ihi - i__;
	    ib = min(i__3,i__4);

/*           Reduce columns i:i+ib-1 to Hessenberg form, returning the */
/*           matrices V and T of the block reflector H = I - V*T*V' */
/*           which performs the reduction, and also the matrix Y = A*V*T */

	    slahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
		    c__65, &work[1], &ldwork);

/*           Apply the block reflector H to A(1:ihi,i+ib:ihi) from the */
/*           right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set */
/*           to 1 */

	    ei = a[i__ + ib + (i__ + ib - 1) * a_dim1];
	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.f;
	    i__3 = *ihi - i__ - ib + 1;
	    sgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b25, &
		    work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &
		    c_b26, &a[(i__ + ib) * a_dim1 + 1], lda);
	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei;

/*           Apply the block reflector H to A(1:i,i+1:i+ib-1) from the */
/*           right */

	    i__3 = ib - 1;
	    strmm_("Right", "Lower", "Transpose", "Unit", &i__, &i__3, &c_b26, 
		     &a[i__ + 1 + i__ * a_dim1], lda, &work[1], &ldwork);
	    i__3 = ib - 2;
	    for (j = 0; j <= i__3; ++j) {
		saxpy_(&i__, &c_b25, &work[ldwork * j + 1], &c__1, &a[(i__ + 
			j + 1) * a_dim1 + 1], &c__1);
/* L30: */
	    }

/*           Apply the block reflector H to A(i+1:ihi,i+ib:n) from the */
/*           left */

	    i__3 = *ihi - i__;
	    i__4 = *n - i__ - ib + 1;
	    slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
		    i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &c__65, &a[
		    i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork);
/* L40: */
	}
    }

/*     Use unblocked code to reduce the rest of the matrix */

    sgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
    work[1] = (real) iws;

    return 0;

/*     End of SGEHRD */

} /* sgehrd_ */
コード例 #6
0
ファイル: ssygv.c プロジェクト: juanjosegarciaripoll/cblapack
/* Subroutine */ int ssygv_(integer *itype, char *jobz, char *uplo, integer *
	n, real *a, integer *lda, real *b, integer *ldb, real *w, real *work, 
	integer *lwork, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;

    /* Local variables */
    integer nb, neig;
    char trans[1];
    logical upper;
    logical wantz;
    integer lwkmin;
    integer lwkopt;
    logical lquery;

/*  -- LAPACK driver routine (version 3.2) -- */
/*     November 2006 */

/*  Purpose */
/*  ======= */

/*  SSYGV computes all the eigenvalues, and optionally, the eigenvectors */
/*  of a real generalized symmetric-definite eigenproblem, of the form */
/*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x. */
/*  Here A and B are assumed to be symmetric and B is also */
/*  positive definite. */

/*  Arguments */
/*  ========= */

/*  ITYPE   (input) INTEGER */
/*          Specifies the problem type to be solved: */
/*          = 1:  A*x = (lambda)*B*x */
/*          = 2:  A*B*x = (lambda)*x */
/*          = 3:  B*A*x = (lambda)*x */

/*  JOBZ    (input) CHARACTER*1 */
/*          = 'N':  Compute eigenvalues only; */
/*          = 'V':  Compute eigenvalues and eigenvectors. */

/*  UPLO    (input) CHARACTER*1 */
/*          = 'U':  Upper triangles of A and B are stored; */
/*          = 'L':  Lower triangles of A and B are stored. */

/*  N       (input) INTEGER */
/*          The order of the matrices A and B.  N >= 0. */

/*  A       (input/output) REAL array, dimension (LDA, N) */
/*          On entry, the symmetric matrix A.  If UPLO = 'U', the */
/*          leading N-by-N upper triangular part of A contains the */
/*          upper triangular part of the matrix A.  If UPLO = 'L', */
/*          the leading N-by-N lower triangular part of A contains */
/*          the lower triangular part of the matrix A. */

/*          On exit, if JOBZ = 'V', then if INFO = 0, A contains the */
/*          matrix Z of eigenvectors.  The eigenvectors are normalized */
/*          as follows: */
/*          if ITYPE = 1 or 2, Z**T*B*Z = I; */
/*          if ITYPE = 3, Z**T*inv(B)*Z = I. */
/*          If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */
/*          or the lower triangle (if UPLO='L') of A, including the */
/*          diagonal, is destroyed. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  B       (input/output) REAL array, dimension (LDB, N) */
/*          On entry, the symmetric positive definite matrix B. */
/*          If UPLO = 'U', the leading N-by-N upper triangular part of B */
/*          contains the upper triangular part of the matrix B. */
/*          If UPLO = 'L', the leading N-by-N lower triangular part of B */
/*          contains the lower triangular part of the matrix B. */

/*          On exit, if INFO <= N, the part of B containing the matrix is */
/*          overwritten by the triangular factor U or L from the Cholesky */
/*          factorization B = U**T*U or B = L*L**T. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B.  LDB >= max(1,N). */

/*  W       (output) REAL array, dimension (N) */
/*          If INFO = 0, the eigenvalues in ascending order. */

/*  WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK)) */
/*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */

/*  LWORK   (input) INTEGER */
/*          The length of the array WORK.  LWORK >= max(1,3*N-1). */
/*          For optimal efficiency, LWORK >= (NB+2)*N, */
/*          where NB is the blocksize for SSYTRD returned by ILAENV. */

/*          If LWORK = -1, then a workspace query is assumed; the routine */
/*          only calculates the optimal size of the WORK array, returns */
/*          this value as the first entry of the WORK array, and no error */
/*          message related to LWORK is issued by XERBLA. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
/*          > 0:  SPOTRF or SSYEV returned an error code: */
/*             <= N:  if INFO = i, SSYEV failed to converge; */
/*                    i off-diagonal elements of an intermediate */
/*                    tridiagonal form did not converge to zero; */
/*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading */
/*                    minor of order i of B is not positive definite. */
/*                    The factorization of B could not be completed and */
/*                    no eigenvalues or eigenvectors were computed. */

/*  ===================================================================== */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    --w;
    --work;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    upper = lsame_(uplo, "U");
    lquery = *lwork == -1;

    *info = 0;
    if (*itype < 1 || *itype > 3) {
	*info = -1;
    } else if (! (wantz || lsame_(jobz, "N"))) {
	*info = -2;
    } else if (! (upper || lsame_(uplo, "L"))) {
	*info = -3;
    } else if (*n < 0) {
	*info = -4;
    } else if (*lda < max(1,*n)) {
	*info = -6;
    } else if (*ldb < max(1,*n)) {
	*info = -8;
    }

    if (*info == 0) {
/* Computing MAX */
	i__1 = 1, i__2 = *n * 3 - 1;
	lwkmin = max(i__1,i__2);
	nb = ilaenv_(&c__1, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1);
/* Computing MAX */
	i__1 = lwkmin, i__2 = (nb + 2) * *n;
	lwkopt = max(i__1,i__2);
	work[1] = (real) lwkopt;

	if (*lwork < lwkmin && ! lquery) {
	    *info = -11;
	}
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SSYGV ", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Form a Cholesky factorization of B. */

    spotrf_(uplo, n, &b[b_offset], ldb, info);
    if (*info != 0) {
	*info = *n + *info;
	return 0;
    }

/*     Transform problem to standard eigenvalue problem and solve. */

    ssygst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info);
    ssyev_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, info);

    if (wantz) {

/*        Backtransform eigenvectors to the original problem. */

	neig = *n;
	if (*info > 0) {
	    neig = *info - 1;
	}
	if (*itype == 1 || *itype == 2) {

/*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */
/*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */

	    if (upper) {
		*(unsigned char *)trans = 'N';
	    } else {
		*(unsigned char *)trans = 'T';
	    }

	    strsm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b16, &b[
		    b_offset], ldb, &a[a_offset], lda);

	} else if (*itype == 3) {

/*           For B*A*x=(lambda)*x; */
/*           backtransform eigenvectors: x = L*y or U'*y */

	    if (upper) {
		*(unsigned char *)trans = 'T';
	    } else {
		*(unsigned char *)trans = 'N';
	    }

	    strmm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b16, &b[
		    b_offset], ldb, &a[a_offset], lda);
	}
    }

    work[1] = (real) lwkopt;
    return 0;

/*     End of SSYGV */

} /* ssygv_ */
コード例 #7
0
ファイル: ssygvd.c プロジェクト: 3deggi/levmar-ndk
/* Subroutine */ int ssygvd_(integer *itype, char *jobz, char *uplo, integer *
	n, real *a, integer *lda, real *b, integer *ldb, real *w, real *work, 
	integer *lwork, integer *iwork, integer *liwork, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
    real r__1, r__2;

    /* Local variables */
    integer lopt;
    extern logical lsame_(char *, char *);
    integer lwmin;
    char trans[1];
    integer liopt;
    logical upper;
    extern /* Subroutine */ int strmm_(char *, char *, char *, char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
);
    logical wantz;
    extern /* Subroutine */ int strsm_(char *, char *, char *, char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
), xerbla_(char *, integer *);
    integer liwmin;
    extern /* Subroutine */ int spotrf_(char *, integer *, real *, integer *, 
	    integer *), ssyevd_(char *, char *, integer *, real *, 
	    integer *, real *, real *, integer *, integer *, integer *, 
	    integer *);
    logical lquery;
    extern /* Subroutine */ int ssygst_(integer *, char *, integer *, real *, 
	    integer *, real *, integer *, integer *);


/*  -- LAPACK driver routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SSYGVD computes all the eigenvalues, and optionally, the eigenvectors */
/*  of a real generalized symmetric-definite eigenproblem, of the form */
/*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A and */
/*  B are assumed to be symmetric and B is also positive definite. */
/*  If eigenvectors are desired, it uses a divide and conquer algorithm. */

/*  The divide and conquer algorithm makes very mild assumptions about */
/*  floating point arithmetic. It will work on machines with a guard */
/*  digit in add/subtract, or on those binary machines without guard */
/*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or */
/*  Cray-2. It could conceivably fail on hexadecimal or decimal machines */
/*  without guard digits, but we know of none. */

/*  Arguments */
/*  ========= */

/*  ITYPE   (input) INTEGER */
/*          Specifies the problem type to be solved: */
/*          = 1:  A*x = (lambda)*B*x */
/*          = 2:  A*B*x = (lambda)*x */
/*          = 3:  B*A*x = (lambda)*x */

/*  JOBZ    (input) CHARACTER*1 */
/*          = 'N':  Compute eigenvalues only; */
/*          = 'V':  Compute eigenvalues and eigenvectors. */

/*  UPLO    (input) CHARACTER*1 */
/*          = 'U':  Upper triangles of A and B are stored; */
/*          = 'L':  Lower triangles of A and B are stored. */

/*  N       (input) INTEGER */
/*          The order of the matrices A and B.  N >= 0. */

/*  A       (input/output) REAL array, dimension (LDA, N) */
/*          On entry, the symmetric matrix A.  If UPLO = 'U', the */
/*          leading N-by-N upper triangular part of A contains the */
/*          upper triangular part of the matrix A.  If UPLO = 'L', */
/*          the leading N-by-N lower triangular part of A contains */
/*          the lower triangular part of the matrix A. */

/*          On exit, if JOBZ = 'V', then if INFO = 0, A contains the */
/*          matrix Z of eigenvectors.  The eigenvectors are normalized */
/*          as follows: */
/*          if ITYPE = 1 or 2, Z**T*B*Z = I; */
/*          if ITYPE = 3, Z**T*inv(B)*Z = I. */
/*          If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') */
/*          or the lower triangle (if UPLO='L') of A, including the */
/*          diagonal, is destroyed. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  B       (input/output) REAL array, dimension (LDB, N) */
/*          On entry, the symmetric matrix B.  If UPLO = 'U', the */
/*          leading N-by-N upper triangular part of B contains the */
/*          upper triangular part of the matrix B.  If UPLO = 'L', */
/*          the leading N-by-N lower triangular part of B contains */
/*          the lower triangular part of the matrix B. */

/*          On exit, if INFO <= N, the part of B containing the matrix is */
/*          overwritten by the triangular factor U or L from the Cholesky */
/*          factorization B = U**T*U or B = L*L**T. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B.  LDB >= max(1,N). */

/*  W       (output) REAL array, dimension (N) */
/*          If INFO = 0, the eigenvalues in ascending order. */

/*  WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK)) */
/*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */

/*  LWORK   (input) INTEGER */
/*          The dimension of the array WORK. */
/*          If N <= 1,               LWORK >= 1. */
/*          If JOBZ = 'N' and N > 1, LWORK >= 2*N+1. */
/*          If JOBZ = 'V' and N > 1, LWORK >= 1 + 6*N + 2*N**2. */

/*          If LWORK = -1, then a workspace query is assumed; the routine */
/*          only calculates the optimal sizes of the WORK and IWORK */
/*          arrays, returns these values as the first entries of the WORK */
/*          and IWORK arrays, and no error message related to LWORK or */
/*          LIWORK is issued by XERBLA. */

/*  IWORK   (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) */
/*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK. */

/*  LIWORK  (input) INTEGER */
/*          The dimension of the array IWORK. */
/*          If N <= 1,                LIWORK >= 1. */
/*          If JOBZ  = 'N' and N > 1, LIWORK >= 1. */
/*          If JOBZ  = 'V' and N > 1, LIWORK >= 3 + 5*N. */

/*          If LIWORK = -1, then a workspace query is assumed; the */
/*          routine only calculates the optimal sizes of the WORK and */
/*          IWORK arrays, returns these values as the first entries of */
/*          the WORK and IWORK arrays, and no error message related to */
/*          LWORK or LIWORK is issued by XERBLA. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
/*          > 0:  SPOTRF or SSYEVD returned an error code: */
/*             <= N:  if INFO = i and JOBZ = 'N', then the algorithm */
/*                    failed to converge; i off-diagonal elements of an */
/*                    intermediate tridiagonal form did not converge to */
/*                    zero; */
/*                    if INFO = i and JOBZ = 'V', then the algorithm */
/*                    failed to compute an eigenvalue while working on */
/*                    the submatrix lying in rows and columns INFO/(N+1) */
/*                    through mod(INFO,N+1); */
/*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading */
/*                    minor of order i of B is not positive definite. */
/*                    The factorization of B could not be completed and */
/*                    no eigenvalues or eigenvectors were computed. */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */

/*  Modified so that no backsubstitution is performed if SSYEVD fails to */
/*  converge (NEIG in old code could be greater than N causing out of */
/*  bounds reference to A - reported by Ralf Meyer).  Also corrected the */
/*  description of INFO and the test on ITYPE. Sven, 16 Feb 05. */
/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    --w;
    --work;
    --iwork;

    /* Function Body */
    wantz = lsame_(jobz, "V");
    upper = lsame_(uplo, "U");
    lquery = *lwork == -1 || *liwork == -1;

    *info = 0;
    if (*n <= 1) {
	liwmin = 1;
	lwmin = 1;
    } else if (wantz) {
	liwmin = *n * 5 + 3;
/* Computing 2nd power */
	i__1 = *n;
	lwmin = *n * 6 + 1 + (i__1 * i__1 << 1);
    } else {
	liwmin = 1;
	lwmin = (*n << 1) + 1;
    }
    lopt = lwmin;
    liopt = liwmin;
    if (*itype < 1 || *itype > 3) {
	*info = -1;
    } else if (! (wantz || lsame_(jobz, "N"))) {
	*info = -2;
    } else if (! (upper || lsame_(uplo, "L"))) {
	*info = -3;
    } else if (*n < 0) {
	*info = -4;
    } else if (*lda < max(1,*n)) {
	*info = -6;
    } else if (*ldb < max(1,*n)) {
	*info = -8;
    }

    if (*info == 0) {
	work[1] = (real) lopt;
	iwork[1] = liopt;

	if (*lwork < lwmin && ! lquery) {
	    *info = -11;
	} else if (*liwork < liwmin && ! lquery) {
	    *info = -13;
	}
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SSYGVD", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Form a Cholesky factorization of B. */

    spotrf_(uplo, n, &b[b_offset], ldb, info);
    if (*info != 0) {
	*info = *n + *info;
	return 0;
    }

/*     Transform problem to standard eigenvalue problem and solve. */

    ssygst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info);
    ssyevd_(jobz, uplo, n, &a[a_offset], lda, &w[1], &work[1], lwork, &iwork[
	    1], liwork, info);
/* Computing MAX */
    r__1 = (real) lopt;
    lopt = dmax(r__1,work[1]);
/* Computing MAX */
    r__1 = (real) liopt, r__2 = (real) iwork[1];
    liopt = dmax(r__1,r__2);

    if (wantz && *info == 0) {

/*        Backtransform eigenvectors to the original problem. */

	if (*itype == 1 || *itype == 2) {

/*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */
/*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */

	    if (upper) {
		*(unsigned char *)trans = 'N';
	    } else {
		*(unsigned char *)trans = 'T';
	    }

	    strsm_("Left", uplo, trans, "Non-unit", n, n, &c_b11, &b[b_offset]
, ldb, &a[a_offset], lda);

	} else if (*itype == 3) {

/*           For B*A*x=(lambda)*x; */
/*           backtransform eigenvectors: x = L*y or U'*y */

	    if (upper) {
		*(unsigned char *)trans = 'T';
	    } else {
		*(unsigned char *)trans = 'N';
	    }

	    strmm_("Left", uplo, trans, "Non-unit", n, n, &c_b11, &b[b_offset]
, ldb, &a[a_offset], lda);
	}
    }

    work[1] = (real) lopt;
    iwork[1] = liopt;

    return 0;

/*     End of SSYGVD */

} /* ssygvd_ */
コード例 #8
0
ファイル: slahr2.c プロジェクト: TakuroNegishi/PDRTAM
/* Subroutine */ int slahr2_(integer *n, integer *k, integer *nb, real *a,
                             integer *lda, real *tau, real *t, integer *ldt, real *y, integer *ldy)
{
    /* System generated locals */
    integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2,
            i__3;
    real r__1;

    /* Local variables */
    integer i__;
    real ei;
    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
           sgemm_(char *, char *, integer *, integer *, integer *, real *,
                  real *, integer *, real *, integer *, real *, real *, integer *), sgemv_(char *, integer *, integer *, real *,
                          real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *),
           strmm_(char *, char *, char *, char *, integer *, integer *, real
                  *, real *, integer *, real *, integer *), saxpy_(integer *, real *, real *, integer *, real *,
                          integer *), strmv_(char *, char *, char *, integer *, real *,
                                  integer *, real *, integer *), slarfg_(
               integer *, real *, real *, integer *, real *), slacpy_(char *,
                       integer *, integer *, real *, integer *, real *, integer *);


    /*  -- LAPACK auxiliary routine (version 3.2) -- */
    /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
    /*     November 2006 */

    /*     .. Scalar Arguments .. */
    /*     .. */
    /*     .. Array Arguments .. */
    /*     .. */

    /*  Purpose */
    /*  ======= */

    /*  SLAHR2 reduces the first NB columns of A real general n-BY-(n-k+1) */
    /*  matrix A so that elements below the k-th subdiagonal are zero. The */
    /*  reduction is performed by an orthogonal similarity transformation */
    /*  Q' * A * Q. The routine returns the matrices V and T which determine */
    /*  Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. */

    /*  This is an auxiliary routine called by SGEHRD. */

    /*  Arguments */
    /*  ========= */

    /*  N       (input) INTEGER */
    /*          The order of the matrix A. */

    /*  K       (input) INTEGER */
    /*          The offset for the reduction. Elements below the k-th */
    /*          subdiagonal in the first NB columns are reduced to zero. */
    /*          K < N. */

    /*  NB      (input) INTEGER */
    /*          The number of columns to be reduced. */

    /*  A       (input/output) REAL array, dimension (LDA,N-K+1) */
    /*          On entry, the n-by-(n-k+1) general matrix A. */
    /*          On exit, the elements on and above the k-th subdiagonal in */
    /*          the first NB columns are overwritten with the corresponding */
    /*          elements of the reduced matrix; the elements below the k-th */
    /*          subdiagonal, with the array TAU, represent the matrix Q as a */
    /*          product of elementary reflectors. The other columns of A are */
    /*          unchanged. See Further Details. */

    /*  LDA     (input) INTEGER */
    /*          The leading dimension of the array A.  LDA >= max(1,N). */

    /*  TAU     (output) REAL array, dimension (NB) */
    /*          The scalar factors of the elementary reflectors. See Further */
    /*          Details. */

    /*  T       (output) REAL array, dimension (LDT,NB) */
    /*          The upper triangular matrix T. */

    /*  LDT     (input) INTEGER */
    /*          The leading dimension of the array T.  LDT >= NB. */

    /*  Y       (output) REAL array, dimension (LDY,NB) */
    /*          The n-by-nb matrix Y. */

    /*  LDY     (input) INTEGER */
    /*          The leading dimension of the array Y. LDY >= N. */

    /*  Further Details */
    /*  =============== */

    /*  The matrix Q is represented as a product of nb elementary reflectors */

    /*     Q = H(1) H(2) . . . H(nb). */

    /*  Each H(i) has the form */

    /*     H(i) = I - tau * v * v' */

    /*  where tau is a real scalar, and v is a real vector with */
    /*  v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in */
    /*  A(i+k+1:n,i), and tau in TAU(i). */

    /*  The elements of the vectors v together form the (n-k+1)-by-nb matrix */
    /*  V which is needed, with T and Y, to apply the transformation to the */
    /*  unreduced part of the matrix, using an update of the form: */
    /*  A := (I - V*T*V') * (A - Y*V'). */

    /*  The contents of A on exit are illustrated by the following example */
    /*  with n = 7, k = 3 and nb = 2: */

    /*     ( a   a   a   a   a ) */
    /*     ( a   a   a   a   a ) */
    /*     ( a   a   a   a   a ) */
    /*     ( h   h   a   a   a ) */
    /*     ( v1  h   a   a   a ) */
    /*     ( v1  v2  a   a   a ) */
    /*     ( v1  v2  a   a   a ) */

    /*  where a denotes an element of the original matrix A, h denotes a */
    /*  modified element of the upper Hessenberg matrix H, and vi denotes an */
    /*  element of the vector defining H(i). */

    /*  This file is a slight modification of LAPACK-3.0's SLAHRD */
    /*  incorporating improvements proposed by Quintana-Orti and Van de */
    /*  Gejin. Note that the entries of A(1:K,2:NB) differ from those */
    /*  returned by the original LAPACK routine. This function is */
    /*  not backward compatible with LAPACK3.0. */

    /*  ===================================================================== */

    /*     .. Parameters .. */
    /*     .. */
    /*     .. Local Scalars .. */
    /*     .. */
    /*     .. External Subroutines .. */
    /*     .. */
    /*     .. Intrinsic Functions .. */
    /*     .. */
    /*     .. Executable Statements .. */

    /*     Quick return if possible */

    /* Parameter adjustments */
    --tau;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1;
    t -= t_offset;
    y_dim1 = *ldy;
    y_offset = 1 + y_dim1;
    y -= y_offset;

    /* Function Body */
    if (*n <= 1) {
        return 0;
    }

    i__1 = *nb;
    for (i__ = 1; i__ <= i__1; ++i__) {
        if (i__ > 1) {

            /*           Update A(K+1:N,I) */

            /*           Update I-th column of A - Y * V' */

            i__2 = *n - *k;
            i__3 = i__ - 1;
            sgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b4, &y[*k + 1 + y_dim1],
                   ldy, &a[*k + i__ - 1 + a_dim1], lda, &c_b5, &a[*k + 1 +
                           i__ * a_dim1], &c__1);

            /*           Apply I - V * T' * V' to this column (call it b) from the */
            /*           left, using the last column of T as workspace */

            /*           Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows) */
            /*                    ( V2 )             ( b2 ) */

            /*           where V1 is unit lower triangular */

            /*           w := V1' * b1 */

            i__2 = i__ - 1;
            scopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 +
                    1], &c__1);
            i__2 = i__ - 1;
            strmv_("Lower", "Transpose", "UNIT", &i__2, &a[*k + 1 + a_dim1],
                   lda, &t[*nb * t_dim1 + 1], &c__1);

            /*           w := w + V2'*b2 */

            i__2 = *n - *k - i__ + 1;
            i__3 = i__ - 1;
            sgemv_("Transpose", &i__2, &i__3, &c_b5, &a[*k + i__ + a_dim1],
                   lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b5, &t[*nb *
                           t_dim1 + 1], &c__1);

            /*           w := T'*w */

            i__2 = i__ - 1;
            strmv_("Upper", "Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt,
                   &t[*nb * t_dim1 + 1], &c__1);

            /*           b2 := b2 - V2*w */

            i__2 = *n - *k - i__ + 1;
            i__3 = i__ - 1;
            sgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b4, &a[*k + i__ + a_dim1],
                   lda, &t[*nb * t_dim1 + 1], &c__1, &c_b5, &a[*k + i__ +
                           i__ * a_dim1], &c__1);

            /*           b1 := b1 - V1*w */

            i__2 = i__ - 1;
            strmv_("Lower", "NO TRANSPOSE", "UNIT", &i__2, &a[*k + 1 + a_dim1]
                   , lda, &t[*nb * t_dim1 + 1], &c__1);
            i__2 = i__ - 1;
            saxpy_(&i__2, &c_b4, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__
                    * a_dim1], &c__1);

            a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei;
        }

        /*        Generate the elementary reflector H(I) to annihilate */
        /*        A(K+I+1:N,I) */

        i__2 = *n - *k - i__ + 1;
        /* Computing MIN */
        i__3 = *k + i__ + 1;
        slarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3, *n)+ i__ *
                a_dim1], &c__1, &tau[i__]);
        ei = a[*k + i__ + i__ * a_dim1];
        a[*k + i__ + i__ * a_dim1] = 1.f;

        /*        Compute  Y(K+1:N,I) */

        i__2 = *n - *k;
        i__3 = *n - *k - i__ + 1;
        sgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b5, &a[*k + 1 + (i__ + 1) *
                a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b38, &y[*
                        k + 1 + i__ * y_dim1], &c__1);
        i__2 = *n - *k - i__ + 1;
        i__3 = i__ - 1;
        sgemv_("Transpose", &i__2, &i__3, &c_b5, &a[*k + i__ + a_dim1], lda, &
               a[*k + i__ + i__ * a_dim1], &c__1, &c_b38, &t[i__ * t_dim1 +
                       1], &c__1);
        i__2 = *n - *k;
        i__3 = i__ - 1;
        sgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b4, &y[*k + 1 + y_dim1], ldy,
               &t[i__ * t_dim1 + 1], &c__1, &c_b5, &y[*k + 1 + i__ * y_dim1],
               &c__1);
        i__2 = *n - *k;
        sscal_(&i__2, &tau[i__], &y[*k + 1 + i__ * y_dim1], &c__1);

        /*        Compute T(1:I,I) */

        i__2 = i__ - 1;
        r__1 = -tau[i__];
        sscal_(&i__2, &r__1, &t[i__ * t_dim1 + 1], &c__1);
        i__2 = i__ - 1;
        strmv_("Upper", "No Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt,
               &t[i__ * t_dim1 + 1], &c__1)
        ;
        t[i__ + i__ * t_dim1] = tau[i__];

        /* L10: */
    }
    a[*k + *nb + *nb * a_dim1] = ei;

    /*     Compute Y(1:K,1:NB) */

    slacpy_("ALL", k, nb, &a[(a_dim1 << 1) + 1], lda, &y[y_offset], ldy);
    strmm_("RIGHT", "Lower", "NO TRANSPOSE", "UNIT", k, nb, &c_b5, &a[*k + 1
            + a_dim1], lda, &y[y_offset], ldy);
    if (*n > *k + *nb) {
        i__1 = *n - *k - *nb;
        sgemm_("NO TRANSPOSE", "NO TRANSPOSE", k, nb, &i__1, &c_b5, &a[(*nb +
                2) * a_dim1 + 1], lda, &a[*k + 1 + *nb + a_dim1], lda, &c_b5,
               &y[y_offset], ldy);
    }
    strmm_("RIGHT", "Upper", "NO TRANSPOSE", "NON-UNIT", k, nb, &c_b5, &t[
               t_offset], ldt, &y[y_offset], ldy);

    return 0;

    /*     End of SLAHR2 */

} /* slahr2_ */
コード例 #9
0
ファイル: test_trmm.c プロジェクト: figual/blis
int main( int argc, char** argv )
{
	obj_t    a, c;
	obj_t    c_save;
	obj_t    alpha;
	dim_t    m, n;
	dim_t    p;
	dim_t    p_begin, p_end, p_inc;
	int      m_input, n_input;
	ind_t    ind;
	num_t    dt;
	char     dt_ch;
	int      r, n_repeats;
	side_t   side;
	uplo_t   uploa;
	trans_t  transa;
	diag_t   diaga;
	f77_char f77_side;
	f77_char f77_uploa;
	f77_char f77_transa;
	f77_char f77_diaga;

	double   dtime;
	double   dtime_save;
	double   gflops;

	//bli_init();

	//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );

	n_repeats = 3;

	dt      = DT;

	ind     = IND;

	p_begin = P_BEGIN;
	p_end   = P_END;
	p_inc   = P_INC;

	m_input = -1;
	n_input = -1;


	// Supress compiler warnings about unused variable 'ind'.
	( void )ind;

#if 0

	cntx_t* cntx;

	ind_t ind_mod = ind;

	// A hack to use 3m1 as 1mpb (with 1m as 1mbp).
	if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M;

	// Initialize a context for the current induced method and datatype.
	cntx = bli_gks_query_ind_cntx( ind_mod, dt );

	// Set k to the kc blocksize for the current datatype.
	k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx );

#elif 1

	//k_input = 256;

#endif

	// Choose the char corresponding to the requested datatype.
	if      ( bli_is_float( dt ) )    dt_ch = 's';
	else if ( bli_is_double( dt ) )   dt_ch = 'd';
	else if ( bli_is_scomplex( dt ) ) dt_ch = 'c';
	else                              dt_ch = 'z';

#if 0
	side   = BLIS_LEFT;
#else
	side   = BLIS_RIGHT;
#endif
#if 0
	uploa  = BLIS_LOWER;
#else
	uploa  = BLIS_UPPER;
#endif
	transa = BLIS_NO_TRANSPOSE;
	diaga  = BLIS_NONUNIT_DIAG;

	bli_param_map_blis_to_netlib_side( side, &f77_side );
	bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa );
	bli_param_map_blis_to_netlib_trans( transa, &f77_transa );
	bli_param_map_blis_to_netlib_diag( diaga, &f77_diaga );

	// Begin with initializing the last entry to zero so that
	// matlab allocates space for the entire array once up-front.
	for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
#ifdef BLIS
	printf( "data_%s_%ctrmm_%s_blis", THR_STR, dt_ch, STR );
#else
	printf( "data_%s_%ctrmm_%s",      THR_STR, dt_ch, STR );
#endif
	printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
	        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
	        ( unsigned long )0,
	        ( unsigned long )0, 0.0 );


	for ( p = p_begin; p <= p_end; p += p_inc )
	{

		if ( m_input < 0 ) m = p / ( dim_t )abs(m_input);
		else               m =     ( dim_t )    m_input;
		if ( n_input < 0 ) n = p / ( dim_t )abs(n_input);
		else               n =     ( dim_t )    n_input;

		bli_obj_create( dt, 1, 1, 0, 0, &alpha );

		if ( bli_does_trans( side ) )
			bli_obj_create( dt, m, m, 0, 0, &a );
        else
			bli_obj_create( dt, n, n, 0, 0, &a );
		bli_obj_create( dt, m, n, 0, 0, &c );
		bli_obj_create( dt, m, n, 0, 0, &c_save );

		bli_randm( &a );
		bli_randm( &c );

		bli_obj_set_struc( BLIS_TRIANGULAR, &a );
		bli_obj_set_uplo( uploa, &a );
		bli_obj_set_conjtrans( transa, &a );
		bli_obj_set_diag( diaga, &a );

		bli_randm( &a );
		bli_mktrim( &a );

		bli_setsc(  (2.0/1.0), 0.0, &alpha );

		bli_copym( &c, &c_save );
	
#if 0 //def BLIS
		bli_ind_disable_all_dt( dt );
		bli_ind_enable_dt( ind, dt );
#endif

		dtime_save = DBL_MAX;

		for ( r = 0; r < n_repeats; ++r )
		{
			bli_copym( &c_save, &c );


			dtime = bli_clock();


#ifdef PRINT
			bli_printm( "a", &a, "%4.1f", "" );
			bli_printm( "c", &c, "%4.1f", "" );
#endif

#ifdef BLIS

			bli_trmm( side,
			          &alpha,
			          &a,
			          &c );

#else

		if ( bli_is_float( dt ) )
		{
			f77_int   mm     = bli_obj_length( &c );
			f77_int   kk     = bli_obj_width( &c );
			f77_int   lda    = bli_obj_col_stride( &a );
			f77_int   ldc    = bli_obj_col_stride( &c );
			float*    alphap = bli_obj_buffer( &alpha );
			float*    ap     = bli_obj_buffer( &a );
			float*    cp     = bli_obj_buffer( &c );

			strmm_( &f77_side,
			        &f77_uploa,
			        &f77_transa,
			        &f77_diaga,
			        &mm,
			        &kk,
			        alphap,
			        ap, &lda,
			        cp, &ldc );
		}
		else if ( bli_is_double( dt ) )
		{
			f77_int   mm     = bli_obj_length( &c );
			f77_int   kk     = bli_obj_width( &c );
			f77_int   lda    = bli_obj_col_stride( &a );
			f77_int   ldc    = bli_obj_col_stride( &c );
			double*   alphap = bli_obj_buffer( &alpha );
			double*   ap     = bli_obj_buffer( &a );
			double*   cp     = bli_obj_buffer( &c );

			dtrmm_( &f77_side,
			        &f77_uploa,
			        &f77_transa,
			        &f77_diaga,
			        &mm,
			        &kk,
			        alphap,
			        ap, &lda,
			        cp, &ldc );
		}
		else if ( bli_is_scomplex( dt ) )
		{
			f77_int   mm     = bli_obj_length( &c );
			f77_int   kk     = bli_obj_width( &c );
			f77_int   lda    = bli_obj_col_stride( &a );
			f77_int   ldc    = bli_obj_col_stride( &c );
			scomplex* alphap = bli_obj_buffer( &alpha );
			scomplex* ap     = bli_obj_buffer( &a );
			scomplex* cp     = bli_obj_buffer( &c );

			ctrmm_( &f77_side,
			        &f77_uploa,
			        &f77_transa,
			        &f77_diaga,
			        &mm,
			        &kk,
			        alphap,
			        ap, &lda,
			        cp, &ldc );
		}
		else if ( bli_is_dcomplex( dt ) )
		{
			f77_int    mm     = bli_obj_length( &c );
			f77_int    kk     = bli_obj_width( &c );
			f77_int    lda    = bli_obj_col_stride( &a );
			f77_int    ldc    = bli_obj_col_stride( &c );
			dcomplex*  alphap = bli_obj_buffer( &alpha );
			dcomplex*  ap     = bli_obj_buffer( &a );
			dcomplex*  cp     = bli_obj_buffer( &c );

			ztrmm_( &f77_side,
			        &f77_uploa,
			        &f77_transa,
			        &f77_diaga,
			        &mm,
			        &kk,
			        alphap,
			        ap, &lda,
			        cp, &ldc );
		}
#endif

#ifdef PRINT
			bli_printm( "c after", &c, "%4.1f", "" );
			exit(1);
#endif


			dtime_save = bli_clock_min_diff( dtime_save, dtime );
		}

		if ( bli_is_left( side ) )
			gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 );
		else
			gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 );

		if ( bli_is_complex( dt ) ) gflops *= 4.0;

#ifdef BLIS
		printf( "data_%s_%ctrmm_%s_blis", THR_STR, dt_ch, STR );
#else
		printf( "data_%s_%ctrmm_%s",      THR_STR, dt_ch, STR );
#endif
		printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
		        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
		        ( unsigned long )m,
		        ( unsigned long )n, gflops );

		bli_obj_free( &alpha );

		bli_obj_free( &a );
		bli_obj_free( &c );
		bli_obj_free( &c_save );
	}

	//bli_finalize();

	return 0;
}
コード例 #10
0
ファイル: ssygst.c プロジェクト: MichaelH13/sdkpub
/* Subroutine */ int ssygst_(integer *itype, char *uplo, integer *n, real *a, 
	integer *lda, real *b, integer *ldb, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    SSYGST reduces a real symmetric-definite generalized eigenproblem   
    to standard form.   

    If ITYPE = 1, the problem is A*x = lambda*B*x,   
    and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)   

    If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or   
    B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L.   

    B must have been previously factorized as U**T*U or L*L**T by SPOTRF.   

    Arguments   
    =========   

    ITYPE   (input) INTEGER   
            = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T);   
            = 2 or 3: compute U*A*U**T or L**T*A*L.   

    UPLO    (input) CHARACTER   
            = 'U':  Upper triangle of A is stored and B is factored as   
                    U**T*U;   
            = 'L':  Lower triangle of A is stored and B is factored as   
                    L*L**T.   

    N       (input) INTEGER   
            The order of the matrices A and B.  N >= 0.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the symmetric matrix A.  If UPLO = 'U', the leading   
            N-by-N upper triangular part of A contains the upper   
            triangular part of the matrix A, and the strictly lower   
            triangular part of A is not referenced.  If UPLO = 'L', the   
            leading N-by-N lower triangular part of A contains the lower   
            triangular part of the matrix A, and the strictly upper   
            triangular part of A is not referenced.   

            On exit, if INFO = 0, the transformed matrix, stored in the   
            same format as A.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    B       (input) REAL array, dimension (LDB,N)   
            The triangular factor from the Cholesky factorization of B,   
            as returned by SPOTRF.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static real c_b14 = 1.f;
    static real c_b16 = -.5f;
    static real c_b19 = -1.f;
    static real c_b52 = .5f;
    
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
    /* Local variables */
    static integer k;
    extern logical lsame_(char *, char *);
    static logical upper;
    extern /* Subroutine */ int strmm_(char *, char *, char *, char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
	    ), ssymm_(char *, char *, integer 
	    *, integer *, real *, real *, integer *, real *, integer *, real *
	    , real *, integer *), strsm_(char *, char *, char 
	    *, char *, integer *, integer *, real *, real *, integer *, real *
	    , integer *);
    static integer kb, nb;
    extern /* Subroutine */ int ssygs2_(integer *, char *, integer *, real *, 
	    integer *, real *, integer *, integer *), ssyr2k_(char *, 
	    char *, integer *, integer *, real *, real *, integer *, real *, 
	    integer *, real *, real *, integer *), xerbla_(
	    char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (*itype < 1 || *itype > 3) {
	*info = -1;
    } else if (! upper && ! lsame_(uplo, "L")) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    } else if (*ldb < max(1,*n)) {
	*info = -7;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SSYGST", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine the block size for this environment. */

    nb = ilaenv_(&c__1, "SSYGST", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)1);

    if (nb <= 1 || nb >= *n) {

/*        Use unblocked code */

	ssygs2_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info);
    } else {

/*        Use blocked code */

	if (*itype == 1) {
	    if (upper) {

/*              Compute inv(U')*A*inv(U) */

		i__1 = *n;
		i__2 = nb;
		for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {
/* Computing MIN */
		    i__3 = *n - k + 1;
		    kb = min(i__3,nb);

/*                 Update the upper triangle of A(k:n,k:n) */

		    ssygs2_(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k),
			     ldb, info);
		    if (k + kb <= *n) {
			i__3 = *n - k - kb + 1;
			strsm_("Left", uplo, "Transpose", "Non-unit", &kb, &
				i__3, &c_b14, &b_ref(k, k), ldb, &a_ref(k, k 
				+ kb), lda);
			i__3 = *n - k - kb + 1;
			ssymm_("Left", uplo, &kb, &i__3, &c_b16, &a_ref(k, k),
				 lda, &b_ref(k, k + kb), ldb, &c_b14, &a_ref(
				k, k + kb), lda);
			i__3 = *n - k - kb + 1;
			ssyr2k_(uplo, "Transpose", &i__3, &kb, &c_b19, &a_ref(
				k, k + kb), lda, &b_ref(k, k + kb), ldb, &
				c_b14, &a_ref(k + kb, k + kb), lda);
			i__3 = *n - k - kb + 1;
			ssymm_("Left", uplo, &kb, &i__3, &c_b16, &a_ref(k, k),
				 lda, &b_ref(k, k + kb), ldb, &c_b14, &a_ref(
				k, k + kb), lda);
			i__3 = *n - k - kb + 1;
			strsm_("Right", uplo, "No transpose", "Non-unit", &kb,
				 &i__3, &c_b14, &b_ref(k + kb, k + kb), ldb, &
				a_ref(k, k + kb), lda);
		    }
/* L10: */
		}
	    } else {

/*              Compute inv(L)*A*inv(L') */

		i__2 = *n;
		i__1 = nb;
		for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) {
/* Computing MIN */
		    i__3 = *n - k + 1;
		    kb = min(i__3,nb);

/*                 Update the lower triangle of A(k:n,k:n) */

		    ssygs2_(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k),
			     ldb, info);
		    if (k + kb <= *n) {
			i__3 = *n - k - kb + 1;
			strsm_("Right", uplo, "Transpose", "Non-unit", &i__3, 
				&kb, &c_b14, &b_ref(k, k), ldb, &a_ref(k + kb,
				 k), lda);
			i__3 = *n - k - kb + 1;
			ssymm_("Right", uplo, &i__3, &kb, &c_b16, &a_ref(k, k)
				, lda, &b_ref(k + kb, k), ldb, &c_b14, &a_ref(
				k + kb, k), lda);
			i__3 = *n - k - kb + 1;
			ssyr2k_(uplo, "No transpose", &i__3, &kb, &c_b19, &
				a_ref(k + kb, k), lda, &b_ref(k + kb, k), ldb,
				 &c_b14, &a_ref(k + kb, k + kb), lda);
			i__3 = *n - k - kb + 1;
			ssymm_("Right", uplo, &i__3, &kb, &c_b16, &a_ref(k, k)
				, lda, &b_ref(k + kb, k), ldb, &c_b14, &a_ref(
				k + kb, k), lda);
			i__3 = *n - k - kb + 1;
			strsm_("Left", uplo, "No transpose", "Non-unit", &
				i__3, &kb, &c_b14, &b_ref(k + kb, k + kb), 
				ldb, &a_ref(k + kb, k), lda);
		    }
/* L20: */
		}
	    }
	} else {
	    if (upper) {

/*              Compute U*A*U' */

		i__1 = *n;
		i__2 = nb;
		for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {
/* Computing MIN */
		    i__3 = *n - k + 1;
		    kb = min(i__3,nb);

/*                 Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */

		    i__3 = k - 1;
		    strmm_("Left", uplo, "No transpose", "Non-unit", &i__3, &
			    kb, &c_b14, &b[b_offset], ldb, &a_ref(1, k), lda);
		    i__3 = k - 1;
		    ssymm_("Right", uplo, &i__3, &kb, &c_b52, &a_ref(k, k), 
			    lda, &b_ref(1, k), ldb, &c_b14, &a_ref(1, k), lda);
		    i__3 = k - 1;
		    ssyr2k_(uplo, "No transpose", &i__3, &kb, &c_b14, &a_ref(
			    1, k), lda, &b_ref(1, k), ldb, &c_b14, &a[
			    a_offset], lda);
		    i__3 = k - 1;
		    ssymm_("Right", uplo, &i__3, &kb, &c_b52, &a_ref(k, k), 
			    lda, &b_ref(1, k), ldb, &c_b14, &a_ref(1, k), lda);
		    i__3 = k - 1;
		    strmm_("Right", uplo, "Transpose", "Non-unit", &i__3, &kb,
			     &c_b14, &b_ref(k, k), ldb, &a_ref(1, k), lda);
		    ssygs2_(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k),
			     ldb, info);
/* L30: */
		}
	    } else {

/*              Compute L'*A*L */

		i__2 = *n;
		i__1 = nb;
		for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) {
/* Computing MIN */
		    i__3 = *n - k + 1;
		    kb = min(i__3,nb);

/*                 Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */

		    i__3 = k - 1;
		    strmm_("Right", uplo, "No transpose", "Non-unit", &kb, &
			    i__3, &c_b14, &b[b_offset], ldb, &a_ref(k, 1), 
			    lda);
		    i__3 = k - 1;
		    ssymm_("Left", uplo, &kb, &i__3, &c_b52, &a_ref(k, k), 
			    lda, &b_ref(k, 1), ldb, &c_b14, &a_ref(k, 1), lda);
		    i__3 = k - 1;
		    ssyr2k_(uplo, "Transpose", &i__3, &kb, &c_b14, &a_ref(k, 
			    1), lda, &b_ref(k, 1), ldb, &c_b14, &a[a_offset], 
			    lda);
		    i__3 = k - 1;
		    ssymm_("Left", uplo, &kb, &i__3, &c_b52, &a_ref(k, k), 
			    lda, &b_ref(k, 1), ldb, &c_b14, &a_ref(k, 1), lda);
		    i__3 = k - 1;
		    strmm_("Left", uplo, "Transpose", "Non-unit", &kb, &i__3, 
			    &c_b14, &b_ref(k, k), ldb, &a_ref(k, 1), lda);
		    ssygs2_(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k),
			     ldb, info);
/* L40: */
		}
	    }
	}
    }
    return 0;

/*     End of SSYGST */

} /* ssygst_ */
コード例 #11
0
ファイル: slauum.c プロジェクト: CJACQUEL/flash-opencv
/* Subroutine */ int slauum_(char *uplo, integer *n, real *a, integer *lda, 
	integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;

    /* Local variables */
    integer i__, ib, nb;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    logical upper;
    extern /* Subroutine */ int strmm_(char *, char *, char *, char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
), ssyrk_(char *, char *, integer 
	    *, integer *, real *, real *, integer *, real *, real *, integer *
), slauu2_(char *, integer *, real *, integer *, 
	    integer *), xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *);


/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLAUUM computes the product U * U' or L' * L, where the triangular */
/*  factor U or L is stored in the upper or lower triangular part of */
/*  the array A. */

/*  If UPLO = 'U' or 'u' then the upper triangle of the result is stored, */
/*  overwriting the factor U in A. */
/*  If UPLO = 'L' or 'l' then the lower triangle of the result is stored, */
/*  overwriting the factor L in A. */

/*  This is the blocked form of the algorithm, calling Level 3 BLAS. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          Specifies whether the triangular factor stored in the array A */
/*          is upper or lower triangular: */
/*          = 'U':  Upper triangular */
/*          = 'L':  Lower triangular */

/*  N       (input) INTEGER */
/*          The order of the triangular factor U or L.  N >= 0. */

/*  A       (input/output) REAL array, dimension (LDA,N) */
/*          On entry, the triangular factor U or L. */
/*          On exit, if UPLO = 'U', the upper triangle of A is */
/*          overwritten with the upper triangle of the product U * U'; */
/*          if UPLO = 'L', the lower triangle of A is overwritten with */
/*          the lower triangle of the product L' * L. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  INFO    (output) INTEGER */
/*          = 0: successful exit */
/*          < 0: if INFO = -k, the k-th argument had an illegal value */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLAUUM", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine the block size for this environment. */

    nb = ilaenv_(&c__1, "SLAUUM", uplo, n, &c_n1, &c_n1, &c_n1);

    if (nb <= 1 || nb >= *n) {

/*        Use unblocked code */

	slauu2_(uplo, n, &a[a_offset], lda, info);
    } else {

/*        Use blocked code */

	if (upper) {

/*           Compute the product U * U'. */

	    i__1 = *n;
	    i__2 = nb;
	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
/* Computing MIN */
		i__3 = nb, i__4 = *n - i__ + 1;
		ib = min(i__3,i__4);
		i__3 = i__ - 1;
		strmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib, 
			&c_b15, &a[i__ + i__ * a_dim1], lda, &a[i__ * a_dim1 
			+ 1], lda)
			;
		slauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info);
		if (i__ + ib <= *n) {
		    i__3 = i__ - 1;
		    i__4 = *n - i__ - ib + 1;
		    sgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, &
			    c_b15, &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__ + 
			    (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ * 
			    a_dim1 + 1], lda);
		    i__3 = *n - i__ - ib + 1;
		    ssyrk_("Upper", "No transpose", &ib, &i__3, &c_b15, &a[
			    i__ + (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ + 
			    i__ * a_dim1], lda);
		}
/* L10: */
	    }
	} else {

/*           Compute the product L' * L. */

	    i__2 = *n;
	    i__1 = nb;
	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
/* Computing MIN */
		i__3 = nb, i__4 = *n - i__ + 1;
		ib = min(i__3,i__4);
		i__3 = i__ - 1;
		strmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, &
			c_b15, &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1], 
			lda);
		slauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info);
		if (i__ + ib <= *n) {
		    i__3 = i__ - 1;
		    i__4 = *n - i__ - ib + 1;
		    sgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, &
			    c_b15, &a[i__ + ib + i__ * a_dim1], lda, &a[i__ + 
			    ib + a_dim1], lda, &c_b15, &a[i__ + a_dim1], lda);
		    i__3 = *n - i__ - ib + 1;
		    ssyrk_("Lower", "Transpose", &ib, &i__3, &c_b15, &a[i__ + 
			    ib + i__ * a_dim1], lda, &c_b15, &a[i__ + i__ * 
			    a_dim1], lda);
		}
/* L20: */
	    }
	}
    }

    return 0;

/*     End of SLAUUM */

} /* slauum_ */
コード例 #12
0
ファイル: strtri.c プロジェクト: deepakantony/vispack
/* Subroutine */ int strtri_(char *uplo, char *diag, integer *n, real *a, 
	integer *lda, integer *info)
{
/*  -- LAPACK routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       March 31, 1993   


    Purpose   
    =======   

    STRTRI computes the inverse of a real upper or lower triangular   
    matrix A.   

    This is the Level 3 BLAS version of the algorithm.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            = 'U':  A is upper triangular;   
            = 'L':  A is lower triangular.   

    DIAG    (input) CHARACTER*1   
            = 'N':  A is non-unit triangular;   
            = 'U':  A is unit triangular.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the triangular matrix A.  If UPLO = 'U', the   
            leading N-by-N upper triangular part of the array A contains 
  
            the upper triangular matrix, and the strictly lower   
            triangular part of A is not referenced.  If UPLO = 'L', the   
            leading N-by-N lower triangular part of the array A contains 
  
            the lower triangular matrix, and the strictly upper   
            triangular part of A is not referenced.  If DIAG = 'U', the   
            diagonal elements of A are also not referenced and are   
            assumed to be 1.   
            On exit, the (triangular) inverse of the original matrix, in 
  
            the same storage format.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -i, the i-th argument had an illegal value   
            > 0: if INFO = i, A(i,i) is exactly zero.  The triangular   
                 matrix is singular and its inverse can not be computed. 
  

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static integer c__2 = 2;
    static real c_b18 = 1.f;
    static real c_b22 = -1.f;
    
    /* System generated locals */
    address a__1[2];
    integer a_dim1, a_offset, i__1, i__2[2], i__3, i__4, i__5;
    char ch__1[2];
    /* Builtin functions   
       Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
    /* Local variables */
    static integer j;
    extern logical lsame_(char *, char *);
    static logical upper;
    extern /* Subroutine */ int strmm_(char *, char *, char *, char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
	    ), strsm_(char *, char *, char *, 
	    char *, integer *, integer *, real *, real *, integer *, real *, 
	    integer *);
    static integer jb, nb;
    extern /* Subroutine */ int strti2_(char *, char *, integer *, real *, 
	    integer *, integer *);
    static integer nn;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    static logical nounit;




#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]

    *info = 0;
    upper = lsame_(uplo, "U");
    nounit = lsame_(diag, "N");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (! nounit && ! lsame_(diag, "U")) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("STRTRI", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Check for singularity if non-unit. */

    if (nounit) {
	i__1 = *n;
	for (*info = 1; *info <= i__1; ++(*info)) {
	    if (A(*info,*info) == 0.f) {
		return 0;
	    }
/* L10: */
	}
	*info = 0;
    }

/*     Determine the block size for this environment.   

   Writing concatenation */
    i__2[0] = 1, a__1[0] = uplo;
    i__2[1] = 1, a__1[1] = diag;
    s_cat(ch__1, a__1, i__2, &c__2, 2L);
    nb = ilaenv_(&c__1, "STRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, 6L, 2L);
    if (nb <= 1 || nb >= *n) {

/*        Use unblocked code */

	strti2_(uplo, diag, n, &A(1,1), lda, info);
    } else {

/*        Use blocked code */

	if (upper) {

/*           Compute inverse of upper triangular matrix */

	    i__1 = *n;
	    i__3 = nb;
	    for (j = 1; nb < 0 ? j >= *n : j <= *n; j += nb) {
/* Computing MIN */
		i__4 = nb, i__5 = *n - j + 1;
		jb = min(i__4,i__5);

/*              Compute rows 1:j-1 of current block column */

		i__4 = j - 1;
		strmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, &
			c_b18, &A(1,1), lda, &A(1,j), lda);
		i__4 = j - 1;
		strsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, &
			c_b22, &A(j,j), lda, &A(1,j), 
			lda);

/*              Compute inverse of current diagonal block */

		strti2_("Upper", diag, &jb, &A(j,j), lda, info);
/* L20: */
	    }
	} else {

/*           Compute inverse of lower triangular matrix */

	    nn = (*n - 1) / nb * nb + 1;
	    i__3 = -nb;
	    for (j = nn; -nb < 0 ? j >= 1 : j <= 1; j += -nb) {
/* Computing MIN */
		i__1 = nb, i__4 = *n - j + 1;
		jb = min(i__1,i__4);
		if (j + jb <= *n) {

/*                 Compute rows j+jb:n of current block co
lumn */

		    i__1 = *n - j - jb + 1;
		    strmm_("Left", "Lower", "No transpose", diag, &i__1, &jb, 
			    &c_b18, &A(j+jb,j+jb), lda, &A(j+jb,j), lda);
		    i__1 = *n - j - jb + 1;
		    strsm_("Right", "Lower", "No transpose", diag, &i__1, &jb,
			     &c_b22, &A(j,j), lda, &A(j+jb,j), lda);
		}

/*              Compute inverse of current diagonal block */

		strti2_("Lower", diag, &jb, &A(j,j), lda, info);
/* L30: */
	    }
	}
    }

    return 0;

/*     End of STRTRI */

} /* strtri_ */
コード例 #13
0
ファイル: stprfb.c プロジェクト: flame/libflame
/* Subroutine */
int stprfb_(char *side, char *trans, char *direct, char * storev, integer *m, integer *n, integer *k, integer *l, real *v, integer *ldv, real *t, integer *ldt, real *a, integer *lda, real *b, integer *ldb, real *work, integer *ldwork)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, work_offset, i__1, i__2;
    /* Local variables */
    logical backward;
    integer i__, j, kp, mp, np;
    logical row, left;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */
    int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *);
    logical right;
    extern /* Subroutine */
    int strmm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * );
    logical column, forward;
    /* -- LAPACK auxiliary routine (version 3.4.2) -- */
    /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
    /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
    /* September 2012 */
    /* .. Scalar Arguments .. */
    /* .. */
    /* .. Array Arguments .. */
    /* .. */
    /* ========================================================================== */
    /* .. Parameters .. */
    /* .. */
    /* .. Local Scalars .. */
    /* .. */
    /* .. External Functions .. */
    /* .. */
    /* .. External Subroutines .. */
    /* .. */
    /* .. Executable Statements .. */
    /* Quick return if possible */
    /* Parameter adjustments */
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1;
    v -= v_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1;
    t -= t_offset;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    work_dim1 = *ldwork;
    work_offset = 1 + work_dim1;
    work -= work_offset;
    /* Function Body */
    if (*m <= 0 || *n <= 0 || *k <= 0 || *l < 0)
    {
        return 0;
    }
    if (lsame_(storev, "C"))
    {
        column = TRUE_;
        row = FALSE_;
    }
    else if (lsame_(storev, "R"))
    {
        column = FALSE_;
        row = TRUE_;
    }
    else
    {
        column = FALSE_;
        row = FALSE_;
    }
    if (lsame_(side, "L"))
    {
        left = TRUE_;
        right = FALSE_;
    }
    else if (lsame_(side, "R"))
    {
        left = FALSE_;
        right = TRUE_;
    }
    else
    {
        left = FALSE_;
        right = FALSE_;
    }
    if (lsame_(direct, "F"))
    {
        forward = TRUE_;
        backward = FALSE_;
    }
    else if (lsame_(direct, "B"))
    {
        forward = FALSE_;
        backward = TRUE_;
    }
    else
    {
        forward = FALSE_;
        backward = FALSE_;
    }
    /* --------------------------------------------------------------------------- */
    if (column && forward && left)
    {
        /* --------------------------------------------------------------------------- */
        /* Let W = [ I ] (K-by-K) */
        /* [ V ] (M-by-K) */
        /* Form H C or H^H C where C = [ A ] (K-by-N) */
        /* [ B ] (M-by-N) */
        /* H = I - W T W^H or H^H = I - W T^H W^H */
        /* A = A - T (A + V^H B) or A = A - T^H (A + V^H B) */
        /* B = B - V T (A + V^H B) or B = B - V T^H (A + V^H B) */
        /* --------------------------------------------------------------------------- */
        /* Computing MIN */
        i__1 = *m - *l + 1;
        mp = min(i__1,*m);
        /* Computing MIN */
        i__1 = *l + 1;
        kp = min(i__1,*k);
        i__1 = *n;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *l;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                work[i__ + j * work_dim1] = b[*m - *l + i__ + j * b_dim1];
            }
        }
        strmm_("L", "U", "T", "N", l, n, &c_b12, &v[mp + v_dim1], ldv, &work[ work_offset], ldwork);
        i__1 = *m - *l;
        sgemm_("T", "N", l, n, &i__1, &c_b12, &v[v_offset], ldv, &b[b_offset], ldb, &c_b12, &work[work_offset], ldwork);
        i__1 = *k - *l;
        sgemm_("T", "N", &i__1, n, m, &c_b12, &v[kp * v_dim1 + 1], ldv, &b[ b_offset], ldb, &c_b20, &work[kp + work_dim1], ldwork);
        i__1 = *n;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *k;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                work[i__ + j * work_dim1] += a[i__ + j * a_dim1];
            }
        }
        strmm_("L", "U", trans, "N", k, n, &c_b12, &t[t_offset], ldt, &work[ work_offset], ldwork);
        i__1 = *n;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *k;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                a[i__ + j * a_dim1] -= work[i__ + j * work_dim1];
            }
        }
        i__1 = *m - *l;
        sgemm_("N", "N", &i__1, n, k, &c_b27, &v[v_offset], ldv, &work[ work_offset], ldwork, &c_b12, &b[b_offset], ldb);
        i__1 = *k - *l;
        sgemm_("N", "N", l, n, &i__1, &c_b27, &v[mp + kp * v_dim1], ldv, & work[kp + work_dim1], ldwork, &c_b12, &b[mp + b_dim1], ldb);
        strmm_("L", "U", "N", "N", l, n, &c_b12, &v[mp + v_dim1], ldv, &work[ work_offset], ldwork);
        i__1 = *n;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *l;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                b[*m - *l + i__ + j * b_dim1] -= work[i__ + j * work_dim1];
            }
        }
        /* --------------------------------------------------------------------------- */
    }
    else if (column && forward && right)
    {
        /* --------------------------------------------------------------------------- */
        /* Let W = [ I ] (K-by-K) */
        /* [ V ] (N-by-K) */
        /* Form C H or C H^H where C = [ A B ] (A is M-by-K, B is M-by-N) */
        /* H = I - W T W^H or H^H = I - W T^H W^H */
        /* A = A - (A + B V) T or A = A - (A + B V) T^H */
        /* B = B - (A + B V) T V^H or B = B - (A + B V) T^H V^H */
        /* --------------------------------------------------------------------------- */
        /* Computing MIN */
        i__1 = *n - *l + 1;
        np = min(i__1,*n);
        /* Computing MIN */
        i__1 = *l + 1;
        kp = min(i__1,*k);
        i__1 = *l;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *m;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                work[i__ + j * work_dim1] = b[i__ + (*n - *l + j) * b_dim1];
            }
        }
        strmm_("R", "U", "N", "N", m, l, &c_b12, &v[np + v_dim1], ldv, &work[ work_offset], ldwork);
        i__1 = *n - *l;
        sgemm_("N", "N", m, l, &i__1, &c_b12, &b[b_offset], ldb, &v[v_offset], ldv, &c_b12, &work[work_offset], ldwork);
        i__1 = *k - *l;
        sgemm_("N", "N", m, &i__1, n, &c_b12, &b[b_offset], ldb, &v[kp * v_dim1 + 1], ldv, &c_b20, &work[kp * work_dim1 + 1], ldwork);
        i__1 = *k;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *m;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                work[i__ + j * work_dim1] += a[i__ + j * a_dim1];
            }
        }
        strmm_("R", "U", trans, "N", m, k, &c_b12, &t[t_offset], ldt, &work[ work_offset], ldwork);
        i__1 = *k;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *m;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                a[i__ + j * a_dim1] -= work[i__ + j * work_dim1];
            }
        }
        i__1 = *n - *l;
        sgemm_("N", "T", m, &i__1, k, &c_b27, &work[work_offset], ldwork, &v[ v_offset], ldv, &c_b12, &b[b_offset], ldb);
        i__1 = *k - *l;
        sgemm_("N", "T", m, l, &i__1, &c_b27, &work[kp * work_dim1 + 1], ldwork, &v[np + kp * v_dim1], ldv, &c_b12, &b[np * b_dim1 + 1] , ldb);
        strmm_("R", "U", "T", "N", m, l, &c_b12, &v[np + v_dim1], ldv, &work[ work_offset], ldwork);
        i__1 = *l;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *m;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                b[i__ + (*n - *l + j) * b_dim1] -= work[i__ + j * work_dim1];
            }
        }
        /* --------------------------------------------------------------------------- */
    }
    else if (column && backward && left)
    {
        /* --------------------------------------------------------------------------- */
        /* Let W = [ V ] (M-by-K) */
        /* [ I ] (K-by-K) */
        /* Form H C or H^H C where C = [ B ] (M-by-N) */
        /* [ A ] (K-by-N) */
        /* H = I - W T W^H or H^H = I - W T^H W^H */
        /* A = A - T (A + V^H B) or A = A - T^H (A + V^H B) */
        /* B = B - V T (A + V^H B) or B = B - V T^H (A + V^H B) */
        /* --------------------------------------------------------------------------- */
        /* Computing MIN */
        i__1 = *l + 1;
        mp = min(i__1,*m);
        /* Computing MIN */
        i__1 = *k - *l + 1;
        kp = min(i__1,*k);
        i__1 = *n;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *l;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                work[*k - *l + i__ + j * work_dim1] = b[i__ + j * b_dim1];
            }
        }
        strmm_("L", "L", "T", "N", l, n, &c_b12, &v[kp * v_dim1 + 1], ldv, & work[kp + work_dim1], ldwork);
        i__1 = *m - *l;
        sgemm_("T", "N", l, n, &i__1, &c_b12, &v[mp + kp * v_dim1], ldv, &b[ mp + b_dim1], ldb, &c_b12, &work[kp + work_dim1], ldwork);
        i__1 = *k - *l;
        sgemm_("T", "N", &i__1, n, m, &c_b12, &v[v_offset], ldv, &b[b_offset], ldb, &c_b20, &work[work_offset], ldwork);
        i__1 = *n;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *k;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                work[i__ + j * work_dim1] += a[i__ + j * a_dim1];
            }
        }
        strmm_("L", "L", trans, "N", k, n, &c_b12, &t[t_offset], ldt, &work[ work_offset], ldwork);
        i__1 = *n;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *k;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                a[i__ + j * a_dim1] -= work[i__ + j * work_dim1];
            }
        }
        i__1 = *m - *l;
        sgemm_("N", "N", &i__1, n, k, &c_b27, &v[mp + v_dim1], ldv, &work[ work_offset], ldwork, &c_b12, &b[mp + b_dim1], ldb);
        i__1 = *k - *l;
        sgemm_("N", "N", l, n, &i__1, &c_b27, &v[v_offset], ldv, &work[ work_offset], ldwork, &c_b12, &b[b_offset], ldb);
        strmm_("L", "L", "N", "N", l, n, &c_b12, &v[kp * v_dim1 + 1], ldv, & work[kp + work_dim1], ldwork);
        i__1 = *n;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *l;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                b[i__ + j * b_dim1] -= work[*k - *l + i__ + j * work_dim1];
            }
        }
        /* --------------------------------------------------------------------------- */
    }
    else if (column && backward && right)
    {
        /* --------------------------------------------------------------------------- */
        /* Let W = [ V ] (N-by-K) */
        /* [ I ] (K-by-K) */
        /* Form C H or C H^H where C = [ B A ] (B is M-by-N, A is M-by-K) */
        /* H = I - W T W^H or H^H = I - W T^H W^H */
        /* A = A - (A + B V) T or A = A - (A + B V) T^H */
        /* B = B - (A + B V) T V^H or B = B - (A + B V) T^H V^H */
        /* --------------------------------------------------------------------------- */
        /* Computing MIN */
        i__1 = *l + 1;
        np = min(i__1,*n);
        /* Computing MIN */
        i__1 = *k - *l + 1;
        kp = min(i__1,*k);
        i__1 = *l;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *m;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                work[i__ + (*k - *l + j) * work_dim1] = b[i__ + j * b_dim1];
            }
        }
        strmm_("R", "L", "N", "N", m, l, &c_b12, &v[kp * v_dim1 + 1], ldv, & work[kp * work_dim1 + 1], ldwork);
        i__1 = *n - *l;
        sgemm_("N", "N", m, l, &i__1, &c_b12, &b[np * b_dim1 + 1], ldb, &v[np + kp * v_dim1], ldv, &c_b12, &work[kp * work_dim1 + 1], ldwork);
        i__1 = *k - *l;
        sgemm_("N", "N", m, &i__1, n, &c_b12, &b[b_offset], ldb, &v[v_offset], ldv, &c_b20, &work[work_offset], ldwork);
        i__1 = *k;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *m;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                work[i__ + j * work_dim1] += a[i__ + j * a_dim1];
            }
        }
        strmm_("R", "L", trans, "N", m, k, &c_b12, &t[t_offset], ldt, &work[ work_offset], ldwork);
        i__1 = *k;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *m;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                a[i__ + j * a_dim1] -= work[i__ + j * work_dim1];
            }
        }
        i__1 = *n - *l;
        sgemm_("N", "T", m, &i__1, k, &c_b27, &work[work_offset], ldwork, &v[ np + v_dim1], ldv, &c_b12, &b[np * b_dim1 + 1], ldb);
        i__1 = *k - *l;
        sgemm_("N", "T", m, l, &i__1, &c_b27, &work[work_offset], ldwork, &v[ v_offset], ldv, &c_b12, &b[b_offset], ldb);
        strmm_("R", "L", "T", "N", m, l, &c_b12, &v[kp * v_dim1 + 1], ldv, & work[kp * work_dim1 + 1], ldwork);
        i__1 = *l;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *m;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                b[i__ + j * b_dim1] -= work[i__ + (*k - *l + j) * work_dim1];
            }
        }
        /* --------------------------------------------------------------------------- */
    }
    else if (row && forward && left)
    {
        /* --------------------------------------------------------------------------- */
        /* Let W = [ I V ] ( I is K-by-K, V is K-by-M ) */
        /* Form H C or H^H C where C = [ A ] (K-by-N) */
        /* [ B ] (M-by-N) */
        /* H = I - W^H T W or H^H = I - W^H T^H W */
        /* A = A - T (A + V B) or A = A - T^H (A + V B) */
        /* B = B - V^H T (A + V B) or B = B - V^H T^H (A + V B) */
        /* --------------------------------------------------------------------------- */
        /* Computing MIN */
        i__1 = *m - *l + 1;
        mp = min(i__1,*m);
        /* Computing MIN */
        i__1 = *l + 1;
        kp = min(i__1,*k);
        i__1 = *n;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *l;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                work[i__ + j * work_dim1] = b[*m - *l + i__ + j * b_dim1];
            }
        }
        strmm_("L", "L", "N", "N", l, n, &c_b12, &v[mp * v_dim1 + 1], ldv, & work[work_offset], ldb);
        i__1 = *m - *l;
        sgemm_("N", "N", l, n, &i__1, &c_b12, &v[v_offset], ldv, &b[b_offset], ldb, &c_b12, &work[work_offset], ldwork);
        i__1 = *k - *l;
        sgemm_("N", "N", &i__1, n, m, &c_b12, &v[kp + v_dim1], ldv, &b[ b_offset], ldb, &c_b20, &work[kp + work_dim1], ldwork);
        i__1 = *n;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *k;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                work[i__ + j * work_dim1] += a[i__ + j * a_dim1];
            }
        }
        strmm_("L", "U", trans, "N", k, n, &c_b12, &t[t_offset], ldt, &work[ work_offset], ldwork);
        i__1 = *n;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *k;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                a[i__ + j * a_dim1] -= work[i__ + j * work_dim1];
            }
        }
        i__1 = *m - *l;
        sgemm_("T", "N", &i__1, n, k, &c_b27, &v[v_offset], ldv, &work[ work_offset], ldwork, &c_b12, &b[b_offset], ldb);
        i__1 = *k - *l;
        sgemm_("T", "N", l, n, &i__1, &c_b27, &v[kp + mp * v_dim1], ldv, & work[kp + work_dim1], ldwork, &c_b12, &b[mp + b_dim1], ldb);
        strmm_("L", "L", "T", "N", l, n, &c_b12, &v[mp * v_dim1 + 1], ldv, & work[work_offset], ldwork);
        i__1 = *n;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *l;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                b[*m - *l + i__ + j * b_dim1] -= work[i__ + j * work_dim1];
            }
        }
        /* --------------------------------------------------------------------------- */
    }
    else if (row && forward && right)
    {
        /* --------------------------------------------------------------------------- */
        /* Let W = [ I V ] ( I is K-by-K, V is K-by-N ) */
        /* Form C H or C H^H where C = [ A B ] (A is M-by-K, B is M-by-N) */
        /* H = I - W^H T W or H^H = I - W^H T^H W */
        /* A = A - (A + B V^H) T or A = A - (A + B V^H) T^H */
        /* B = B - (A + B V^H) T V or B = B - (A + B V^H) T^H V */
        /* --------------------------------------------------------------------------- */
        /* Computing MIN */
        i__1 = *n - *l + 1;
        np = min(i__1,*n);
        /* Computing MIN */
        i__1 = *l + 1;
        kp = min(i__1,*k);
        i__1 = *l;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *m;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                work[i__ + j * work_dim1] = b[i__ + (*n - *l + j) * b_dim1];
            }
        }
        strmm_("R", "L", "T", "N", m, l, &c_b12, &v[np * v_dim1 + 1], ldv, & work[work_offset], ldwork);
        i__1 = *n - *l;
        sgemm_("N", "T", m, l, &i__1, &c_b12, &b[b_offset], ldb, &v[v_offset], ldv, &c_b12, &work[work_offset], ldwork);
        i__1 = *k - *l;
        sgemm_("N", "T", m, &i__1, n, &c_b12, &b[b_offset], ldb, &v[kp + v_dim1], ldv, &c_b20, &work[kp * work_dim1 + 1], ldwork);
        i__1 = *k;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *m;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                work[i__ + j * work_dim1] += a[i__ + j * a_dim1];
            }
        }
        strmm_("R", "U", trans, "N", m, k, &c_b12, &t[t_offset], ldt, &work[ work_offset], ldwork);
        i__1 = *k;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *m;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                a[i__ + j * a_dim1] -= work[i__ + j * work_dim1];
            }
        }
        i__1 = *n - *l;
        sgemm_("N", "N", m, &i__1, k, &c_b27, &work[work_offset], ldwork, &v[ v_offset], ldv, &c_b12, &b[b_offset], ldb);
        i__1 = *k - *l;
        sgemm_("N", "N", m, l, &i__1, &c_b27, &work[kp * work_dim1 + 1], ldwork, &v[kp + np * v_dim1], ldv, &c_b12, &b[np * b_dim1 + 1] , ldb);
        strmm_("R", "L", "N", "N", m, l, &c_b12, &v[np * v_dim1 + 1], ldv, & work[work_offset], ldwork);
        i__1 = *l;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *m;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                b[i__ + (*n - *l + j) * b_dim1] -= work[i__ + j * work_dim1];
            }
        }
        /* --------------------------------------------------------------------------- */
    }
    else if (row && backward && left)
    {
        /* --------------------------------------------------------------------------- */
        /* Let W = [ V I ] ( I is K-by-K, V is K-by-M ) */
        /* Form H C or H^H C where C = [ B ] (M-by-N) */
        /* [ A ] (K-by-N) */
        /* H = I - W^H T W or H^H = I - W^H T^H W */
        /* A = A - T (A + V B) or A = A - T^H (A + V B) */
        /* B = B - V^H T (A + V B) or B = B - V^H T^H (A + V B) */
        /* --------------------------------------------------------------------------- */
        /* Computing MIN */
        i__1 = *l + 1;
        mp = min(i__1,*m);
        /* Computing MIN */
        i__1 = *k - *l + 1;
        kp = min(i__1,*k);
        i__1 = *n;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *l;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                work[*k - *l + i__ + j * work_dim1] = b[i__ + j * b_dim1];
            }
        }
        strmm_("L", "U", "N", "N", l, n, &c_b12, &v[kp + v_dim1], ldv, &work[ kp + work_dim1], ldwork);
        i__1 = *m - *l;
        sgemm_("N", "N", l, n, &i__1, &c_b12, &v[kp + mp * v_dim1], ldv, &b[ mp + b_dim1], ldb, &c_b12, &work[kp + work_dim1], ldwork);
        i__1 = *k - *l;
        sgemm_("N", "N", &i__1, n, m, &c_b12, &v[v_offset], ldv, &b[b_offset], ldb, &c_b20, &work[work_offset], ldwork);
        i__1 = *n;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *k;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                work[i__ + j * work_dim1] += a[i__ + j * a_dim1];
            }
        }
        strmm_("L", "L ", trans, "N", k, n, &c_b12, &t[t_offset], ldt, &work[ work_offset], ldwork);
        i__1 = *n;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *k;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                a[i__ + j * a_dim1] -= work[i__ + j * work_dim1];
            }
        }
        i__1 = *m - *l;
        sgemm_("T", "N", &i__1, n, k, &c_b27, &v[mp * v_dim1 + 1], ldv, &work[ work_offset], ldwork, &c_b12, &b[mp + b_dim1], ldb);
        i__1 = *k - *l;
        sgemm_("T", "N", l, n, &i__1, &c_b27, &v[v_offset], ldv, &work[ work_offset], ldwork, &c_b12, &b[b_offset], ldb);
        strmm_("L", "U", "T", "N", l, n, &c_b12, &v[kp + v_dim1], ldv, &work[ kp + work_dim1], ldwork);
        i__1 = *n;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *l;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                b[i__ + j * b_dim1] -= work[*k - *l + i__ + j * work_dim1];
            }
        }
        /* --------------------------------------------------------------------------- */
    }
    else if (row && backward && right)
    {
        /* --------------------------------------------------------------------------- */
        /* Let W = [ V I ] ( I is K-by-K, V is K-by-N ) */
        /* Form C H or C H^H where C = [ B A ] (A is M-by-K, B is M-by-N) */
        /* H = I - W^H T W or H^H = I - W^H T^H W */
        /* A = A - (A + B V^H) T or A = A - (A + B V^H) T^H */
        /* B = B - (A + B V^H) T V or B = B - (A + B V^H) T^H V */
        /* --------------------------------------------------------------------------- */
        /* Computing MIN */
        i__1 = *l + 1;
        np = min(i__1,*n);
        /* Computing MIN */
        i__1 = *k - *l + 1;
        kp = min(i__1,*k);
        i__1 = *l;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *m;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                work[i__ + (*k - *l + j) * work_dim1] = b[i__ + j * b_dim1];
            }
        }
        strmm_("R", "U", "T", "N", m, l, &c_b12, &v[kp + v_dim1], ldv, &work[ kp * work_dim1 + 1], ldwork);
        i__1 = *n - *l;
        sgemm_("N", "T", m, l, &i__1, &c_b12, &b[np * b_dim1 + 1], ldb, &v[kp + np * v_dim1], ldv, &c_b12, &work[kp * work_dim1 + 1], ldwork);
        i__1 = *k - *l;
        sgemm_("N", "T", m, &i__1, n, &c_b12, &b[b_offset], ldb, &v[v_offset], ldv, &c_b20, &work[work_offset], ldwork);
        i__1 = *k;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *m;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                work[i__ + j * work_dim1] += a[i__ + j * a_dim1];
            }
        }
        strmm_("R", "L", trans, "N", m, k, &c_b12, &t[t_offset], ldt, &work[ work_offset], ldwork);
        i__1 = *k;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *m;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                a[i__ + j * a_dim1] -= work[i__ + j * work_dim1];
            }
        }
        i__1 = *n - *l;
        sgemm_("N", "N", m, &i__1, k, &c_b27, &work[work_offset], ldwork, &v[ np * v_dim1 + 1], ldv, &c_b12, &b[np * b_dim1 + 1], ldb);
        i__1 = *k - *l;
        sgemm_("N", "N", m, l, &i__1, &c_b27, &work[work_offset], ldwork, &v[ v_offset], ldv, &c_b12, &b[b_offset], ldb);
        strmm_("R", "U", "N", "N", m, l, &c_b12, &v[kp + v_dim1], ldv, &work[ kp * work_dim1 + 1], ldwork);
        i__1 = *l;
        for (j = 1;
                j <= i__1;
                ++j)
        {
            i__2 = *m;
            for (i__ = 1;
                    i__ <= i__2;
                    ++i__)
            {
                b[i__ + j * b_dim1] -= work[i__ + (*k - *l + j) * work_dim1];
            }
        }
    }
    return 0;
    /* End of STPRFB */
}
コード例 #14
0
/* Subroutine */ int slauum_(char *uplo, integer *n, real *a, integer *lda, 
	integer *info)
{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    SLAUUM computes the product U * U' or L' * L, where the triangular   
    factor U or L is stored in the upper or lower triangular part of   
    the array A.   

    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,   
    overwriting the factor U in A.   
    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,   
    overwriting the factor L in A.   

    This is the blocked form of the algorithm, calling Level 3 BLAS.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            Specifies whether the triangular factor stored in the array A   
            is upper or lower triangular:   
            = 'U':  Upper triangular   
            = 'L':  Lower triangular   

    N       (input) INTEGER   
            The order of the triangular factor U or L.  N >= 0.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the triangular factor U or L.   
            On exit, if UPLO = 'U', the upper triangle of A is   
            overwritten with the upper triangle of the product U * U';   
            if UPLO = 'L', the lower triangle of A is overwritten with   
            the lower triangle of the product L' * L.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -k, the k-th argument had an illegal value   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static real c_b15 = 1.f;
    
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    /* Local variables */
    static integer i__;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    static logical upper;
    extern /* Subroutine */ int strmm_(char *, char *, char *, char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
	    ), ssyrk_(char *, char *, integer 
	    *, integer *, real *, real *, integer *, real *, real *, integer *
	    );
    static integer ib;
    extern /* Subroutine */ int slauu2_(char *, integer *, real *, integer *, 
	    integer *);
    static integer nb;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SLAUUM", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine the block size for this environment. */

    nb = ilaenv_(&c__1, "SLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)1);

    if (nb <= 1 || nb >= *n) {

/*        Use unblocked code */

	slauu2_(uplo, n, &a[a_offset], lda, info);
    } else {

/*        Use blocked code */

	if (upper) {

/*           Compute the product U * U'. */

	    i__1 = *n;
	    i__2 = nb;
	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
/* Computing MIN */
		i__3 = nb, i__4 = *n - i__ + 1;
		ib = min(i__3,i__4);
		i__3 = i__ - 1;
		strmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib, 
			&c_b15, &a_ref(i__, i__), lda, &a_ref(1, i__), lda);
		slauu2_("Upper", &ib, &a_ref(i__, i__), lda, info);
		if (i__ + ib <= *n) {
		    i__3 = i__ - 1;
		    i__4 = *n - i__ - ib + 1;
		    sgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, &
			    c_b15, &a_ref(1, i__ + ib), lda, &a_ref(i__, i__ 
			    + ib), lda, &c_b15, &a_ref(1, i__), lda);
		    i__3 = *n - i__ - ib + 1;
		    ssyrk_("Upper", "No transpose", &ib, &i__3, &c_b15, &
			    a_ref(i__, i__ + ib), lda, &c_b15, &a_ref(i__, 
			    i__), lda);
		}
/* L10: */
	    }
	} else {

/*           Compute the product L' * L. */

	    i__2 = *n;
	    i__1 = nb;
	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
/* Computing MIN */
		i__3 = nb, i__4 = *n - i__ + 1;
		ib = min(i__3,i__4);
		i__3 = i__ - 1;
		strmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, &
			c_b15, &a_ref(i__, i__), lda, &a_ref(i__, 1), lda);
		slauu2_("Lower", &ib, &a_ref(i__, i__), lda, info);
		if (i__ + ib <= *n) {
		    i__3 = i__ - 1;
		    i__4 = *n - i__ - ib + 1;
		    sgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, &
			    c_b15, &a_ref(i__ + ib, i__), lda, &a_ref(i__ + 
			    ib, 1), lda, &c_b15, &a_ref(i__, 1), lda);
		    i__3 = *n - i__ - ib + 1;
		    ssyrk_("Lower", "Transpose", &ib, &i__3, &c_b15, &a_ref(
			    i__ + ib, i__), lda, &c_b15, &a_ref(i__, i__), 
			    lda);
		}
/* L20: */
	    }
	}
    }

    return 0;

/*     End of SLAUUM */

} /* slauum_ */
コード例 #15
0
ファイル: ssygv.c プロジェクト: deepakantony/vispack
/* Subroutine */ int ssygv_(integer *itype, char *jobz, char *uplo, integer *
	n, real *a, integer *lda, real *b, integer *ldb, real *w, real *work, 
	integer *lwork, integer *info)
{
/*  -- LAPACK driver routine (version 2.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    SSYGV computes all the eigenvalues, and optionally, the eigenvectors 
  
    of a real generalized symmetric-definite eigenproblem, of the form   
    A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.   
    Here A and B are assumed to be symmetric and B is also   
    positive definite.   

    Arguments   
    =========   

    ITYPE   (input) INTEGER   
            Specifies the problem type to be solved:   
            = 1:  A*x = (lambda)*B*x   
            = 2:  A*B*x = (lambda)*x   
            = 3:  B*A*x = (lambda)*x   

    JOBZ    (input) CHARACTER*1   
            = 'N':  Compute eigenvalues only;   
            = 'V':  Compute eigenvalues and eigenvectors.   

    UPLO    (input) CHARACTER*1   
            = 'U':  Upper triangles of A and B are stored;   
            = 'L':  Lower triangles of A and B are stored.   

    N       (input) INTEGER   
            The order of the matrices A and B.  N >= 0.   

    A       (input/output) REAL array, dimension (LDA, N)   
            On entry, the symmetric matrix A.  If UPLO = 'U', the   
            leading N-by-N upper triangular part of A contains the   
            upper triangular part of the matrix A.  If UPLO = 'L',   
            the leading N-by-N lower triangular part of A contains   
            the lower triangular part of the matrix A.   

            On exit, if JOBZ = 'V', then if INFO = 0, A contains the   
            matrix Z of eigenvectors.  The eigenvectors are normalized   
            as follows:   
            if ITYPE = 1 or 2, Z**T*B*Z = I;   
            if ITYPE = 3, Z**T*inv(B)*Z = I.   
            If JOBZ = 'N', then on exit the upper triangle (if UPLO='U') 
  
            or the lower triangle (if UPLO='L') of A, including the   
            diagonal, is destroyed.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    B       (input/output) REAL array, dimension (LDB, N)   
            On entry, the symmetric matrix B.  If UPLO = 'U', the   
            leading N-by-N upper triangular part of B contains the   
            upper triangular part of the matrix B.  If UPLO = 'L',   
            the leading N-by-N lower triangular part of B contains   
            the lower triangular part of the matrix B.   

            On exit, if INFO <= N, the part of B containing the matrix is 
  
            overwritten by the triangular factor U or L from the Cholesky 
  
            factorization B = U**T*U or B = L*L**T.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    W       (output) REAL array, dimension (N)   
            If INFO = 0, the eigenvalues in ascending order.   

    WORK    (workspace/output) REAL array, dimension (LWORK)   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The length of the array WORK.  LWORK >= max(1,3*N-1).   
            For optimal efficiency, LWORK >= (NB+2)*N,   
            where NB is the blocksize for SSYTRD returned by ILAENV.   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   
            > 0:  SPOTRF or SSYEV returned an error code:   
               <= N:  if INFO = i, SSYEV failed to converge;   
                      i off-diagonal elements of an intermediate   
                      tridiagonal form did not converge to zero;   
               > N:   if INFO = N + i, for 1 <= i <= N, then the leading 
  
                      minor of order i of B is not positive definite.   
                      The factorization of B could not be completed and   
                      no eigenvalues or eigenvectors were computed.   

    ===================================================================== 
  


       Test the input parameters.   

    
   Parameter adjustments   
       Function Body */
    /* Table of constant values */
    static real c_b11 = 1.f;
    
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
    /* Local variables */
    static integer neig;
    extern logical lsame_(char *, char *);
    static char trans[1];
    static logical upper;
    extern /* Subroutine */ int strmm_(char *, char *, char *, char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
	    );
    static logical wantz;
    extern /* Subroutine */ int strsm_(char *, char *, char *, char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
	    ), ssyev_(char *, char *, integer 
	    *, real *, integer *, real *, real *, integer *, integer *), xerbla_(char *, integer *), spotrf_(char 
	    *, integer *, real *, integer *, integer *), ssygst_(
	    integer *, char *, integer *, real *, integer *, real *, integer *
	    , integer *);



#define W(I) w[(I)-1]
#define WORK(I) work[(I)-1]

#define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)]
#define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)]

    wantz = lsame_(jobz, "V");
    upper = lsame_(uplo, "U");

    *info = 0;
    if (*itype < 0 || *itype > 3) {
	*info = -1;
    } else if (! (wantz || lsame_(jobz, "N"))) {
	*info = -2;
    } else if (! (upper || lsame_(uplo, "L"))) {
	*info = -3;
    } else if (*n < 0) {
	*info = -4;
    } else if (*lda < max(1,*n)) {
	*info = -6;
    } else if (*ldb < max(1,*n)) {
	*info = -8;
    } else /* if(complicated condition) */ {
/* Computing MAX */
	i__1 = 1, i__2 = *n * 3 - 1;
	if (*lwork < max(i__1,i__2)) {
	    *info = -11;
	}
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SSYGV ", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Form a Cholesky factorization of B. */

    spotrf_(uplo, n, &B(1,1), ldb, info);
    if (*info != 0) {
	*info = *n + *info;
	return 0;
    }

/*     Transform problem to standard eigenvalue problem and solve. */

    ssygst_(itype, uplo, n, &A(1,1), lda, &B(1,1), ldb, info);
    ssyev_(jobz, uplo, n, &A(1,1), lda, &W(1), &WORK(1), lwork, info);

    if (wantz) {

/*        Backtransform eigenvectors to the original problem. */

	neig = *n;
	if (*info > 0) {
	    neig = *info - 1;
	}
	if (*itype == 1 || *itype == 2) {

/*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;   
             backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
 */

	    if (upper) {
		*(unsigned char *)trans = 'N';
	    } else {
		*(unsigned char *)trans = 'T';
	    }

	    strsm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b11, &B(1,1), ldb, &A(1,1), lda);

	} else if (*itype == 3) {

/*           For B*A*x=(lambda)*x;   
             backtransform eigenvectors: x = L*y or U'*y */

	    if (upper) {
		*(unsigned char *)trans = 'T';
	    } else {
		*(unsigned char *)trans = 'N';
	    }

	    strmm_("Left", uplo, trans, "Non-unit", n, &neig, &c_b11, &B(1,1), ldb, &A(1,1), lda);
	}
    }
    return 0;

/*     End of SSYGV */

} /* ssygv_ */
コード例 #16
0
ファイル: blas-lapack.c プロジェクト: BenjaminCoquelle/clBLAS
void
strmm(char side, char uplo, char transa, char diag, int m, int n, float alpha, float *a, int lda, float *b, int ldb)
{
    strmm_(&side, &uplo, &transa, &diag, &m, &n, &alpha, a, &lda, b, &ldb);
}
コード例 #17
0
ファイル: stftri.c プロジェクト: GuillaumeFuchs/Ensimag
int stftri_(char *transr, char *uplo, char *diag, int *n,
            float *a, int *info)
{
    /* System generated locals */
    int i__1, i__2;

    /* Local variables */
    int k, n1, n2;
    int normaltransr;
    extern int lsame_(char *, char *);
    int lower;
    extern  int strmm_(char *, char *, char *, char *,
                       int *, int *, float *, float *, int *, float *, int *
                      ), xerbla_(char *, int *);
    int nisodd;
    extern  int strtri_(char *, char *, int *, float *,
                        int *, int *);


    /*  -- LAPACK routine (version 3.2)                                    -- */

    /*  -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */
    /*  -- November 2008                                                   -- */

    /*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
    /*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */

    /*     .. Scalar Arguments .. */
    /*     .. */
    /*     .. Array Arguments .. */
    /*     .. */

    /*  Purpose */
    /*  ======= */

    /*  STFTRI computes the inverse of a triangular matrix A stored in RFP */
    /*  format. */

    /*  This is a Level 3 BLAS version of the algorithm. */

    /*  Arguments */
    /*  ========= */

    /*  TRANSR    (input) CHARACTER */
    /*          = 'N':  The Normal TRANSR of RFP A is stored; */
    /*          = 'T':  The Transpose TRANSR of RFP A is stored. */

    /*  UPLO    (input) CHARACTER */
    /*          = 'U':  A is upper triangular; */
    /*          = 'L':  A is lower triangular. */

    /*  DIAG    (input) CHARACTER */
    /*          = 'N':  A is non-unit triangular; */
    /*          = 'U':  A is unit triangular. */

    /*  N       (input) INTEGER */
    /*          The order of the matrix A.  N >= 0. */

    /*  A       (input/output) REAL array, dimension (NT); */
    /*          NT=N*(N+1)/2. On entry, the triangular factor of a Hermitian */
    /*          Positive Definite matrix A in RFP format. RFP format is */
    /*          described by TRANSR, UPLO, and N as follows: If TRANSR = 'N' */
    /*          then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is */
    /*          (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'T' then RFP is */
    /*          the transpose of RFP A as defined when */
    /*          TRANSR = 'N'. The contents of RFP A are defined by UPLO as */
    /*          follows: If UPLO = 'U' the RFP A contains the nt elements of */
    /*          upper packed A; If UPLO = 'L' the RFP A contains the nt */
    /*          elements of lower packed A. The LDA of RFP A is (N+1)/2 when */
    /*          TRANSR = 'T'. When TRANSR is 'N' the LDA is N+1 when N is */
    /*          even and N is odd. See the Note below for more details. */

    /*          On exit, the (triangular) inverse of the original matrix, in */
    /*          the same storage format. */

    /*  INFO    (output) INTEGER */
    /*          = 0: successful exit */
    /*          < 0: if INFO = -i, the i-th argument had an illegal value */
    /*          > 0: if INFO = i, A(i,i) is exactly zero.  The triangular */
    /*               matrix is singular and its inverse can not be computed. */

    /*  Notes */
    /*  ===== */

    /*  We first consider Rectangular Full Packed (RFP) Format when N is */
    /*  even. We give an example where N = 6. */

    /*      AP is Upper             AP is Lower */

    /*   00 01 02 03 04 05       00 */
    /*      11 12 13 14 15       10 11 */
    /*         22 23 24 25       20 21 22 */
    /*            33 34 35       30 31 32 33 */
    /*               44 45       40 41 42 43 44 */
    /*                  55       50 51 52 53 54 55 */


    /*  Let TRANSR = 'N'. RFP holds AP as follows: */
    /*  For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */
    /*  three columns of AP upper. The lower triangle A(4:6,0:2) consists of */
    /*  the transpose of the first three columns of AP upper. */
    /*  For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */
    /*  three columns of AP lower. The upper triangle A(0:2,0:2) consists of */
    /*  the transpose of the last three columns of AP lower. */
    /*  This covers the case N even and TRANSR = 'N'. */

    /*         RFP A                   RFP A */

    /*        03 04 05                33 43 53 */
    /*        13 14 15                00 44 54 */
    /*        23 24 25                10 11 55 */
    /*        33 34 35                20 21 22 */
    /*        00 44 45                30 31 32 */
    /*        01 11 55                40 41 42 */
    /*        02 12 22                50 51 52 */

    /*  Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */
    /*  transpose of RFP A above. One therefore gets: */


    /*           RFP A                   RFP A */

    /*     03 13 23 33 00 01 02    33 00 10 20 30 40 50 */
    /*     04 14 24 34 44 11 12    43 44 11 21 31 41 51 */
    /*     05 15 25 35 45 55 22    53 54 55 22 32 42 52 */


    /*  We first consider Rectangular Full Packed (RFP) Format when N is */
    /*  odd. We give an example where N = 5. */

    /*     AP is Upper                 AP is Lower */

    /*   00 01 02 03 04              00 */
    /*      11 12 13 14              10 11 */
    /*         22 23 24              20 21 22 */
    /*            33 34              30 31 32 33 */
    /*               44              40 41 42 43 44 */


    /*  Let TRANSR = 'N'. RFP holds AP as follows: */
    /*  For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */
    /*  three columns of AP upper. The lower triangle A(3:4,0:1) consists of */
    /*  the transpose of the first two columns of AP upper. */
    /*  For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */
    /*  three columns of AP lower. The upper triangle A(0:1,1:2) consists of */
    /*  the transpose of the last two columns of AP lower. */
    /*  This covers the case N odd and TRANSR = 'N'. */

    /*         RFP A                   RFP A */

    /*        02 03 04                00 33 43 */
    /*        12 13 14                10 11 44 */
    /*        22 23 24                20 21 22 */
    /*        00 33 34                30 31 32 */
    /*        01 11 44                40 41 42 */

    /*  Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */
    /*  transpose of RFP A above. One therefore gets: */

    /*           RFP A                   RFP A */

    /*     02 12 22 00 01             00 10 20 30 40 50 */
    /*     03 13 23 33 11             33 11 21 31 41 51 */
    /*     04 14 24 34 44             43 44 22 32 42 52 */

    /*  ===================================================================== */

    /*     .. Parameters .. */
    /*     .. */
    /*     .. Local Scalars .. */
    /*     .. */
    /*     .. External Functions .. */
    /*     .. */
    /*     .. External Subroutines .. */
    /*     .. */
    /*     .. Intrinsic Functions .. */
    /*     .. */
    /*     .. Executable Statements .. */

    /*     Test the input parameters. */

    *info = 0;
    normaltransr = lsame_(transr, "N");
    lower = lsame_(uplo, "L");
    if (! normaltransr && ! lsame_(transr, "T")) {
        *info = -1;
    } else if (! lower && ! lsame_(uplo, "U")) {
        *info = -2;
    } else if (! lsame_(diag, "N") && ! lsame_(diag,
               "U")) {
        *info = -3;
    } else if (*n < 0) {
        *info = -4;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_("STFTRI", &i__1);
        return 0;
    }

    /*     Quick return if possible */

    if (*n == 0) {
        return 0;
    }

    /*     If N is odd, set NISODD = .TRUE. */
    /*     If N is even, set K = N/2 and NISODD = .FALSE. */

    if (*n % 2 == 0) {
        k = *n / 2;
        nisodd = FALSE;
    } else {
        nisodd = TRUE;
    }

    /*     Set N1 and N2 depending on LOWER */

    if (lower) {
        n2 = *n / 2;
        n1 = *n - n2;
    } else {
        n1 = *n / 2;
        n2 = *n - n1;
    }


    /*     start execution: there are eight cases */

    if (nisodd) {

        /*        N is odd */

        if (normaltransr) {

            /*           N is odd and TRANSR = 'N' */

            if (lower) {

                /*             SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) */
                /*             T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) */
                /*             T1 -> a(0), T2 -> a(n), S -> a(n1) */

                strtri_("L", diag, &n1, a, n, info);
                if (*info > 0) {
                    return 0;
                }
                strmm_("R", "L", "N", diag, &n2, &n1, &c_b13, a, n, &a[n1], n);
                strtri_("U", diag, &n2, &a[*n], n, info)
                ;
                if (*info > 0) {
                    *info += n1;
                }
                if (*info > 0) {
                    return 0;
                }
                strmm_("L", "U", "T", diag, &n2, &n1, &c_b18, &a[*n], n, &a[
                           n1], n);

            } else {

                /*             SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) */
                /*             T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) */
                /*             T1 -> a(n2), T2 -> a(n1), S -> a(0) */

                strtri_("L", diag, &n1, &a[n2], n, info)
                ;
                if (*info > 0) {
                    return 0;
                }
                strmm_("L", "L", "T", diag, &n1, &n2, &c_b13, &a[n2], n, a, n);
                strtri_("U", diag, &n2, &a[n1], n, info)
                ;
                if (*info > 0) {
                    *info += n1;
                }
                if (*info > 0) {
                    return 0;
                }
                strmm_("R", "U", "N", diag, &n1, &n2, &c_b18, &a[n1], n, a, n);

            }

        } else {

            /*           N is odd and TRANSR = 'T' */

            if (lower) {

                /*              SRPA for LOWER, TRANSPOSE and N is odd */
                /*              T1 -> a(0), T2 -> a(1), S -> a(0+n1*n1) */

                strtri_("U", diag, &n1, a, &n1, info);
                if (*info > 0) {
                    return 0;
                }
                strmm_("L", "U", "N", diag, &n1, &n2, &c_b13, a, &n1, &a[n1 *
                        n1], &n1);
                strtri_("L", diag, &n2, &a[1], &n1, info);
                if (*info > 0) {
                    *info += n1;
                }
                if (*info > 0) {
                    return 0;
                }
                strmm_("R", "L", "T", diag, &n1, &n2, &c_b18, &a[1], &n1, &a[
                           n1 * n1], &n1);

            } else {

                /*              SRPA for UPPER, TRANSPOSE and N is odd */
                /*              T1 -> a(0+n2*n2), T2 -> a(0+n1*n2), S -> a(0) */

                strtri_("U", diag, &n1, &a[n2 * n2], &n2, info);
                if (*info > 0) {
                    return 0;
                }
                strmm_("R", "U", "T", diag, &n2, &n1, &c_b13, &a[n2 * n2], &
                       n2, a, &n2);
                strtri_("L", diag, &n2, &a[n1 * n2], &n2, info);
                if (*info > 0) {
                    *info += n1;
                }
                if (*info > 0) {
                    return 0;
                }
                strmm_("L", "L", "N", diag, &n2, &n1, &c_b18, &a[n1 * n2], &
                       n2, a, &n2);
            }

        }

    } else {

        /*        N is even */

        if (normaltransr) {

            /*           N is even and TRANSR = 'N' */

            if (lower) {

                /*              SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) */
                /*              T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) */
                /*              T1 -> a(1), T2 -> a(0), S -> a(k+1) */

                i__1 = *n + 1;
                strtri_("L", diag, &k, &a[1], &i__1, info);
                if (*info > 0) {
                    return 0;
                }
                i__1 = *n + 1;
                i__2 = *n + 1;
                strmm_("R", "L", "N", diag, &k, &k, &c_b13, &a[1], &i__1, &a[
                           k + 1], &i__2);
                i__1 = *n + 1;
                strtri_("U", diag, &k, a, &i__1, info);
                if (*info > 0) {
                    *info += k;
                }
                if (*info > 0) {
                    return 0;
                }
                i__1 = *n + 1;
                i__2 = *n + 1;
                strmm_("L", "U", "T", diag, &k, &k, &c_b18, a, &i__1, &a[k +
                        1], &i__2)
                ;

            } else {

                /*              SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) */
                /*              T1 -> a(k+1,0) ,  T2 -> a(k,0),   S -> a(0,0) */
                /*              T1 -> a(k+1), T2 -> a(k), S -> a(0) */

                i__1 = *n + 1;
                strtri_("L", diag, &k, &a[k + 1], &i__1, info);
                if (*info > 0) {
                    return 0;
                }
                i__1 = *n + 1;
                i__2 = *n + 1;
                strmm_("L", "L", "T", diag, &k, &k, &c_b13, &a[k + 1], &i__1,
                       a, &i__2);
                i__1 = *n + 1;
                strtri_("U", diag, &k, &a[k], &i__1, info);
                if (*info > 0) {
                    *info += k;
                }
                if (*info > 0) {
                    return 0;
                }
                i__1 = *n + 1;
                i__2 = *n + 1;
                strmm_("R", "U", "N", diag, &k, &k, &c_b18, &a[k], &i__1, a, &
                       i__2);
            }
        } else {

            /*           N is even and TRANSR = 'T' */

            if (lower) {

                /*              SRPA for LOWER, TRANSPOSE and N is even (see paper) */
                /*              T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) */
                /*              T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k */

                strtri_("U", diag, &k, &a[k], &k, info);
                if (*info > 0) {
                    return 0;
                }
                strmm_("L", "U", "N", diag, &k, &k, &c_b13, &a[k], &k, &a[k *
                        (k + 1)], &k);
                strtri_("L", diag, &k, a, &k, info);
                if (*info > 0) {
                    *info += k;
                }
                if (*info > 0) {
                    return 0;
                }
                strmm_("R", "L", "T", diag, &k, &k, &c_b18, a, &k, &a[k * (k
                        + 1)], &k)
                ;
            } else {

                /*              SRPA for UPPER, TRANSPOSE and N is even (see paper) */
                /*              T1 -> B(0,k+1),     T2 -> B(0,k),   S -> B(0,0) */
                /*              T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k */

                strtri_("U", diag, &k, &a[k * (k + 1)], &k, info);
                if (*info > 0) {
                    return 0;
                }
                strmm_("R", "U", "T", diag, &k, &k, &c_b13, &a[k * (k + 1)], &
                       k, a, &k);
                strtri_("L", diag, &k, &a[k * k], &k, info);
                if (*info > 0) {
                    *info += k;
                }
                if (*info > 0) {
                    return 0;
                }
                strmm_("L", "L", "N", diag, &k, &k, &c_b18, &a[k * k], &k, a,
                       &k);
            }
        }
    }

    return 0;

    /*     End of STFTRI */

} /* stftri_ */
コード例 #18
0
ファイル: ssygst.c プロジェクト: 0u812/roadrunner-backup
/* Subroutine */ int ssygst_(integer *itype, char *uplo, integer *n, real *a, 
	integer *lda, real *b, integer *ldb, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;

    /* Local variables */
    integer k, kb, nb;
    extern logical lsame_(char *, char *);
    logical upper;
    extern /* Subroutine */ int strmm_(char *, char *, char *, char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
), ssymm_(char *, char *, integer 
	    *, integer *, real *, real *, integer *, real *, integer *, real *
, real *, integer *), strsm_(char *, char *, char 
	    *, char *, integer *, integer *, real *, real *, integer *, real *
, integer *), ssygs2_(integer *, 
	    char *, integer *, real *, integer *, real *, integer *, integer *
), ssyr2k_(char *, char *, integer *, integer *, real *, 
	    real *, integer *, real *, integer *, real *, real *, integer *), xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *);


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SSYGST reduces a real symmetric-definite generalized eigenproblem */
/*  to standard form. */

/*  If ITYPE = 1, the problem is A*x = lambda*B*x, */
/*  and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) */

/*  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or */
/*  B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. */

/*  B must have been previously factorized as U**T*U or L*L**T by SPOTRF. */

/*  Arguments */
/*  ========= */

/*  ITYPE   (input) INTEGER */
/*          = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); */
/*          = 2 or 3: compute U*A*U**T or L**T*A*L. */

/*  UPLO    (input) CHARACTER*1 */
/*          = 'U':  Upper triangle of A is stored and B is factored as */
/*                  U**T*U; */
/*          = 'L':  Lower triangle of A is stored and B is factored as */
/*                  L*L**T. */

/*  N       (input) INTEGER */
/*          The order of the matrices A and B.  N >= 0. */

/*  A       (input/output) REAL array, dimension (LDA,N) */
/*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading */
/*          N-by-N upper triangular part of A contains the upper */
/*          triangular part of the matrix A, and the strictly lower */
/*          triangular part of A is not referenced.  If UPLO = 'L', the */
/*          leading N-by-N lower triangular part of A contains the lower */
/*          triangular part of the matrix A, and the strictly upper */
/*          triangular part of A is not referenced. */

/*          On exit, if INFO = 0, the transformed matrix, stored in the */
/*          same format as A. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  B       (input) REAL array, dimension (LDB,N) */
/*          The triangular factor from the Cholesky factorization of B, */
/*          as returned by SPOTRF. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B.  LDB >= max(1,N). */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (*itype < 1 || *itype > 3) {
	*info = -1;
    } else if (! upper && ! lsame_(uplo, "L")) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    } else if (*ldb < max(1,*n)) {
	*info = -7;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SSYGST", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine the block size for this environment. */

    nb = ilaenv_(&c__1, "SSYGST", uplo, n, &c_n1, &c_n1, &c_n1);

    if (nb <= 1 || nb >= *n) {

/*        Use unblocked code */

	ssygs2_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info);
    } else {

/*        Use blocked code */

	if (*itype == 1) {
	    if (upper) {

/*              Compute inv(U')*A*inv(U) */

		i__1 = *n;
		i__2 = nb;
		for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {
/* Computing MIN */
		    i__3 = *n - k + 1;
		    kb = min(i__3,nb);

/*                 Update the upper triangle of A(k:n,k:n) */

		    ssygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + 
			    k * b_dim1], ldb, info);
		    if (k + kb <= *n) {
			i__3 = *n - k - kb + 1;
			strsm_("Left", uplo, "Transpose", "Non-unit", &kb, &
				i__3, &c_b14, &b[k + k * b_dim1], ldb, &a[k + 
				(k + kb) * a_dim1], lda);
			i__3 = *n - k - kb + 1;
			ssymm_("Left", uplo, &kb, &i__3, &c_b16, &a[k + k * 
				a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, 
				&c_b14, &a[k + (k + kb) * a_dim1], lda);
			i__3 = *n - k - kb + 1;
			ssyr2k_(uplo, "Transpose", &i__3, &kb, &c_b19, &a[k + 
				(k + kb) * a_dim1], lda, &b[k + (k + kb) * 
				b_dim1], ldb, &c_b14, &a[k + kb + (k + kb) * 
				a_dim1], lda);
			i__3 = *n - k - kb + 1;
			ssymm_("Left", uplo, &kb, &i__3, &c_b16, &a[k + k * 
				a_dim1], lda, &b[k + (k + kb) * b_dim1], ldb, 
				&c_b14, &a[k + (k + kb) * a_dim1], lda);
			i__3 = *n - k - kb + 1;
			strsm_("Right", uplo, "No transpose", "Non-unit", &kb, 
				 &i__3, &c_b14, &b[k + kb + (k + kb) * b_dim1]
, ldb, &a[k + (k + kb) * a_dim1], lda);
		    }
/* L10: */
		}
	    } else {

/*              Compute inv(L)*A*inv(L') */

		i__2 = *n;
		i__1 = nb;
		for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) {
/* Computing MIN */
		    i__3 = *n - k + 1;
		    kb = min(i__3,nb);

/*                 Update the lower triangle of A(k:n,k:n) */

		    ssygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + 
			    k * b_dim1], ldb, info);
		    if (k + kb <= *n) {
			i__3 = *n - k - kb + 1;
			strsm_("Right", uplo, "Transpose", "Non-unit", &i__3, 
				&kb, &c_b14, &b[k + k * b_dim1], ldb, &a[k + 
				kb + k * a_dim1], lda);
			i__3 = *n - k - kb + 1;
			ssymm_("Right", uplo, &i__3, &kb, &c_b16, &a[k + k * 
				a_dim1], lda, &b[k + kb + k * b_dim1], ldb, &
				c_b14, &a[k + kb + k * a_dim1], lda);
			i__3 = *n - k - kb + 1;
			ssyr2k_(uplo, "No transpose", &i__3, &kb, &c_b19, &a[
				k + kb + k * a_dim1], lda, &b[k + kb + k * 
				b_dim1], ldb, &c_b14, &a[k + kb + (k + kb) * 
				a_dim1], lda);
			i__3 = *n - k - kb + 1;
			ssymm_("Right", uplo, &i__3, &kb, &c_b16, &a[k + k * 
				a_dim1], lda, &b[k + kb + k * b_dim1], ldb, &
				c_b14, &a[k + kb + k * a_dim1], lda);
			i__3 = *n - k - kb + 1;
			strsm_("Left", uplo, "No transpose", "Non-unit", &
				i__3, &kb, &c_b14, &b[k + kb + (k + kb) * 
				b_dim1], ldb, &a[k + kb + k * a_dim1], lda);
		    }
/* L20: */
		}
	    }
	} else {
	    if (upper) {

/*              Compute U*A*U' */

		i__1 = *n;
		i__2 = nb;
		for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {
/* Computing MIN */
		    i__3 = *n - k + 1;
		    kb = min(i__3,nb);

/*                 Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */

		    i__3 = k - 1;
		    strmm_("Left", uplo, "No transpose", "Non-unit", &i__3, &
			    kb, &c_b14, &b[b_offset], ldb, &a[k * a_dim1 + 1], 
			     lda)
			    ;
		    i__3 = k - 1;
		    ssymm_("Right", uplo, &i__3, &kb, &c_b52, &a[k + k * 
			    a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b14, &a[
			    k * a_dim1 + 1], lda);
		    i__3 = k - 1;
		    ssyr2k_(uplo, "No transpose", &i__3, &kb, &c_b14, &a[k * 
			    a_dim1 + 1], lda, &b[k * b_dim1 + 1], ldb, &c_b14, 
			     &a[a_offset], lda);
		    i__3 = k - 1;
		    ssymm_("Right", uplo, &i__3, &kb, &c_b52, &a[k + k * 
			    a_dim1], lda, &b[k * b_dim1 + 1], ldb, &c_b14, &a[
			    k * a_dim1 + 1], lda);
		    i__3 = k - 1;
		    strmm_("Right", uplo, "Transpose", "Non-unit", &i__3, &kb, 
			     &c_b14, &b[k + k * b_dim1], ldb, &a[k * a_dim1 + 
			    1], lda);
		    ssygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + 
			    k * b_dim1], ldb, info);
/* L30: */
		}
	    } else {

/*              Compute L'*A*L */

		i__2 = *n;
		i__1 = nb;
		for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) {
/* Computing MIN */
		    i__3 = *n - k + 1;
		    kb = min(i__3,nb);

/*                 Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */

		    i__3 = k - 1;
		    strmm_("Right", uplo, "No transpose", "Non-unit", &kb, &
			    i__3, &c_b14, &b[b_offset], ldb, &a[k + a_dim1], 
			    lda);
		    i__3 = k - 1;
		    ssymm_("Left", uplo, &kb, &i__3, &c_b52, &a[k + k * 
			    a_dim1], lda, &b[k + b_dim1], ldb, &c_b14, &a[k + 
			    a_dim1], lda);
		    i__3 = k - 1;
		    ssyr2k_(uplo, "Transpose", &i__3, &kb, &c_b14, &a[k + 
			    a_dim1], lda, &b[k + b_dim1], ldb, &c_b14, &a[
			    a_offset], lda);
		    i__3 = k - 1;
		    ssymm_("Left", uplo, &kb, &i__3, &c_b52, &a[k + k * 
			    a_dim1], lda, &b[k + b_dim1], ldb, &c_b14, &a[k + 
			    a_dim1], lda);
		    i__3 = k - 1;
		    strmm_("Left", uplo, "Transpose", "Non-unit", &kb, &i__3, 
			    &c_b14, &b[k + k * b_dim1], ldb, &a[k + a_dim1], 
			    lda);
		    ssygs2_(itype, uplo, &kb, &a[k + k * a_dim1], lda, &b[k + 
			    k * b_dim1], ldb, info);
/* L40: */
		}
	    }
	}
    }
    return 0;

/*     End of SSYGST */

} /* ssygst_ */
コード例 #19
0
ファイル: spftri.c プロジェクト: csapng/libflame
/* Subroutine */
int spftri_(char *transr, char *uplo, integer *n, real *a, integer *info)
{
    /* System generated locals */
    integer i__1, i__2;
    /* Local variables */
    integer k, n1, n2;
    logical normaltransr;
    extern logical lsame_(char *, char *);
    logical lower;
    extern /* Subroutine */
    int strmm_(char *, char *, char *, char *, integer *, integer *, real *, real *, integer *, real *, integer * ), ssyrk_(char *, char *, integer *, integer *, real *, real *, integer *, real *, real *, integer * ), xerbla_(char *, integer *);
    logical nisodd;
    extern /* Subroutine */
    int slauum_(char *, integer *, real *, integer *, integer *), stftri_(char *, char *, char *, integer *, real *, integer *);
    /* -- LAPACK computational routine (version 3.4.0) -- */
    /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
    /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
    /* November 2011 */
    /* .. Scalar Arguments .. */
    /* .. Array Arguments .. */
    /* .. */
    /* ===================================================================== */
    /* .. Parameters .. */
    /* .. */
    /* .. Local Scalars .. */
    /* .. */
    /* .. External Functions .. */
    /* .. */
    /* .. External Subroutines .. */
    /* .. */
    /* .. Intrinsic Functions .. */
    /* .. */
    /* .. Executable Statements .. */
    /* Test the input parameters. */
    *info = 0;
    normaltransr = lsame_(transr, "N");
    lower = lsame_(uplo, "L");
    if (! normaltransr && ! lsame_(transr, "T"))
    {
        *info = -1;
    }
    else if (! lower && ! lsame_(uplo, "U"))
    {
        *info = -2;
    }
    else if (*n < 0)
    {
        *info = -3;
    }
    if (*info != 0)
    {
        i__1 = -(*info);
        xerbla_("SPFTRI", &i__1);
        return 0;
    }
    /* Quick return if possible */
    if (*n == 0)
    {
        return 0;
    }
    /* Invert the triangular Cholesky factor U or L. */
    stftri_(transr, uplo, "N", n, a, info);
    if (*info > 0)
    {
        return 0;
    }
    /* If N is odd, set NISODD = .TRUE. */
    /* If N is even, set K = N/2 and NISODD = .FALSE. */
    if (*n % 2 == 0)
    {
        k = *n / 2;
        nisodd = FALSE_;
    }
    else
    {
        nisodd = TRUE_;
    }
    /* Set N1 and N2 depending on LOWER */
    if (lower)
    {
        n2 = *n / 2;
        n1 = *n - n2;
    }
    else
    {
        n1 = *n / 2;
        n2 = *n - n1;
    }
    /* Start execution of triangular matrix multiply: inv(U)*inv(U)^C or */
    /* inv(L)^C*inv(L). There are eight cases. */
    if (nisodd)
    {
        /* N is odd */
        if (normaltransr)
        {
            /* N is odd and TRANSR = 'N' */
            if (lower)
            {
                /* SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:N1-1) ) */
                /* T1 -> a(0,0), T2 -> a(0,1), S -> a(N1,0) */
                /* T1 -> a(0), T2 -> a(n), S -> a(N1) */
                slauum_("L", &n1, a, n, info);
                ssyrk_("L", "T", &n1, &n2, &c_b11, &a[n1], n, &c_b11, a, n);
                strmm_("L", "U", "N", "N", &n2, &n1, &c_b11, &a[*n], n, &a[n1] , n);
                slauum_("U", &n2, &a[*n], n, info);
            }
            else
            {
                /* SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:N2-1) */
                /* T1 -> a(N1+1,0), T2 -> a(N1,0), S -> a(0,0) */
                /* T1 -> a(N2), T2 -> a(N1), S -> a(0) */
                slauum_("L", &n1, &a[n2], n, info);
                ssyrk_("L", "N", &n1, &n2, &c_b11, a, n, &c_b11, &a[n2], n);
                strmm_("R", "U", "T", "N", &n1, &n2, &c_b11, &a[n1], n, a, n);
                slauum_("U", &n2, &a[n1], n, info);
            }
        }
        else
        {
            /* N is odd and TRANSR = 'T' */
            if (lower)
            {
                /* SRPA for LOWER, TRANSPOSE, and N is odd */
                /* T1 -> a(0), T2 -> a(1), S -> a(0+N1*N1) */
                slauum_("U", &n1, a, &n1, info);
                ssyrk_("U", "N", &n1, &n2, &c_b11, &a[n1 * n1], &n1, &c_b11, a, &n1);
                strmm_("R", "L", "N", "N", &n1, &n2, &c_b11, &a[1], &n1, &a[ n1 * n1], &n1);
                slauum_("L", &n2, &a[1], &n1, info);
            }
            else
            {
                /* SRPA for UPPER, TRANSPOSE, and N is odd */
                /* T1 -> a(0+N2*N2), T2 -> a(0+N1*N2), S -> a(0) */
                slauum_("U", &n1, &a[n2 * n2], &n2, info);
                ssyrk_("U", "T", &n1, &n2, &c_b11, a, &n2, &c_b11, &a[n2 * n2] , &n2);
                strmm_("L", "L", "T", "N", &n2, &n1, &c_b11, &a[n1 * n2], &n2, a, &n2);
                slauum_("L", &n2, &a[n1 * n2], &n2, info);
            }
        }
    }
    else
    {
        /* N is even */
        if (normaltransr)
        {
            /* N is even and TRANSR = 'N' */
            if (lower)
            {
                /* SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) */
                /* T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) */
                /* T1 -> a(1), T2 -> a(0), S -> a(k+1) */
                i__1 = *n + 1;
                slauum_("L", &k, &a[1], &i__1, info);
                i__1 = *n + 1;
                i__2 = *n + 1;
                ssyrk_("L", "T", &k, &k, &c_b11, &a[k + 1], &i__1, &c_b11, &a[ 1], &i__2);
                i__1 = *n + 1;
                i__2 = *n + 1;
                strmm_("L", "U", "N", "N", &k, &k, &c_b11, a, &i__1, &a[k + 1] , &i__2);
                i__1 = *n + 1;
                slauum_("U", &k, a, &i__1, info);
            }
            else
            {
                /* SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) */
                /* T1 -> a(k+1,0) , T2 -> a(k,0), S -> a(0,0) */
                /* T1 -> a(k+1), T2 -> a(k), S -> a(0) */
                i__1 = *n + 1;
                slauum_("L", &k, &a[k + 1], &i__1, info);
                i__1 = *n + 1;
                i__2 = *n + 1;
                ssyrk_("L", "N", &k, &k, &c_b11, a, &i__1, &c_b11, &a[k + 1], &i__2);
                i__1 = *n + 1;
                i__2 = *n + 1;
                strmm_("R", "U", "T", "N", &k, &k, &c_b11, &a[k], &i__1, a, & i__2);
                i__1 = *n + 1;
                slauum_("U", &k, &a[k], &i__1, info);
            }
        }
        else
        {
            /* N is even and TRANSR = 'T' */
            if (lower)
            {
                /* SRPA for LOWER, TRANSPOSE, and N is even (see paper) */
                /* T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1), */
                /* T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1));
                lda=k */
                slauum_("U", &k, &a[k], &k, info);
                ssyrk_("U", "N", &k, &k, &c_b11, &a[k * (k + 1)], &k, &c_b11, &a[k], &k);
                strmm_("R", "L", "N", "N", &k, &k, &c_b11, a, &k, &a[k * (k + 1)], &k);
                slauum_("L", &k, a, &k, info);
            }
            else
            {
                /* SRPA for UPPER, TRANSPOSE, and N is even (see paper) */
                /* T1 -> B(0,k+1), T2 -> B(0,k), S -> B(0,0), */
                /* T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0));
                lda=k */
                slauum_("U", &k, &a[k * (k + 1)], &k, info);
                ssyrk_("U", "T", &k, &k, &c_b11, a, &k, &c_b11, &a[k * (k + 1) ], &k);
                strmm_("L", "L", "T", "N", &k, &k, &c_b11, &a[k * k], &k, a, & k);
                slauum_("L", &k, &a[k * k], &k, info);
            }
        }
    }
    return 0;
    /* End of SPFTRI */
}
コード例 #20
0
ファイル: strtri.c プロジェクト: Electrostatics/FETK
/* Subroutine */ int strtri_(char *uplo, char *diag, integer *n, real *a, 
	integer *lda, integer *info, ftnlen uplo_len, ftnlen diag_len)
{
    /* System generated locals */
    address a__1[2];
    integer a_dim1, a_offset, i__1, i__2[2], i__3, i__4, i__5;
    char ch__1[2];

    /* Builtin functions */
    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);

    /* Local variables */
    static integer j, jb, nb, nn;
    extern logical lsame_(char *, char *, ftnlen, ftnlen);
    static logical upper;
    extern /* Subroutine */ int strmm_(char *, char *, char *, char *, 
	    integer *, integer *, real *, real *, integer *, real *, integer *
	    , ftnlen, ftnlen, ftnlen, ftnlen), strsm_(char *, char *, char *, 
	    char *, integer *, integer *, real *, real *, integer *, real *, 
	    integer *, ftnlen, ftnlen, ftnlen, ftnlen), strti2_(char *, char *
	    , integer *, real *, integer *, integer *, ftnlen, ftnlen), 
	    xerbla_(char *, integer *, ftnlen);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    static logical nounit;


/*  -- LAPACK routine (version 3.0) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */
/*     Courant Institute, Argonne National Lab, and Rice University */
/*     March 31, 1993 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  STRTRI computes the inverse of a real upper or lower triangular */
/*  matrix A. */

/*  This is the Level 3 BLAS version of the algorithm. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          = 'U':  A is upper triangular; */
/*          = 'L':  A is lower triangular. */

/*  DIAG    (input) CHARACTER*1 */
/*          = 'N':  A is non-unit triangular; */
/*          = 'U':  A is unit triangular. */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  A       (input/output) REAL array, dimension (LDA,N) */
/*          On entry, the triangular matrix A.  If UPLO = 'U', the */
/*          leading N-by-N upper triangular part of the array A contains */
/*          the upper triangular matrix, and the strictly lower */
/*          triangular part of A is not referenced.  If UPLO = 'L', the */
/*          leading N-by-N lower triangular part of the array A contains */
/*          the lower triangular matrix, and the strictly upper */
/*          triangular part of A is not referenced.  If DIAG = 'U', the */
/*          diagonal elements of A are also not referenced and are */
/*          assumed to be 1. */
/*          On exit, the (triangular) inverse of the original matrix, in */
/*          the same storage format. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  INFO    (output) INTEGER */
/*          = 0: successful exit */
/*          < 0: if INFO = -i, the i-th argument had an illegal value */
/*          > 0: if INFO = i, A(i,i) is exactly zero.  The triangular */
/*               matrix is singular and its inverse can not be computed. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
	*info = -1;
    } else if (! nounit && ! lsame_(diag, "U", (ftnlen)1, (ftnlen)1)) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("STRTRI", &i__1, (ftnlen)6);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Check for singularity if non-unit. */

    if (nounit) {
	i__1 = *n;
	for (*info = 1; *info <= i__1; ++(*info)) {
	    if (a[*info + *info * a_dim1] == 0.f) {
		return 0;
	    }
/* L10: */
	}
	*info = 0;
    }

/*     Determine the block size for this environment. */

/* Writing concatenation */
    i__2[0] = 1, a__1[0] = uplo;
    i__2[1] = 1, a__1[1] = diag;
    s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2);
    nb = ilaenv_(&c__1, "STRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)2);
    if (nb <= 1 || nb >= *n) {

/*        Use unblocked code */

	strti2_(uplo, diag, n, &a[a_offset], lda, info, (ftnlen)1, (ftnlen)1);
    } else {

/*        Use blocked code */

	if (upper) {

/*           Compute inverse of upper triangular matrix */

	    i__1 = *n;
	    i__3 = nb;
	    for (j = 1; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) {
/* Computing MIN */
		i__4 = nb, i__5 = *n - j + 1;
		jb = min(i__4,i__5);

/*              Compute rows 1:j-1 of current block column */

		i__4 = j - 1;
		strmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, &
			c_b18, &a[a_offset], lda, &a[j * a_dim1 + 1], lda, (
			ftnlen)4, (ftnlen)5, (ftnlen)12, (ftnlen)1);
		i__4 = j - 1;
		strsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, &
			c_b22, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1], 
			lda, (ftnlen)5, (ftnlen)5, (ftnlen)12, (ftnlen)1);

/*              Compute inverse of current diagonal block */

		strti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info, (
			ftnlen)5, (ftnlen)1);
/* L20: */
	    }
	} else {

/*           Compute inverse of lower triangular matrix */

	    nn = (*n - 1) / nb * nb + 1;
	    i__3 = -nb;
	    for (j = nn; i__3 < 0 ? j >= 1 : j <= 1; j += i__3) {
/* Computing MIN */
		i__1 = nb, i__4 = *n - j + 1;
		jb = min(i__1,i__4);
		if (j + jb <= *n) {

/*                 Compute rows j+jb:n of current block column */

		    i__1 = *n - j - jb + 1;
		    strmm_("Left", "Lower", "No transpose", diag, &i__1, &jb, 
			    &c_b18, &a[j + jb + (j + jb) * a_dim1], lda, &a[j 
			    + jb + j * a_dim1], lda, (ftnlen)4, (ftnlen)5, (
			    ftnlen)12, (ftnlen)1);
		    i__1 = *n - j - jb + 1;
		    strsm_("Right", "Lower", "No transpose", diag, &i__1, &jb,
			     &c_b22, &a[j + j * a_dim1], lda, &a[j + jb + j * 
			    a_dim1], lda, (ftnlen)5, (ftnlen)5, (ftnlen)12, (
			    ftnlen)1);
		}

/*              Compute inverse of current diagonal block */

		strti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info, (
			ftnlen)5, (ftnlen)1);
/* L30: */
	    }
	}
    }

    return 0;

/*     End of STRTRI */

} /* strtri_ */
コード例 #21
0
ファイル: slarzb.c プロジェクト: 0u812/roadrunner-backup
/* Subroutine */ int slarzb_(char *side, char *trans, char *direct, char *
	storev, integer *m, integer *n, integer *k, integer *l, real *v, 
	integer *ldv, real *t, integer *ldt, real *c__, integer *ldc, real *
	work, integer *ldwork)
{
    /* System generated locals */
    integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, 
	    work_offset, i__1, i__2;

    /* Local variables */
    integer i__, j, info;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *), scopy_(integer *, real *, 
	    integer *, real *, integer *), strmm_(char *, char *, char *, 
	    char *, integer *, integer *, real *, real *, integer *, real *, 
	    integer *), xerbla_(char *, 
	    integer *);
    char transt[1];


/*  -- LAPACK routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLARZB applies a real block reflector H or its transpose H**T to */
/*  a real distributed M-by-N  C from the left or the right. */

/*  Currently, only STOREV = 'R' and DIRECT = 'B' are supported. */

/*  Arguments */
/*  ========= */

/*  SIDE    (input) CHARACTER*1 */
/*          = 'L': apply H or H' from the Left */
/*          = 'R': apply H or H' from the Right */

/*  TRANS   (input) CHARACTER*1 */
/*          = 'N': apply H (No transpose) */
/*          = 'C': apply H' (Transpose) */

/*  DIRECT  (input) CHARACTER*1 */
/*          Indicates how H is formed from a product of elementary */
/*          reflectors */
/*          = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet) */
/*          = 'B': H = H(k) . . . H(2) H(1) (Backward) */

/*  STOREV  (input) CHARACTER*1 */
/*          Indicates how the vectors which define the elementary */
/*          reflectors are stored: */
/*          = 'C': Columnwise                        (not supported yet) */
/*          = 'R': Rowwise */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix C. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix C. */

/*  K       (input) INTEGER */
/*          The order of the matrix T (= the number of elementary */
/*          reflectors whose product defines the block reflector). */

/*  L       (input) INTEGER */
/*          The number of columns of the matrix V containing the */
/*          meaningful part of the Householder reflectors. */
/*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0. */

/*  V       (input) REAL array, dimension (LDV,NV). */
/*          If STOREV = 'C', NV = K; if STOREV = 'R', NV = L. */

/*  LDV     (input) INTEGER */
/*          The leading dimension of the array V. */
/*          If STOREV = 'C', LDV >= L; if STOREV = 'R', LDV >= K. */

/*  T       (input) REAL array, dimension (LDT,K) */
/*          The triangular K-by-K matrix T in the representation of the */
/*          block reflector. */

/*  LDT     (input) INTEGER */
/*          The leading dimension of the array T. LDT >= K. */

/*  C       (input/output) REAL array, dimension (LDC,N) */
/*          On entry, the M-by-N matrix C. */
/*          On exit, C is overwritten by H*C or H'*C or C*H or C*H'. */

/*  LDC     (input) INTEGER */
/*          The leading dimension of the array C. LDC >= max(1,M). */

/*  WORK    (workspace) REAL array, dimension (LDWORK,K) */

/*  LDWORK  (input) INTEGER */
/*          The leading dimension of the array WORK. */
/*          If SIDE = 'L', LDWORK >= max(1,N); */
/*          if SIDE = 'R', LDWORK >= max(1,M). */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Quick return if possible */

    /* Parameter adjustments */
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1;
    v -= v_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1;
    t -= t_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1;
    c__ -= c_offset;
    work_dim1 = *ldwork;
    work_offset = 1 + work_dim1;
    work -= work_offset;

    /* Function Body */
    if (*m <= 0 || *n <= 0) {
	return 0;
    }

/*     Check for currently supported options */

    info = 0;
    if (! lsame_(direct, "B")) {
	info = -3;
    } else if (! lsame_(storev, "R")) {
	info = -4;
    }
    if (info != 0) {
	i__1 = -info;
	xerbla_("SLARZB", &i__1);
	return 0;
    }

    if (lsame_(trans, "N")) {
	*(unsigned char *)transt = 'T';
    } else {
	*(unsigned char *)transt = 'N';
    }

    if (lsame_(side, "L")) {

/*        Form  H * C  or  H' * C */

/*        W( 1:n, 1:k ) = C( 1:k, 1:n )' */

	i__1 = *k;
	for (j = 1; j <= i__1; ++j) {
	    scopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1], &c__1);
/* L10: */
	}

/*        W( 1:n, 1:k ) = W( 1:n, 1:k ) + ... */
/*                        C( m-l+1:m, 1:n )' * V( 1:k, 1:l )' */

	if (*l > 0) {
	    sgemm_("Transpose", "Transpose", n, k, l, &c_b13, &c__[*m - *l + 
		    1 + c_dim1], ldc, &v[v_offset], ldv, &c_b13, &work[
		    work_offset], ldwork);
	}

/*        W( 1:n, 1:k ) = W( 1:n, 1:k ) * T'  or  W( 1:m, 1:k ) * T */

	strmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b13, &t[
		t_offset], ldt, &work[work_offset], ldwork);

/*        C( 1:k, 1:n ) = C( 1:k, 1:n ) - W( 1:n, 1:k )' */

	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    i__2 = *k;
	    for (i__ = 1; i__ <= i__2; ++i__) {
		c__[i__ + j * c_dim1] -= work[j + i__ * work_dim1];
/* L20: */
	    }
/* L30: */
	}

/*        C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ... */
/*                            V( 1:k, 1:l )' * W( 1:n, 1:k )' */

	if (*l > 0) {
	    sgemm_("Transpose", "Transpose", l, n, k, &c_b23, &v[v_offset], 
		    ldv, &work[work_offset], ldwork, &c_b13, &c__[*m - *l + 1 
		    + c_dim1], ldc);
	}

    } else if (lsame_(side, "R")) {

/*        Form  C * H  or  C * H' */

/*        W( 1:m, 1:k ) = C( 1:m, 1:k ) */

	i__1 = *k;
	for (j = 1; j <= i__1; ++j) {
	    scopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j * work_dim1 + 1], &
		    c__1);
/* L40: */
	}

/*        W( 1:m, 1:k ) = W( 1:m, 1:k ) + ... */
/*                        C( 1:m, n-l+1:n ) * V( 1:k, 1:l )' */

	if (*l > 0) {
	    sgemm_("No transpose", "Transpose", m, k, l, &c_b13, &c__[(*n - *
		    l + 1) * c_dim1 + 1], ldc, &v[v_offset], ldv, &c_b13, &
		    work[work_offset], ldwork);
	}

/*        W( 1:m, 1:k ) = W( 1:m, 1:k ) * T  or  W( 1:m, 1:k ) * T' */

	strmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b13, &t[t_offset]
, ldt, &work[work_offset], ldwork);

/*        C( 1:m, 1:k ) = C( 1:m, 1:k ) - W( 1:m, 1:k ) */

	i__1 = *k;
	for (j = 1; j <= i__1; ++j) {
	    i__2 = *m;
	    for (i__ = 1; i__ <= i__2; ++i__) {
		c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
/* L50: */
	    }
/* L60: */
	}

/*        C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ... */
/*                            W( 1:m, 1:k ) * V( 1:k, 1:l ) */

	if (*l > 0) {
	    sgemm_("No transpose", "No transpose", m, l, k, &c_b23, &work[
		    work_offset], ldwork, &v[v_offset], ldv, &c_b13, &c__[(*n 
		    - *l + 1) * c_dim1 + 1], ldc);
	}

    }

    return 0;

/*     End of SLARZB */

} /* slarzb_ */
コード例 #22
0
ファイル: slarfb.c プロジェクト: GuillaumeFuchs/Ensimag
 int slarfb_(char *side, char *trans, char *direct, char *
	storev, int *m, int *n, int *k, float *v, int *ldv, 
	float *t, int *ldt, float *c__, int *ldc, float *work, int *
	ldwork)
{
    /* System generated locals */
    int c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1, 
	    work_offset, i__1, i__2;

    /* Local variables */
    int i__, j;
    extern int lsame_(char *, char *);
    int lastc;
    extern  int sgemm_(char *, char *, int *, int *, 
	    int *, float *, float *, int *, float *, int *, float *, 
	    float *, int *);
    int lastv;
    extern  int scopy_(int *, float *, int *, float *, 
	    int *), strmm_(char *, char *, char *, char *, int *, 
	    int *, float *, float *, int *, float *, int *);
    extern int ilaslc_(int *, int *, float *, int *), ilaslr_(
	    int *, int *, float *, int *);
    char transt[1];


/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  SLARFB applies a float block reflector H or its transpose H' to a */
/*  float m by n matrix C, from either the left or the right. */

/*  Arguments */
/*  ========= */

/*  SIDE    (input) CHARACTER*1 */
/*          = 'L': apply H or H' from the Left */
/*          = 'R': apply H or H' from the Right */

/*  TRANS   (input) CHARACTER*1 */
/*          = 'N': apply H (No transpose) */
/*          = 'T': apply H' (Transpose) */

/*  DIRECT  (input) CHARACTER*1 */
/*          Indicates how H is formed from a product of elementary */
/*          reflectors */
/*          = 'F': H = H(1) H(2) . . . H(k) (Forward) */
/*          = 'B': H = H(k) . . . H(2) H(1) (Backward) */

/*  STOREV  (input) CHARACTER*1 */
/*          Indicates how the vectors which define the elementary */
/*          reflectors are stored: */
/*          = 'C': Columnwise */
/*          = 'R': Rowwise */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix C. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix C. */

/*  K       (input) INTEGER */
/*          The order of the matrix T (= the number of elementary */
/*          reflectors whose product defines the block reflector). */

/*  V       (input) REAL array, dimension */
/*                                (LDV,K) if STOREV = 'C' */
/*                                (LDV,M) if STOREV = 'R' and SIDE = 'L' */
/*                                (LDV,N) if STOREV = 'R' and SIDE = 'R' */
/*          The matrix V. See further details. */

/*  LDV     (input) INTEGER */
/*          The leading dimension of the array V. */
/*          If STOREV = 'C' and SIDE = 'L', LDV >= MAX(1,M); */
/*          if STOREV = 'C' and SIDE = 'R', LDV >= MAX(1,N); */
/*          if STOREV = 'R', LDV >= K. */

/*  T       (input) REAL array, dimension (LDT,K) */
/*          The triangular k by k matrix T in the representation of the */
/*          block reflector. */

/*  LDT     (input) INTEGER */
/*          The leading dimension of the array T. LDT >= K. */

/*  C       (input/output) REAL array, dimension (LDC,N) */
/*          On entry, the m by n matrix C. */
/*          On exit, C is overwritten by H*C or H'*C or C*H or C*H'. */

/*  LDC     (input) INTEGER */
/*          The leading dimension of the array C. LDA >= MAX(1,M). */

/*  WORK    (workspace) REAL array, dimension (LDWORK,K) */

/*  LDWORK  (input) INTEGER */
/*          The leading dimension of the array WORK. */
/*          If SIDE = 'L', LDWORK >= MAX(1,N); */
/*          if SIDE = 'R', LDWORK >= MAX(1,M). */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Quick return if possible */

    /* Parameter adjustments */
    v_dim1 = *ldv;
    v_offset = 1 + v_dim1;
    v -= v_offset;
    t_dim1 = *ldt;
    t_offset = 1 + t_dim1;
    t -= t_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1;
    c__ -= c_offset;
    work_dim1 = *ldwork;
    work_offset = 1 + work_dim1;
    work -= work_offset;

    /* Function Body */
    if (*m <= 0 || *n <= 0) {
	return 0;
    }

    if (lsame_(trans, "N")) {
	*(unsigned char *)transt = 'T';
    } else {
	*(unsigned char *)transt = 'N';
    }

    if (lsame_(storev, "C")) {

	if (lsame_(direct, "F")) {

/*           Let  V =  ( V1 )    (first K rows) */
/*                     ( V2 ) */
/*           where  V1  is unit lower triangular. */

	    if (lsame_(side, "L")) {

/*              Form  H * C  or  H' * C  where  C = ( C1 ) */
/*                                                  ( C2 ) */

/* Computing MAX */
		i__1 = *k, i__2 = ilaslr_(m, k, &v[v_offset], ldv);
		lastv = MAX(i__1,i__2);
		lastc = ilaslc_(&lastv, n, &c__[c_offset], ldc);

/*              W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK) */

/*              W := C1' */

		i__1 = *k;
		for (j = 1; j <= i__1; ++j) {
		    scopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1 
			    + 1], &c__1);
/* L10: */
		}

/*              W := W * V1 */

		strmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);
		if (lastv > *k) {

/*                 W := W + C2'*V2 */

		    i__1 = lastv - *k;
		    sgemm_("Transpose", "No transpose", &lastc, k, &i__1, &
			    c_b14, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + 
			    v_dim1], ldv, &c_b14, &work[work_offset], ldwork);
		}

/*              W := W * T'  or  W * T */

		strmm_("Right", "Upper", transt, "Non-unit", &lastc, k, &
			c_b14, &t[t_offset], ldt, &work[work_offset], ldwork);

/*              C := C - V * W' */

		if (lastv > *k) {

/*                 C2 := C2 - V2 * W' */

		    i__1 = lastv - *k;
		    sgemm_("No transpose", "Transpose", &i__1, &lastc, k, &
			    c_b25, &v[*k + 1 + v_dim1], ldv, &work[
			    work_offset], ldwork, &c_b14, &c__[*k + 1 + 
			    c_dim1], ldc);
		}

/*              W := W * V1' */

		strmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, &
			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);

/*              C1 := C1 - W' */

		i__1 = *k;
		for (j = 1; j <= i__1; ++j) {
		    i__2 = lastc;
		    for (i__ = 1; i__ <= i__2; ++i__) {
			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
/* L20: */
		    }
/* L30: */
		}

	    } else if (lsame_(side, "R")) {

/*              Form  C * H  or  C * H'  where  C = ( C1  C2 ) */

/* Computing MAX */
		i__1 = *k, i__2 = ilaslr_(n, k, &v[v_offset], ldv);
		lastv = MAX(i__1,i__2);
		lastc = ilaslr_(m, &lastv, &c__[c_offset], ldc);

/*              W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK) */

/*              W := C1 */

		i__1 = *k;
		for (j = 1; j <= i__1; ++j) {
		    scopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j * 
			    work_dim1 + 1], &c__1);
/* L40: */
		}

/*              W := W * V1 */

		strmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);
		if (lastv > *k) {

/*                 W := W + C2 * V2 */

		    i__1 = lastv - *k;
		    sgemm_("No transpose", "No transpose", &lastc, k, &i__1, &
			    c_b14, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k + 
			    1 + v_dim1], ldv, &c_b14, &work[work_offset], 
			    ldwork);
		}

/*              W := W * T  or  W * T' */

		strmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b14, 
			 &t[t_offset], ldt, &work[work_offset], ldwork);

/*              C := C - W * V' */

		if (lastv > *k) {

/*                 C2 := C2 - W * V2' */

		    i__1 = lastv - *k;
		    sgemm_("No transpose", "Transpose", &lastc, &i__1, k, &
			    c_b25, &work[work_offset], ldwork, &v[*k + 1 + 
			    v_dim1], ldv, &c_b14, &c__[(*k + 1) * c_dim1 + 1], 
			     ldc);
		}

/*              W := W * V1' */

		strmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, &
			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);

/*              C1 := C1 - W */

		i__1 = *k;
		for (j = 1; j <= i__1; ++j) {
		    i__2 = lastc;
		    for (i__ = 1; i__ <= i__2; ++i__) {
			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
/* L50: */
		    }
/* L60: */
		}
	    }

	} else {

/*           Let  V =  ( V1 ) */
/*                     ( V2 )    (last K rows) */
/*           where  V2  is unit upper triangular. */

	    if (lsame_(side, "L")) {

/*              Form  H * C  or  H' * C  where  C = ( C1 ) */
/*                                                  ( C2 ) */

/* Computing MAX */
		i__1 = *k, i__2 = ilaslr_(m, k, &v[v_offset], ldv);
		lastv = MAX(i__1,i__2);
		lastc = ilaslc_(&lastv, n, &c__[c_offset], ldc);

/*              W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK) */

/*              W := C2' */

		i__1 = *k;
		for (j = 1; j <= i__1; ++j) {
		    scopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[
			    j * work_dim1 + 1], &c__1);
/* L70: */
		}

/*              W := W * V2 */

		strmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
			c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[
			work_offset], ldwork);
		if (lastv > *k) {

/*                 W := W + C1'*V1 */

		    i__1 = lastv - *k;
		    sgemm_("Transpose", "No transpose", &lastc, k, &i__1, &
			    c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, &
			    c_b14, &work[work_offset], ldwork);
		}

/*              W := W * T'  or  W * T */

		strmm_("Right", "Lower", transt, "Non-unit", &lastc, k, &
			c_b14, &t[t_offset], ldt, &work[work_offset], ldwork);

/*              C := C - V * W' */

		if (lastv > *k) {

/*                 C1 := C1 - V1 * W' */

		    i__1 = lastv - *k;
		    sgemm_("No transpose", "Transpose", &i__1, &lastc, k, &
			    c_b25, &v[v_offset], ldv, &work[work_offset], 
			    ldwork, &c_b14, &c__[c_offset], ldc);
		}

/*              W := W * V2' */

		strmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, &
			c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[
			work_offset], ldwork);

/*              C2 := C2 - W' */

		i__1 = *k;
		for (j = 1; j <= i__1; ++j) {
		    i__2 = lastc;
		    for (i__ = 1; i__ <= i__2; ++i__) {
			c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j * 
				work_dim1];
/* L80: */
		    }
/* L90: */
		}

	    } else if (lsame_(side, "R")) {

/*              Form  C * H  or  C * H'  where  C = ( C1  C2 ) */

/* Computing MAX */
		i__1 = *k, i__2 = ilaslr_(n, k, &v[v_offset], ldv);
		lastv = MAX(i__1,i__2);
		lastc = ilaslr_(m, &lastv, &c__[c_offset], ldc);

/*              W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK) */

/*              W := C2 */

		i__1 = *k;
		for (j = 1; j <= i__1; ++j) {
		    scopy_(&lastc, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &
			    work[j * work_dim1 + 1], &c__1);
/* L100: */
		}

/*              W := W * V2 */

		strmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
			c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[
			work_offset], ldwork);
		if (lastv > *k) {

/*                 W := W + C1 * V1 */

		    i__1 = lastv - *k;
		    sgemm_("No transpose", "No transpose", &lastc, k, &i__1, &
			    c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, &
			    c_b14, &work[work_offset], ldwork);
		}

/*              W := W * T  or  W * T' */

		strmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b14, 
			 &t[t_offset], ldt, &work[work_offset], ldwork);

/*              C := C - W * V' */

		if (lastv > *k) {

/*                 C1 := C1 - W * V1' */

		    i__1 = lastv - *k;
		    sgemm_("No transpose", "Transpose", &lastc, &i__1, k, &
			    c_b25, &work[work_offset], ldwork, &v[v_offset], 
			    ldv, &c_b14, &c__[c_offset], ldc);
		}

/*              W := W * V2' */

		strmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, &
			c_b14, &v[lastv - *k + 1 + v_dim1], ldv, &work[
			work_offset], ldwork);

/*              C2 := C2 - W */

		i__1 = *k;
		for (j = 1; j <= i__1; ++j) {
		    i__2 = lastc;
		    for (i__ = 1; i__ <= i__2; ++i__) {
			c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j *
				 work_dim1];
/* L110: */
		    }
/* L120: */
		}
	    }
	}

    } else if (lsame_(storev, "R")) {

	if (lsame_(direct, "F")) {

/*           Let  V =  ( V1  V2 )    (V1: first K columns) */
/*           where  V1  is unit upper triangular. */

	    if (lsame_(side, "L")) {

/*              Form  H * C  or  H' * C  where  C = ( C1 ) */
/*                                                  ( C2 ) */

/* Computing MAX */
		i__1 = *k, i__2 = ilaslc_(k, m, &v[v_offset], ldv);
		lastv = MAX(i__1,i__2);
		lastc = ilaslc_(&lastv, n, &c__[c_offset], ldc);

/*              W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK) */

/*              W := C1' */

		i__1 = *k;
		for (j = 1; j <= i__1; ++j) {
		    scopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1 
			    + 1], &c__1);
/* L130: */
		}

/*              W := W * V1' */

		strmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, &
			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);
		if (lastv > *k) {

/*                 W := W + C2'*V2' */

		    i__1 = lastv - *k;
		    sgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b14, 
			     &c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 
			    + 1], ldv, &c_b14, &work[work_offset], ldwork);
		}

/*              W := W * T'  or  W * T */

		strmm_("Right", "Upper", transt, "Non-unit", &lastc, k, &
			c_b14, &t[t_offset], ldt, &work[work_offset], ldwork);

/*              C := C - V' * W' */

		if (lastv > *k) {

/*                 C2 := C2 - V2' * W' */

		    i__1 = lastv - *k;
		    sgemm_("Transpose", "Transpose", &i__1, &lastc, k, &c_b25, 
			     &v[(*k + 1) * v_dim1 + 1], ldv, &work[
			    work_offset], ldwork, &c_b14, &c__[*k + 1 + 
			    c_dim1], ldc);
		}

/*              W := W * V1 */

		strmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);

/*              C1 := C1 - W' */

		i__1 = *k;
		for (j = 1; j <= i__1; ++j) {
		    i__2 = lastc;
		    for (i__ = 1; i__ <= i__2; ++i__) {
			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
/* L140: */
		    }
/* L150: */
		}

	    } else if (lsame_(side, "R")) {

/*              Form  C * H  or  C * H'  where  C = ( C1  C2 ) */

/* Computing MAX */
		i__1 = *k, i__2 = ilaslc_(k, n, &v[v_offset], ldv);
		lastv = MAX(i__1,i__2);
		lastc = ilaslr_(m, &lastv, &c__[c_offset], ldc);

/*              W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK) */

/*              W := C1 */

		i__1 = *k;
		for (j = 1; j <= i__1; ++j) {
		    scopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j * 
			    work_dim1 + 1], &c__1);
/* L160: */
		}

/*              W := W * V1' */

		strmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, &
			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);
		if (lastv > *k) {

/*                 W := W + C2 * V2' */

		    i__1 = lastv - *k;
		    sgemm_("No transpose", "Transpose", &lastc, k, &i__1, &
			    c_b14, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + 
			    1) * v_dim1 + 1], ldv, &c_b14, &work[work_offset], 
			     ldwork);
		}

/*              W := W * T  or  W * T' */

		strmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b14, 
			 &t[t_offset], ldt, &work[work_offset], ldwork);

/*              C := C - W * V */

		if (lastv > *k) {

/*                 C2 := C2 - W * V2 */

		    i__1 = lastv - *k;
		    sgemm_("No transpose", "No transpose", &lastc, &i__1, k, &
			    c_b25, &work[work_offset], ldwork, &v[(*k + 1) * 
			    v_dim1 + 1], ldv, &c_b14, &c__[(*k + 1) * c_dim1 
			    + 1], ldc);
		}

/*              W := W * V1 */

		strmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
			c_b14, &v[v_offset], ldv, &work[work_offset], ldwork);

/*              C1 := C1 - W */

		i__1 = *k;
		for (j = 1; j <= i__1; ++j) {
		    i__2 = lastc;
		    for (i__ = 1; i__ <= i__2; ++i__) {
			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
/* L170: */
		    }
/* L180: */
		}

	    }

	} else {

/*           Let  V =  ( V1  V2 )    (V2: last K columns) */
/*           where  V2  is unit lower triangular. */

	    if (lsame_(side, "L")) {

/*              Form  H * C  or  H' * C  where  C = ( C1 ) */
/*                                                  ( C2 ) */

/* Computing MAX */
		i__1 = *k, i__2 = ilaslc_(k, m, &v[v_offset], ldv);
		lastv = MAX(i__1,i__2);
		lastc = ilaslc_(&lastv, n, &c__[c_offset], ldc);

/*              W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK) */

/*              W := C2' */

		i__1 = *k;
		for (j = 1; j <= i__1; ++j) {
		    scopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[
			    j * work_dim1 + 1], &c__1);
/* L190: */
		}

/*              W := W * V2' */

		strmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, &
			c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
			work_offset], ldwork);
		if (lastv > *k) {

/*                 W := W + C1'*V1' */

		    i__1 = lastv - *k;
		    sgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b14, 
			     &c__[c_offset], ldc, &v[v_offset], ldv, &c_b14, &
			    work[work_offset], ldwork);
		}

/*              W := W * T'  or  W * T */

		strmm_("Right", "Lower", transt, "Non-unit", &lastc, k, &
			c_b14, &t[t_offset], ldt, &work[work_offset], ldwork);

/*              C := C - V' * W' */

		if (lastv > *k) {

/*                 C1 := C1 - V1' * W' */

		    i__1 = lastv - *k;
		    sgemm_("Transpose", "Transpose", &i__1, &lastc, k, &c_b25, 
			     &v[v_offset], ldv, &work[work_offset], ldwork, &
			    c_b14, &c__[c_offset], ldc);
		}

/*              W := W * V2 */

		strmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
			c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
			work_offset], ldwork);

/*              C2 := C2 - W' */

		i__1 = *k;
		for (j = 1; j <= i__1; ++j) {
		    i__2 = lastc;
		    for (i__ = 1; i__ <= i__2; ++i__) {
			c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j * 
				work_dim1];
/* L200: */
		    }
/* L210: */
		}

	    } else if (lsame_(side, "R")) {

/*              Form  C * H  or  C * H'  where  C = ( C1  C2 ) */

/* Computing MAX */
		i__1 = *k, i__2 = ilaslc_(k, n, &v[v_offset], ldv);
		lastv = MAX(i__1,i__2);
		lastc = ilaslr_(m, &lastv, &c__[c_offset], ldc);

/*              W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK) */

/*              W := C2 */

		i__1 = *k;
		for (j = 1; j <= i__1; ++j) {
		    scopy_(&lastc, &c__[(lastv - *k + j) * c_dim1 + 1], &c__1, 
			     &work[j * work_dim1 + 1], &c__1);
/* L220: */
		}

/*              W := W * V2' */

		strmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, &
			c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
			work_offset], ldwork);
		if (lastv > *k) {

/*                 W := W + C1 * V1' */

		    i__1 = lastv - *k;
		    sgemm_("No transpose", "Transpose", &lastc, k, &i__1, &
			    c_b14, &c__[c_offset], ldc, &v[v_offset], ldv, &
			    c_b14, &work[work_offset], ldwork);
		}

/*              W := W * T  or  W * T' */

		strmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b14, 
			 &t[t_offset], ldt, &work[work_offset], ldwork);

/*              C := C - W * V */

		if (lastv > *k) {

/*                 C1 := C1 - W * V1 */

		    i__1 = lastv - *k;
		    sgemm_("No transpose", "No transpose", &lastc, &i__1, k, &
			    c_b25, &work[work_offset], ldwork, &v[v_offset], 
			    ldv, &c_b14, &c__[c_offset], ldc);
		}

/*              W := W * V2 */

		strmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
			c_b14, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
			work_offset], ldwork);

/*              C1 := C1 - W */

		i__1 = *k;
		for (j = 1; j <= i__1; ++j) {
		    i__2 = lastc;
		    for (i__ = 1; i__ <= i__2; ++i__) {
			c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j *
				 work_dim1];
/* L230: */
		    }
/* L240: */
		}

	    }

	}
    }

    return 0;

/*     End of SLARFB */

} /* slarfb_ */
コード例 #23
0
/* Subroutine */ int ssygvx_(integer *itype, char *jobz, char *range, char *
	uplo, integer *n, real *a, integer *lda, real *b, integer *ldb, real *
	vl, real *vu, integer *il, integer *iu, real *abstol, integer *m, 
	real *w, real *z__, integer *ldz, real *work, integer *lwork, integer 
	*iwork, integer *ifail, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, z_dim1, z_offset, i__1, i__2;

    /* Local variables */
    integer nb;
    char trans[1];
    logical upper;
    logical wantz;
    logical alleig, indeig, valeig;
    integer lwkmin;
    integer lwkopt;
    logical lquery;

/*  -- LAPACK driver routine (version 3.2) -- */
/*     November 2006 */

/*  Purpose */
/*  ======= */

/*  SSYGVX computes selected eigenvalues, and optionally, eigenvectors */
/*  of a real generalized symmetric-definite eigenproblem, of the form */
/*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A */
/*  and B are assumed to be symmetric and B is also positive definite. */
/*  Eigenvalues and eigenvectors can be selected by specifying either a */
/*  range of values or a range of indices for the desired eigenvalues. */

/*  Arguments */
/*  ========= */

/*  ITYPE   (input) INTEGER */
/*          Specifies the problem type to be solved: */
/*          = 1:  A*x = (lambda)*B*x */
/*          = 2:  A*B*x = (lambda)*x */
/*          = 3:  B*A*x = (lambda)*x */

/*  JOBZ    (input) CHARACTER*1 */
/*          = 'N':  Compute eigenvalues only; */
/*          = 'V':  Compute eigenvalues and eigenvectors. */

/*  RANGE   (input) CHARACTER*1 */
/*          = 'A': all eigenvalues will be found. */
/*          = 'V': all eigenvalues in the half-open interval (VL,VU] */
/*                 will be found. */
/*          = 'I': the IL-th through IU-th eigenvalues will be found. */

/*  UPLO    (input) CHARACTER*1 */
/*          = 'U':  Upper triangle of A and B are stored; */
/*          = 'L':  Lower triangle of A and B are stored. */

/*  N       (input) INTEGER */
/*          The order of the matrix pencil (A,B).  N >= 0. */

/*  A       (input/output) REAL array, dimension (LDA, N) */
/*          On entry, the symmetric matrix A.  If UPLO = 'U', the */
/*          leading N-by-N upper triangular part of A contains the */
/*          upper triangular part of the matrix A.  If UPLO = 'L', */
/*          the leading N-by-N lower triangular part of A contains */
/*          the lower triangular part of the matrix A. */

/*          On exit, the lower triangle (if UPLO='L') or the upper */
/*          triangle (if UPLO='U') of A, including the diagonal, is */
/*          destroyed. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  B       (input/output) REAL array, dimension (LDA, N) */
/*          On entry, the symmetric matrix B.  If UPLO = 'U', the */
/*          leading N-by-N upper triangular part of B contains the */
/*          upper triangular part of the matrix B.  If UPLO = 'L', */
/*          the leading N-by-N lower triangular part of B contains */
/*          the lower triangular part of the matrix B. */

/*          On exit, if INFO <= N, the part of B containing the matrix is */
/*          overwritten by the triangular factor U or L from the Cholesky */
/*          factorization B = U**T*U or B = L*L**T. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B.  LDB >= max(1,N). */

/*  VL      (input) REAL */
/*  VU      (input) REAL */
/*          If RANGE='V', the lower and upper bounds of the interval to */
/*          be searched for eigenvalues. VL < VU. */
/*          Not referenced if RANGE = 'A' or 'I'. */

/*  IL      (input) INTEGER */
/*  IU      (input) INTEGER */
/*          If RANGE='I', the indices (in ascending order) of the */
/*          smallest and largest eigenvalues to be returned. */
/*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. */
/*          Not referenced if RANGE = 'A' or 'V'. */

/*  ABSTOL  (input) REAL */
/*          The absolute error tolerance for the eigenvalues. */
/*          An approximate eigenvalue is accepted as converged */
/*          when it is determined to lie in an interval [a,b] */
/*          of width less than or equal to */

/*                  ABSTOL + EPS *   max( |a|,|b| ) , */

/*          where EPS is the machine precision.  If ABSTOL is less than */
/*          or equal to zero, then  EPS*|T|  will be used in its place, */
/*          where |T| is the 1-norm of the tridiagonal matrix obtained */
/*          by reducing A to tridiagonal form. */

/*          Eigenvalues will be computed most accurately when ABSTOL is */
/*          set to twice the underflow threshold 2*DLAMCH('S'), not zero. */
/*          If this routine returns with INFO>0, indicating that some */
/*          eigenvectors did not converge, try setting ABSTOL to */
/*          2*SLAMCH('S'). */

/*  M       (output) INTEGER */
/*          The total number of eigenvalues found.  0 <= M <= N. */
/*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1. */

/*  W       (output) REAL array, dimension (N) */
/*          On normal exit, the first M elements contain the selected */
/*          eigenvalues in ascending order. */

/*  Z       (output) REAL array, dimension (LDZ, max(1,M)) */
/*          If JOBZ = 'N', then Z is not referenced. */
/*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z */
/*          contain the orthonormal eigenvectors of the matrix A */
/*          corresponding to the selected eigenvalues, with the i-th */
/*          column of Z holding the eigenvector associated with W(i). */
/*          The eigenvectors are normalized as follows: */
/*          if ITYPE = 1 or 2, Z**T*B*Z = I; */
/*          if ITYPE = 3, Z**T*inv(B)*Z = I. */

/*          If an eigenvector fails to converge, then that column of Z */
/*          contains the latest approximation to the eigenvector, and the */
/*          index of the eigenvector is returned in IFAIL. */
/*          Note: the user must ensure that at least max(1,M) columns are */
/*          supplied in the array Z; if RANGE = 'V', the exact value of M */
/*          is not known in advance and an upper bound must be used. */

/*  LDZ     (input) INTEGER */
/*          The leading dimension of the array Z.  LDZ >= 1, and if */
/*          JOBZ = 'V', LDZ >= max(1,N). */

/*  WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK)) */
/*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */

/*  LWORK   (input) INTEGER */
/*          The length of the array WORK.  LWORK >= max(1,8*N). */
/*          For optimal efficiency, LWORK >= (NB+3)*N, */
/*          where NB is the blocksize for SSYTRD returned by ILAENV. */

/*          If LWORK = -1, then a workspace query is assumed; the routine */
/*          only calculates the optimal size of the WORK array, returns */
/*          this value as the first entry of the WORK array, and no error */
/*          message related to LWORK is issued by XERBLA. */

/*  IWORK   (workspace) INTEGER array, dimension (5*N) */

/*  IFAIL   (output) INTEGER array, dimension (N) */
/*          If JOBZ = 'V', then if INFO = 0, the first M elements of */
/*          IFAIL are zero.  If INFO > 0, then IFAIL contains the */
/*          indices of the eigenvectors that failed to converge. */
/*          If JOBZ = 'N', then IFAIL is not referenced. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
/*          > 0:  SPOTRF or SSYEVX returned an error code: */
/*             <= N:  if INFO = i, SSYEVX failed to converge; */
/*                    i eigenvectors failed to converge.  Their indices */
/*                    are stored in array IFAIL. */
/*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading */
/*                    minor of order i of B is not positive definite. */
/*                    The factorization of B could not be completed and */
/*                    no eigenvalues or eigenvectors were computed. */

/*  Further Details */
/*  =============== */

/*  Based on contributions by */
/*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA */

/* ===================================================================== */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    --w;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1;
    z__ -= z_offset;
    --work;
    --iwork;
    --ifail;

    /* Function Body */
    upper = lsame_(uplo, "U");
    wantz = lsame_(jobz, "V");
    alleig = lsame_(range, "A");
    valeig = lsame_(range, "V");
    indeig = lsame_(range, "I");
    lquery = *lwork == -1;

    *info = 0;
    if (*itype < 1 || *itype > 3) {
	*info = -1;
    } else if (! (wantz || lsame_(jobz, "N"))) {
	*info = -2;
    } else if (! (alleig || valeig || indeig)) {
	*info = -3;
    } else if (! (upper || lsame_(uplo, "L"))) {
	*info = -4;
    } else if (*n < 0) {
	*info = -5;
    } else if (*lda < max(1,*n)) {
	*info = -7;
    } else if (*ldb < max(1,*n)) {
	*info = -9;
    } else {
	if (valeig) {
	    if (*n > 0 && *vu <= *vl) {
		*info = -11;
	    }
	} else if (indeig) {
	    if (*il < 1 || *il > max(1,*n)) {
		*info = -12;
	    } else if (*iu < min(*n,*il) || *iu > *n) {
		*info = -13;
	    }
	}
    }
    if (*info == 0) {
	if (*ldz < 1 || wantz && *ldz < *n) {
	    *info = -18;
	}
    }

    if (*info == 0) {
/* Computing MAX */
	i__1 = 1, i__2 = *n << 3;
	lwkmin = max(i__1,i__2);
	nb = ilaenv_(&c__1, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1);
/* Computing MAX */
	i__1 = lwkmin, i__2 = (nb + 3) * *n;
	lwkopt = max(i__1,i__2);
	work[1] = (real) lwkopt;

	if (*lwork < lwkmin && ! lquery) {
	    *info = -20;
	}
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("SSYGVX", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Quick return if possible */

    *m = 0;
    if (*n == 0) {
	return 0;
    }

/*     Form a Cholesky factorization of B. */

    spotrf_(uplo, n, &b[b_offset], ldb, info);
    if (*info != 0) {
	*info = *n + *info;
	return 0;
    }

/*     Transform problem to standard eigenvalue problem and solve. */

    ssygst_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info);
    ssyevx_(jobz, range, uplo, n, &a[a_offset], lda, vl, vu, il, iu, abstol, 
	    m, &w[1], &z__[z_offset], ldz, &work[1], lwork, &iwork[1], &ifail[
	    1], info);

    if (wantz) {

/*        Backtransform eigenvectors to the original problem. */

	if (*info > 0) {
	    *m = *info - 1;
	}
	if (*itype == 1 || *itype == 2) {

/*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x; */
/*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y */

	    if (upper) {
		*(unsigned char *)trans = 'N';
	    } else {
		*(unsigned char *)trans = 'T';
	    }

	    strsm_("Left", uplo, trans, "Non-unit", n, m, &c_b19, &b[b_offset]
, ldb, &z__[z_offset], ldz);

	} else if (*itype == 3) {

/*           For B*A*x=(lambda)*x; */
/*           backtransform eigenvectors: x = L*y or U'*y */

	    if (upper) {
		*(unsigned char *)trans = 'T';
	    } else {
		*(unsigned char *)trans = 'N';
	    }

	    strmm_("Left", uplo, trans, "Non-unit", n, m, &c_b19, &b[b_offset]
, ldb, &z__[z_offset], ldz);
	}
    }

/*     Set WORK(1) to optimal workspace size. */

    work[1] = (real) lwkopt;

    return 0;

/*     End of SSYGVX */

} /* ssygvx_ */